Fix the non-Linux build
[ghc.git] / rts / sm / CNF.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2014
4 *
5 * GC support for immutable non-GCed structures, also known as Compact
6 * Normal Forms (CNF for short). This provides the RTS support for
7 * the 'compact' package and the Data.Compact module.
8 *
9 * ---------------------------------------------------------------------------*/
10
11 #define _GNU_SOURCE
12
13 #include "PosixSource.h"
14 #include <string.h>
15 #include "Rts.h"
16 #include "RtsUtils.h"
17
18 #include "Capability.h"
19 #include "GC.h"
20 #include "Storage.h"
21 #include "CNF.h"
22 #include "Hash.h"
23 #include "HeapAlloc.h"
24 #include "BlockAlloc.h"
25
26 #ifdef HAVE_UNISTD_H
27 #include <unistd.h>
28 #endif
29 #ifdef HAVE_LIMITS_H
30 #include <limits.h>
31 #endif
32
33 /**
34 * Note [Compact Normal Forms]
35 *
36 * A Compact Normal Form, is at its essence a chain of memory blocks (multiple
37 * of block allocator blocks) containing other closures inside.
38 *
39 * Each block starts with a header, of type StgCompactNFDataBlock, that points
40 * to the first and to the next block in the chain. Right after the header
41 * in the first block we have a closure of type StgCompactNFData, which holds
42 * compact-wide metadata. This closure is the Compact# that Cmm and Haskell
43 * see, and it's mostly a regular Haskell closure.
44 *
45 * Blocks are appended to the chain automatically as needed, or manually with a
46 * compactResize() call, which also adjust the size of automatically appended
47 * blocks.
48 *
49 * Objects can be appended to the block currently marked to the nursery, or any
50 * of the later blocks if the nursery block is too full to fit the entire
51 * object. For each block in the chain (which can be multiple block allocator
52 * blocks), we use the bdescr of its beginning to store how full it is.
53 * After an object is appended, it is scavenged for any outgoing pointers,
54 * and all pointed to objects are appended, recursively, in a manner similar
55 * to copying GC (further discussion in the note [Appending to a Compact])
56 *
57 * We also flag each bdescr in each block allocator block of a compact
58 * (including those there were obtained as second or later from a single
59 * allocGroup(n) call) with the BF_COMPACT. This allows the GC to quickly
60 * realize that a given pointer is in a compact region, and trigger the
61 * CNF path.
62 *
63 * These two facts combined mean that in any compact block where some object
64 * begins bdescrs must be valid. For this simplicity this is achieved by
65 * restricting the maximum size of a compact block to 252 block allocator
66 * blocks (so that the total with the bdescr is one megablock).
67 *
68 * Compacts as a whole live in special list in each generation, where the
69 * list is held through the bd->link field of the bdescr of the StgCompactNFData
70 * closure (as for large objects). They live in a different list than large
71 * objects because the operation to free them is different (all blocks in
72 * a compact must be freed individually), and stats/sanity behavior are
73 * slightly different. This is also the reason that compact allocates memory
74 * using a special function instead of just calling allocate().
75 *
76 * Compacts are also suitable for network or disk serialization, and to
77 * that extent they support a pointer fixup operation, which adjusts pointers
78 * from a previous layout of the chain in memory to the new allocation.
79 * This works by constructing a temporary binary search table (in the C heap)
80 * of the old block addresses (which are known from the block header), and
81 * then searching for each pointer in the table, and adjusting it.
82 * It relies on ABI compatibility and static linking (or no ASLR) because it
83 * does not attempt to reconstruct info tables, and uses info tables to detect
84 * pointers. In practice this means only the exact same binary should be
85 * used.
86 */
87
88 typedef enum {
89 ALLOCATE_APPEND,
90 ALLOCATE_NEW,
91 ALLOCATE_IMPORT_NEW,
92 ALLOCATE_IMPORT_APPEND,
93 } AllocateOp;
94
95 static StgCompactNFDataBlock *
96 compactAllocateBlockInternal(Capability *cap,
97 StgWord aligned_size,
98 StgCompactNFDataBlock *first,
99 AllocateOp operation)
100 {
101 StgCompactNFDataBlock *self;
102 bdescr *block, *head;
103 uint32_t n_blocks;
104 generation *g;
105
106 n_blocks = aligned_size / BLOCK_SIZE;
107
108 // Attempting to allocate an object larger than maxHeapSize
109 // should definitely be disallowed. (bug #1791)
110 if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
111 n_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
112 n_blocks >= HS_INT32_MAX) // avoid overflow when
113 // calling allocGroup() below
114 {
115 heapOverflow();
116 // heapOverflow() doesn't exit (see #2592), but we aren't
117 // in a position to do a clean shutdown here: we
118 // either have to allocate the memory or exit now.
119 // Allocating the memory would be bad, because the user
120 // has requested that we not exceed maxHeapSize, so we
121 // just exit.
122 stg_exit(EXIT_HEAPOVERFLOW);
123 }
124
125 // It is imperative that first is the first block in the compact
126 // (or NULL if the compact does not exist yet)
127 // because the evacuate code does not update the generation of
128 // blocks other than the first (so we would get the statistics
129 // wrong and crash in Sanity)
130 if (first != NULL) {
131 block = Bdescr((P_)first);
132 g = block->gen;
133 } else {
134 g = g0;
135 }
136
137 ACQUIRE_SM_LOCK;
138 block = allocGroup(n_blocks);
139 switch (operation) {
140 case ALLOCATE_NEW:
141 ASSERT (first == NULL);
142 ASSERT (g == g0);
143 dbl_link_onto(block, &g0->compact_objects);
144 g->n_compact_blocks += block->blocks;
145 g->n_new_large_words += aligned_size / sizeof(StgWord);
146 break;
147
148 case ALLOCATE_IMPORT_NEW:
149 dbl_link_onto(block, &g0->compact_blocks_in_import);
150 /* fallthrough */
151 case ALLOCATE_IMPORT_APPEND:
152 ASSERT (first == NULL);
153 ASSERT (g == g0);
154 g->n_compact_blocks_in_import += block->blocks;
155 g->n_new_large_words += aligned_size / sizeof(StgWord);
156 break;
157
158 case ALLOCATE_APPEND:
159 g->n_compact_blocks += block->blocks;
160 if (g == g0)
161 g->n_new_large_words += aligned_size / sizeof(StgWord);
162 break;
163
164 default:
165 #ifdef DEBUG
166 ASSERT(!"code should not be reached");
167 #else
168 __builtin_unreachable();
169 #endif
170 }
171 RELEASE_SM_LOCK;
172
173 cap->total_allocated += aligned_size / sizeof(StgWord);
174
175 self = (StgCompactNFDataBlock*) block->start;
176 self->self = self;
177 self->next = NULL;
178
179 head = block;
180 initBdescr(head, g, g);
181 head->flags = BF_COMPACT;
182 for (block = head + 1, n_blocks --; n_blocks > 0; block++, n_blocks--) {
183 block->link = head;
184 block->blocks = 0;
185 block->flags = BF_COMPACT;
186 }
187
188 return self;
189 }
190
191 static inline StgCompactNFDataBlock *
192 compactGetFirstBlock(StgCompactNFData *str)
193 {
194 return (StgCompactNFDataBlock*) ((W_)str - sizeof(StgCompactNFDataBlock));
195 }
196
197 static inline StgCompactNFData *
198 firstBlockGetCompact(StgCompactNFDataBlock *block)
199 {
200 return (StgCompactNFData*) ((W_)block + sizeof(StgCompactNFDataBlock));
201 }
202
203 static void
204 freeBlockChain(StgCompactNFDataBlock *block)
205 {
206 StgCompactNFDataBlock *next;
207 bdescr *bd;
208
209 for ( ; block; block = next) {
210 next = block->next;
211 bd = Bdescr((StgPtr)block);
212 ASSERT((bd->flags & BF_EVACUATED) == 0);
213 freeGroup(bd);
214 }
215 }
216
217 void
218 compactFree(StgCompactNFData *str)
219 {
220 StgCompactNFDataBlock *block;
221
222 block = compactGetFirstBlock(str);
223 freeBlockChain(block);
224 }
225
226 void
227 compactMarkKnown(StgCompactNFData *str)
228 {
229 bdescr *bd;
230 StgCompactNFDataBlock *block;
231
232 block = compactGetFirstBlock(str);
233 for ( ; block; block = block->next) {
234 bd = Bdescr((StgPtr)block);
235 bd->flags |= BF_KNOWN;
236 }
237 }
238
239 StgWord
240 countCompactBlocks(bdescr *outer)
241 {
242 StgCompactNFDataBlock *block;
243 W_ count;
244
245 count = 0;
246 while (outer) {
247 bdescr *inner;
248
249 block = (StgCompactNFDataBlock*)(outer->start);
250 do {
251 inner = Bdescr((P_)block);
252 ASSERT (inner->flags & BF_COMPACT);
253
254 count += inner->blocks;
255 block = block->next;
256 } while(block);
257
258 outer = outer->link;
259 }
260
261 return count;
262 }
263
264 StgCompactNFData *
265 compactNew (Capability *cap, StgWord size)
266 {
267 StgWord aligned_size;
268 StgCompactNFDataBlock *block;
269 StgCompactNFData *self;
270 bdescr *bd;
271
272 aligned_size = BLOCK_ROUND_UP(size + sizeof(StgCompactNFDataBlock)
273 + sizeof(StgCompactNFDataBlock));
274 if (aligned_size >= BLOCK_SIZE * BLOCKS_PER_MBLOCK)
275 aligned_size = BLOCK_SIZE * BLOCKS_PER_MBLOCK;
276
277 block = compactAllocateBlockInternal(cap, aligned_size, NULL,
278 ALLOCATE_NEW);
279
280 self = firstBlockGetCompact(block);
281 SET_INFO((StgClosure*)self, &stg_COMPACT_NFDATA_info);
282 self->totalDataW = aligned_size / sizeof(StgWord);
283 self->autoBlockW = aligned_size / sizeof(StgWord);
284 self->nursery = block;
285 self->last = block;
286
287 block->owner = self;
288
289 bd = Bdescr((P_)block);
290 bd->free = (StgPtr)((W_)self + sizeof(StgCompactNFData));
291 ASSERT (bd->free == (StgPtr)self + sizeofW(StgCompactNFData));
292
293 self->totalW = bd->blocks * BLOCK_SIZE_W;
294
295 return self;
296 }
297
298 static StgCompactNFDataBlock *
299 compactAppendBlock (Capability *cap,
300 StgCompactNFData *str,
301 StgWord aligned_size)
302 {
303 StgCompactNFDataBlock *block;
304 bdescr *bd;
305
306 block = compactAllocateBlockInternal(cap, aligned_size,
307 compactGetFirstBlock(str),
308 ALLOCATE_APPEND);
309 block->owner = str;
310 block->next = NULL;
311
312 ASSERT (str->last->next == NULL);
313 str->last->next = block;
314 str->last = block;
315 if (str->nursery == NULL)
316 str->nursery = block;
317 str->totalDataW += aligned_size / sizeof(StgWord);
318
319 bd = Bdescr((P_)block);
320 bd->free = (StgPtr)((W_)block + sizeof(StgCompactNFDataBlock));
321 ASSERT (bd->free == (StgPtr)block + sizeofW(StgCompactNFDataBlock));
322
323 str->totalW += bd->blocks * BLOCK_SIZE_W;
324
325 return block;
326 }
327
328 void
329 compactResize (Capability *cap, StgCompactNFData *str, StgWord new_size)
330 {
331 StgWord aligned_size;
332
333 aligned_size = BLOCK_ROUND_UP(new_size + sizeof(StgCompactNFDataBlock));
334 if (aligned_size >= BLOCK_SIZE * BLOCKS_PER_MBLOCK)
335 aligned_size = BLOCK_SIZE * BLOCKS_PER_MBLOCK;
336
337 str->autoBlockW = aligned_size / sizeof(StgWord);
338
339 compactAppendBlock(cap, str, aligned_size);
340 }
341
342 /* Note [Appending to a Compact]
343
344 This is a simple reimplementation of the copying GC.
345 One could be tempted to reuse the actual GC code here, but he
346 would quickly find out that it would bring all the generational
347 GC complexity for no need at all.
348
349 Plus, we don't need to scavenge/evacuate all kinds of weird
350 objects here, just constructors and primitives. Thunks are
351 expected to be evaluated before appending by the API layer
352 (in Haskell, above the primop which is implemented here).
353 Also, we have a different policy for large objects: instead
354 of relinking to the new large object list, we fully copy
355 them inside the compact and scavenge them normally.
356
357 Note that if we allowed thunks and lazy evaluation the compact
358 would be a mutable object, which would create all sorts of
359 GC problems (besides, evaluating a thunk could exaust the
360 compact space or yield an invalid object, and we would have
361 no way to signal that to the user)
362
363 Just like the real evacuate/scavenge pairs, we need to handle
364 object loops. We would want to use the same strategy of rewriting objects
365 with forwarding pointer, but in a real GC, at the end the
366 blocks from the old space are dropped (dropping all forwarding
367 pointers at the same time), which we can't do here as we don't
368 know all pointers to the objects being evacuated. Also, in parallel
369 we don't know which other threads are evaluating the thunks
370 that we just corrupted at the same time.
371
372 So instead we use a hash table of "visited" objects, and add
373 the pointer as we copy it. To reduce the overhead, we also offer
374 a version of the API that does not preserve sharing (TODO).
375
376 You might be tempted to replace the objects with StdInd to
377 the object in the compact, but you would be wrong: the haskell
378 code assumes that objects in the heap only become more evaluated
379 (thunks to blackholes to inds to actual objects), and in
380 particular it assumes that if a pointer is tagged the object
381 is directly referenced and the values can be read directly,
382 without entering the closure.
383
384 FIXME: any better idea than the hash table?
385 */
386
387 static void
388 unroll_memcpy(StgPtr to, StgPtr from, StgWord size)
389 {
390 for (; size > 0; size--)
391 *(to++) = *(from++);
392 }
393
394 static rtsBool
395 allocate_in_compact (StgCompactNFDataBlock *block, StgWord sizeW, StgPtr *at)
396 {
397 bdescr *bd;
398 StgPtr top;
399 StgPtr free;
400
401 bd = Bdescr((StgPtr)block);
402 top = bd->start + BLOCK_SIZE_W * bd->blocks;
403 if (bd->free + sizeW > top)
404 return rtsFalse;
405
406 free = bd->free;
407 bd->free += sizeW;
408 *at = free;
409
410 return rtsTrue;
411 }
412
413 static rtsBool
414 block_is_full (StgCompactNFDataBlock *block)
415 {
416 bdescr *bd;
417 StgPtr top;
418 StgWord sizeW;
419
420 bd = Bdescr((StgPtr)block);
421 top = bd->start + BLOCK_SIZE_W * bd->blocks;
422
423 // We consider a block full if we could not fit
424 // an entire closure with 7 payload items
425 // (this leaves a slop of 64 bytes at most, but
426 // it avoids leaving a block almost empty to fit
427 // a large byte array, while at the same time
428 // it avoids trying to allocate a large closure
429 // in a chain of almost empty blocks)
430 sizeW = sizeofW(StgHeader) + 7;
431 return (bd->free + sizeW > top);
432 }
433
434 static rtsBool
435 allocate_loop (Capability *cap,
436 StgCompactNFData *str,
437 StgWord sizeW,
438 StgPtr *at)
439 {
440 StgCompactNFDataBlock *block;
441 StgWord next_size;
442
443 // try the nursery first
444 retry:
445 if (str->nursery != NULL) {
446 if (allocate_in_compact(str->nursery, sizeW, at))
447 return rtsTrue;
448
449 if (block_is_full (str->nursery)) {
450 str->nursery = str->nursery->next;
451 goto retry;
452 }
453
454 // try subsequent blocks
455 block = str->nursery->next;
456 while (block != NULL) {
457 if (allocate_in_compact(block, sizeW, at))
458 return rtsTrue;
459
460 block = block->next;
461 }
462 }
463
464 next_size = stg_max(str->autoBlockW * sizeof(StgWord),
465 BLOCK_ROUND_UP(sizeW * sizeof(StgWord)));
466 if (next_size >= BLOCKS_PER_MBLOCK * BLOCK_SIZE)
467 next_size = BLOCKS_PER_MBLOCK * BLOCK_SIZE;
468 if (next_size < sizeW * sizeof(StgWord) + sizeof(StgCompactNFDataBlock))
469 return rtsFalse;
470
471 block = compactAppendBlock(cap, str, next_size);
472 ASSERT (str->nursery != NULL);
473 return allocate_in_compact(block, sizeW, at);
474 }
475
476 static void
477 copy_tag (Capability *cap,
478 StgCompactNFData *str,
479 HashTable *hash,
480 StgClosure **p,
481 StgClosure *from,
482 StgWord tag)
483 {
484 StgPtr to;
485 StgWord sizeW;
486
487 sizeW = closure_sizeW(from);
488
489 if (!allocate_loop(cap, str, sizeW, &to)) {
490 barf("Failed to copy object in compact, object too large\n");
491 return;
492 }
493
494 // unroll memcpy for small sizes because we can
495 // benefit of known alignment
496 // (32 extracted from my magic hat)
497 if (sizeW < 32)
498 unroll_memcpy(to, (StgPtr)from, sizeW);
499 else
500 memcpy(to, from, sizeW * sizeof(StgWord));
501
502 if (hash != NULL)
503 insertHashTable(hash, (StgWord)from, to);
504
505 *p = TAG_CLOSURE(tag, (StgClosure*)to);
506 }
507
508 STATIC_INLINE rtsBool
509 object_in_compact (StgCompactNFData *str, StgClosure *p)
510 {
511 bdescr *bd;
512
513 if (!HEAP_ALLOCED(p))
514 return rtsFalse;
515
516 bd = Bdescr((P_)p);
517 return (bd->flags & BF_COMPACT) != 0 &&
518 objectGetCompact(p) == str;
519 }
520
521 static void
522 simple_evacuate (Capability *cap,
523 StgCompactNFData *str,
524 HashTable *hash,
525 StgClosure **p)
526 {
527 StgWord tag;
528 StgClosure *from;
529 void *already;
530
531 from = *p;
532 tag = GET_CLOSURE_TAG(from);
533 from = UNTAG_CLOSURE(from);
534
535 // If the object referenced is already in this compact
536 // (for example by reappending an object that was obtained
537 // by compactGetRoot) then do nothing
538 if (object_in_compact(str, from))
539 return;
540
541 switch (get_itbl(from)->type) {
542 case BLACKHOLE:
543 // If tag == 0, the indirectee is the TSO that claimed the tag
544 //
545 // Not useful and not NFData
546 from = ((StgInd*)from)->indirectee;
547 if (GET_CLOSURE_TAG(from) == 0) {
548 debugBelch("Claimed but not updated BLACKHOLE in Compact,"
549 " not normal form");
550 return;
551 }
552
553 *p = from;
554 return simple_evacuate(cap, str, hash, p);
555
556 case IND:
557 case IND_STATIC:
558 // follow chains of indirections, don't evacuate them
559 from = ((StgInd*)from)->indirectee;
560 *p = from;
561 // Evac.c uses a goto, but let's rely on a smart compiler
562 // and get readable code instead
563 return simple_evacuate(cap, str, hash, p);
564
565 default:
566 // This object was evacuated already, return the existing
567 // pointer
568 if (hash != NULL &&
569 (already = lookupHashTable (hash, (StgWord)from))) {
570 *p = TAG_CLOSURE(tag, (StgClosure*)already);
571 return;
572 }
573
574 copy_tag(cap, str, hash, p, from, tag);
575 }
576 }
577
578 static void
579 simple_scavenge_mut_arr_ptrs (Capability *cap,
580 StgCompactNFData *str,
581 HashTable *hash,
582 StgMutArrPtrs *a)
583 {
584 StgPtr p, q;
585
586 p = (StgPtr)&a->payload[0];
587 q = (StgPtr)&a->payload[a->ptrs];
588 for (; p < q; p++) {
589 simple_evacuate(cap, str, hash, (StgClosure**)p);
590 }
591 }
592
593 static void
594 simple_scavenge_block (Capability *cap,
595 StgCompactNFData *str,
596 StgCompactNFDataBlock *block,
597 HashTable *hash,
598 StgPtr p)
599 {
600 const StgInfoTable *info;
601 bdescr *bd = Bdescr((P_)block);
602
603 while (p < bd->free) {
604 ASSERT (LOOKS_LIKE_CLOSURE_PTR(p));
605 info = get_itbl((StgClosure*)p);
606
607 switch (info->type) {
608 case CONSTR_1_0:
609 simple_evacuate(cap, str, hash, &((StgClosure*)p)->payload[0]);
610 case CONSTR_0_1:
611 p += sizeofW(StgClosure) + 1;
612 break;
613
614 case CONSTR_2_0:
615 simple_evacuate(cap, str, hash, &((StgClosure*)p)->payload[1]);
616 case CONSTR_1_1:
617 simple_evacuate(cap, str, hash, &((StgClosure*)p)->payload[0]);
618 case CONSTR_0_2:
619 p += sizeofW(StgClosure) + 2;
620 break;
621
622 case CONSTR:
623 case PRIM:
624 case CONSTR_NOCAF_STATIC:
625 case CONSTR_STATIC:
626 {
627 StgPtr end;
628
629 end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
630 for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
631 simple_evacuate(cap, str, hash, (StgClosure **)p);
632 }
633 p += info->layout.payload.nptrs;
634 break;
635 }
636
637 case ARR_WORDS:
638 p += arr_words_sizeW((StgArrBytes*)p);
639 break;
640
641 case MUT_ARR_PTRS_FROZEN:
642 case MUT_ARR_PTRS_FROZEN0:
643 simple_scavenge_mut_arr_ptrs(cap, str, hash, (StgMutArrPtrs*)p);
644 p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
645 break;
646
647 case SMALL_MUT_ARR_PTRS_FROZEN:
648 case SMALL_MUT_ARR_PTRS_FROZEN0:
649 {
650 uint32_t i;
651 StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p;
652
653 for (i = 0; i < arr->ptrs; i++)
654 simple_evacuate(cap, str, hash, &arr->payload[i]);
655
656 p += sizeofW(StgSmallMutArrPtrs) + arr->ptrs;
657 break;
658 }
659
660 case IND:
661 case BLACKHOLE:
662 case IND_STATIC:
663 // They get shortcircuited by simple_evaluate()
664 barf("IND/BLACKHOLE in Compact");
665 break;
666
667 default:
668 barf("Invalid non-NFData closure in Compact\n");
669 }
670 }
671 }
672
673 static void
674 scavenge_loop (Capability *cap,
675 StgCompactNFData *str,
676 StgCompactNFDataBlock *first_block,
677 HashTable *hash,
678 StgPtr p)
679 {
680 // Scavenge the first block
681 simple_scavenge_block(cap, str, first_block, hash, p);
682
683 // Note: simple_scavenge_block can change str->last, which
684 // changes this check, in addition to iterating through
685 while (first_block != str->last) {
686 // we can't allocate in blocks that were already scavenged
687 // so push the nursery forward
688 if (str->nursery == first_block)
689 str->nursery = str->nursery->next;
690
691 first_block = first_block->next;
692 simple_scavenge_block(cap, str, first_block, hash,
693 (P_)first_block + sizeofW(StgCompactNFDataBlock));
694 }
695 }
696
697 #ifdef DEBUG
698 static rtsBool
699 objectIsWHNFData (StgClosure *what)
700 {
701 switch (get_itbl(what)->type) {
702 case CONSTR:
703 case CONSTR_1_0:
704 case CONSTR_0_1:
705 case CONSTR_2_0:
706 case CONSTR_1_1:
707 case CONSTR_0_2:
708 case CONSTR_STATIC:
709 case CONSTR_NOCAF_STATIC:
710 case ARR_WORDS:
711 case MUT_ARR_PTRS_FROZEN:
712 case MUT_ARR_PTRS_FROZEN0:
713 case SMALL_MUT_ARR_PTRS_FROZEN:
714 case SMALL_MUT_ARR_PTRS_FROZEN0:
715 return rtsTrue;
716
717 case IND:
718 case BLACKHOLE:
719 return objectIsWHNFData(UNTAG_CLOSURE(((StgInd*)what)->indirectee));
720
721 default:
722 return rtsFalse;
723 }
724 }
725
726 static rtsBool
727 verify_mut_arr_ptrs (StgCompactNFData *str,
728 StgMutArrPtrs *a)
729 {
730 StgPtr p, q;
731
732 p = (StgPtr)&a->payload[0];
733 q = (StgPtr)&a->payload[a->ptrs];
734 for (; p < q; p++) {
735 if (!object_in_compact(str, UNTAG_CLOSURE(*(StgClosure**)p)))
736 return rtsFalse;
737 }
738
739 return rtsTrue;
740 }
741
742 static rtsBool
743 verify_consistency_block (StgCompactNFData *str, StgCompactNFDataBlock *block)
744 {
745 bdescr *bd;
746 StgPtr p;
747 const StgInfoTable *info;
748 StgClosure *q;
749
750 p = (P_)firstBlockGetCompact(block);
751 bd = Bdescr((P_)block);
752 while (p < bd->free) {
753 q = (StgClosure*)p;
754
755 if (!LOOKS_LIKE_CLOSURE_PTR(q))
756 return rtsFalse;
757
758 info = get_itbl(q);
759 switch (info->type) {
760 case CONSTR_1_0:
761 if (!object_in_compact(str, UNTAG_CLOSURE(q->payload[0])))
762 return rtsFalse;
763 case CONSTR_0_1:
764 p += sizeofW(StgClosure) + 1;
765 break;
766
767 case CONSTR_2_0:
768 if (!object_in_compact(str, UNTAG_CLOSURE(q->payload[1])))
769 return rtsFalse;
770 case CONSTR_1_1:
771 if (!object_in_compact(str, UNTAG_CLOSURE(q->payload[0])))
772 return rtsFalse;
773 case CONSTR_0_2:
774 p += sizeofW(StgClosure) + 2;
775 break;
776
777 case CONSTR:
778 case PRIM:
779 case CONSTR_STATIC:
780 case CONSTR_NOCAF_STATIC:
781 {
782 uint32_t i;
783
784 for (i = 0; i < info->layout.payload.ptrs; i++)
785 if (!object_in_compact(str, UNTAG_CLOSURE(q->payload[i])))
786 return rtsFalse;
787
788 p += sizeofW(StgClosure) + info->layout.payload.ptrs +
789 info->layout.payload.nptrs;
790 break;
791 }
792
793 case ARR_WORDS:
794 p += arr_words_sizeW((StgArrBytes*)p);
795 break;
796
797 case MUT_ARR_PTRS_FROZEN:
798 case MUT_ARR_PTRS_FROZEN0:
799 if (!verify_mut_arr_ptrs(str, (StgMutArrPtrs*)p))
800 return rtsFalse;
801 p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
802 break;
803
804 case SMALL_MUT_ARR_PTRS_FROZEN:
805 case SMALL_MUT_ARR_PTRS_FROZEN0:
806 {
807 uint32_t i;
808 StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p;
809
810 for (i = 0; i < arr->ptrs; i++)
811 if (!object_in_compact(str, UNTAG_CLOSURE(arr->payload[i])))
812 return rtsFalse;
813
814 p += sizeofW(StgSmallMutArrPtrs) + arr->ptrs;
815 break;
816 }
817
818 case COMPACT_NFDATA:
819 p += sizeofW(StgCompactNFData);
820 break;
821
822 default:
823 return rtsFalse;
824 }
825 }
826
827 return rtsTrue;
828 }
829
830 static rtsBool
831 verify_consistency_loop (StgCompactNFData *str)
832 {
833 StgCompactNFDataBlock *block;
834
835 block = compactGetFirstBlock(str);
836 do {
837 if (!verify_consistency_block(str, block))
838 return rtsFalse;
839 block = block->next;
840 } while (block && block->owner);
841
842 return rtsTrue;
843 }
844 #endif
845
846
847 StgPtr
848 compactAppend (Capability *cap,
849 StgCompactNFData *str,
850 StgClosure *what,
851 StgWord share)
852 {
853 StgClosure *root;
854 StgClosure *tagged_root;
855 HashTable *hash;
856 StgCompactNFDataBlock *evaced_block;
857
858 ASSERT(objectIsWHNFData(UNTAG_CLOSURE(what)));
859
860 tagged_root = what;
861 simple_evacuate(cap, str, NULL, &tagged_root);
862
863 root = UNTAG_CLOSURE(tagged_root);
864 evaced_block = objectGetCompactBlock(root);
865
866 if (share) {
867 hash = allocHashTable ();
868 insertHashTable(hash, (StgWord)UNTAG_CLOSURE(what), root);
869 } else
870 hash = NULL;
871
872 scavenge_loop(cap, str, evaced_block, hash, (P_)root);
873
874 if (share)
875 freeHashTable(hash, NULL);
876
877 ASSERT(verify_consistency_loop(str));
878
879 return (StgPtr)tagged_root;
880 }
881
882 StgWord
883 compactContains (StgCompactNFData *str, StgPtr what)
884 {
885 bdescr *bd;
886
887 // This check is the reason why this needs to be
888 // implemented in C instead of (possibly faster) Cmm
889 if (!HEAP_ALLOCED (what))
890 return 0;
891
892 // Note that we don't care about tags, they are eaten
893 // away by the Bdescr operation anyway
894 bd = Bdescr((P_)what);
895 return (bd->flags & BF_COMPACT) != 0 &&
896 (str == NULL || objectGetCompact((StgClosure*)what) == str);
897 }
898
899 StgCompactNFDataBlock *
900 compactAllocateBlock(Capability *cap,
901 StgWord size,
902 StgCompactNFDataBlock *previous)
903 {
904 StgWord aligned_size;
905 StgCompactNFDataBlock *block;
906 bdescr *bd;
907
908 aligned_size = BLOCK_ROUND_UP(size);
909
910 // We do not link the new object into the generation ever
911 // - we cannot let the GC know about this object until we're done
912 // importing it and we have fixed up all info tables and stuff
913 //
914 // but we do update n_compact_blocks, otherwise memInventory()
915 // in Sanity will think we have a memory leak, because it compares
916 // the blocks he knows about with the blocks obtained by the
917 // block allocator
918 // (if by chance a memory leak does happen due to a bug somewhere
919 // else, memInventory will also report that all compact blocks
920 // associated with this compact are leaked - but they are not really,
921 // we have a pointer to them and we're not losing track of it, it's
922 // just we can't use the GC until we're done with the import)
923 //
924 // (That btw means that the high level import code must be careful
925 // not to lose the pointer, so don't use the primops directly
926 // unless you know what you're doing!)
927
928 // Other trickery: we pass NULL as first, which means our blocks
929 // are always in generation 0
930 // This is correct because the GC has never seen the blocks so
931 // it had no chance of promoting them
932
933 block = compactAllocateBlockInternal(cap, aligned_size, NULL,
934 previous != NULL ? ALLOCATE_IMPORT_APPEND : ALLOCATE_IMPORT_NEW);
935 if (previous != NULL)
936 previous->next = block;
937
938 bd = Bdescr((P_)block);
939 bd->free = (P_)((W_)bd->start + size);
940
941 return block;
942 }
943
944 STATIC_INLINE rtsBool
945 any_needs_fixup(StgCompactNFDataBlock *block)
946 {
947 // ->next pointers are always valid, even if some blocks were
948 // not allocated where we want them, because compactAllocateAt()
949 // will take care to adjust them
950
951 do {
952 if (block->self != block)
953 return rtsTrue;
954 block = block->next;
955 } while (block && block->owner);
956
957 return rtsFalse;
958 }
959
960 #ifdef DEBUG
961 static void
962 spew_failing_pointer(StgWord *fixup_table, uint32_t count, StgWord address)
963 {
964 uint32_t i;
965 StgWord key, value;
966 StgCompactNFDataBlock *block;
967 bdescr *bd;
968 StgWord size;
969
970 debugBelch("Failed to adjust 0x%" FMT_HexWord ". Block dump follows...\n",
971 address);
972
973 for (i = 0; i < count; i++) {
974 key = fixup_table [2 * i];
975 value = fixup_table [2 * i + 1];
976
977 block = (StgCompactNFDataBlock*)value;
978 bd = Bdescr((P_)block);
979 size = (W_)bd->free - (W_)bd->start;
980
981 debugBelch("%" FMT_Word32 ": was 0x%" FMT_HexWord "-0x%" FMT_HexWord
982 ", now 0x%" FMT_HexWord "-0x%" FMT_HexWord "\n", i, key,
983 key+size, value, value+size);
984 }
985 }
986 #endif
987
988 STATIC_INLINE StgCompactNFDataBlock *
989 find_pointer(StgWord *fixup_table, uint32_t count, StgClosure *q)
990 {
991 StgWord address = (W_)q;
992 uint32_t a, b, c;
993 StgWord key, value;
994 bdescr *bd;
995
996 a = 0;
997 b = count;
998 while (a < b-1) {
999 c = (a+b)/2;
1000
1001 key = fixup_table[c * 2];
1002 value = fixup_table[c * 2 + 1];
1003
1004 if (key > address)
1005 b = c;
1006 else
1007 a = c;
1008 }
1009
1010 // three cases here: 0, 1 or 2 blocks to check
1011 for ( ; a < b; a++) {
1012 key = fixup_table[a * 2];
1013 value = fixup_table[a * 2 + 1];
1014
1015 if (key > address)
1016 goto fail;
1017
1018 bd = Bdescr((P_)value);
1019
1020 if (key + bd->blocks * BLOCK_SIZE <= address)
1021 goto fail;
1022
1023 return (StgCompactNFDataBlock*)value;
1024 }
1025
1026 fail:
1027 // We should never get here
1028
1029 #ifdef DEBUG
1030 spew_failing_pointer(fixup_table, count, address);
1031 #endif
1032 return NULL;
1033 }
1034
1035 static rtsBool
1036 fixup_one_pointer(StgWord *fixup_table, uint32_t count, StgClosure **p)
1037 {
1038 StgWord tag;
1039 StgClosure *q;
1040 StgCompactNFDataBlock *block;
1041
1042 q = *p;
1043 tag = GET_CLOSURE_TAG(q);
1044 q = UNTAG_CLOSURE(q);
1045
1046 block = find_pointer(fixup_table, count, q);
1047 if (block == NULL)
1048 return rtsFalse;
1049 if (block == block->self)
1050 return rtsTrue;
1051
1052 q = (StgClosure*)((W_)q - (W_)block->self + (W_)block);
1053 *p = TAG_CLOSURE(tag, q);
1054
1055 return rtsTrue;
1056 }
1057
1058 static rtsBool
1059 fixup_mut_arr_ptrs (StgWord *fixup_table,
1060 uint32_t count,
1061 StgMutArrPtrs *a)
1062 {
1063 StgPtr p, q;
1064
1065 p = (StgPtr)&a->payload[0];
1066 q = (StgPtr)&a->payload[a->ptrs];
1067 for (; p < q; p++) {
1068 if (!fixup_one_pointer(fixup_table, count, (StgClosure**)p))
1069 return rtsFalse;
1070 }
1071
1072 return rtsTrue;
1073 }
1074
1075 static rtsBool
1076 fixup_block(StgCompactNFDataBlock *block, StgWord *fixup_table, uint32_t count)
1077 {
1078 const StgInfoTable *info;
1079 bdescr *bd;
1080 StgPtr p;
1081
1082 bd = Bdescr((P_)block);
1083 p = bd->start + sizeofW(StgCompactNFDataBlock);
1084 while (p < bd->free) {
1085 ASSERT (LOOKS_LIKE_CLOSURE_PTR(p));
1086 info = get_itbl((StgClosure*)p);
1087
1088 switch (info->type) {
1089 case CONSTR_1_0:
1090 if (!fixup_one_pointer(fixup_table, count,
1091 &((StgClosure*)p)->payload[0]))
1092 return rtsFalse;
1093 case CONSTR_0_1:
1094 p += sizeofW(StgClosure) + 1;
1095 break;
1096
1097 case CONSTR_2_0:
1098 if (!fixup_one_pointer(fixup_table, count,
1099 &((StgClosure*)p)->payload[1]))
1100 return rtsFalse;
1101 case CONSTR_1_1:
1102 if (!fixup_one_pointer(fixup_table, count,
1103 &((StgClosure*)p)->payload[0]))
1104 return rtsFalse;
1105 case CONSTR_0_2:
1106 p += sizeofW(StgClosure) + 2;
1107 break;
1108
1109 case CONSTR:
1110 case PRIM:
1111 case CONSTR_STATIC:
1112 case CONSTR_NOCAF_STATIC:
1113 {
1114 StgPtr end;
1115
1116 end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
1117 for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
1118 if (!fixup_one_pointer(fixup_table, count, (StgClosure **)p))
1119 return rtsFalse;
1120 }
1121 p += info->layout.payload.nptrs;
1122 break;
1123 }
1124
1125 case ARR_WORDS:
1126 p += arr_words_sizeW((StgArrBytes*)p);
1127 break;
1128
1129 case MUT_ARR_PTRS_FROZEN:
1130 case MUT_ARR_PTRS_FROZEN0:
1131 fixup_mut_arr_ptrs(fixup_table, count, (StgMutArrPtrs*)p);
1132 p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
1133 break;
1134
1135 case SMALL_MUT_ARR_PTRS_FROZEN:
1136 case SMALL_MUT_ARR_PTRS_FROZEN0:
1137 {
1138 uint32_t i;
1139 StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p;
1140
1141 for (i = 0; i < arr->ptrs; i++) {
1142 if (!fixup_one_pointer(fixup_table, count,
1143 &arr->payload[i]))
1144 return rtsFalse;
1145 }
1146
1147 p += sizeofW(StgSmallMutArrPtrs) + arr->ptrs;
1148 break;
1149 }
1150
1151 case COMPACT_NFDATA:
1152 if (p == (bd->start + sizeofW(StgCompactNFDataBlock))) {
1153 // Ignore the COMPACT_NFDATA header
1154 // (it will be fixed up later)
1155 p += sizeofW(StgCompactNFData);
1156 break;
1157 }
1158
1159 // fall through
1160
1161 default:
1162 debugBelch("Invalid non-NFData closure (type %d) in Compact\n",
1163 info->type);
1164 return rtsFalse;
1165 }
1166 }
1167
1168 return rtsTrue;
1169 }
1170
1171 static int
1172 cmp_fixup_table_item (const void *e1, const void *e2)
1173 {
1174 const StgWord *w1 = e1;
1175 const StgWord *w2 = e2;
1176
1177 return *w1 - *w2;
1178 }
1179
1180 static StgWord *
1181 build_fixup_table (StgCompactNFDataBlock *block, uint32_t *pcount)
1182 {
1183 uint32_t count;
1184 StgCompactNFDataBlock *tmp;
1185 StgWord *table;
1186
1187 count = 0;
1188 tmp = block;
1189 do {
1190 count++;
1191 tmp = tmp->next;
1192 } while(tmp && tmp->owner);
1193
1194 table = stgMallocBytes(sizeof(StgWord) * 2 * count, "build_fixup_table");
1195
1196 count = 0;
1197 do {
1198 table[count * 2] = (W_)block->self;
1199 table[count * 2 + 1] = (W_)block;
1200 count++;
1201 block = block->next;
1202 } while(block && block->owner);
1203
1204 qsort(table, count, sizeof(StgWord) * 2, cmp_fixup_table_item);
1205
1206 *pcount = count;
1207 return table;
1208 }
1209
1210 static rtsBool
1211 fixup_loop(StgCompactNFDataBlock *block, StgClosure **proot)
1212 {
1213 StgWord *table;
1214 rtsBool ok;
1215 uint32_t count;
1216
1217 table = build_fixup_table (block, &count);
1218
1219 do {
1220 if (!fixup_block(block, table, count)) {
1221 ok = rtsFalse;
1222 goto out;
1223 }
1224
1225 block = block->next;
1226 } while(block && block->owner);
1227
1228 ok = fixup_one_pointer(table, count, proot);
1229
1230 out:
1231 stgFree(table);
1232 return ok;
1233 }
1234
1235 static void
1236 fixup_early(StgCompactNFData *str, StgCompactNFDataBlock *block)
1237 {
1238 StgCompactNFDataBlock *last;
1239
1240 do {
1241 last = block;
1242 block = block->next;
1243 } while(block);
1244
1245 str->last = last;
1246 }
1247
1248 static void
1249 fixup_late(StgCompactNFData *str, StgCompactNFDataBlock *block)
1250 {
1251 StgCompactNFDataBlock *nursery;
1252 bdescr *bd;
1253 StgWord totalW;
1254 StgWord totalDataW;
1255
1256 nursery = block;
1257 totalW = 0;
1258 totalDataW = 0;
1259 do {
1260 block->self = block;
1261
1262 bd = Bdescr((P_)block);
1263 totalW += bd->blocks * BLOCK_SIZE_W;
1264
1265 if (block->owner != NULL) {
1266 if (bd->free != bd->start)
1267 nursery = block;
1268 block->owner = str;
1269 totalDataW += bd->blocks * BLOCK_SIZE_W;
1270 }
1271
1272 block = block->next;
1273 } while(block);
1274
1275 str->nursery = nursery;
1276 str->totalW = totalW;
1277 str->totalDataW = totalDataW;
1278 }
1279
1280 static StgClosure *
1281 maybe_fixup_internal_pointers (StgCompactNFDataBlock *block,
1282 StgClosure *root)
1283 {
1284 rtsBool ok;
1285 StgClosure **proot;
1286
1287 // Check for fast path
1288 if (!any_needs_fixup(block))
1289 return root;
1290
1291 debugBelch("Compact imported at the wrong address, will fix up"
1292 " internal pointers\n");
1293
1294 // I am PROOT!
1295 proot = &root;
1296
1297 ok = fixup_loop(block, proot);
1298 if (!ok)
1299 *proot = NULL;
1300
1301 return *proot;
1302 }
1303
1304 StgPtr
1305 compactFixupPointers(StgCompactNFData *str,
1306 StgClosure *root)
1307 {
1308 StgCompactNFDataBlock *block;
1309 bdescr *bd;
1310 StgWord total_blocks;
1311
1312 block = compactGetFirstBlock(str);
1313
1314 fixup_early(str, block);
1315
1316 root = maybe_fixup_internal_pointers(block, root);
1317
1318 // Do the late fixup even if we did not fixup all
1319 // internal pointers, we need that for GC and Sanity
1320 fixup_late(str, block);
1321
1322 // Now we're ready to let the GC, Sanity, the profiler
1323 // etc. know about this object
1324 bd = Bdescr((P_)block);
1325
1326 total_blocks = str->totalW / BLOCK_SIZE_W;
1327
1328 ACQUIRE_SM_LOCK;
1329 ASSERT (bd->gen == g0);
1330 ASSERT (g0->n_compact_blocks_in_import >= total_blocks);
1331 g0->n_compact_blocks_in_import -= total_blocks;
1332 g0->n_compact_blocks += total_blocks;
1333 dbl_link_remove(bd, &g0->compact_blocks_in_import);
1334 dbl_link_onto(bd, &g0->compact_objects);
1335 RELEASE_SM_LOCK;
1336
1337 #if DEBUG
1338 if (root)
1339 verify_consistency_loop(str);
1340 #endif
1341
1342 return (StgPtr)root;
1343 }