Add optional eager black-holing, with new flag -feager-blackholing
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #ifdef PROFILING
11
12 // Turn off inlining when debugging - it obfuscates things
13 #ifdef DEBUG
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "Rts.h"
20 #include "RtsUtils.h"
21 #include "RetainerProfile.h"
22 #include "RetainerSet.h"
23 #include "Schedule.h"
24 #include "Printer.h"
25 #include "RtsFlags.h"
26 #include "Weak.h"
27 #include "Sanity.h"
28 #include "Profiling.h"
29 #include "Stats.h"
30 #include "ProfHeap.h"
31 #include "Apply.h"
32
33 /*
34 Note: what to change in order to plug-in a new retainer profiling scheme?
35 (1) type retainer in ../includes/StgRetainerProf.h
36 (2) retainer function R(), i.e., getRetainerFrom()
37 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
38 in RetainerSet.h, if needed.
39 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
40 */
41
42 /* -----------------------------------------------------------------------------
43 * Declarations...
44 * -------------------------------------------------------------------------- */
45
46 static nat retainerGeneration; // generation
47
48 static nat numObjectVisited; // total number of objects visited
49 static nat timesAnyObjectVisited; // number of times any objects are visited
50
51 /*
52 The rs field in the profile header of any object points to its retainer
53 set in an indirect way: if flip is 0, it points to the retainer set;
54 if flip is 1, it points to the next byte after the retainer set (even
55 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
56 pointer. See retainerSetOf().
57 */
58
59 StgWord flip = 0; // flip bit
60 // must be 0 if DEBUG_RETAINER is on (for static closures)
61
62 #define setRetainerSetToNull(c) \
63 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
64
65 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
66 static void retainClosure(StgClosure *, StgClosure *, retainer);
67 #ifdef DEBUG_RETAINER
68 static void belongToHeap(StgPtr p);
69 #endif
70
71 #ifdef DEBUG_RETAINER
72 /*
73 cStackSize records how many times retainStack() has been invoked recursively,
74 that is, the number of activation records for retainStack() on the C stack.
75 maxCStackSize records its max value.
76 Invariants:
77 cStackSize <= maxCStackSize
78 */
79 static nat cStackSize, maxCStackSize;
80
81 static nat sumOfNewCost; // sum of the cost of each object, computed
82 // when the object is first visited
83 static nat sumOfNewCostExtra; // for those objects not visited during
84 // retainer profiling, e.g., MUT_VAR
85 static nat costArray[N_CLOSURE_TYPES];
86
87 nat sumOfCostLinear; // sum of the costs of all object, computed
88 // when linearly traversing the heap after
89 // retainer profiling
90 nat costArrayLinear[N_CLOSURE_TYPES];
91 #endif
92
93 /* -----------------------------------------------------------------------------
94 * Retainer stack - header
95 * Note:
96 * Although the retainer stack implementation could be separated *
97 * from the retainer profiling engine, there does not seem to be
98 * any advantage in doing that; retainer stack is an integral part
99 * of retainer profiling engine and cannot be use elsewhere at
100 * all.
101 * -------------------------------------------------------------------------- */
102
103 typedef enum {
104 posTypeStep,
105 posTypePtrs,
106 posTypeSRT,
107 posTypeLargeSRT,
108 } nextPosType;
109
110 typedef union {
111 // fixed layout or layout specified by a field in the closure
112 StgWord step;
113
114 // layout.payload
115 struct {
116 // See StgClosureInfo in InfoTables.h
117 #if SIZEOF_VOID_P == 8
118 StgWord32 pos;
119 StgWord32 ptrs;
120 #else
121 StgWord16 pos;
122 StgWord16 ptrs;
123 #endif
124 StgPtr payload;
125 } ptrs;
126
127 // SRT
128 struct {
129 StgClosure **srt;
130 StgWord srt_bitmap;
131 } srt;
132
133 // Large SRT
134 struct {
135 StgLargeSRT *srt;
136 StgWord offset;
137 } large_srt;
138
139 } nextPos;
140
141 typedef struct {
142 nextPosType type;
143 nextPos next;
144 } stackPos;
145
146 typedef struct {
147 StgClosure *c;
148 retainer c_child_r;
149 stackPos info;
150 } stackElement;
151
152 /*
153 Invariants:
154 firstStack points to the first block group.
155 currentStack points to the block group currently being used.
156 currentStack->free == stackLimit.
157 stackTop points to the topmost byte in the stack of currentStack.
158 Unless the whole stack is empty, stackTop must point to the topmost
159 object (or byte) in the whole stack. Thus, it is only when the whole stack
160 is empty that stackTop == stackLimit (not during the execution of push()
161 and pop()).
162 stackBottom == currentStack->start.
163 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
164 Note:
165 When a current stack becomes empty, stackTop is set to point to
166 the topmost element on the previous block group so as to satisfy
167 the invariants described above.
168 */
169 static bdescr *firstStack = NULL;
170 static bdescr *currentStack;
171 static stackElement *stackBottom, *stackTop, *stackLimit;
172
173 /*
174 currentStackBoundary is used to mark the current stack chunk.
175 If stackTop == currentStackBoundary, it means that the current stack chunk
176 is empty. It is the responsibility of the user to keep currentStackBoundary
177 valid all the time if it is to be employed.
178 */
179 static stackElement *currentStackBoundary;
180
181 /*
182 stackSize records the current size of the stack.
183 maxStackSize records its high water mark.
184 Invariants:
185 stackSize <= maxStackSize
186 Note:
187 stackSize is just an estimate measure of the depth of the graph. The reason
188 is that some heap objects have only a single child and may not result
189 in a new element being pushed onto the stack. Therefore, at the end of
190 retainer profiling, maxStackSize + maxCStackSize is some value no greater
191 than the actual depth of the graph.
192 */
193 #ifdef DEBUG_RETAINER
194 static int stackSize, maxStackSize;
195 #endif
196
197 // number of blocks allocated for one stack
198 #define BLOCKS_IN_STACK 1
199
200 /* -----------------------------------------------------------------------------
201 * Add a new block group to the stack.
202 * Invariants:
203 * currentStack->link == s.
204 * -------------------------------------------------------------------------- */
205 static INLINE void
206 newStackBlock( bdescr *bd )
207 {
208 currentStack = bd;
209 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
210 stackBottom = (stackElement *)bd->start;
211 stackLimit = (stackElement *)stackTop;
212 bd->free = (StgPtr)stackLimit;
213 }
214
215 /* -----------------------------------------------------------------------------
216 * Return to the previous block group.
217 * Invariants:
218 * s->link == currentStack.
219 * -------------------------------------------------------------------------- */
220 static INLINE void
221 returnToOldStack( bdescr *bd )
222 {
223 currentStack = bd;
224 stackTop = (stackElement *)bd->free;
225 stackBottom = (stackElement *)bd->start;
226 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
227 bd->free = (StgPtr)stackLimit;
228 }
229
230 /* -----------------------------------------------------------------------------
231 * Initializes the traverse stack.
232 * -------------------------------------------------------------------------- */
233 static void
234 initializeTraverseStack( void )
235 {
236 if (firstStack != NULL) {
237 freeChain(firstStack);
238 }
239
240 firstStack = allocGroup(BLOCKS_IN_STACK);
241 firstStack->link = NULL;
242 firstStack->u.back = NULL;
243
244 newStackBlock(firstStack);
245 }
246
247 /* -----------------------------------------------------------------------------
248 * Frees all the block groups in the traverse stack.
249 * Invariants:
250 * firstStack != NULL
251 * -------------------------------------------------------------------------- */
252 static void
253 closeTraverseStack( void )
254 {
255 freeChain(firstStack);
256 firstStack = NULL;
257 }
258
259 /* -----------------------------------------------------------------------------
260 * Returns rtsTrue if the whole stack is empty.
261 * -------------------------------------------------------------------------- */
262 static INLINE rtsBool
263 isEmptyRetainerStack( void )
264 {
265 return (firstStack == currentStack) && stackTop == stackLimit;
266 }
267
268 /* -----------------------------------------------------------------------------
269 * Returns size of stack
270 * -------------------------------------------------------------------------- */
271 #ifdef DEBUG
272 lnat
273 retainerStackBlocks( void )
274 {
275 bdescr* bd;
276 lnat res = 0;
277
278 for (bd = firstStack; bd != NULL; bd = bd->link)
279 res += bd->blocks;
280
281 return res;
282 }
283 #endif
284
285 /* -----------------------------------------------------------------------------
286 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
287 * i.e., if the current stack chunk is empty.
288 * -------------------------------------------------------------------------- */
289 static INLINE rtsBool
290 isOnBoundary( void )
291 {
292 return stackTop == currentStackBoundary;
293 }
294
295 /* -----------------------------------------------------------------------------
296 * Initializes *info from ptrs and payload.
297 * Invariants:
298 * payload[] begins with ptrs pointers followed by non-pointers.
299 * -------------------------------------------------------------------------- */
300 static INLINE void
301 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
302 {
303 info->type = posTypePtrs;
304 info->next.ptrs.pos = 0;
305 info->next.ptrs.ptrs = ptrs;
306 info->next.ptrs.payload = payload;
307 }
308
309 /* -----------------------------------------------------------------------------
310 * Find the next object from *info.
311 * -------------------------------------------------------------------------- */
312 static INLINE StgClosure *
313 find_ptrs( stackPos *info )
314 {
315 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
316 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
317 } else {
318 return NULL;
319 }
320 }
321
322 /* -----------------------------------------------------------------------------
323 * Initializes *info from SRT information stored in *infoTable.
324 * -------------------------------------------------------------------------- */
325 static INLINE void
326 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
327 {
328 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
329 info->type = posTypeLargeSRT;
330 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
331 info->next.large_srt.offset = 0;
332 } else {
333 info->type = posTypeSRT;
334 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
335 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
336 }
337 }
338
339 static INLINE void
340 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
341 {
342 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
343 info->type = posTypeLargeSRT;
344 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
345 info->next.large_srt.offset = 0;
346 } else {
347 info->type = posTypeSRT;
348 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
349 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
350 }
351 }
352
353 /* -----------------------------------------------------------------------------
354 * Find the next object from *info.
355 * -------------------------------------------------------------------------- */
356 static INLINE StgClosure *
357 find_srt( stackPos *info )
358 {
359 StgClosure *c;
360 StgWord bitmap;
361
362 if (info->type == posTypeSRT) {
363 // Small SRT bitmap
364 bitmap = info->next.srt.srt_bitmap;
365 while (bitmap != 0) {
366 if ((bitmap & 1) != 0) {
367 #if defined(__PIC__) && defined(mingw32_TARGET_OS)
368 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
369 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
370 else
371 c = *(info->next.srt.srt);
372 #else
373 c = *(info->next.srt.srt);
374 #endif
375 bitmap = bitmap >> 1;
376 info->next.srt.srt++;
377 info->next.srt.srt_bitmap = bitmap;
378 return c;
379 }
380 bitmap = bitmap >> 1;
381 info->next.srt.srt++;
382 }
383 // bitmap is now zero...
384 return NULL;
385 }
386 else {
387 // Large SRT bitmap
388 nat i = info->next.large_srt.offset;
389 StgWord bitmap;
390
391 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
392 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
393 bitmap = bitmap >> (i % BITS_IN(StgWord));
394 while (i < info->next.large_srt.srt->l.size) {
395 if ((bitmap & 1) != 0) {
396 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
397 i++;
398 info->next.large_srt.offset = i;
399 return c;
400 }
401 i++;
402 if (i % BITS_IN(W_) == 0) {
403 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
404 } else {
405 bitmap = bitmap >> 1;
406 }
407 }
408 // reached the end of this bitmap.
409 info->next.large_srt.offset = i;
410 return NULL;
411 }
412 }
413
414 /* -----------------------------------------------------------------------------
415 * push() pushes a stackElement representing the next child of *c
416 * onto the traverse stack. If *c has no child, *first_child is set
417 * to NULL and nothing is pushed onto the stack. If *c has only one
418 * child, *c_chlid is set to that child and nothing is pushed onto
419 * the stack. If *c has more than two children, *first_child is set
420 * to the first child and a stackElement representing the second
421 * child is pushed onto the stack.
422
423 * Invariants:
424 * *c_child_r is the most recent retainer of *c's children.
425 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
426 * there cannot be any stack objects.
427 * Note: SRTs are considered to be children as well.
428 * -------------------------------------------------------------------------- */
429 static INLINE void
430 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
431 {
432 stackElement se;
433 bdescr *nbd; // Next Block Descriptor
434
435 #ifdef DEBUG_RETAINER
436 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
437 #endif
438
439 ASSERT(get_itbl(c)->type != TSO);
440 ASSERT(get_itbl(c)->type != AP_STACK);
441
442 //
443 // fill in se
444 //
445
446 se.c = c;
447 se.c_child_r = c_child_r;
448
449 // fill in se.info
450 switch (get_itbl(c)->type) {
451 // no child, no SRT
452 case CONSTR_0_1:
453 case CONSTR_0_2:
454 case CAF_BLACKHOLE:
455 case BLACKHOLE:
456 case ARR_WORDS:
457 *first_child = NULL;
458 return;
459
460 // one child (fixed), no SRT
461 case MUT_VAR_CLEAN:
462 case MUT_VAR_DIRTY:
463 *first_child = ((StgMutVar *)c)->var;
464 return;
465 case THUNK_SELECTOR:
466 *first_child = ((StgSelector *)c)->selectee;
467 return;
468 case IND_PERM:
469 case IND_OLDGEN_PERM:
470 case IND_OLDGEN:
471 *first_child = ((StgInd *)c)->indirectee;
472 return;
473 case CONSTR_1_0:
474 case CONSTR_1_1:
475 *first_child = c->payload[0];
476 return;
477
478 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
479 // of the next child. We do not write a separate initialization code.
480 // Also we do not have to initialize info.type;
481
482 // two children (fixed), no SRT
483 // need to push a stackElement, but nothing to store in se.info
484 case CONSTR_2_0:
485 *first_child = c->payload[0]; // return the first pointer
486 // se.info.type = posTypeStep;
487 // se.info.next.step = 2; // 2 = second
488 break;
489
490 // three children (fixed), no SRT
491 // need to push a stackElement
492 case MVAR_CLEAN:
493 case MVAR_DIRTY:
494 // head must be TSO and the head of a linked list of TSOs.
495 // Shoule it be a child? Seems to be yes.
496 *first_child = (StgClosure *)((StgMVar *)c)->head;
497 // se.info.type = posTypeStep;
498 se.info.next.step = 2; // 2 = second
499 break;
500
501 // three children (fixed), no SRT
502 case WEAK:
503 *first_child = ((StgWeak *)c)->key;
504 // se.info.type = posTypeStep;
505 se.info.next.step = 2;
506 break;
507
508 // layout.payload.ptrs, no SRT
509 case CONSTR:
510 case STABLE_NAME:
511 case BCO:
512 case CONSTR_STATIC:
513 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
514 (StgPtr)c->payload);
515 *first_child = find_ptrs(&se.info);
516 if (*first_child == NULL)
517 return; // no child
518 break;
519
520 // StgMutArrPtr.ptrs, no SRT
521 case MUT_ARR_PTRS_CLEAN:
522 case MUT_ARR_PTRS_DIRTY:
523 case MUT_ARR_PTRS_FROZEN:
524 case MUT_ARR_PTRS_FROZEN0:
525 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
526 (StgPtr)(((StgMutArrPtrs *)c)->payload));
527 *first_child = find_ptrs(&se.info);
528 if (*first_child == NULL)
529 return;
530 break;
531
532 // layout.payload.ptrs, SRT
533 case FUN: // *c is a heap object.
534 case FUN_2_0:
535 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
536 *first_child = find_ptrs(&se.info);
537 if (*first_child == NULL)
538 // no child from ptrs, so check SRT
539 goto fun_srt_only;
540 break;
541
542 case THUNK:
543 case THUNK_2_0:
544 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
545 (StgPtr)((StgThunk *)c)->payload);
546 *first_child = find_ptrs(&se.info);
547 if (*first_child == NULL)
548 // no child from ptrs, so check SRT
549 goto thunk_srt_only;
550 break;
551
552 // 1 fixed child, SRT
553 case FUN_1_0:
554 case FUN_1_1:
555 *first_child = c->payload[0];
556 ASSERT(*first_child != NULL);
557 init_srt_fun(&se.info, get_fun_itbl(c));
558 break;
559
560 case THUNK_1_0:
561 case THUNK_1_1:
562 *first_child = ((StgThunk *)c)->payload[0];
563 ASSERT(*first_child != NULL);
564 init_srt_thunk(&se.info, get_thunk_itbl(c));
565 break;
566
567 case FUN_STATIC: // *c is a heap object.
568 ASSERT(get_itbl(c)->srt_bitmap != 0);
569 case FUN_0_1:
570 case FUN_0_2:
571 fun_srt_only:
572 init_srt_fun(&se.info, get_fun_itbl(c));
573 *first_child = find_srt(&se.info);
574 if (*first_child == NULL)
575 return; // no child
576 break;
577
578 // SRT only
579 case THUNK_STATIC:
580 ASSERT(get_itbl(c)->srt_bitmap != 0);
581 case THUNK_0_1:
582 case THUNK_0_2:
583 thunk_srt_only:
584 init_srt_thunk(&se.info, get_thunk_itbl(c));
585 *first_child = find_srt(&se.info);
586 if (*first_child == NULL)
587 return; // no child
588 break;
589
590 case TVAR_WATCH_QUEUE:
591 *first_child = (StgClosure *)((StgTVarWatchQueue *)c)->closure;
592 se.info.next.step = 2; // 2 = second
593 break;
594 case TVAR:
595 *first_child = (StgClosure *)((StgTVar *)c)->current_value;
596 break;
597 case TREC_HEADER:
598 *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
599 break;
600 case TREC_CHUNK:
601 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
602 se.info.next.step = 0; // entry no.
603 break;
604
605 // cannot appear
606 case PAP:
607 case AP:
608 case AP_STACK:
609 case TSO:
610 case IND_STATIC:
611 case CONSTR_NOCAF_STATIC:
612 // stack objects
613 case UPDATE_FRAME:
614 case CATCH_FRAME:
615 case STOP_FRAME:
616 case RET_DYN:
617 case RET_BCO:
618 case RET_SMALL:
619 case RET_BIG:
620 // invalid objects
621 case IND:
622 case BLOCKED_FETCH:
623 case FETCH_ME:
624 case FETCH_ME_BQ:
625 case RBH:
626 case REMOTE_REF:
627 case INVALID_OBJECT:
628 default:
629 barf("Invalid object *c in push()");
630 return;
631 }
632
633 if (stackTop - 1 < stackBottom) {
634 #ifdef DEBUG_RETAINER
635 // debugBelch("push() to the next stack.\n");
636 #endif
637 // currentStack->free is updated when the active stack is switched
638 // to the next stack.
639 currentStack->free = (StgPtr)stackTop;
640
641 if (currentStack->link == NULL) {
642 nbd = allocGroup(BLOCKS_IN_STACK);
643 nbd->link = NULL;
644 nbd->u.back = currentStack;
645 currentStack->link = nbd;
646 } else
647 nbd = currentStack->link;
648
649 newStackBlock(nbd);
650 }
651
652 // adjust stackTop (acutal push)
653 stackTop--;
654 // If the size of stackElement was huge, we would better replace the
655 // following statement by either a memcpy() call or a switch statement
656 // on the type of the element. Currently, the size of stackElement is
657 // small enough (5 words) that this direct assignment seems to be enough.
658
659 // ToDo: The line below leads to the warning:
660 // warning: 'se.info.type' may be used uninitialized in this function
661 // This is caused by the fact that there are execution paths through the
662 // large switch statement above where some cases do not initialize this
663 // field. Is this really harmless? Can we avoid the warning?
664 *stackTop = se;
665
666 #ifdef DEBUG_RETAINER
667 stackSize++;
668 if (stackSize > maxStackSize) maxStackSize = stackSize;
669 // ASSERT(stackSize >= 0);
670 // debugBelch("stackSize = %d\n", stackSize);
671 #endif
672 }
673
674 /* -----------------------------------------------------------------------------
675 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
676 * Invariants:
677 * stackTop cannot be equal to stackLimit unless the whole stack is
678 * empty, in which case popOff() is not allowed.
679 * Note:
680 * You can think of popOffReal() as a part of popOff() which is
681 * executed at the end of popOff() in necessary. Since popOff() is
682 * likely to be executed quite often while popOffReal() is not, we
683 * separate popOffReal() from popOff(), which is declared as an
684 * INLINE function (for the sake of execution speed). popOffReal()
685 * is called only within popOff() and nowhere else.
686 * -------------------------------------------------------------------------- */
687 static void
688 popOffReal(void)
689 {
690 bdescr *pbd; // Previous Block Descriptor
691
692 #ifdef DEBUG_RETAINER
693 // debugBelch("pop() to the previous stack.\n");
694 #endif
695
696 ASSERT(stackTop + 1 == stackLimit);
697 ASSERT(stackBottom == (stackElement *)currentStack->start);
698
699 if (firstStack == currentStack) {
700 // The stack is completely empty.
701 stackTop++;
702 ASSERT(stackTop == stackLimit);
703 #ifdef DEBUG_RETAINER
704 stackSize--;
705 if (stackSize > maxStackSize) maxStackSize = stackSize;
706 /*
707 ASSERT(stackSize >= 0);
708 debugBelch("stackSize = %d\n", stackSize);
709 */
710 #endif
711 return;
712 }
713
714 // currentStack->free is updated when the active stack is switched back
715 // to the previous stack.
716 currentStack->free = (StgPtr)stackLimit;
717
718 // find the previous block descriptor
719 pbd = currentStack->u.back;
720 ASSERT(pbd != NULL);
721
722 returnToOldStack(pbd);
723
724 #ifdef DEBUG_RETAINER
725 stackSize--;
726 if (stackSize > maxStackSize) maxStackSize = stackSize;
727 /*
728 ASSERT(stackSize >= 0);
729 debugBelch("stackSize = %d\n", stackSize);
730 */
731 #endif
732 }
733
734 static INLINE void
735 popOff(void) {
736 #ifdef DEBUG_RETAINER
737 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
738 #endif
739
740 ASSERT(stackTop != stackLimit);
741 ASSERT(!isEmptyRetainerStack());
742
743 // <= (instead of <) is wrong!
744 if (stackTop + 1 < stackLimit) {
745 stackTop++;
746 #ifdef DEBUG_RETAINER
747 stackSize--;
748 if (stackSize > maxStackSize) maxStackSize = stackSize;
749 /*
750 ASSERT(stackSize >= 0);
751 debugBelch("stackSize = %d\n", stackSize);
752 */
753 #endif
754 return;
755 }
756
757 popOffReal();
758 }
759
760 /* -----------------------------------------------------------------------------
761 * Finds the next object to be considered for retainer profiling and store
762 * its pointer to *c.
763 * Test if the topmost stack element indicates that more objects are left,
764 * and if so, retrieve the first object and store its pointer to *c. Also,
765 * set *cp and *r appropriately, both of which are stored in the stack element.
766 * The topmost stack element then is overwritten so as for it to now denote
767 * the next object.
768 * If the topmost stack element indicates no more objects are left, pop
769 * off the stack element until either an object can be retrieved or
770 * the current stack chunk becomes empty, indicated by rtsTrue returned by
771 * isOnBoundary(), in which case *c is set to NULL.
772 * Note:
773 * It is okay to call this function even when the current stack chunk
774 * is empty.
775 * -------------------------------------------------------------------------- */
776 static INLINE void
777 pop( StgClosure **c, StgClosure **cp, retainer *r )
778 {
779 stackElement *se;
780
781 #ifdef DEBUG_RETAINER
782 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
783 #endif
784
785 do {
786 if (isOnBoundary()) { // if the current stack chunk is depleted
787 *c = NULL;
788 return;
789 }
790
791 se = stackTop;
792
793 switch (get_itbl(se->c)->type) {
794 // two children (fixed), no SRT
795 // nothing in se.info
796 case CONSTR_2_0:
797 *c = se->c->payload[1];
798 *cp = se->c;
799 *r = se->c_child_r;
800 popOff();
801 return;
802
803 // three children (fixed), no SRT
804 // need to push a stackElement
805 case MVAR_CLEAN:
806 case MVAR_DIRTY:
807 if (se->info.next.step == 2) {
808 *c = (StgClosure *)((StgMVar *)se->c)->tail;
809 se->info.next.step++; // move to the next step
810 // no popOff
811 } else {
812 *c = ((StgMVar *)se->c)->value;
813 popOff();
814 }
815 *cp = se->c;
816 *r = se->c_child_r;
817 return;
818
819 // three children (fixed), no SRT
820 case WEAK:
821 if (se->info.next.step == 2) {
822 *c = ((StgWeak *)se->c)->value;
823 se->info.next.step++;
824 // no popOff
825 } else {
826 *c = ((StgWeak *)se->c)->finalizer;
827 popOff();
828 }
829 *cp = se->c;
830 *r = se->c_child_r;
831 return;
832
833 case TVAR_WATCH_QUEUE:
834 if (se->info.next.step == 2) {
835 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->next_queue_entry;
836 se->info.next.step++; // move to the next step
837 // no popOff
838 } else {
839 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->prev_queue_entry;
840 popOff();
841 }
842 *cp = se->c;
843 *r = se->c_child_r;
844 return;
845
846 case TVAR:
847 *c = (StgClosure *)((StgTVar *)se->c)->first_watch_queue_entry;
848 *cp = se->c;
849 *r = se->c_child_r;
850 popOff();
851 return;
852
853 case TREC_HEADER:
854 *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
855 *cp = se->c;
856 *r = se->c_child_r;
857 popOff();
858 return;
859
860 case TREC_CHUNK: {
861 // These are pretty complicated: we have N entries, each
862 // of which contains 3 fields that we want to follow. So
863 // we divide the step counter: the 2 low bits indicate
864 // which field, and the rest of the bits indicate the
865 // entry number (starting from zero).
866 TRecEntry *entry;
867 nat entry_no = se->info.next.step >> 2;
868 nat field_no = se->info.next.step & 3;
869 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
870 *c = NULL;
871 popOff();
872 return;
873 }
874 entry = &((StgTRecChunk *)se->c)->entries[entry_no];
875 if (field_no == 0) {
876 *c = (StgClosure *)entry->tvar;
877 } else if (field_no == 1) {
878 *c = entry->expected_value;
879 } else {
880 *c = entry->new_value;
881 }
882 *cp = se->c;
883 *r = se->c_child_r;
884 se->info.next.step++;
885 return;
886 }
887
888 case CONSTR:
889 case STABLE_NAME:
890 case BCO:
891 case CONSTR_STATIC:
892 // StgMutArrPtr.ptrs, no SRT
893 case MUT_ARR_PTRS_CLEAN:
894 case MUT_ARR_PTRS_DIRTY:
895 case MUT_ARR_PTRS_FROZEN:
896 case MUT_ARR_PTRS_FROZEN0:
897 *c = find_ptrs(&se->info);
898 if (*c == NULL) {
899 popOff();
900 break;
901 }
902 *cp = se->c;
903 *r = se->c_child_r;
904 return;
905
906 // layout.payload.ptrs, SRT
907 case FUN: // always a heap object
908 case FUN_2_0:
909 if (se->info.type == posTypePtrs) {
910 *c = find_ptrs(&se->info);
911 if (*c != NULL) {
912 *cp = se->c;
913 *r = se->c_child_r;
914 return;
915 }
916 init_srt_fun(&se->info, get_fun_itbl(se->c));
917 }
918 goto do_srt;
919
920 case THUNK:
921 case THUNK_2_0:
922 if (se->info.type == posTypePtrs) {
923 *c = find_ptrs(&se->info);
924 if (*c != NULL) {
925 *cp = se->c;
926 *r = se->c_child_r;
927 return;
928 }
929 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
930 }
931 goto do_srt;
932
933 // SRT
934 do_srt:
935 case THUNK_STATIC:
936 case FUN_STATIC:
937 case FUN_0_1:
938 case FUN_0_2:
939 case THUNK_0_1:
940 case THUNK_0_2:
941 case FUN_1_0:
942 case FUN_1_1:
943 case THUNK_1_0:
944 case THUNK_1_1:
945 *c = find_srt(&se->info);
946 if (*c != NULL) {
947 *cp = se->c;
948 *r = se->c_child_r;
949 return;
950 }
951 popOff();
952 break;
953
954 // no child (fixed), no SRT
955 case CONSTR_0_1:
956 case CONSTR_0_2:
957 case CAF_BLACKHOLE:
958 case BLACKHOLE:
959 case ARR_WORDS:
960 // one child (fixed), no SRT
961 case MUT_VAR_CLEAN:
962 case MUT_VAR_DIRTY:
963 case THUNK_SELECTOR:
964 case IND_PERM:
965 case IND_OLDGEN_PERM:
966 case IND_OLDGEN:
967 case CONSTR_1_1:
968 // cannot appear
969 case PAP:
970 case AP:
971 case AP_STACK:
972 case TSO:
973 case IND_STATIC:
974 case CONSTR_NOCAF_STATIC:
975 // stack objects
976 case RET_DYN:
977 case UPDATE_FRAME:
978 case CATCH_FRAME:
979 case STOP_FRAME:
980 case RET_BCO:
981 case RET_SMALL:
982 case RET_BIG:
983 // invalid objects
984 case IND:
985 case BLOCKED_FETCH:
986 case FETCH_ME:
987 case FETCH_ME_BQ:
988 case RBH:
989 case REMOTE_REF:
990 case INVALID_OBJECT:
991 default:
992 barf("Invalid object *c in pop()");
993 return;
994 }
995 } while (rtsTrue);
996 }
997
998 /* -----------------------------------------------------------------------------
999 * RETAINER PROFILING ENGINE
1000 * -------------------------------------------------------------------------- */
1001
1002 void
1003 initRetainerProfiling( void )
1004 {
1005 initializeAllRetainerSet();
1006 retainerGeneration = 0;
1007 }
1008
1009 /* -----------------------------------------------------------------------------
1010 * This function must be called before f-closing prof_file.
1011 * -------------------------------------------------------------------------- */
1012 void
1013 endRetainerProfiling( void )
1014 {
1015 #ifdef SECOND_APPROACH
1016 outputAllRetainerSet(prof_file);
1017 #endif
1018 }
1019
1020 /* -----------------------------------------------------------------------------
1021 * Returns the actual pointer to the retainer set of the closure *c.
1022 * It may adjust RSET(c) subject to flip.
1023 * Side effects:
1024 * RSET(c) is initialized to NULL if its current value does not
1025 * conform to flip.
1026 * Note:
1027 * Even though this function has side effects, they CAN be ignored because
1028 * subsequent calls to retainerSetOf() always result in the same return value
1029 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1030 * closure.
1031 * We have to perform an XOR (^) operation each time a closure is examined.
1032 * The reason is that we do not know when a closure is visited last.
1033 * -------------------------------------------------------------------------- */
1034 static INLINE void
1035 maybeInitRetainerSet( StgClosure *c )
1036 {
1037 if (!isRetainerSetFieldValid(c)) {
1038 setRetainerSetToNull(c);
1039 }
1040 }
1041
1042 /* -----------------------------------------------------------------------------
1043 * Returns rtsTrue if *c is a retainer.
1044 * -------------------------------------------------------------------------- */
1045 static INLINE rtsBool
1046 isRetainer( StgClosure *c )
1047 {
1048 switch (get_itbl(c)->type) {
1049 //
1050 // True case
1051 //
1052 // TSOs MUST be retainers: they constitute the set of roots.
1053 case TSO:
1054
1055 // mutable objects
1056 case MVAR_CLEAN:
1057 case MVAR_DIRTY:
1058 case MUT_VAR_CLEAN:
1059 case MUT_VAR_DIRTY:
1060 case MUT_ARR_PTRS_CLEAN:
1061 case MUT_ARR_PTRS_DIRTY:
1062 case MUT_ARR_PTRS_FROZEN:
1063 case MUT_ARR_PTRS_FROZEN0:
1064
1065 // thunks are retainers.
1066 case THUNK:
1067 case THUNK_1_0:
1068 case THUNK_0_1:
1069 case THUNK_2_0:
1070 case THUNK_1_1:
1071 case THUNK_0_2:
1072 case THUNK_SELECTOR:
1073 case AP:
1074 case AP_STACK:
1075
1076 // Static thunks, or CAFS, are obviously retainers.
1077 case THUNK_STATIC:
1078
1079 // WEAK objects are roots; there is separate code in which traversing
1080 // begins from WEAK objects.
1081 case WEAK:
1082
1083 // Since the other mutvar-type things are retainers, seems
1084 // like the right thing to do:
1085 case TVAR:
1086 return rtsTrue;
1087
1088 //
1089 // False case
1090 //
1091
1092 // constructors
1093 case CONSTR:
1094 case CONSTR_1_0:
1095 case CONSTR_0_1:
1096 case CONSTR_2_0:
1097 case CONSTR_1_1:
1098 case CONSTR_0_2:
1099 // functions
1100 case FUN:
1101 case FUN_1_0:
1102 case FUN_0_1:
1103 case FUN_2_0:
1104 case FUN_1_1:
1105 case FUN_0_2:
1106 // partial applications
1107 case PAP:
1108 // blackholes
1109 case CAF_BLACKHOLE:
1110 case BLACKHOLE:
1111 // indirection
1112 case IND_PERM:
1113 case IND_OLDGEN_PERM:
1114 case IND_OLDGEN:
1115 // static objects
1116 case CONSTR_STATIC:
1117 case FUN_STATIC:
1118 // misc
1119 case STABLE_NAME:
1120 case BCO:
1121 case ARR_WORDS:
1122 // STM
1123 case TVAR_WATCH_QUEUE:
1124 case TREC_HEADER:
1125 case TREC_CHUNK:
1126 return rtsFalse;
1127
1128 //
1129 // Error case
1130 //
1131 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1132 case IND_STATIC:
1133 // CONSTR_NOCAF_STATIC
1134 // cannot be *c, *cp, *r in the retainer profiling loop.
1135 case CONSTR_NOCAF_STATIC:
1136 // Stack objects are invalid because they are never treated as
1137 // legal objects during retainer profiling.
1138 case UPDATE_FRAME:
1139 case CATCH_FRAME:
1140 case STOP_FRAME:
1141 case RET_DYN:
1142 case RET_BCO:
1143 case RET_SMALL:
1144 case RET_BIG:
1145 // other cases
1146 case IND:
1147 case BLOCKED_FETCH:
1148 case FETCH_ME:
1149 case FETCH_ME_BQ:
1150 case RBH:
1151 case REMOTE_REF:
1152 case INVALID_OBJECT:
1153 default:
1154 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1155 return rtsFalse;
1156 }
1157 }
1158
1159 /* -----------------------------------------------------------------------------
1160 * Returns the retainer function value for the closure *c, i.e., R(*c).
1161 * This function does NOT return the retainer(s) of *c.
1162 * Invariants:
1163 * *c must be a retainer.
1164 * Note:
1165 * Depending on the definition of this function, the maintenance of retainer
1166 * sets can be made easier. If most retainer sets are likely to be created
1167 * again across garbage collections, refreshAllRetainerSet() in
1168 * RetainerSet.c can simply do nothing.
1169 * If this is not the case, we can free all the retainer sets and
1170 * re-initialize the hash table.
1171 * See refreshAllRetainerSet() in RetainerSet.c.
1172 * -------------------------------------------------------------------------- */
1173 static INLINE retainer
1174 getRetainerFrom( StgClosure *c )
1175 {
1176 ASSERT(isRetainer(c));
1177
1178 #if defined(RETAINER_SCHEME_INFO)
1179 // Retainer scheme 1: retainer = info table
1180 return get_itbl(c);
1181 #elif defined(RETAINER_SCHEME_CCS)
1182 // Retainer scheme 2: retainer = cost centre stack
1183 return c->header.prof.ccs;
1184 #elif defined(RETAINER_SCHEME_CC)
1185 // Retainer scheme 3: retainer = cost centre
1186 return c->header.prof.ccs->cc;
1187 #endif
1188 }
1189
1190 /* -----------------------------------------------------------------------------
1191 * Associates the retainer set *s with the closure *c, that is, *s becomes
1192 * the retainer set of *c.
1193 * Invariants:
1194 * c != NULL
1195 * s != NULL
1196 * -------------------------------------------------------------------------- */
1197 static INLINE void
1198 associate( StgClosure *c, RetainerSet *s )
1199 {
1200 // StgWord has the same size as pointers, so the following type
1201 // casting is okay.
1202 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1203 }
1204
1205 /* -----------------------------------------------------------------------------
1206 Call retainClosure for each of the closures covered by a large bitmap.
1207 -------------------------------------------------------------------------- */
1208
1209 static void
1210 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1211 StgClosure *c, retainer c_child_r)
1212 {
1213 nat i, b;
1214 StgWord bitmap;
1215
1216 b = 0;
1217 bitmap = large_bitmap->bitmap[b];
1218 for (i = 0; i < size; ) {
1219 if ((bitmap & 1) == 0) {
1220 retainClosure((StgClosure *)*p, c, c_child_r);
1221 }
1222 i++;
1223 p++;
1224 if (i % BITS_IN(W_) == 0) {
1225 b++;
1226 bitmap = large_bitmap->bitmap[b];
1227 } else {
1228 bitmap = bitmap >> 1;
1229 }
1230 }
1231 }
1232
1233 static INLINE StgPtr
1234 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1235 StgClosure *c, retainer c_child_r)
1236 {
1237 while (size > 0) {
1238 if ((bitmap & 1) == 0) {
1239 retainClosure((StgClosure *)*p, c, c_child_r);
1240 }
1241 p++;
1242 bitmap = bitmap >> 1;
1243 size--;
1244 }
1245 return p;
1246 }
1247
1248 /* -----------------------------------------------------------------------------
1249 * Call retainClosure for each of the closures in an SRT.
1250 * ------------------------------------------------------------------------- */
1251
1252 static void
1253 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1254 {
1255 nat i, b, size;
1256 StgWord bitmap;
1257 StgClosure **p;
1258
1259 b = 0;
1260 p = (StgClosure **)srt->srt;
1261 size = srt->l.size;
1262 bitmap = srt->l.bitmap[b];
1263 for (i = 0; i < size; ) {
1264 if ((bitmap & 1) != 0) {
1265 retainClosure((StgClosure *)*p, c, c_child_r);
1266 }
1267 i++;
1268 p++;
1269 if (i % BITS_IN(W_) == 0) {
1270 b++;
1271 bitmap = srt->l.bitmap[b];
1272 } else {
1273 bitmap = bitmap >> 1;
1274 }
1275 }
1276 }
1277
1278 static INLINE void
1279 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1280 {
1281 nat bitmap;
1282 StgClosure **p;
1283
1284 bitmap = srt_bitmap;
1285 p = srt;
1286
1287 if (bitmap == (StgHalfWord)(-1)) {
1288 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1289 return;
1290 }
1291
1292 while (bitmap != 0) {
1293 if ((bitmap & 1) != 0) {
1294 #ifdef ENABLE_WIN32_DLL_SUPPORT
1295 if ( (unsigned long)(*srt) & 0x1 ) {
1296 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1297 c, c_child_r);
1298 } else {
1299 retainClosure(*srt,c,c_child_r);
1300 }
1301 #else
1302 retainClosure(*srt,c,c_child_r);
1303 #endif
1304 }
1305 p++;
1306 bitmap = bitmap >> 1;
1307 }
1308 }
1309
1310 /* -----------------------------------------------------------------------------
1311 * Process all the objects in the stack chunk from stackStart to stackEnd
1312 * with *c and *c_child_r being their parent and their most recent retainer,
1313 * respectively. Treat stackOptionalFun as another child of *c if it is
1314 * not NULL.
1315 * Invariants:
1316 * *c is one of the following: TSO, AP_STACK.
1317 * If *c is TSO, c == c_child_r.
1318 * stackStart < stackEnd.
1319 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1320 * interpretation conforms to the current value of flip (even when they
1321 * are interpreted to be NULL).
1322 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1323 * or ThreadKilled, which means that its stack is ready to process.
1324 * Note:
1325 * This code was almost plagiarzied from GC.c! For each pointer,
1326 * retainClosure() is invoked instead of evacuate().
1327 * -------------------------------------------------------------------------- */
1328 static void
1329 retainStack( StgClosure *c, retainer c_child_r,
1330 StgPtr stackStart, StgPtr stackEnd )
1331 {
1332 stackElement *oldStackBoundary;
1333 StgPtr p;
1334 StgRetInfoTable *info;
1335 StgWord32 bitmap;
1336 nat size;
1337
1338 #ifdef DEBUG_RETAINER
1339 cStackSize++;
1340 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1341 #endif
1342
1343 /*
1344 Each invocation of retainStack() creates a new virtual
1345 stack. Since all such stacks share a single common stack, we
1346 record the current currentStackBoundary, which will be restored
1347 at the exit.
1348 */
1349 oldStackBoundary = currentStackBoundary;
1350 currentStackBoundary = stackTop;
1351
1352 #ifdef DEBUG_RETAINER
1353 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1354 #endif
1355
1356 ASSERT(get_itbl(c)->type != TSO ||
1357 (((StgTSO *)c)->what_next != ThreadRelocated &&
1358 ((StgTSO *)c)->what_next != ThreadComplete &&
1359 ((StgTSO *)c)->what_next != ThreadKilled));
1360
1361 p = stackStart;
1362 while (p < stackEnd) {
1363 info = get_ret_itbl((StgClosure *)p);
1364
1365 switch(info->i.type) {
1366
1367 case UPDATE_FRAME:
1368 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1369 p += sizeofW(StgUpdateFrame);
1370 continue;
1371
1372 case STOP_FRAME:
1373 case CATCH_FRAME:
1374 case CATCH_STM_FRAME:
1375 case CATCH_RETRY_FRAME:
1376 case ATOMICALLY_FRAME:
1377 case RET_SMALL:
1378 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1379 size = BITMAP_SIZE(info->i.layout.bitmap);
1380 p++;
1381 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1382
1383 follow_srt:
1384 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1385 continue;
1386
1387 case RET_BCO: {
1388 StgBCO *bco;
1389
1390 p++;
1391 retainClosure((StgClosure *)*p, c, c_child_r);
1392 bco = (StgBCO *)*p;
1393 p++;
1394 size = BCO_BITMAP_SIZE(bco);
1395 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1396 p += size;
1397 continue;
1398 }
1399
1400 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1401 case RET_BIG:
1402 size = GET_LARGE_BITMAP(&info->i)->size;
1403 p++;
1404 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1405 size, c, c_child_r);
1406 p += size;
1407 // and don't forget to follow the SRT
1408 goto follow_srt;
1409
1410 // Dynamic bitmap: the mask is stored on the stack
1411 case RET_DYN: {
1412 StgWord dyn;
1413 dyn = ((StgRetDyn *)p)->liveness;
1414
1415 // traverse the bitmap first
1416 bitmap = RET_DYN_LIVENESS(dyn);
1417 p = (P_)&((StgRetDyn *)p)->payload[0];
1418 size = RET_DYN_BITMAP_SIZE;
1419 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1420
1421 // skip over the non-ptr words
1422 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1423
1424 // follow the ptr words
1425 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1426 retainClosure((StgClosure *)*p, c, c_child_r);
1427 p++;
1428 }
1429 continue;
1430 }
1431
1432 case RET_FUN: {
1433 StgRetFun *ret_fun = (StgRetFun *)p;
1434 StgFunInfoTable *fun_info;
1435
1436 retainClosure(ret_fun->fun, c, c_child_r);
1437 fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
1438
1439 p = (P_)&ret_fun->payload;
1440 switch (fun_info->f.fun_type) {
1441 case ARG_GEN:
1442 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1443 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1444 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1445 break;
1446 case ARG_GEN_BIG:
1447 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1448 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1449 size, c, c_child_r);
1450 p += size;
1451 break;
1452 default:
1453 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1454 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1455 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1456 break;
1457 }
1458 goto follow_srt;
1459 }
1460
1461 default:
1462 barf("Invalid object found in retainStack(): %d",
1463 (int)(info->i.type));
1464 }
1465 }
1466
1467 // restore currentStackBoundary
1468 currentStackBoundary = oldStackBoundary;
1469 #ifdef DEBUG_RETAINER
1470 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1471 #endif
1472
1473 #ifdef DEBUG_RETAINER
1474 cStackSize--;
1475 #endif
1476 }
1477
1478 /* ----------------------------------------------------------------------------
1479 * Call retainClosure for each of the children of a PAP/AP
1480 * ------------------------------------------------------------------------- */
1481
1482 static INLINE StgPtr
1483 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1484 retainer c_child_r, /* NOT tagged */
1485 StgClosure *fun, /* tagged */
1486 StgClosure** payload, StgWord n_args)
1487 {
1488 StgPtr p;
1489 StgWord bitmap;
1490 StgFunInfoTable *fun_info;
1491
1492 retainClosure(fun, pap, c_child_r);
1493 fun = UNTAG_CLOSURE(fun);
1494 fun_info = get_fun_itbl(fun);
1495 ASSERT(fun_info->i.type != PAP);
1496
1497 p = (StgPtr)payload;
1498
1499 switch (fun_info->f.fun_type) {
1500 case ARG_GEN:
1501 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1502 p = retain_small_bitmap(p, n_args, bitmap,
1503 pap, c_child_r);
1504 break;
1505 case ARG_GEN_BIG:
1506 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1507 n_args, pap, c_child_r);
1508 p += n_args;
1509 break;
1510 case ARG_BCO:
1511 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1512 n_args, pap, c_child_r);
1513 p += n_args;
1514 break;
1515 default:
1516 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1517 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1518 break;
1519 }
1520 return p;
1521 }
1522
1523 /* -----------------------------------------------------------------------------
1524 * Compute the retainer set of *c0 and all its desecents by traversing.
1525 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1526 * Invariants:
1527 * c0 = cp0 = r0 holds only for root objects.
1528 * RSET(cp0) and RSET(r0) are valid, i.e., their
1529 * interpretation conforms to the current value of flip (even when they
1530 * are interpreted to be NULL).
1531 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1532 * the current value of flip. If it does not, during the execution
1533 * of this function, RSET(c0) must be initialized as well as all
1534 * its descendants.
1535 * Note:
1536 * stackTop must be the same at the beginning and the exit of this function.
1537 * *c0 can be TSO (as well as AP_STACK).
1538 * -------------------------------------------------------------------------- */
1539 static void
1540 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1541 {
1542 // c = Current closure (possibly tagged)
1543 // cp = Current closure's Parent (NOT tagged)
1544 // r = current closures' most recent Retainer (NOT tagged)
1545 // c_child_r = current closure's children's most recent retainer
1546 // first_child = first child of c
1547 StgClosure *c, *cp, *first_child;
1548 RetainerSet *s, *retainerSetOfc;
1549 retainer r, c_child_r;
1550 StgWord typeOfc;
1551
1552 #ifdef DEBUG_RETAINER
1553 // StgPtr oldStackTop;
1554 #endif
1555
1556 #ifdef DEBUG_RETAINER
1557 // oldStackTop = stackTop;
1558 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1559 #endif
1560
1561 // (c, cp, r) = (c0, cp0, r0)
1562 c = c0;
1563 cp = cp0;
1564 r = r0;
1565 goto inner_loop;
1566
1567 loop:
1568 //debugBelch("loop");
1569 // pop to (c, cp, r);
1570 pop(&c, &cp, &r);
1571
1572 if (c == NULL) {
1573 #ifdef DEBUG_RETAINER
1574 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1575 #endif
1576 return;
1577 }
1578
1579 //debugBelch("inner_loop");
1580
1581 inner_loop:
1582 c = UNTAG_CLOSURE(c);
1583
1584 // c = current closure under consideration,
1585 // cp = current closure's parent,
1586 // r = current closure's most recent retainer
1587 //
1588 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1589 // RSET(cp) and RSET(r) are valid.
1590 // RSET(c) is valid only if c has been visited before.
1591 //
1592 // Loop invariants (on the relation between c, cp, and r)
1593 // if cp is not a retainer, r belongs to RSET(cp).
1594 // if cp is a retainer, r == cp.
1595
1596 typeOfc = get_itbl(c)->type;
1597
1598 #ifdef DEBUG_RETAINER
1599 switch (typeOfc) {
1600 case IND_STATIC:
1601 case CONSTR_NOCAF_STATIC:
1602 case CONSTR_STATIC:
1603 case THUNK_STATIC:
1604 case FUN_STATIC:
1605 break;
1606 default:
1607 if (retainerSetOf(c) == NULL) { // first visit?
1608 costArray[typeOfc] += cost(c);
1609 sumOfNewCost += cost(c);
1610 }
1611 break;
1612 }
1613 #endif
1614
1615 // special cases
1616 switch (typeOfc) {
1617 case TSO:
1618 if (((StgTSO *)c)->what_next == ThreadComplete ||
1619 ((StgTSO *)c)->what_next == ThreadKilled) {
1620 #ifdef DEBUG_RETAINER
1621 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1622 #endif
1623 goto loop;
1624 }
1625 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1626 #ifdef DEBUG_RETAINER
1627 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1628 #endif
1629 c = (StgClosure *)((StgTSO *)c)->_link;
1630 goto inner_loop;
1631 }
1632 break;
1633
1634 case IND_STATIC:
1635 // We just skip IND_STATIC, so its retainer set is never computed.
1636 c = ((StgIndStatic *)c)->indirectee;
1637 goto inner_loop;
1638 // static objects with no pointers out, so goto loop.
1639 case CONSTR_NOCAF_STATIC:
1640 // It is not just enough not to compute the retainer set for *c; it is
1641 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1642 // scavenged_static_objects, the list from which is assumed to traverse
1643 // all static objects after major garbage collections.
1644 goto loop;
1645 case THUNK_STATIC:
1646 case FUN_STATIC:
1647 if (get_itbl(c)->srt_bitmap == 0) {
1648 // No need to compute the retainer set; no dynamic objects
1649 // are reachable from *c.
1650 //
1651 // Static objects: if we traverse all the live closures,
1652 // including static closures, during each heap census then
1653 // we will observe that some static closures appear and
1654 // disappear. eg. a closure may contain a pointer to a
1655 // static function 'f' which is not otherwise reachable
1656 // (it doesn't indirectly point to any CAFs, so it doesn't
1657 // appear in any SRTs), so we would find 'f' during
1658 // traversal. However on the next sweep there may be no
1659 // closures pointing to 'f'.
1660 //
1661 // We must therefore ignore static closures whose SRT is
1662 // empty, because these are exactly the closures that may
1663 // "appear". A closure with a non-empty SRT, and which is
1664 // still required, will always be reachable.
1665 //
1666 // But what about CONSTR_STATIC? Surely these may be able
1667 // to appear, and they don't have SRTs, so we can't
1668 // check. So for now, we're calling
1669 // resetStaticObjectForRetainerProfiling() from the
1670 // garbage collector to reset the retainer sets in all the
1671 // reachable static objects.
1672 goto loop;
1673 }
1674 default:
1675 break;
1676 }
1677
1678 // The above objects are ignored in computing the average number of times
1679 // an object is visited.
1680 timesAnyObjectVisited++;
1681
1682 // If this is the first visit to c, initialize its retainer set.
1683 maybeInitRetainerSet(c);
1684 retainerSetOfc = retainerSetOf(c);
1685
1686 // Now compute s:
1687 // isRetainer(cp) == rtsTrue => s == NULL
1688 // isRetainer(cp) == rtsFalse => s == cp.retainer
1689 if (isRetainer(cp))
1690 s = NULL;
1691 else
1692 s = retainerSetOf(cp);
1693
1694 // (c, cp, r, s) is available.
1695
1696 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1697 if (retainerSetOfc == NULL) {
1698 // This is the first visit to *c.
1699 numObjectVisited++;
1700
1701 if (s == NULL)
1702 associate(c, singleton(r));
1703 else
1704 // s is actually the retainer set of *c!
1705 associate(c, s);
1706
1707 // compute c_child_r
1708 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1709 } else {
1710 // This is not the first visit to *c.
1711 if (isMember(r, retainerSetOfc))
1712 goto loop; // no need to process child
1713
1714 if (s == NULL)
1715 associate(c, addElement(r, retainerSetOfc));
1716 else {
1717 // s is not NULL and cp is not a retainer. This means that
1718 // each time *cp is visited, so is *c. Thus, if s has
1719 // exactly one more element in its retainer set than c, s
1720 // is also the new retainer set for *c.
1721 if (s->num == retainerSetOfc->num + 1) {
1722 associate(c, s);
1723 }
1724 // Otherwise, just add R_r to the current retainer set of *c.
1725 else {
1726 associate(c, addElement(r, retainerSetOfc));
1727 }
1728 }
1729
1730 if (isRetainer(c))
1731 goto loop; // no need to process child
1732
1733 // compute c_child_r
1734 c_child_r = r;
1735 }
1736
1737 // now, RSET() of all of *c, *cp, and *r is valid.
1738 // (c, c_child_r) are available.
1739
1740 // process child
1741
1742 // Special case closures: we process these all in one go rather
1743 // than attempting to save the current position, because doing so
1744 // would be hard.
1745 switch (typeOfc) {
1746 case TSO:
1747 retainStack(c, c_child_r,
1748 ((StgTSO *)c)->sp,
1749 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1750 goto loop;
1751
1752 case PAP:
1753 {
1754 StgPAP *pap = (StgPAP *)c;
1755 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1756 goto loop;
1757 }
1758
1759 case AP:
1760 {
1761 StgAP *ap = (StgAP *)c;
1762 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1763 goto loop;
1764 }
1765
1766 case AP_STACK:
1767 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1768 retainStack(c, c_child_r,
1769 (StgPtr)((StgAP_STACK *)c)->payload,
1770 (StgPtr)((StgAP_STACK *)c)->payload +
1771 ((StgAP_STACK *)c)->size);
1772 goto loop;
1773 }
1774
1775 push(c, c_child_r, &first_child);
1776
1777 // If first_child is null, c has no child.
1778 // If first_child is not null, the top stack element points to the next
1779 // object. push() may or may not push a stackElement on the stack.
1780 if (first_child == NULL)
1781 goto loop;
1782
1783 // (c, cp, r) = (first_child, c, c_child_r)
1784 r = c_child_r;
1785 cp = c;
1786 c = first_child;
1787 goto inner_loop;
1788 }
1789
1790 /* -----------------------------------------------------------------------------
1791 * Compute the retainer set for every object reachable from *tl.
1792 * -------------------------------------------------------------------------- */
1793 static void
1794 retainRoot(void *user STG_UNUSED, StgClosure **tl)
1795 {
1796 StgClosure *c;
1797
1798 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1799 // be a root.
1800
1801 ASSERT(isEmptyRetainerStack());
1802 currentStackBoundary = stackTop;
1803
1804 c = UNTAG_CLOSURE(*tl);
1805 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1806 retainClosure(c, c, getRetainerFrom(c));
1807 } else {
1808 retainClosure(c, c, CCS_SYSTEM);
1809 }
1810
1811 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1812 // *tl might be a TSO which is ThreadComplete, in which
1813 // case we ignore it for the purposes of retainer profiling.
1814 }
1815
1816 /* -----------------------------------------------------------------------------
1817 * Compute the retainer set for each of the objects in the heap.
1818 * -------------------------------------------------------------------------- */
1819 static void
1820 computeRetainerSet( void )
1821 {
1822 StgWeak *weak;
1823 RetainerSet *rtl;
1824 nat g;
1825 StgPtr ml;
1826 bdescr *bd;
1827 #ifdef DEBUG_RETAINER
1828 RetainerSet tmpRetainerSet;
1829 #endif
1830
1831 markCapabilities(retainRoot, NULL); // for scheduler roots
1832
1833 // This function is called after a major GC, when key, value, and finalizer
1834 // all are guaranteed to be valid, or reachable.
1835 //
1836 // The following code assumes that WEAK objects are considered to be roots
1837 // for retainer profilng.
1838 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1839 // retainRoot((StgClosure *)weak);
1840 retainRoot(NULL, (StgClosure **)&weak);
1841
1842 // Consider roots from the stable ptr table.
1843 markStablePtrTable(retainRoot, NULL);
1844
1845 // The following code resets the rs field of each unvisited mutable
1846 // object (computing sumOfNewCostExtra and updating costArray[] when
1847 // debugging retainer profiler).
1848 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1849 // NOT TRUE: even G0 has a block on its mutable list
1850 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1851
1852 // Traversing through mut_list is necessary
1853 // because we can find MUT_VAR objects which have not been
1854 // visited during retainer profiling.
1855 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1856 for (ml = bd->start; ml < bd->free; ml++) {
1857
1858 maybeInitRetainerSet((StgClosure *)*ml);
1859 rtl = retainerSetOf((StgClosure *)*ml);
1860
1861 #ifdef DEBUG_RETAINER
1862 if (rtl == NULL) {
1863 // first visit to *ml
1864 // This is a violation of the interface rule!
1865 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1866
1867 switch (get_itbl((StgClosure *)ml)->type) {
1868 case IND_STATIC:
1869 // no cost involved
1870 break;
1871 case CONSTR_NOCAF_STATIC:
1872 case CONSTR_STATIC:
1873 case THUNK_STATIC:
1874 case FUN_STATIC:
1875 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1876 break;
1877 default:
1878 // dynamic objects
1879 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1880 sumOfNewCostExtra += cost((StgClosure *)ml);
1881 break;
1882 }
1883 }
1884 #endif
1885 }
1886 }
1887 }
1888 }
1889
1890 /* -----------------------------------------------------------------------------
1891 * Traverse all static objects for which we compute retainer sets,
1892 * and reset their rs fields to NULL, which is accomplished by
1893 * invoking maybeInitRetainerSet(). This function must be called
1894 * before zeroing all objects reachable from scavenged_static_objects
1895 * in the case of major gabage collections. See GarbageCollect() in
1896 * GC.c.
1897 * Note:
1898 * The mut_once_list of the oldest generation must also be traversed?
1899 * Why? Because if the evacuation of an object pointed to by a static
1900 * indirection object fails, it is put back to the mut_once_list of
1901 * the oldest generation.
1902 * However, this is not necessary because any static indirection objects
1903 * are just traversed through to reach dynamic objects. In other words,
1904 * they are not taken into consideration in computing retainer sets.
1905 * -------------------------------------------------------------------------- */
1906 void
1907 resetStaticObjectForRetainerProfiling( StgClosure *static_objects )
1908 {
1909 #ifdef DEBUG_RETAINER
1910 nat count;
1911 #endif
1912 StgClosure *p;
1913
1914 #ifdef DEBUG_RETAINER
1915 count = 0;
1916 #endif
1917 p = static_objects;
1918 while (p != END_OF_STATIC_LIST) {
1919 #ifdef DEBUG_RETAINER
1920 count++;
1921 #endif
1922 switch (get_itbl(p)->type) {
1923 case IND_STATIC:
1924 // Since we do not compute the retainer set of any
1925 // IND_STATIC object, we don't have to reset its retainer
1926 // field.
1927 p = (StgClosure*)*IND_STATIC_LINK(p);
1928 break;
1929 case THUNK_STATIC:
1930 maybeInitRetainerSet(p);
1931 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1932 break;
1933 case FUN_STATIC:
1934 maybeInitRetainerSet(p);
1935 p = (StgClosure*)*FUN_STATIC_LINK(p);
1936 break;
1937 case CONSTR_STATIC:
1938 maybeInitRetainerSet(p);
1939 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1940 break;
1941 default:
1942 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1943 p, get_itbl(p)->type);
1944 break;
1945 }
1946 }
1947 #ifdef DEBUG_RETAINER
1948 // debugBelch("count in scavenged_static_objects = %d\n", count);
1949 #endif
1950 }
1951
1952 /* -----------------------------------------------------------------------------
1953 * Perform retainer profiling.
1954 * N is the oldest generation being profilied, where the generations are
1955 * numbered starting at 0.
1956 * Invariants:
1957 * Note:
1958 * This function should be called only immediately after major garbage
1959 * collection.
1960 * ------------------------------------------------------------------------- */
1961 void
1962 retainerProfile(void)
1963 {
1964 #ifdef DEBUG_RETAINER
1965 nat i;
1966 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1967 #endif
1968
1969 #ifdef DEBUG_RETAINER
1970 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1971 #endif
1972
1973 stat_startRP();
1974
1975 // We haven't flipped the bit yet.
1976 #ifdef DEBUG_RETAINER
1977 debugBelch("Before traversing:\n");
1978 sumOfCostLinear = 0;
1979 for (i = 0;i < N_CLOSURE_TYPES; i++)
1980 costArrayLinear[i] = 0;
1981 totalHeapSize = checkHeapSanityForRetainerProfiling();
1982
1983 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1984 /*
1985 debugBelch("costArrayLinear[] = ");
1986 for (i = 0;i < N_CLOSURE_TYPES; i++)
1987 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1988 debugBelch("\n");
1989 */
1990
1991 ASSERT(sumOfCostLinear == totalHeapSize);
1992
1993 /*
1994 #define pcostArrayLinear(index) \
1995 if (costArrayLinear[index] > 0) \
1996 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1997 pcostArrayLinear(THUNK_STATIC);
1998 pcostArrayLinear(FUN_STATIC);
1999 pcostArrayLinear(CONSTR_STATIC);
2000 pcostArrayLinear(CONSTR_NOCAF_STATIC);
2001 */
2002 #endif
2003
2004 // Now we flips flip.
2005 flip = flip ^ 1;
2006
2007 #ifdef DEBUG_RETAINER
2008 stackSize = 0;
2009 maxStackSize = 0;
2010 cStackSize = 0;
2011 maxCStackSize = 0;
2012 #endif
2013 numObjectVisited = 0;
2014 timesAnyObjectVisited = 0;
2015
2016 #ifdef DEBUG_RETAINER
2017 debugBelch("During traversing:\n");
2018 sumOfNewCost = 0;
2019 sumOfNewCostExtra = 0;
2020 for (i = 0;i < N_CLOSURE_TYPES; i++)
2021 costArray[i] = 0;
2022 #endif
2023
2024 /*
2025 We initialize the traverse stack each time the retainer profiling is
2026 performed (because the traverse stack size varies on each retainer profiling
2027 and this operation is not costly anyhow). However, we just refresh the
2028 retainer sets.
2029 */
2030 initializeTraverseStack();
2031 #ifdef DEBUG_RETAINER
2032 initializeAllRetainerSet();
2033 #else
2034 refreshAllRetainerSet();
2035 #endif
2036 computeRetainerSet();
2037
2038 #ifdef DEBUG_RETAINER
2039 debugBelch("After traversing:\n");
2040 sumOfCostLinear = 0;
2041 for (i = 0;i < N_CLOSURE_TYPES; i++)
2042 costArrayLinear[i] = 0;
2043 totalHeapSize = checkHeapSanityForRetainerProfiling();
2044
2045 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2046 ASSERT(sumOfCostLinear == totalHeapSize);
2047
2048 // now, compare the two results
2049 /*
2050 Note:
2051 costArray[] must be exactly the same as costArrayLinear[].
2052 Known exceptions:
2053 1) Dead weak pointers, whose type is CONSTR. These objects are not
2054 reachable from any roots.
2055 */
2056 debugBelch("Comparison:\n");
2057 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2058 for (i = 0;i < N_CLOSURE_TYPES; i++)
2059 if (costArray[i] != costArrayLinear[i])
2060 // nothing should be printed except MUT_VAR after major GCs
2061 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2062 debugBelch("\n");
2063
2064 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2065 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2066 debugBelch("\tcostArray[] (must be empty) = ");
2067 for (i = 0;i < N_CLOSURE_TYPES; i++)
2068 if (costArray[i] != costArrayLinear[i])
2069 // nothing should be printed except MUT_VAR after major GCs
2070 debugBelch("[%u:%u] ", i, costArray[i]);
2071 debugBelch("\n");
2072
2073 // only for major garbage collection
2074 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2075 #endif
2076
2077 // post-processing
2078 closeTraverseStack();
2079 #ifdef DEBUG_RETAINER
2080 closeAllRetainerSet();
2081 #else
2082 // Note that there is no post-processing for the retainer sets.
2083 #endif
2084 retainerGeneration++;
2085
2086 stat_endRP(
2087 retainerGeneration - 1, // retainerGeneration has just been incremented!
2088 #ifdef DEBUG_RETAINER
2089 maxCStackSize, maxStackSize,
2090 #endif
2091 (double)timesAnyObjectVisited / numObjectVisited);
2092 }
2093
2094 /* -----------------------------------------------------------------------------
2095 * DEBUGGING CODE
2096 * -------------------------------------------------------------------------- */
2097
2098 #ifdef DEBUG_RETAINER
2099
2100 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2101 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2102 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2103
2104 static nat
2105 sanityCheckHeapClosure( StgClosure *c )
2106 {
2107 StgInfoTable *info;
2108
2109 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2110 ASSERT(!closure_STATIC(c));
2111 ASSERT(LOOKS_LIKE_PTR(c));
2112
2113 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2114 if (get_itbl(c)->type == CONSTR &&
2115 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2116 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2117 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2118 costArray[get_itbl(c)->type] += cost(c);
2119 sumOfNewCost += cost(c);
2120 } else
2121 debugBelch(
2122 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2123 flip, c, get_itbl(c)->type,
2124 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2125 RSET(c));
2126 } else {
2127 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2128 }
2129
2130 return closure_sizeW(c);
2131 }
2132
2133 static nat
2134 heapCheck( bdescr *bd )
2135 {
2136 StgPtr p;
2137 static nat costSum, size;
2138
2139 costSum = 0;
2140 while (bd != NULL) {
2141 p = bd->start;
2142 while (p < bd->free) {
2143 size = sanityCheckHeapClosure((StgClosure *)p);
2144 sumOfCostLinear += size;
2145 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2146 p += size;
2147 // no need for slop check; I think slops are not used currently.
2148 }
2149 ASSERT(p == bd->free);
2150 costSum += bd->free - bd->start;
2151 bd = bd->link;
2152 }
2153
2154 return costSum;
2155 }
2156
2157 static nat
2158 smallObjectPoolCheck(void)
2159 {
2160 bdescr *bd;
2161 StgPtr p;
2162 static nat costSum, size;
2163
2164 bd = g0s0->blocks;
2165 costSum = 0;
2166
2167 // first block
2168 if (bd == NULL)
2169 return costSum;
2170
2171 p = bd->start;
2172 while (p < alloc_Hp) {
2173 size = sanityCheckHeapClosure((StgClosure *)p);
2174 sumOfCostLinear += size;
2175 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2176 p += size;
2177 }
2178 ASSERT(p == alloc_Hp);
2179 costSum += alloc_Hp - bd->start;
2180
2181 bd = bd->link;
2182 while (bd != NULL) {
2183 p = bd->start;
2184 while (p < bd->free) {
2185 size = sanityCheckHeapClosure((StgClosure *)p);
2186 sumOfCostLinear += size;
2187 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2188 p += size;
2189 }
2190 ASSERT(p == bd->free);
2191 costSum += bd->free - bd->start;
2192 bd = bd->link;
2193 }
2194
2195 return costSum;
2196 }
2197
2198 static nat
2199 chainCheck(bdescr *bd)
2200 {
2201 nat costSum, size;
2202
2203 costSum = 0;
2204 while (bd != NULL) {
2205 // bd->free - bd->start is not an accurate measurement of the
2206 // object size. Actually it is always zero, so we compute its
2207 // size explicitly.
2208 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2209 sumOfCostLinear += size;
2210 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2211 costSum += size;
2212 bd = bd->link;
2213 }
2214
2215 return costSum;
2216 }
2217
2218 static nat
2219 checkHeapSanityForRetainerProfiling( void )
2220 {
2221 nat costSum, g, s;
2222
2223 costSum = 0;
2224 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2225 if (RtsFlags.GcFlags.generations == 1) {
2226 costSum += heapCheck(g0s0->to_blocks);
2227 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2228 costSum += chainCheck(g0s0->large_objects);
2229 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2230 } else {
2231 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2232 for (s = 0; s < generations[g].n_steps; s++) {
2233 /*
2234 After all live objects have been scavenged, the garbage
2235 collector may create some objects in
2236 scheduleFinalizers(). These objects are created throught
2237 allocate(), so the small object pool or the large object
2238 pool of the g0s0 may not be empty.
2239 */
2240 if (g == 0 && s == 0) {
2241 costSum += smallObjectPoolCheck();
2242 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2243 costSum += chainCheck(generations[g].steps[s].large_objects);
2244 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2245 } else {
2246 costSum += heapCheck(generations[g].steps[s].blocks);
2247 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2248 costSum += chainCheck(generations[g].steps[s].large_objects);
2249 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2250 }
2251 }
2252 }
2253
2254 return costSum;
2255 }
2256
2257 void
2258 findPointer(StgPtr p)
2259 {
2260 StgPtr q, r, e;
2261 bdescr *bd;
2262 nat g, s;
2263
2264 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2265 for (s = 0; s < generations[g].n_steps; s++) {
2266 // if (g == 0 && s == 0) continue;
2267 bd = generations[g].steps[s].blocks;
2268 for (; bd; bd = bd->link) {
2269 for (q = bd->start; q < bd->free; q++) {
2270 if (*q == (StgWord)p) {
2271 r = q;
2272 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2273 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2274 // return;
2275 }
2276 }
2277 }
2278 bd = generations[g].steps[s].large_objects;
2279 for (; bd; bd = bd->link) {
2280 e = bd->start + cost((StgClosure *)bd->start);
2281 for (q = bd->start; q < e; q++) {
2282 if (*q == (StgWord)p) {
2283 r = q;
2284 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2285 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2286 // return;
2287 }
2288 }
2289 }
2290 }
2291 }
2292 }
2293
2294 static void
2295 belongToHeap(StgPtr p)
2296 {
2297 bdescr *bd;
2298 nat g, s;
2299
2300 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2301 for (s = 0; s < generations[g].n_steps; s++) {
2302 // if (g == 0 && s == 0) continue;
2303 bd = generations[g].steps[s].blocks;
2304 for (; bd; bd = bd->link) {
2305 if (bd->start <= p && p < bd->free) {
2306 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2307 return;
2308 }
2309 }
2310 bd = generations[g].steps[s].large_objects;
2311 for (; bd; bd = bd->link) {
2312 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2313 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2314 return;
2315 }
2316 }
2317 }
2318 }
2319 }
2320 #endif /* DEBUG_RETAINER */
2321
2322 #endif /* PROFILING */