Pointer Tagging
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #ifdef PROFILING
11
12 // Turn off inlining when debugging - it obfuscates things
13 #ifdef DEBUG
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "Rts.h"
20 #include "RtsUtils.h"
21 #include "RetainerProfile.h"
22 #include "RetainerSet.h"
23 #include "Schedule.h"
24 #include "Printer.h"
25 #include "RtsFlags.h"
26 #include "Weak.h"
27 #include "Sanity.h"
28 #include "Profiling.h"
29 #include "Stats.h"
30 #include "ProfHeap.h"
31 #include "Apply.h"
32
33 /*
34 Note: what to change in order to plug-in a new retainer profiling scheme?
35 (1) type retainer in ../includes/StgRetainerProf.h
36 (2) retainer function R(), i.e., getRetainerFrom()
37 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
38 in RetainerSet.h, if needed.
39 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
40 */
41
42 /* -----------------------------------------------------------------------------
43 * Declarations...
44 * -------------------------------------------------------------------------- */
45
46 static nat retainerGeneration; // generation
47
48 static nat numObjectVisited; // total number of objects visited
49 static nat timesAnyObjectVisited; // number of times any objects are visited
50
51 /*
52 The rs field in the profile header of any object points to its retainer
53 set in an indirect way: if flip is 0, it points to the retainer set;
54 if flip is 1, it points to the next byte after the retainer set (even
55 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
56 pointer. See retainerSetOf().
57 */
58
59 StgWord flip = 0; // flip bit
60 // must be 0 if DEBUG_RETAINER is on (for static closures)
61
62 #define setRetainerSetToNull(c) \
63 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
64
65 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
66 static void retainClosure(StgClosure *, StgClosure *, retainer);
67 #ifdef DEBUG_RETAINER
68 static void belongToHeap(StgPtr p);
69 #endif
70
71 #ifdef DEBUG_RETAINER
72 /*
73 cStackSize records how many times retainStack() has been invoked recursively,
74 that is, the number of activation records for retainStack() on the C stack.
75 maxCStackSize records its max value.
76 Invariants:
77 cStackSize <= maxCStackSize
78 */
79 static nat cStackSize, maxCStackSize;
80
81 static nat sumOfNewCost; // sum of the cost of each object, computed
82 // when the object is first visited
83 static nat sumOfNewCostExtra; // for those objects not visited during
84 // retainer profiling, e.g., MUT_VAR
85 static nat costArray[N_CLOSURE_TYPES];
86
87 nat sumOfCostLinear; // sum of the costs of all object, computed
88 // when linearly traversing the heap after
89 // retainer profiling
90 nat costArrayLinear[N_CLOSURE_TYPES];
91 #endif
92
93 /* -----------------------------------------------------------------------------
94 * Retainer stack - header
95 * Note:
96 * Although the retainer stack implementation could be separated *
97 * from the retainer profiling engine, there does not seem to be
98 * any advantage in doing that; retainer stack is an integral part
99 * of retainer profiling engine and cannot be use elsewhere at
100 * all.
101 * -------------------------------------------------------------------------- */
102
103 typedef enum {
104 posTypeStep,
105 posTypePtrs,
106 posTypeSRT,
107 posTypeLargeSRT,
108 } nextPosType;
109
110 typedef union {
111 // fixed layout or layout specified by a field in the closure
112 StgWord step;
113
114 // layout.payload
115 struct {
116 // See StgClosureInfo in InfoTables.h
117 #if SIZEOF_VOID_P == 8
118 StgWord32 pos;
119 StgWord32 ptrs;
120 #else
121 StgWord16 pos;
122 StgWord16 ptrs;
123 #endif
124 StgPtr payload;
125 } ptrs;
126
127 // SRT
128 struct {
129 StgClosure **srt;
130 StgWord srt_bitmap;
131 } srt;
132
133 // Large SRT
134 struct {
135 StgLargeSRT *srt;
136 StgWord offset;
137 } large_srt;
138
139 } nextPos;
140
141 typedef struct {
142 nextPosType type;
143 nextPos next;
144 } stackPos;
145
146 typedef struct {
147 StgClosure *c;
148 retainer c_child_r;
149 stackPos info;
150 } stackElement;
151
152 /*
153 Invariants:
154 firstStack points to the first block group.
155 currentStack points to the block group currently being used.
156 currentStack->free == stackLimit.
157 stackTop points to the topmost byte in the stack of currentStack.
158 Unless the whole stack is empty, stackTop must point to the topmost
159 object (or byte) in the whole stack. Thus, it is only when the whole stack
160 is empty that stackTop == stackLimit (not during the execution of push()
161 and pop()).
162 stackBottom == currentStack->start.
163 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
164 Note:
165 When a current stack becomes empty, stackTop is set to point to
166 the topmost element on the previous block group so as to satisfy
167 the invariants described above.
168 */
169 static bdescr *firstStack = NULL;
170 static bdescr *currentStack;
171 static stackElement *stackBottom, *stackTop, *stackLimit;
172
173 /*
174 currentStackBoundary is used to mark the current stack chunk.
175 If stackTop == currentStackBoundary, it means that the current stack chunk
176 is empty. It is the responsibility of the user to keep currentStackBoundary
177 valid all the time if it is to be employed.
178 */
179 static stackElement *currentStackBoundary;
180
181 /*
182 stackSize records the current size of the stack.
183 maxStackSize records its high water mark.
184 Invariants:
185 stackSize <= maxStackSize
186 Note:
187 stackSize is just an estimate measure of the depth of the graph. The reason
188 is that some heap objects have only a single child and may not result
189 in a new element being pushed onto the stack. Therefore, at the end of
190 retainer profiling, maxStackSize + maxCStackSize is some value no greater
191 than the actual depth of the graph.
192 */
193 #ifdef DEBUG_RETAINER
194 static int stackSize, maxStackSize;
195 #endif
196
197 // number of blocks allocated for one stack
198 #define BLOCKS_IN_STACK 1
199
200 /* -----------------------------------------------------------------------------
201 * Add a new block group to the stack.
202 * Invariants:
203 * currentStack->link == s.
204 * -------------------------------------------------------------------------- */
205 static INLINE void
206 newStackBlock( bdescr *bd )
207 {
208 currentStack = bd;
209 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
210 stackBottom = (stackElement *)bd->start;
211 stackLimit = (stackElement *)stackTop;
212 bd->free = (StgPtr)stackLimit;
213 }
214
215 /* -----------------------------------------------------------------------------
216 * Return to the previous block group.
217 * Invariants:
218 * s->link == currentStack.
219 * -------------------------------------------------------------------------- */
220 static INLINE void
221 returnToOldStack( bdescr *bd )
222 {
223 currentStack = bd;
224 stackTop = (stackElement *)bd->free;
225 stackBottom = (stackElement *)bd->start;
226 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
227 bd->free = (StgPtr)stackLimit;
228 }
229
230 /* -----------------------------------------------------------------------------
231 * Initializes the traverse stack.
232 * -------------------------------------------------------------------------- */
233 static void
234 initializeTraverseStack( void )
235 {
236 if (firstStack != NULL) {
237 freeChain(firstStack);
238 }
239
240 firstStack = allocGroup(BLOCKS_IN_STACK);
241 firstStack->link = NULL;
242 firstStack->u.back = NULL;
243
244 newStackBlock(firstStack);
245 }
246
247 /* -----------------------------------------------------------------------------
248 * Frees all the block groups in the traverse stack.
249 * Invariants:
250 * firstStack != NULL
251 * -------------------------------------------------------------------------- */
252 static void
253 closeTraverseStack( void )
254 {
255 freeChain(firstStack);
256 firstStack = NULL;
257 }
258
259 /* -----------------------------------------------------------------------------
260 * Returns rtsTrue if the whole stack is empty.
261 * -------------------------------------------------------------------------- */
262 static INLINE rtsBool
263 isEmptyRetainerStack( void )
264 {
265 return (firstStack == currentStack) && stackTop == stackLimit;
266 }
267
268 /* -----------------------------------------------------------------------------
269 * Returns size of stack
270 * -------------------------------------------------------------------------- */
271 #ifdef DEBUG
272 lnat
273 retainerStackBlocks( void )
274 {
275 bdescr* bd;
276 lnat res = 0;
277
278 for (bd = firstStack; bd != NULL; bd = bd->link)
279 res += bd->blocks;
280
281 return res;
282 }
283 #endif
284
285 /* -----------------------------------------------------------------------------
286 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
287 * i.e., if the current stack chunk is empty.
288 * -------------------------------------------------------------------------- */
289 static INLINE rtsBool
290 isOnBoundary( void )
291 {
292 return stackTop == currentStackBoundary;
293 }
294
295 /* -----------------------------------------------------------------------------
296 * Initializes *info from ptrs and payload.
297 * Invariants:
298 * payload[] begins with ptrs pointers followed by non-pointers.
299 * -------------------------------------------------------------------------- */
300 static INLINE void
301 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
302 {
303 info->type = posTypePtrs;
304 info->next.ptrs.pos = 0;
305 info->next.ptrs.ptrs = ptrs;
306 info->next.ptrs.payload = payload;
307 }
308
309 /* -----------------------------------------------------------------------------
310 * Find the next object from *info.
311 * -------------------------------------------------------------------------- */
312 static INLINE StgClosure *
313 find_ptrs( stackPos *info )
314 {
315 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
316 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
317 } else {
318 return NULL;
319 }
320 }
321
322 /* -----------------------------------------------------------------------------
323 * Initializes *info from SRT information stored in *infoTable.
324 * -------------------------------------------------------------------------- */
325 static INLINE void
326 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
327 {
328 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
329 info->type = posTypeLargeSRT;
330 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
331 info->next.large_srt.offset = 0;
332 } else {
333 info->type = posTypeSRT;
334 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
335 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
336 }
337 }
338
339 static INLINE void
340 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
341 {
342 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
343 info->type = posTypeLargeSRT;
344 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
345 info->next.large_srt.offset = 0;
346 } else {
347 info->type = posTypeSRT;
348 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
349 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
350 }
351 }
352
353 /* -----------------------------------------------------------------------------
354 * Find the next object from *info.
355 * -------------------------------------------------------------------------- */
356 static INLINE StgClosure *
357 find_srt( stackPos *info )
358 {
359 StgClosure *c;
360 StgWord bitmap;
361
362 if (info->type == posTypeSRT) {
363 // Small SRT bitmap
364 bitmap = info->next.srt.srt_bitmap;
365 while (bitmap != 0) {
366 if ((bitmap & 1) != 0) {
367 #ifdef ENABLE_WIN32_DLL_SUPPORT
368
369 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
370 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
371 else
372 c = *(info->next.srt.srt);
373 #else
374 c = *(info->next.srt.srt);
375 #endif
376 bitmap = bitmap >> 1;
377 info->next.srt.srt++;
378 info->next.srt.srt_bitmap = bitmap;
379 return c;
380 }
381 bitmap = bitmap >> 1;
382 info->next.srt.srt++;
383 }
384 // bitmap is now zero...
385 return NULL;
386 }
387 else {
388 // Large SRT bitmap
389 nat i = info->next.large_srt.offset;
390 StgWord bitmap;
391
392 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
393 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
394 bitmap = bitmap >> (i % BITS_IN(StgWord));
395 while (i < info->next.large_srt.srt->l.size) {
396 if ((bitmap & 1) != 0) {
397 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
398 i++;
399 info->next.large_srt.offset = i;
400 return c;
401 }
402 i++;
403 if (i % BITS_IN(W_) == 0) {
404 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
405 } else {
406 bitmap = bitmap >> 1;
407 }
408 }
409 // reached the end of this bitmap.
410 info->next.large_srt.offset = i;
411 return NULL;
412 }
413 }
414
415 /* -----------------------------------------------------------------------------
416 * push() pushes a stackElement representing the next child of *c
417 * onto the traverse stack. If *c has no child, *first_child is set
418 * to NULL and nothing is pushed onto the stack. If *c has only one
419 * child, *c_chlid is set to that child and nothing is pushed onto
420 * the stack. If *c has more than two children, *first_child is set
421 * to the first child and a stackElement representing the second
422 * child is pushed onto the stack.
423
424 * Invariants:
425 * *c_child_r is the most recent retainer of *c's children.
426 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
427 * there cannot be any stack objects.
428 * Note: SRTs are considered to be children as well.
429 * -------------------------------------------------------------------------- */
430 static INLINE void
431 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
432 {
433 stackElement se;
434 bdescr *nbd; // Next Block Descriptor
435
436 #ifdef DEBUG_RETAINER
437 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
438 #endif
439
440 ASSERT(get_itbl(c)->type != TSO);
441 ASSERT(get_itbl(c)->type != AP_STACK);
442
443 //
444 // fill in se
445 //
446
447 se.c = c;
448 se.c_child_r = c_child_r;
449
450 // fill in se.info
451 switch (get_itbl(c)->type) {
452 // no child, no SRT
453 case CONSTR_0_1:
454 case CONSTR_0_2:
455 case CAF_BLACKHOLE:
456 case BLACKHOLE:
457 case SE_BLACKHOLE:
458 case SE_CAF_BLACKHOLE:
459 case ARR_WORDS:
460 *first_child = NULL;
461 return;
462
463 // one child (fixed), no SRT
464 case MUT_VAR_CLEAN:
465 case MUT_VAR_DIRTY:
466 *first_child = ((StgMutVar *)c)->var;
467 return;
468 case THUNK_SELECTOR:
469 *first_child = ((StgSelector *)c)->selectee;
470 return;
471 case IND_PERM:
472 case IND_OLDGEN_PERM:
473 case IND_OLDGEN:
474 *first_child = ((StgInd *)c)->indirectee;
475 return;
476 case CONSTR_1_0:
477 case CONSTR_1_1:
478 *first_child = c->payload[0];
479 return;
480
481 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
482 // of the next child. We do not write a separate initialization code.
483 // Also we do not have to initialize info.type;
484
485 // two children (fixed), no SRT
486 // need to push a stackElement, but nothing to store in se.info
487 case CONSTR_2_0:
488 *first_child = c->payload[0]; // return the first pointer
489 // se.info.type = posTypeStep;
490 // se.info.next.step = 2; // 2 = second
491 break;
492
493 // three children (fixed), no SRT
494 // need to push a stackElement
495 case MVAR:
496 // head must be TSO and the head of a linked list of TSOs.
497 // Shoule it be a child? Seems to be yes.
498 *first_child = (StgClosure *)((StgMVar *)c)->head;
499 // se.info.type = posTypeStep;
500 se.info.next.step = 2; // 2 = second
501 break;
502
503 // three children (fixed), no SRT
504 case WEAK:
505 *first_child = ((StgWeak *)c)->key;
506 // se.info.type = posTypeStep;
507 se.info.next.step = 2;
508 break;
509
510 // layout.payload.ptrs, no SRT
511 case CONSTR:
512 case STABLE_NAME:
513 case BCO:
514 case CONSTR_STATIC:
515 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
516 (StgPtr)c->payload);
517 *first_child = find_ptrs(&se.info);
518 if (*first_child == NULL)
519 return; // no child
520 break;
521
522 // StgMutArrPtr.ptrs, no SRT
523 case MUT_ARR_PTRS_CLEAN:
524 case MUT_ARR_PTRS_DIRTY:
525 case MUT_ARR_PTRS_FROZEN:
526 case MUT_ARR_PTRS_FROZEN0:
527 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
528 (StgPtr)(((StgMutArrPtrs *)c)->payload));
529 *first_child = find_ptrs(&se.info);
530 if (*first_child == NULL)
531 return;
532 break;
533
534 // layout.payload.ptrs, SRT
535 case FUN: // *c is a heap object.
536 case FUN_2_0:
537 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
538 *first_child = find_ptrs(&se.info);
539 if (*first_child == NULL)
540 // no child from ptrs, so check SRT
541 goto fun_srt_only;
542 break;
543
544 case THUNK:
545 case THUNK_2_0:
546 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
547 (StgPtr)((StgThunk *)c)->payload);
548 *first_child = find_ptrs(&se.info);
549 if (*first_child == NULL)
550 // no child from ptrs, so check SRT
551 goto thunk_srt_only;
552 break;
553
554 // 1 fixed child, SRT
555 case FUN_1_0:
556 case FUN_1_1:
557 *first_child = c->payload[0];
558 ASSERT(*first_child != NULL);
559 init_srt_fun(&se.info, get_fun_itbl(c));
560 break;
561
562 case THUNK_1_0:
563 case THUNK_1_1:
564 *first_child = ((StgThunk *)c)->payload[0];
565 ASSERT(*first_child != NULL);
566 init_srt_thunk(&se.info, get_thunk_itbl(c));
567 break;
568
569 case FUN_STATIC: // *c is a heap object.
570 ASSERT(get_itbl(c)->srt_bitmap != 0);
571 case FUN_0_1:
572 case FUN_0_2:
573 fun_srt_only:
574 init_srt_fun(&se.info, get_fun_itbl(c));
575 *first_child = find_srt(&se.info);
576 if (*first_child == NULL)
577 return; // no child
578 break;
579
580 // SRT only
581 case THUNK_STATIC:
582 ASSERT(get_itbl(c)->srt_bitmap != 0);
583 case THUNK_0_1:
584 case THUNK_0_2:
585 thunk_srt_only:
586 init_srt_thunk(&se.info, get_thunk_itbl(c));
587 *first_child = find_srt(&se.info);
588 if (*first_child == NULL)
589 return; // no child
590 break;
591
592 case TVAR_WATCH_QUEUE:
593 *first_child = (StgClosure *)((StgTVarWatchQueue *)c)->closure;
594 se.info.next.step = 2; // 2 = second
595 break;
596 case TVAR:
597 *first_child = (StgClosure *)((StgTVar *)c)->current_value;
598 break;
599 case TREC_HEADER:
600 *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
601 break;
602 case TREC_CHUNK:
603 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
604 se.info.next.step = 0; // entry no.
605 break;
606
607 // cannot appear
608 case PAP:
609 case AP:
610 case AP_STACK:
611 case TSO:
612 case IND_STATIC:
613 case CONSTR_NOCAF_STATIC:
614 // stack objects
615 case UPDATE_FRAME:
616 case CATCH_FRAME:
617 case STOP_FRAME:
618 case RET_DYN:
619 case RET_BCO:
620 case RET_SMALL:
621 case RET_BIG:
622 // invalid objects
623 case IND:
624 case BLOCKED_FETCH:
625 case FETCH_ME:
626 case FETCH_ME_BQ:
627 case RBH:
628 case REMOTE_REF:
629 case EVACUATED:
630 case INVALID_OBJECT:
631 default:
632 barf("Invalid object *c in push()");
633 return;
634 }
635
636 if (stackTop - 1 < stackBottom) {
637 #ifdef DEBUG_RETAINER
638 // debugBelch("push() to the next stack.\n");
639 #endif
640 // currentStack->free is updated when the active stack is switched
641 // to the next stack.
642 currentStack->free = (StgPtr)stackTop;
643
644 if (currentStack->link == NULL) {
645 nbd = allocGroup(BLOCKS_IN_STACK);
646 nbd->link = NULL;
647 nbd->u.back = currentStack;
648 currentStack->link = nbd;
649 } else
650 nbd = currentStack->link;
651
652 newStackBlock(nbd);
653 }
654
655 // adjust stackTop (acutal push)
656 stackTop--;
657 // If the size of stackElement was huge, we would better replace the
658 // following statement by either a memcpy() call or a switch statement
659 // on the type of the element. Currently, the size of stackElement is
660 // small enough (5 words) that this direct assignment seems to be enough.
661
662 // ToDo: The line below leads to the warning:
663 // warning: 'se.info.type' may be used uninitialized in this function
664 // This is caused by the fact that there are execution paths through the
665 // large switch statement above where some cases do not initialize this
666 // field. Is this really harmless? Can we avoid the warning?
667 *stackTop = se;
668
669 #ifdef DEBUG_RETAINER
670 stackSize++;
671 if (stackSize > maxStackSize) maxStackSize = stackSize;
672 // ASSERT(stackSize >= 0);
673 // debugBelch("stackSize = %d\n", stackSize);
674 #endif
675 }
676
677 /* -----------------------------------------------------------------------------
678 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
679 * Invariants:
680 * stackTop cannot be equal to stackLimit unless the whole stack is
681 * empty, in which case popOff() is not allowed.
682 * Note:
683 * You can think of popOffReal() as a part of popOff() which is
684 * executed at the end of popOff() in necessary. Since popOff() is
685 * likely to be executed quite often while popOffReal() is not, we
686 * separate popOffReal() from popOff(), which is declared as an
687 * INLINE function (for the sake of execution speed). popOffReal()
688 * is called only within popOff() and nowhere else.
689 * -------------------------------------------------------------------------- */
690 static void
691 popOffReal(void)
692 {
693 bdescr *pbd; // Previous Block Descriptor
694
695 #ifdef DEBUG_RETAINER
696 // debugBelch("pop() to the previous stack.\n");
697 #endif
698
699 ASSERT(stackTop + 1 == stackLimit);
700 ASSERT(stackBottom == (stackElement *)currentStack->start);
701
702 if (firstStack == currentStack) {
703 // The stack is completely empty.
704 stackTop++;
705 ASSERT(stackTop == stackLimit);
706 #ifdef DEBUG_RETAINER
707 stackSize--;
708 if (stackSize > maxStackSize) maxStackSize = stackSize;
709 /*
710 ASSERT(stackSize >= 0);
711 debugBelch("stackSize = %d\n", stackSize);
712 */
713 #endif
714 return;
715 }
716
717 // currentStack->free is updated when the active stack is switched back
718 // to the previous stack.
719 currentStack->free = (StgPtr)stackLimit;
720
721 // find the previous block descriptor
722 pbd = currentStack->u.back;
723 ASSERT(pbd != NULL);
724
725 returnToOldStack(pbd);
726
727 #ifdef DEBUG_RETAINER
728 stackSize--;
729 if (stackSize > maxStackSize) maxStackSize = stackSize;
730 /*
731 ASSERT(stackSize >= 0);
732 debugBelch("stackSize = %d\n", stackSize);
733 */
734 #endif
735 }
736
737 static INLINE void
738 popOff(void) {
739 #ifdef DEBUG_RETAINER
740 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
741 #endif
742
743 ASSERT(stackTop != stackLimit);
744 ASSERT(!isEmptyRetainerStack());
745
746 // <= (instead of <) is wrong!
747 if (stackTop + 1 < stackLimit) {
748 stackTop++;
749 #ifdef DEBUG_RETAINER
750 stackSize--;
751 if (stackSize > maxStackSize) maxStackSize = stackSize;
752 /*
753 ASSERT(stackSize >= 0);
754 debugBelch("stackSize = %d\n", stackSize);
755 */
756 #endif
757 return;
758 }
759
760 popOffReal();
761 }
762
763 /* -----------------------------------------------------------------------------
764 * Finds the next object to be considered for retainer profiling and store
765 * its pointer to *c.
766 * Test if the topmost stack element indicates that more objects are left,
767 * and if so, retrieve the first object and store its pointer to *c. Also,
768 * set *cp and *r appropriately, both of which are stored in the stack element.
769 * The topmost stack element then is overwritten so as for it to now denote
770 * the next object.
771 * If the topmost stack element indicates no more objects are left, pop
772 * off the stack element until either an object can be retrieved or
773 * the current stack chunk becomes empty, indicated by rtsTrue returned by
774 * isOnBoundary(), in which case *c is set to NULL.
775 * Note:
776 * It is okay to call this function even when the current stack chunk
777 * is empty.
778 * -------------------------------------------------------------------------- */
779 static INLINE void
780 pop( StgClosure **c, StgClosure **cp, retainer *r )
781 {
782 stackElement *se;
783
784 #ifdef DEBUG_RETAINER
785 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
786 #endif
787
788 do {
789 if (isOnBoundary()) { // if the current stack chunk is depleted
790 *c = NULL;
791 return;
792 }
793
794 se = stackTop;
795
796 switch (get_itbl(se->c)->type) {
797 // two children (fixed), no SRT
798 // nothing in se.info
799 case CONSTR_2_0:
800 *c = se->c->payload[1];
801 *cp = se->c;
802 *r = se->c_child_r;
803 popOff();
804 return;
805
806 // three children (fixed), no SRT
807 // need to push a stackElement
808 case MVAR:
809 if (se->info.next.step == 2) {
810 *c = (StgClosure *)((StgMVar *)se->c)->tail;
811 se->info.next.step++; // move to the next step
812 // no popOff
813 } else {
814 *c = ((StgMVar *)se->c)->value;
815 popOff();
816 }
817 *cp = se->c;
818 *r = se->c_child_r;
819 return;
820
821 // three children (fixed), no SRT
822 case WEAK:
823 if (se->info.next.step == 2) {
824 *c = ((StgWeak *)se->c)->value;
825 se->info.next.step++;
826 // no popOff
827 } else {
828 *c = ((StgWeak *)se->c)->finalizer;
829 popOff();
830 }
831 *cp = se->c;
832 *r = se->c_child_r;
833 return;
834
835 case TVAR_WATCH_QUEUE:
836 if (se->info.next.step == 2) {
837 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->next_queue_entry;
838 se->info.next.step++; // move to the next step
839 // no popOff
840 } else {
841 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->prev_queue_entry;
842 popOff();
843 }
844 *cp = se->c;
845 *r = se->c_child_r;
846 return;
847
848 case TVAR:
849 *c = (StgClosure *)((StgTVar *)se->c)->first_watch_queue_entry;
850 *cp = se->c;
851 *r = se->c_child_r;
852 popOff();
853 return;
854
855 case TREC_HEADER:
856 *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
857 *cp = se->c;
858 *r = se->c_child_r;
859 popOff();
860 return;
861
862 case TREC_CHUNK: {
863 // These are pretty complicated: we have N entries, each
864 // of which contains 3 fields that we want to follow. So
865 // we divide the step counter: the 2 low bits indicate
866 // which field, and the rest of the bits indicate the
867 // entry number (starting from zero).
868 nat entry_no = se->info.next.step >> 2;
869 nat field_no = se->info.next.step & 3;
870 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
871 *c = NULL;
872 popOff();
873 return;
874 }
875 TRecEntry *entry = &((StgTRecChunk *)se->c)->entries[entry_no];
876 if (field_no == 0) {
877 *c = (StgClosure *)entry->tvar;
878 } else if (field_no == 1) {
879 *c = entry->expected_value;
880 } else {
881 *c = entry->new_value;
882 }
883 *cp = se->c;
884 *r = se->c_child_r;
885 se->info.next.step++;
886 return;
887 }
888
889 case CONSTR:
890 case STABLE_NAME:
891 case BCO:
892 case CONSTR_STATIC:
893 // StgMutArrPtr.ptrs, no SRT
894 case MUT_ARR_PTRS_CLEAN:
895 case MUT_ARR_PTRS_DIRTY:
896 case MUT_ARR_PTRS_FROZEN:
897 case MUT_ARR_PTRS_FROZEN0:
898 *c = find_ptrs(&se->info);
899 if (*c == NULL) {
900 popOff();
901 break;
902 }
903 *cp = se->c;
904 *r = se->c_child_r;
905 return;
906
907 // layout.payload.ptrs, SRT
908 case FUN: // always a heap object
909 case FUN_2_0:
910 if (se->info.type == posTypePtrs) {
911 *c = find_ptrs(&se->info);
912 if (*c != NULL) {
913 *cp = se->c;
914 *r = se->c_child_r;
915 return;
916 }
917 init_srt_fun(&se->info, get_fun_itbl(se->c));
918 }
919 goto do_srt;
920
921 case THUNK:
922 case THUNK_2_0:
923 if (se->info.type == posTypePtrs) {
924 *c = find_ptrs(&se->info);
925 if (*c != NULL) {
926 *cp = se->c;
927 *r = se->c_child_r;
928 return;
929 }
930 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
931 }
932 goto do_srt;
933
934 // SRT
935 do_srt:
936 case THUNK_STATIC:
937 case FUN_STATIC:
938 case FUN_0_1:
939 case FUN_0_2:
940 case THUNK_0_1:
941 case THUNK_0_2:
942 case FUN_1_0:
943 case FUN_1_1:
944 case THUNK_1_0:
945 case THUNK_1_1:
946 *c = find_srt(&se->info);
947 if (*c != NULL) {
948 *cp = se->c;
949 *r = se->c_child_r;
950 return;
951 }
952 popOff();
953 break;
954
955 // no child (fixed), no SRT
956 case CONSTR_0_1:
957 case CONSTR_0_2:
958 case CAF_BLACKHOLE:
959 case BLACKHOLE:
960 case SE_BLACKHOLE:
961 case SE_CAF_BLACKHOLE:
962 case ARR_WORDS:
963 // one child (fixed), no SRT
964 case MUT_VAR_CLEAN:
965 case MUT_VAR_DIRTY:
966 case THUNK_SELECTOR:
967 case IND_PERM:
968 case IND_OLDGEN_PERM:
969 case IND_OLDGEN:
970 case CONSTR_1_1:
971 // cannot appear
972 case PAP:
973 case AP:
974 case AP_STACK:
975 case TSO:
976 case IND_STATIC:
977 case CONSTR_NOCAF_STATIC:
978 // stack objects
979 case RET_DYN:
980 case UPDATE_FRAME:
981 case CATCH_FRAME:
982 case STOP_FRAME:
983 case RET_BCO:
984 case RET_SMALL:
985 case RET_BIG:
986 // invalid objects
987 case IND:
988 case BLOCKED_FETCH:
989 case FETCH_ME:
990 case FETCH_ME_BQ:
991 case RBH:
992 case REMOTE_REF:
993 case EVACUATED:
994 case INVALID_OBJECT:
995 default:
996 barf("Invalid object *c in pop()");
997 return;
998 }
999 } while (rtsTrue);
1000 }
1001
1002 /* -----------------------------------------------------------------------------
1003 * RETAINER PROFILING ENGINE
1004 * -------------------------------------------------------------------------- */
1005
1006 void
1007 initRetainerProfiling( void )
1008 {
1009 initializeAllRetainerSet();
1010 retainerGeneration = 0;
1011 }
1012
1013 /* -----------------------------------------------------------------------------
1014 * This function must be called before f-closing prof_file.
1015 * -------------------------------------------------------------------------- */
1016 void
1017 endRetainerProfiling( void )
1018 {
1019 #ifdef SECOND_APPROACH
1020 outputAllRetainerSet(prof_file);
1021 #endif
1022 }
1023
1024 /* -----------------------------------------------------------------------------
1025 * Returns the actual pointer to the retainer set of the closure *c.
1026 * It may adjust RSET(c) subject to flip.
1027 * Side effects:
1028 * RSET(c) is initialized to NULL if its current value does not
1029 * conform to flip.
1030 * Note:
1031 * Even though this function has side effects, they CAN be ignored because
1032 * subsequent calls to retainerSetOf() always result in the same return value
1033 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1034 * closure.
1035 * We have to perform an XOR (^) operation each time a closure is examined.
1036 * The reason is that we do not know when a closure is visited last.
1037 * -------------------------------------------------------------------------- */
1038 static INLINE void
1039 maybeInitRetainerSet( StgClosure *c )
1040 {
1041 if (!isRetainerSetFieldValid(c)) {
1042 setRetainerSetToNull(c);
1043 }
1044 }
1045
1046 /* -----------------------------------------------------------------------------
1047 * Returns rtsTrue if *c is a retainer.
1048 * -------------------------------------------------------------------------- */
1049 static INLINE rtsBool
1050 isRetainer( StgClosure *c )
1051 {
1052 switch (get_itbl(c)->type) {
1053 //
1054 // True case
1055 //
1056 // TSOs MUST be retainers: they constitute the set of roots.
1057 case TSO:
1058
1059 // mutable objects
1060 case MVAR:
1061 case MUT_VAR_CLEAN:
1062 case MUT_VAR_DIRTY:
1063 case MUT_ARR_PTRS_CLEAN:
1064 case MUT_ARR_PTRS_DIRTY:
1065 case MUT_ARR_PTRS_FROZEN:
1066 case MUT_ARR_PTRS_FROZEN0:
1067
1068 // thunks are retainers.
1069 case THUNK:
1070 case THUNK_1_0:
1071 case THUNK_0_1:
1072 case THUNK_2_0:
1073 case THUNK_1_1:
1074 case THUNK_0_2:
1075 case THUNK_SELECTOR:
1076 case AP:
1077 case AP_STACK:
1078
1079 // Static thunks, or CAFS, are obviously retainers.
1080 case THUNK_STATIC:
1081
1082 // WEAK objects are roots; there is separate code in which traversing
1083 // begins from WEAK objects.
1084 case WEAK:
1085
1086 // Since the other mutvar-type things are retainers, seems
1087 // like the right thing to do:
1088 case TVAR:
1089 return rtsTrue;
1090
1091 //
1092 // False case
1093 //
1094
1095 // constructors
1096 case CONSTR:
1097 case CONSTR_1_0:
1098 case CONSTR_0_1:
1099 case CONSTR_2_0:
1100 case CONSTR_1_1:
1101 case CONSTR_0_2:
1102 // functions
1103 case FUN:
1104 case FUN_1_0:
1105 case FUN_0_1:
1106 case FUN_2_0:
1107 case FUN_1_1:
1108 case FUN_0_2:
1109 // partial applications
1110 case PAP:
1111 // blackholes
1112 case CAF_BLACKHOLE:
1113 case BLACKHOLE:
1114 case SE_BLACKHOLE:
1115 case SE_CAF_BLACKHOLE:
1116 // indirection
1117 case IND_PERM:
1118 case IND_OLDGEN_PERM:
1119 case IND_OLDGEN:
1120 // static objects
1121 case CONSTR_STATIC:
1122 case FUN_STATIC:
1123 // misc
1124 case STABLE_NAME:
1125 case BCO:
1126 case ARR_WORDS:
1127 // STM
1128 case TVAR_WATCH_QUEUE:
1129 case TREC_HEADER:
1130 case TREC_CHUNK:
1131 return rtsFalse;
1132
1133 //
1134 // Error case
1135 //
1136 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1137 case IND_STATIC:
1138 // CONSTR_NOCAF_STATIC
1139 // cannot be *c, *cp, *r in the retainer profiling loop.
1140 case CONSTR_NOCAF_STATIC:
1141 // Stack objects are invalid because they are never treated as
1142 // legal objects during retainer profiling.
1143 case UPDATE_FRAME:
1144 case CATCH_FRAME:
1145 case STOP_FRAME:
1146 case RET_DYN:
1147 case RET_BCO:
1148 case RET_SMALL:
1149 case RET_BIG:
1150 // other cases
1151 case IND:
1152 case BLOCKED_FETCH:
1153 case FETCH_ME:
1154 case FETCH_ME_BQ:
1155 case RBH:
1156 case REMOTE_REF:
1157 case EVACUATED:
1158 case INVALID_OBJECT:
1159 default:
1160 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1161 return rtsFalse;
1162 }
1163 }
1164
1165 /* -----------------------------------------------------------------------------
1166 * Returns the retainer function value for the closure *c, i.e., R(*c).
1167 * This function does NOT return the retainer(s) of *c.
1168 * Invariants:
1169 * *c must be a retainer.
1170 * Note:
1171 * Depending on the definition of this function, the maintenance of retainer
1172 * sets can be made easier. If most retainer sets are likely to be created
1173 * again across garbage collections, refreshAllRetainerSet() in
1174 * RetainerSet.c can simply do nothing.
1175 * If this is not the case, we can free all the retainer sets and
1176 * re-initialize the hash table.
1177 * See refreshAllRetainerSet() in RetainerSet.c.
1178 * -------------------------------------------------------------------------- */
1179 static INLINE retainer
1180 getRetainerFrom( StgClosure *c )
1181 {
1182 ASSERT(isRetainer(c));
1183
1184 #if defined(RETAINER_SCHEME_INFO)
1185 // Retainer scheme 1: retainer = info table
1186 return get_itbl(c);
1187 #elif defined(RETAINER_SCHEME_CCS)
1188 // Retainer scheme 2: retainer = cost centre stack
1189 return c->header.prof.ccs;
1190 #elif defined(RETAINER_SCHEME_CC)
1191 // Retainer scheme 3: retainer = cost centre
1192 return c->header.prof.ccs->cc;
1193 #endif
1194 }
1195
1196 /* -----------------------------------------------------------------------------
1197 * Associates the retainer set *s with the closure *c, that is, *s becomes
1198 * the retainer set of *c.
1199 * Invariants:
1200 * c != NULL
1201 * s != NULL
1202 * -------------------------------------------------------------------------- */
1203 static INLINE void
1204 associate( StgClosure *c, RetainerSet *s )
1205 {
1206 // StgWord has the same size as pointers, so the following type
1207 // casting is okay.
1208 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1209 }
1210
1211 /* -----------------------------------------------------------------------------
1212 Call retainClosure for each of the closures covered by a large bitmap.
1213 -------------------------------------------------------------------------- */
1214
1215 static void
1216 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1217 StgClosure *c, retainer c_child_r)
1218 {
1219 nat i, b;
1220 StgWord bitmap;
1221
1222 b = 0;
1223 bitmap = large_bitmap->bitmap[b];
1224 for (i = 0; i < size; ) {
1225 if ((bitmap & 1) == 0) {
1226 retainClosure((StgClosure *)*p, c, c_child_r);
1227 }
1228 i++;
1229 p++;
1230 if (i % BITS_IN(W_) == 0) {
1231 b++;
1232 bitmap = large_bitmap->bitmap[b];
1233 } else {
1234 bitmap = bitmap >> 1;
1235 }
1236 }
1237 }
1238
1239 static INLINE StgPtr
1240 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1241 StgClosure *c, retainer c_child_r)
1242 {
1243 while (size > 0) {
1244 if ((bitmap & 1) == 0) {
1245 retainClosure((StgClosure *)*p, c, c_child_r);
1246 }
1247 p++;
1248 bitmap = bitmap >> 1;
1249 size--;
1250 }
1251 return p;
1252 }
1253
1254 /* -----------------------------------------------------------------------------
1255 * Call retainClosure for each of the closures in an SRT.
1256 * ------------------------------------------------------------------------- */
1257
1258 static void
1259 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1260 {
1261 nat i, b, size;
1262 StgWord bitmap;
1263 StgClosure **p;
1264
1265 b = 0;
1266 p = (StgClosure **)srt->srt;
1267 size = srt->l.size;
1268 bitmap = srt->l.bitmap[b];
1269 for (i = 0; i < size; ) {
1270 if ((bitmap & 1) != 0) {
1271 retainClosure((StgClosure *)*p, c, c_child_r);
1272 }
1273 i++;
1274 p++;
1275 if (i % BITS_IN(W_) == 0) {
1276 b++;
1277 bitmap = srt->l.bitmap[b];
1278 } else {
1279 bitmap = bitmap >> 1;
1280 }
1281 }
1282 }
1283
1284 static INLINE void
1285 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1286 {
1287 nat bitmap;
1288 StgClosure **p;
1289
1290 bitmap = srt_bitmap;
1291 p = srt;
1292
1293 if (bitmap == (StgHalfWord)(-1)) {
1294 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1295 return;
1296 }
1297
1298 while (bitmap != 0) {
1299 if ((bitmap & 1) != 0) {
1300 #ifdef ENABLE_WIN32_DLL_SUPPORT
1301 if ( (unsigned long)(*srt) & 0x1 ) {
1302 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1303 c, c_child_r);
1304 } else {
1305 retainClosure(*srt,c,c_child_r);
1306 }
1307 #else
1308 retainClosure(*srt,c,c_child_r);
1309 #endif
1310 }
1311 p++;
1312 bitmap = bitmap >> 1;
1313 }
1314 }
1315
1316 /* -----------------------------------------------------------------------------
1317 * Process all the objects in the stack chunk from stackStart to stackEnd
1318 * with *c and *c_child_r being their parent and their most recent retainer,
1319 * respectively. Treat stackOptionalFun as another child of *c if it is
1320 * not NULL.
1321 * Invariants:
1322 * *c is one of the following: TSO, AP_STACK.
1323 * If *c is TSO, c == c_child_r.
1324 * stackStart < stackEnd.
1325 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1326 * interpretation conforms to the current value of flip (even when they
1327 * are interpreted to be NULL).
1328 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1329 * or ThreadKilled, which means that its stack is ready to process.
1330 * Note:
1331 * This code was almost plagiarzied from GC.c! For each pointer,
1332 * retainClosure() is invoked instead of evacuate().
1333 * -------------------------------------------------------------------------- */
1334 static void
1335 retainStack( StgClosure *c, retainer c_child_r,
1336 StgPtr stackStart, StgPtr stackEnd )
1337 {
1338 stackElement *oldStackBoundary;
1339 StgPtr p;
1340 StgRetInfoTable *info;
1341 StgWord32 bitmap;
1342 nat size;
1343
1344 #ifdef DEBUG_RETAINER
1345 cStackSize++;
1346 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1347 #endif
1348
1349 /*
1350 Each invocation of retainStack() creates a new virtual
1351 stack. Since all such stacks share a single common stack, we
1352 record the current currentStackBoundary, which will be restored
1353 at the exit.
1354 */
1355 oldStackBoundary = currentStackBoundary;
1356 currentStackBoundary = stackTop;
1357
1358 #ifdef DEBUG_RETAINER
1359 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1360 #endif
1361
1362 ASSERT(get_itbl(c)->type != TSO ||
1363 (((StgTSO *)c)->what_next != ThreadRelocated &&
1364 ((StgTSO *)c)->what_next != ThreadComplete &&
1365 ((StgTSO *)c)->what_next != ThreadKilled));
1366
1367 p = stackStart;
1368 while (p < stackEnd) {
1369 info = get_ret_itbl((StgClosure *)p);
1370
1371 switch(info->i.type) {
1372
1373 case UPDATE_FRAME:
1374 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1375 p += sizeofW(StgUpdateFrame);
1376 continue;
1377
1378 case STOP_FRAME:
1379 case CATCH_FRAME:
1380 case CATCH_STM_FRAME:
1381 case CATCH_RETRY_FRAME:
1382 case ATOMICALLY_FRAME:
1383 case RET_SMALL:
1384 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1385 size = BITMAP_SIZE(info->i.layout.bitmap);
1386 p++;
1387 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1388
1389 follow_srt:
1390 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1391 continue;
1392
1393 case RET_BCO: {
1394 StgBCO *bco;
1395
1396 p++;
1397 retainClosure((StgClosure *)*p, c, c_child_r);
1398 bco = (StgBCO *)*p;
1399 p++;
1400 size = BCO_BITMAP_SIZE(bco);
1401 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1402 p += size;
1403 continue;
1404 }
1405
1406 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1407 case RET_BIG:
1408 size = GET_LARGE_BITMAP(&info->i)->size;
1409 p++;
1410 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1411 size, c, c_child_r);
1412 p += size;
1413 // and don't forget to follow the SRT
1414 goto follow_srt;
1415
1416 // Dynamic bitmap: the mask is stored on the stack
1417 case RET_DYN: {
1418 StgWord dyn;
1419 dyn = ((StgRetDyn *)p)->liveness;
1420
1421 // traverse the bitmap first
1422 bitmap = RET_DYN_LIVENESS(dyn);
1423 p = (P_)&((StgRetDyn *)p)->payload[0];
1424 size = RET_DYN_BITMAP_SIZE;
1425 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1426
1427 // skip over the non-ptr words
1428 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1429
1430 // follow the ptr words
1431 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1432 retainClosure((StgClosure *)*p, c, c_child_r);
1433 p++;
1434 }
1435 continue;
1436 }
1437
1438 case RET_FUN: {
1439 StgRetFun *ret_fun = (StgRetFun *)p;
1440 StgFunInfoTable *fun_info;
1441
1442 retainClosure(ret_fun->fun, c, c_child_r);
1443 fun_info = get_fun_itbl(ret_fun->fun);
1444
1445 p = (P_)&ret_fun->payload;
1446 switch (fun_info->f.fun_type) {
1447 case ARG_GEN:
1448 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1449 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1450 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1451 break;
1452 case ARG_GEN_BIG:
1453 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1454 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1455 size, c, c_child_r);
1456 p += size;
1457 break;
1458 default:
1459 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1460 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1461 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1462 break;
1463 }
1464 goto follow_srt;
1465 }
1466
1467 default:
1468 barf("Invalid object found in retainStack(): %d",
1469 (int)(info->i.type));
1470 }
1471 }
1472
1473 // restore currentStackBoundary
1474 currentStackBoundary = oldStackBoundary;
1475 #ifdef DEBUG_RETAINER
1476 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1477 #endif
1478
1479 #ifdef DEBUG_RETAINER
1480 cStackSize--;
1481 #endif
1482 }
1483
1484 /* ----------------------------------------------------------------------------
1485 * Call retainClosure for each of the children of a PAP/AP
1486 * ------------------------------------------------------------------------- */
1487
1488 static INLINE StgPtr
1489 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1490 retainer c_child_r, /* NOT tagged */
1491 StgClosure *fun, /* tagged */
1492 StgClosure** payload, StgWord n_args)
1493 {
1494 StgPtr p;
1495 StgWord bitmap;
1496 StgFunInfoTable *fun_info;
1497
1498 retainClosure(fun, pap, c_child_r);
1499 fun = UNTAG_CLOSURE(fun);
1500 fun_info = get_fun_itbl(fun);
1501 ASSERT(fun_info->i.type != PAP);
1502
1503 p = (StgPtr)payload;
1504
1505 switch (fun_info->f.fun_type) {
1506 case ARG_GEN:
1507 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1508 p = retain_small_bitmap(p, n_args, bitmap,
1509 pap, c_child_r);
1510 break;
1511 case ARG_GEN_BIG:
1512 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1513 n_args, pap, c_child_r);
1514 p += n_args;
1515 break;
1516 case ARG_BCO:
1517 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1518 n_args, pap, c_child_r);
1519 p += n_args;
1520 break;
1521 default:
1522 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1523 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1524 break;
1525 }
1526 return p;
1527 }
1528
1529 /* -----------------------------------------------------------------------------
1530 * Compute the retainer set of *c0 and all its desecents by traversing.
1531 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1532 * Invariants:
1533 * c0 = cp0 = r0 holds only for root objects.
1534 * RSET(cp0) and RSET(r0) are valid, i.e., their
1535 * interpretation conforms to the current value of flip (even when they
1536 * are interpreted to be NULL).
1537 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1538 * the current value of flip. If it does not, during the execution
1539 * of this function, RSET(c0) must be initialized as well as all
1540 * its descendants.
1541 * Note:
1542 * stackTop must be the same at the beginning and the exit of this function.
1543 * *c0 can be TSO (as well as AP_STACK).
1544 * -------------------------------------------------------------------------- */
1545 static void
1546 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1547 {
1548 // c = Current closure (possibly tagged)
1549 // cp = Current closure's Parent (NOT tagged)
1550 // r = current closures' most recent Retainer (NOT tagged)
1551 // c_child_r = current closure's children's most recent retainer
1552 // first_child = first child of c
1553 StgClosure *c, *cp, *first_child;
1554 RetainerSet *s, *retainerSetOfc;
1555 retainer r, c_child_r;
1556 StgWord typeOfc;
1557
1558 #ifdef DEBUG_RETAINER
1559 // StgPtr oldStackTop;
1560 #endif
1561
1562 #ifdef DEBUG_RETAINER
1563 // oldStackTop = stackTop;
1564 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1565 #endif
1566
1567 // (c, cp, r) = (c0, cp0, r0)
1568 c = c0;
1569 cp = cp0;
1570 r = r0;
1571 goto inner_loop;
1572
1573 loop:
1574 //debugBelch("loop");
1575 // pop to (c, cp, r);
1576 pop(&c, &cp, &r);
1577
1578 if (c == NULL) {
1579 #ifdef DEBUG_RETAINER
1580 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1581 #endif
1582 return;
1583 }
1584
1585 //debugBelch("inner_loop");
1586
1587 inner_loop:
1588 c = UNTAG_CLOSURE(c);
1589
1590 // c = current closure under consideration,
1591 // cp = current closure's parent,
1592 // r = current closure's most recent retainer
1593 //
1594 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1595 // RSET(cp) and RSET(r) are valid.
1596 // RSET(c) is valid only if c has been visited before.
1597 //
1598 // Loop invariants (on the relation between c, cp, and r)
1599 // if cp is not a retainer, r belongs to RSET(cp).
1600 // if cp is a retainer, r == cp.
1601
1602 typeOfc = get_itbl(c)->type;
1603
1604 #ifdef DEBUG_RETAINER
1605 switch (typeOfc) {
1606 case IND_STATIC:
1607 case CONSTR_NOCAF_STATIC:
1608 case CONSTR_STATIC:
1609 case THUNK_STATIC:
1610 case FUN_STATIC:
1611 break;
1612 default:
1613 if (retainerSetOf(c) == NULL) { // first visit?
1614 costArray[typeOfc] += cost(c);
1615 sumOfNewCost += cost(c);
1616 }
1617 break;
1618 }
1619 #endif
1620
1621 // special cases
1622 switch (typeOfc) {
1623 case TSO:
1624 if (((StgTSO *)c)->what_next == ThreadComplete ||
1625 ((StgTSO *)c)->what_next == ThreadKilled) {
1626 #ifdef DEBUG_RETAINER
1627 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1628 #endif
1629 goto loop;
1630 }
1631 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1632 #ifdef DEBUG_RETAINER
1633 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1634 #endif
1635 c = (StgClosure *)((StgTSO *)c)->link;
1636 goto inner_loop;
1637 }
1638 break;
1639
1640 case IND_STATIC:
1641 // We just skip IND_STATIC, so its retainer set is never computed.
1642 c = ((StgIndStatic *)c)->indirectee;
1643 goto inner_loop;
1644 // static objects with no pointers out, so goto loop.
1645 case CONSTR_NOCAF_STATIC:
1646 // It is not just enough not to compute the retainer set for *c; it is
1647 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1648 // scavenged_static_objects, the list from which is assumed to traverse
1649 // all static objects after major garbage collections.
1650 goto loop;
1651 case THUNK_STATIC:
1652 case FUN_STATIC:
1653 if (get_itbl(c)->srt_bitmap == 0) {
1654 // No need to compute the retainer set; no dynamic objects
1655 // are reachable from *c.
1656 //
1657 // Static objects: if we traverse all the live closures,
1658 // including static closures, during each heap census then
1659 // we will observe that some static closures appear and
1660 // disappear. eg. a closure may contain a pointer to a
1661 // static function 'f' which is not otherwise reachable
1662 // (it doesn't indirectly point to any CAFs, so it doesn't
1663 // appear in any SRTs), so we would find 'f' during
1664 // traversal. However on the next sweep there may be no
1665 // closures pointing to 'f'.
1666 //
1667 // We must therefore ignore static closures whose SRT is
1668 // empty, because these are exactly the closures that may
1669 // "appear". A closure with a non-empty SRT, and which is
1670 // still required, will always be reachable.
1671 //
1672 // But what about CONSTR_STATIC? Surely these may be able
1673 // to appear, and they don't have SRTs, so we can't
1674 // check. So for now, we're calling
1675 // resetStaticObjectForRetainerProfiling() from the
1676 // garbage collector to reset the retainer sets in all the
1677 // reachable static objects.
1678 goto loop;
1679 }
1680 default:
1681 break;
1682 }
1683
1684 // The above objects are ignored in computing the average number of times
1685 // an object is visited.
1686 timesAnyObjectVisited++;
1687
1688 // If this is the first visit to c, initialize its retainer set.
1689 maybeInitRetainerSet(c);
1690 retainerSetOfc = retainerSetOf(c);
1691
1692 // Now compute s:
1693 // isRetainer(cp) == rtsTrue => s == NULL
1694 // isRetainer(cp) == rtsFalse => s == cp.retainer
1695 if (isRetainer(cp))
1696 s = NULL;
1697 else
1698 s = retainerSetOf(cp);
1699
1700 // (c, cp, r, s) is available.
1701
1702 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1703 if (retainerSetOfc == NULL) {
1704 // This is the first visit to *c.
1705 numObjectVisited++;
1706
1707 if (s == NULL)
1708 associate(c, singleton(r));
1709 else
1710 // s is actually the retainer set of *c!
1711 associate(c, s);
1712
1713 // compute c_child_r
1714 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1715 } else {
1716 // This is not the first visit to *c.
1717 if (isMember(r, retainerSetOfc))
1718 goto loop; // no need to process child
1719
1720 if (s == NULL)
1721 associate(c, addElement(r, retainerSetOfc));
1722 else {
1723 // s is not NULL and cp is not a retainer. This means that
1724 // each time *cp is visited, so is *c. Thus, if s has
1725 // exactly one more element in its retainer set than c, s
1726 // is also the new retainer set for *c.
1727 if (s->num == retainerSetOfc->num + 1) {
1728 associate(c, s);
1729 }
1730 // Otherwise, just add R_r to the current retainer set of *c.
1731 else {
1732 associate(c, addElement(r, retainerSetOfc));
1733 }
1734 }
1735
1736 if (isRetainer(c))
1737 goto loop; // no need to process child
1738
1739 // compute c_child_r
1740 c_child_r = r;
1741 }
1742
1743 // now, RSET() of all of *c, *cp, and *r is valid.
1744 // (c, c_child_r) are available.
1745
1746 // process child
1747
1748 // Special case closures: we process these all in one go rather
1749 // than attempting to save the current position, because doing so
1750 // would be hard.
1751 switch (typeOfc) {
1752 case TSO:
1753 retainStack(c, c_child_r,
1754 ((StgTSO *)c)->sp,
1755 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1756 goto loop;
1757
1758 case PAP:
1759 {
1760 StgPAP *pap = (StgPAP *)c;
1761 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1762 goto loop;
1763 }
1764
1765 case AP:
1766 {
1767 StgAP *ap = (StgAP *)c;
1768 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1769 goto loop;
1770 }
1771
1772 case AP_STACK:
1773 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1774 retainStack(c, c_child_r,
1775 (StgPtr)((StgAP_STACK *)c)->payload,
1776 (StgPtr)((StgAP_STACK *)c)->payload +
1777 ((StgAP_STACK *)c)->size);
1778 goto loop;
1779 }
1780
1781 push(c, c_child_r, &first_child);
1782
1783 // If first_child is null, c has no child.
1784 // If first_child is not null, the top stack element points to the next
1785 // object. push() may or may not push a stackElement on the stack.
1786 if (first_child == NULL)
1787 goto loop;
1788
1789 // (c, cp, r) = (first_child, c, c_child_r)
1790 r = c_child_r;
1791 cp = c;
1792 c = first_child;
1793 goto inner_loop;
1794 }
1795
1796 /* -----------------------------------------------------------------------------
1797 * Compute the retainer set for every object reachable from *tl.
1798 * -------------------------------------------------------------------------- */
1799 static void
1800 retainRoot( StgClosure **tl )
1801 {
1802 StgClosure *c;
1803
1804 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1805 // be a root.
1806
1807 ASSERT(isEmptyRetainerStack());
1808 currentStackBoundary = stackTop;
1809
1810 c = UNTAG_CLOSURE(*tl);
1811 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1812 retainClosure(c, c, getRetainerFrom(c));
1813 } else {
1814 retainClosure(c, c, CCS_SYSTEM);
1815 }
1816
1817 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1818 // *tl might be a TSO which is ThreadComplete, in which
1819 // case we ignore it for the purposes of retainer profiling.
1820 }
1821
1822 /* -----------------------------------------------------------------------------
1823 * Compute the retainer set for each of the objects in the heap.
1824 * -------------------------------------------------------------------------- */
1825 static void
1826 computeRetainerSet( void )
1827 {
1828 StgWeak *weak;
1829 RetainerSet *rtl;
1830 nat g;
1831 StgPtr ml;
1832 bdescr *bd;
1833 #ifdef DEBUG_RETAINER
1834 RetainerSet tmpRetainerSet;
1835 #endif
1836
1837 GetRoots(retainRoot); // for scheduler roots
1838
1839 // This function is called after a major GC, when key, value, and finalizer
1840 // all are guaranteed to be valid, or reachable.
1841 //
1842 // The following code assumes that WEAK objects are considered to be roots
1843 // for retainer profilng.
1844 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1845 // retainRoot((StgClosure *)weak);
1846 retainRoot((StgClosure **)&weak);
1847
1848 // Consider roots from the stable ptr table.
1849 markStablePtrTable(retainRoot);
1850
1851 // The following code resets the rs field of each unvisited mutable
1852 // object (computing sumOfNewCostExtra and updating costArray[] when
1853 // debugging retainer profiler).
1854 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1855 // NOT TRUE: even G0 has a block on its mutable list
1856 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1857
1858 // Traversing through mut_list is necessary
1859 // because we can find MUT_VAR objects which have not been
1860 // visited during retainer profiling.
1861 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1862 for (ml = bd->start; ml < bd->free; ml++) {
1863
1864 maybeInitRetainerSet((StgClosure *)*ml);
1865 rtl = retainerSetOf((StgClosure *)*ml);
1866
1867 #ifdef DEBUG_RETAINER
1868 if (rtl == NULL) {
1869 // first visit to *ml
1870 // This is a violation of the interface rule!
1871 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1872
1873 switch (get_itbl((StgClosure *)ml)->type) {
1874 case IND_STATIC:
1875 // no cost involved
1876 break;
1877 case CONSTR_NOCAF_STATIC:
1878 case CONSTR_STATIC:
1879 case THUNK_STATIC:
1880 case FUN_STATIC:
1881 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1882 break;
1883 default:
1884 // dynamic objects
1885 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1886 sumOfNewCostExtra += cost((StgClosure *)ml);
1887 break;
1888 }
1889 }
1890 #endif
1891 }
1892 }
1893 }
1894 }
1895
1896 /* -----------------------------------------------------------------------------
1897 * Traverse all static objects for which we compute retainer sets,
1898 * and reset their rs fields to NULL, which is accomplished by
1899 * invoking maybeInitRetainerSet(). This function must be called
1900 * before zeroing all objects reachable from scavenged_static_objects
1901 * in the case of major gabage collections. See GarbageCollect() in
1902 * GC.c.
1903 * Note:
1904 * The mut_once_list of the oldest generation must also be traversed?
1905 * Why? Because if the evacuation of an object pointed to by a static
1906 * indirection object fails, it is put back to the mut_once_list of
1907 * the oldest generation.
1908 * However, this is not necessary because any static indirection objects
1909 * are just traversed through to reach dynamic objects. In other words,
1910 * they are not taken into consideration in computing retainer sets.
1911 * -------------------------------------------------------------------------- */
1912 void
1913 resetStaticObjectForRetainerProfiling( void )
1914 {
1915 #ifdef DEBUG_RETAINER
1916 nat count;
1917 #endif
1918 StgClosure *p;
1919
1920 #ifdef DEBUG_RETAINER
1921 count = 0;
1922 #endif
1923 p = scavenged_static_objects;
1924 while (p != END_OF_STATIC_LIST) {
1925 #ifdef DEBUG_RETAINER
1926 count++;
1927 #endif
1928 switch (get_itbl(p)->type) {
1929 case IND_STATIC:
1930 // Since we do not compute the retainer set of any
1931 // IND_STATIC object, we don't have to reset its retainer
1932 // field.
1933 p = (StgClosure*)*IND_STATIC_LINK(p);
1934 break;
1935 case THUNK_STATIC:
1936 maybeInitRetainerSet(p);
1937 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1938 break;
1939 case FUN_STATIC:
1940 maybeInitRetainerSet(p);
1941 p = (StgClosure*)*FUN_STATIC_LINK(p);
1942 break;
1943 case CONSTR_STATIC:
1944 maybeInitRetainerSet(p);
1945 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1946 break;
1947 default:
1948 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1949 p, get_itbl(p)->type);
1950 break;
1951 }
1952 }
1953 #ifdef DEBUG_RETAINER
1954 // debugBelch("count in scavenged_static_objects = %d\n", count);
1955 #endif
1956 }
1957
1958 /* -----------------------------------------------------------------------------
1959 * Perform retainer profiling.
1960 * N is the oldest generation being profilied, where the generations are
1961 * numbered starting at 0.
1962 * Invariants:
1963 * Note:
1964 * This function should be called only immediately after major garbage
1965 * collection.
1966 * ------------------------------------------------------------------------- */
1967 void
1968 retainerProfile(void)
1969 {
1970 #ifdef DEBUG_RETAINER
1971 nat i;
1972 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1973 #endif
1974
1975 #ifdef DEBUG_RETAINER
1976 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1977 #endif
1978
1979 stat_startRP();
1980
1981 // We haven't flipped the bit yet.
1982 #ifdef DEBUG_RETAINER
1983 debugBelch("Before traversing:\n");
1984 sumOfCostLinear = 0;
1985 for (i = 0;i < N_CLOSURE_TYPES; i++)
1986 costArrayLinear[i] = 0;
1987 totalHeapSize = checkHeapSanityForRetainerProfiling();
1988
1989 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1990 /*
1991 debugBelch("costArrayLinear[] = ");
1992 for (i = 0;i < N_CLOSURE_TYPES; i++)
1993 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1994 debugBelch("\n");
1995 */
1996
1997 ASSERT(sumOfCostLinear == totalHeapSize);
1998
1999 /*
2000 #define pcostArrayLinear(index) \
2001 if (costArrayLinear[index] > 0) \
2002 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
2003 pcostArrayLinear(THUNK_STATIC);
2004 pcostArrayLinear(FUN_STATIC);
2005 pcostArrayLinear(CONSTR_STATIC);
2006 pcostArrayLinear(CONSTR_NOCAF_STATIC);
2007 */
2008 #endif
2009
2010 // Now we flips flip.
2011 flip = flip ^ 1;
2012
2013 #ifdef DEBUG_RETAINER
2014 stackSize = 0;
2015 maxStackSize = 0;
2016 cStackSize = 0;
2017 maxCStackSize = 0;
2018 #endif
2019 numObjectVisited = 0;
2020 timesAnyObjectVisited = 0;
2021
2022 #ifdef DEBUG_RETAINER
2023 debugBelch("During traversing:\n");
2024 sumOfNewCost = 0;
2025 sumOfNewCostExtra = 0;
2026 for (i = 0;i < N_CLOSURE_TYPES; i++)
2027 costArray[i] = 0;
2028 #endif
2029
2030 /*
2031 We initialize the traverse stack each time the retainer profiling is
2032 performed (because the traverse stack size varies on each retainer profiling
2033 and this operation is not costly anyhow). However, we just refresh the
2034 retainer sets.
2035 */
2036 initializeTraverseStack();
2037 #ifdef DEBUG_RETAINER
2038 initializeAllRetainerSet();
2039 #else
2040 refreshAllRetainerSet();
2041 #endif
2042 computeRetainerSet();
2043
2044 #ifdef DEBUG_RETAINER
2045 debugBelch("After traversing:\n");
2046 sumOfCostLinear = 0;
2047 for (i = 0;i < N_CLOSURE_TYPES; i++)
2048 costArrayLinear[i] = 0;
2049 totalHeapSize = checkHeapSanityForRetainerProfiling();
2050
2051 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2052 ASSERT(sumOfCostLinear == totalHeapSize);
2053
2054 // now, compare the two results
2055 /*
2056 Note:
2057 costArray[] must be exactly the same as costArrayLinear[].
2058 Known exceptions:
2059 1) Dead weak pointers, whose type is CONSTR. These objects are not
2060 reachable from any roots.
2061 */
2062 debugBelch("Comparison:\n");
2063 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2064 for (i = 0;i < N_CLOSURE_TYPES; i++)
2065 if (costArray[i] != costArrayLinear[i])
2066 // nothing should be printed except MUT_VAR after major GCs
2067 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2068 debugBelch("\n");
2069
2070 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2071 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2072 debugBelch("\tcostArray[] (must be empty) = ");
2073 for (i = 0;i < N_CLOSURE_TYPES; i++)
2074 if (costArray[i] != costArrayLinear[i])
2075 // nothing should be printed except MUT_VAR after major GCs
2076 debugBelch("[%u:%u] ", i, costArray[i]);
2077 debugBelch("\n");
2078
2079 // only for major garbage collection
2080 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2081 #endif
2082
2083 // post-processing
2084 closeTraverseStack();
2085 #ifdef DEBUG_RETAINER
2086 closeAllRetainerSet();
2087 #else
2088 // Note that there is no post-processing for the retainer sets.
2089 #endif
2090 retainerGeneration++;
2091
2092 stat_endRP(
2093 retainerGeneration - 1, // retainerGeneration has just been incremented!
2094 #ifdef DEBUG_RETAINER
2095 maxCStackSize, maxStackSize,
2096 #endif
2097 (double)timesAnyObjectVisited / numObjectVisited);
2098 }
2099
2100 /* -----------------------------------------------------------------------------
2101 * DEBUGGING CODE
2102 * -------------------------------------------------------------------------- */
2103
2104 #ifdef DEBUG_RETAINER
2105
2106 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2107 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2108 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2109
2110 static nat
2111 sanityCheckHeapClosure( StgClosure *c )
2112 {
2113 StgInfoTable *info;
2114
2115 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2116 ASSERT(!closure_STATIC(c));
2117 ASSERT(LOOKS_LIKE_PTR(c));
2118
2119 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2120 if (get_itbl(c)->type == CONSTR &&
2121 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2122 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2123 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2124 costArray[get_itbl(c)->type] += cost(c);
2125 sumOfNewCost += cost(c);
2126 } else
2127 debugBelch(
2128 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2129 flip, c, get_itbl(c)->type,
2130 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2131 RSET(c));
2132 } else {
2133 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2134 }
2135
2136 return closure_sizeW(c);
2137 }
2138
2139 static nat
2140 heapCheck( bdescr *bd )
2141 {
2142 StgPtr p;
2143 static nat costSum, size;
2144
2145 costSum = 0;
2146 while (bd != NULL) {
2147 p = bd->start;
2148 while (p < bd->free) {
2149 size = sanityCheckHeapClosure((StgClosure *)p);
2150 sumOfCostLinear += size;
2151 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2152 p += size;
2153 // no need for slop check; I think slops are not used currently.
2154 }
2155 ASSERT(p == bd->free);
2156 costSum += bd->free - bd->start;
2157 bd = bd->link;
2158 }
2159
2160 return costSum;
2161 }
2162
2163 static nat
2164 smallObjectPoolCheck(void)
2165 {
2166 bdescr *bd;
2167 StgPtr p;
2168 static nat costSum, size;
2169
2170 bd = small_alloc_list;
2171 costSum = 0;
2172
2173 // first block
2174 if (bd == NULL)
2175 return costSum;
2176
2177 p = bd->start;
2178 while (p < alloc_Hp) {
2179 size = sanityCheckHeapClosure((StgClosure *)p);
2180 sumOfCostLinear += size;
2181 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2182 p += size;
2183 }
2184 ASSERT(p == alloc_Hp);
2185 costSum += alloc_Hp - bd->start;
2186
2187 bd = bd->link;
2188 while (bd != NULL) {
2189 p = bd->start;
2190 while (p < bd->free) {
2191 size = sanityCheckHeapClosure((StgClosure *)p);
2192 sumOfCostLinear += size;
2193 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2194 p += size;
2195 }
2196 ASSERT(p == bd->free);
2197 costSum += bd->free - bd->start;
2198 bd = bd->link;
2199 }
2200
2201 return costSum;
2202 }
2203
2204 static nat
2205 chainCheck(bdescr *bd)
2206 {
2207 nat costSum, size;
2208
2209 costSum = 0;
2210 while (bd != NULL) {
2211 // bd->free - bd->start is not an accurate measurement of the
2212 // object size. Actually it is always zero, so we compute its
2213 // size explicitly.
2214 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2215 sumOfCostLinear += size;
2216 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2217 costSum += size;
2218 bd = bd->link;
2219 }
2220
2221 return costSum;
2222 }
2223
2224 static nat
2225 checkHeapSanityForRetainerProfiling( void )
2226 {
2227 nat costSum, g, s;
2228
2229 costSum = 0;
2230 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2231 if (RtsFlags.GcFlags.generations == 1) {
2232 costSum += heapCheck(g0s0->to_blocks);
2233 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2234 costSum += chainCheck(g0s0->large_objects);
2235 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2236 } else {
2237 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2238 for (s = 0; s < generations[g].n_steps; s++) {
2239 /*
2240 After all live objects have been scavenged, the garbage
2241 collector may create some objects in
2242 scheduleFinalizers(). These objects are created throught
2243 allocate(), so the small object pool or the large object
2244 pool of the g0s0 may not be empty.
2245 */
2246 if (g == 0 && s == 0) {
2247 costSum += smallObjectPoolCheck();
2248 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2249 costSum += chainCheck(generations[g].steps[s].large_objects);
2250 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2251 } else {
2252 costSum += heapCheck(generations[g].steps[s].blocks);
2253 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2254 costSum += chainCheck(generations[g].steps[s].large_objects);
2255 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2256 }
2257 }
2258 }
2259
2260 return costSum;
2261 }
2262
2263 void
2264 findPointer(StgPtr p)
2265 {
2266 StgPtr q, r, e;
2267 bdescr *bd;
2268 nat g, s;
2269
2270 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2271 for (s = 0; s < generations[g].n_steps; s++) {
2272 // if (g == 0 && s == 0) continue;
2273 bd = generations[g].steps[s].blocks;
2274 for (; bd; bd = bd->link) {
2275 for (q = bd->start; q < bd->free; q++) {
2276 if (*q == (StgWord)p) {
2277 r = q;
2278 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2279 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2280 // return;
2281 }
2282 }
2283 }
2284 bd = generations[g].steps[s].large_objects;
2285 for (; bd; bd = bd->link) {
2286 e = bd->start + cost((StgClosure *)bd->start);
2287 for (q = bd->start; q < e; q++) {
2288 if (*q == (StgWord)p) {
2289 r = q;
2290 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2291 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2292 // return;
2293 }
2294 }
2295 }
2296 }
2297 }
2298 }
2299
2300 static void
2301 belongToHeap(StgPtr p)
2302 {
2303 bdescr *bd;
2304 nat g, s;
2305
2306 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2307 for (s = 0; s < generations[g].n_steps; s++) {
2308 // if (g == 0 && s == 0) continue;
2309 bd = generations[g].steps[s].blocks;
2310 for (; bd; bd = bd->link) {
2311 if (bd->start <= p && p < bd->free) {
2312 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2313 return;
2314 }
2315 }
2316 bd = generations[g].steps[s].large_objects;
2317 for (; bd; bd = bd->link) {
2318 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2319 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2320 return;
2321 }
2322 }
2323 }
2324 }
2325 }
2326 #endif /* DEBUG_RETAINER */
2327
2328 #endif /* PROFILING */