6ca09fc43e6be6e5d4c342a066a2d0bca10b90b2
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #if defined(PROFILING)
11
12 // Turn off inlining when debugging - it obfuscates things
13 #if defined(DEBUG)
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "PosixSource.h"
20 #include "Rts.h"
21
22 #include "RtsUtils.h"
23 #include "RetainerProfile.h"
24 #include "RetainerSet.h"
25 #include "Schedule.h"
26 #include "Printer.h"
27 #include "Weak.h"
28 #include "sm/Sanity.h"
29 #include "Profiling.h"
30 #include "Stats.h"
31 #include "ProfHeap.h"
32 #include "Apply.h"
33 #include "Stable.h" /* markStableTables */
34 #include "sm/Storage.h" // for END_OF_STATIC_LIST
35
36 /*
37 Note: what to change in order to plug-in a new retainer profiling scheme?
38 (1) type retainer in ../includes/StgRetainerProf.h
39 (2) retainer function R(), i.e., getRetainerFrom()
40 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
41 in RetainerSet.h, if needed.
42 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
43 */
44
45 /* -----------------------------------------------------------------------------
46 * Declarations...
47 * -------------------------------------------------------------------------- */
48
49 static uint32_t retainerGeneration; // generation
50
51 static uint32_t numObjectVisited; // total number of objects visited
52 static uint32_t timesAnyObjectVisited; // number of times any objects are
53 // visited
54
55 /*
56 The rs field in the profile header of any object points to its retainer
57 set in an indirect way: if flip is 0, it points to the retainer set;
58 if flip is 1, it points to the next byte after the retainer set (even
59 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
60 pointer. See retainerSetOf().
61 */
62
63 StgWord flip = 0; // flip bit
64 // must be 0 if DEBUG_RETAINER is on (for static closures)
65
66 #define setRetainerSetToNull(c) \
67 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
68
69 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
70 static void retainClosure(StgClosure *, StgClosure *, retainer);
71 #if defined(DEBUG_RETAINER)
72 static void belongToHeap(StgPtr p);
73 #endif
74
75 #if defined(DEBUG_RETAINER)
76 /*
77 cStackSize records how many times retainStack() has been invoked recursively,
78 that is, the number of activation records for retainStack() on the C stack.
79 maxCStackSize records its max value.
80 Invariants:
81 cStackSize <= maxCStackSize
82 */
83 static uint32_t cStackSize, maxCStackSize;
84
85 static uint32_t sumOfNewCost; // sum of the cost of each object, computed
86 // when the object is first visited
87 static uint32_t sumOfNewCostExtra; // for those objects not visited during
88 // retainer profiling, e.g., MUT_VAR
89 static uint32_t costArray[N_CLOSURE_TYPES];
90
91 uint32_t sumOfCostLinear; // sum of the costs of all object, computed
92 // when linearly traversing the heap after
93 // retainer profiling
94 uint32_t costArrayLinear[N_CLOSURE_TYPES];
95 #endif
96
97 /* -----------------------------------------------------------------------------
98 * Retainer stack - header
99 * Note:
100 * Although the retainer stack implementation could be separated *
101 * from the retainer profiling engine, there does not seem to be
102 * any advantage in doing that; retainer stack is an integral part
103 * of retainer profiling engine and cannot be use elsewhere at
104 * all.
105 * -------------------------------------------------------------------------- */
106
107 typedef enum {
108 posTypeStep,
109 posTypePtrs,
110 posTypeSRT,
111 posTypeLargeSRT,
112 } nextPosType;
113
114 typedef union {
115 // fixed layout or layout specified by a field in the closure
116 StgWord step;
117
118 // layout.payload
119 struct {
120 // See StgClosureInfo in InfoTables.h
121 StgHalfWord pos;
122 StgHalfWord ptrs;
123 StgPtr payload;
124 } ptrs;
125
126 // SRT
127 struct {
128 StgClosure **srt;
129 StgWord srt_bitmap;
130 } srt;
131
132 // Large SRT
133 struct {
134 StgLargeSRT *srt;
135 StgWord offset;
136 } large_srt;
137
138 } nextPos;
139
140 typedef struct {
141 nextPosType type;
142 nextPos next;
143 } stackPos;
144
145 typedef struct {
146 StgClosure *c;
147 retainer c_child_r;
148 stackPos info;
149 } stackElement;
150
151 /*
152 Invariants:
153 firstStack points to the first block group.
154 currentStack points to the block group currently being used.
155 currentStack->free == stackLimit.
156 stackTop points to the topmost byte in the stack of currentStack.
157 Unless the whole stack is empty, stackTop must point to the topmost
158 object (or byte) in the whole stack. Thus, it is only when the whole stack
159 is empty that stackTop == stackLimit (not during the execution of push()
160 and pop()).
161 stackBottom == currentStack->start.
162 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
163 Note:
164 When a current stack becomes empty, stackTop is set to point to
165 the topmost element on the previous block group so as to satisfy
166 the invariants described above.
167 */
168 static bdescr *firstStack = NULL;
169 static bdescr *currentStack;
170 static stackElement *stackBottom, *stackTop, *stackLimit;
171
172 /*
173 currentStackBoundary is used to mark the current stack chunk.
174 If stackTop == currentStackBoundary, it means that the current stack chunk
175 is empty. It is the responsibility of the user to keep currentStackBoundary
176 valid all the time if it is to be employed.
177 */
178 static stackElement *currentStackBoundary;
179
180 /*
181 stackSize records the current size of the stack.
182 maxStackSize records its high water mark.
183 Invariants:
184 stackSize <= maxStackSize
185 Note:
186 stackSize is just an estimate measure of the depth of the graph. The reason
187 is that some heap objects have only a single child and may not result
188 in a new element being pushed onto the stack. Therefore, at the end of
189 retainer profiling, maxStackSize + maxCStackSize is some value no greater
190 than the actual depth of the graph.
191 */
192 #if defined(DEBUG_RETAINER)
193 static int stackSize, maxStackSize;
194 #endif
195
196 // number of blocks allocated for one stack
197 #define BLOCKS_IN_STACK 1
198
199 /* -----------------------------------------------------------------------------
200 * Add a new block group to the stack.
201 * Invariants:
202 * currentStack->link == s.
203 * -------------------------------------------------------------------------- */
204 static INLINE void
205 newStackBlock( bdescr *bd )
206 {
207 currentStack = bd;
208 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
209 stackBottom = (stackElement *)bd->start;
210 stackLimit = (stackElement *)stackTop;
211 bd->free = (StgPtr)stackLimit;
212 }
213
214 /* -----------------------------------------------------------------------------
215 * Return to the previous block group.
216 * Invariants:
217 * s->link == currentStack.
218 * -------------------------------------------------------------------------- */
219 static INLINE void
220 returnToOldStack( bdescr *bd )
221 {
222 currentStack = bd;
223 stackTop = (stackElement *)bd->free;
224 stackBottom = (stackElement *)bd->start;
225 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
226 bd->free = (StgPtr)stackLimit;
227 }
228
229 /* -----------------------------------------------------------------------------
230 * Initializes the traverse stack.
231 * -------------------------------------------------------------------------- */
232 static void
233 initializeTraverseStack( void )
234 {
235 if (firstStack != NULL) {
236 freeChain(firstStack);
237 }
238
239 firstStack = allocGroup(BLOCKS_IN_STACK);
240 firstStack->link = NULL;
241 firstStack->u.back = NULL;
242
243 newStackBlock(firstStack);
244 }
245
246 /* -----------------------------------------------------------------------------
247 * Frees all the block groups in the traverse stack.
248 * Invariants:
249 * firstStack != NULL
250 * -------------------------------------------------------------------------- */
251 static void
252 closeTraverseStack( void )
253 {
254 freeChain(firstStack);
255 firstStack = NULL;
256 }
257
258 /* -----------------------------------------------------------------------------
259 * Returns true if the whole stack is empty.
260 * -------------------------------------------------------------------------- */
261 static INLINE bool
262 isEmptyRetainerStack( void )
263 {
264 return (firstStack == currentStack) && stackTop == stackLimit;
265 }
266
267 /* -----------------------------------------------------------------------------
268 * Returns size of stack
269 * -------------------------------------------------------------------------- */
270 #if defined(DEBUG)
271 W_
272 retainerStackBlocks( void )
273 {
274 bdescr* bd;
275 W_ res = 0;
276
277 for (bd = firstStack; bd != NULL; bd = bd->link)
278 res += bd->blocks;
279
280 return res;
281 }
282 #endif
283
284 /* -----------------------------------------------------------------------------
285 * Returns true if stackTop is at the stack boundary of the current stack,
286 * i.e., if the current stack chunk is empty.
287 * -------------------------------------------------------------------------- */
288 static INLINE bool
289 isOnBoundary( void )
290 {
291 return stackTop == currentStackBoundary;
292 }
293
294 /* -----------------------------------------------------------------------------
295 * Initializes *info from ptrs and payload.
296 * Invariants:
297 * payload[] begins with ptrs pointers followed by non-pointers.
298 * -------------------------------------------------------------------------- */
299 static INLINE void
300 init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
301 {
302 info->type = posTypePtrs;
303 info->next.ptrs.pos = 0;
304 info->next.ptrs.ptrs = ptrs;
305 info->next.ptrs.payload = payload;
306 }
307
308 /* -----------------------------------------------------------------------------
309 * Find the next object from *info.
310 * -------------------------------------------------------------------------- */
311 static INLINE StgClosure *
312 find_ptrs( stackPos *info )
313 {
314 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
315 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
316 } else {
317 return NULL;
318 }
319 }
320
321 /* -----------------------------------------------------------------------------
322 * Initializes *info from SRT information stored in *infoTable.
323 * -------------------------------------------------------------------------- */
324 static INLINE void
325 init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
326 {
327 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
328 info->type = posTypeLargeSRT;
329 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
330 info->next.large_srt.offset = 0;
331 } else {
332 info->type = posTypeSRT;
333 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
334 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
335 }
336 }
337
338 static INLINE void
339 init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
340 {
341 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
342 info->type = posTypeLargeSRT;
343 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
344 info->next.large_srt.offset = 0;
345 } else {
346 info->type = posTypeSRT;
347 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
348 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
349 }
350 }
351
352 /* -----------------------------------------------------------------------------
353 * Find the next object from *info.
354 * -------------------------------------------------------------------------- */
355 static INLINE StgClosure *
356 find_srt( stackPos *info )
357 {
358 StgClosure *c;
359 StgWord bitmap;
360
361 if (info->type == posTypeSRT) {
362 // Small SRT bitmap
363 bitmap = info->next.srt.srt_bitmap;
364 while (bitmap != 0) {
365 if ((bitmap & 1) != 0) {
366 #if defined(COMPILING_WINDOWS_DLL)
367 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
368 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
369 else
370 c = *(info->next.srt.srt);
371 #else
372 c = *(info->next.srt.srt);
373 #endif
374 bitmap = bitmap >> 1;
375 info->next.srt.srt++;
376 info->next.srt.srt_bitmap = bitmap;
377 return c;
378 }
379 bitmap = bitmap >> 1;
380 info->next.srt.srt++;
381 }
382 // bitmap is now zero...
383 return NULL;
384 }
385 else {
386 // Large SRT bitmap
387 uint32_t i = info->next.large_srt.offset;
388 StgWord bitmap;
389
390 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
391 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
392 bitmap = bitmap >> (i % BITS_IN(StgWord));
393 while (i < info->next.large_srt.srt->l.size) {
394 if ((bitmap & 1) != 0) {
395 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
396 i++;
397 info->next.large_srt.offset = i;
398 return c;
399 }
400 i++;
401 if (i % BITS_IN(W_) == 0) {
402 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
403 } else {
404 bitmap = bitmap >> 1;
405 }
406 }
407 // reached the end of this bitmap.
408 info->next.large_srt.offset = i;
409 return NULL;
410 }
411 }
412
413 /* -----------------------------------------------------------------------------
414 * push() pushes a stackElement representing the next child of *c
415 * onto the traverse stack. If *c has no child, *first_child is set
416 * to NULL and nothing is pushed onto the stack. If *c has only one
417 * child, *c_chlid is set to that child and nothing is pushed onto
418 * the stack. If *c has more than two children, *first_child is set
419 * to the first child and a stackElement representing the second
420 * child is pushed onto the stack.
421
422 * Invariants:
423 * *c_child_r is the most recent retainer of *c's children.
424 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
425 * there cannot be any stack objects.
426 * Note: SRTs are considered to be children as well.
427 * -------------------------------------------------------------------------- */
428 static INLINE void
429 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
430 {
431 stackElement se;
432 bdescr *nbd; // Next Block Descriptor
433
434 #if defined(DEBUG_RETAINER)
435 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
436 #endif
437
438 ASSERT(get_itbl(c)->type != TSO);
439 ASSERT(get_itbl(c)->type != AP_STACK);
440
441 //
442 // fill in se
443 //
444
445 se.c = c;
446 se.c_child_r = c_child_r;
447
448 // fill in se.info
449 switch (get_itbl(c)->type) {
450 // no child, no SRT
451 case CONSTR_0_1:
452 case CONSTR_0_2:
453 case ARR_WORDS:
454 case COMPACT_NFDATA:
455 *first_child = NULL;
456 return;
457
458 // one child (fixed), no SRT
459 case MUT_VAR_CLEAN:
460 case MUT_VAR_DIRTY:
461 *first_child = ((StgMutVar *)c)->var;
462 return;
463 case THUNK_SELECTOR:
464 *first_child = ((StgSelector *)c)->selectee;
465 return;
466 case BLACKHOLE:
467 *first_child = ((StgInd *)c)->indirectee;
468 return;
469 case CONSTR_1_0:
470 case CONSTR_1_1:
471 *first_child = c->payload[0];
472 return;
473
474 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
475 // of the next child. We do not write a separate initialization code.
476 // Also we do not have to initialize info.type;
477
478 // two children (fixed), no SRT
479 // need to push a stackElement, but nothing to store in se.info
480 case CONSTR_2_0:
481 *first_child = c->payload[0]; // return the first pointer
482 // se.info.type = posTypeStep;
483 // se.info.next.step = 2; // 2 = second
484 break;
485
486 // three children (fixed), no SRT
487 // need to push a stackElement
488 case MVAR_CLEAN:
489 case MVAR_DIRTY:
490 // head must be TSO and the head of a linked list of TSOs.
491 // Shoule it be a child? Seems to be yes.
492 *first_child = (StgClosure *)((StgMVar *)c)->head;
493 // se.info.type = posTypeStep;
494 se.info.next.step = 2; // 2 = second
495 break;
496
497 // three children (fixed), no SRT
498 case WEAK:
499 *first_child = ((StgWeak *)c)->key;
500 // se.info.type = posTypeStep;
501 se.info.next.step = 2;
502 break;
503
504 // layout.payload.ptrs, no SRT
505 case TVAR:
506 case CONSTR:
507 case CONSTR_NOCAF:
508 case PRIM:
509 case MUT_PRIM:
510 case BCO:
511 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
512 (StgPtr)c->payload);
513 *first_child = find_ptrs(&se.info);
514 if (*first_child == NULL)
515 return; // no child
516 break;
517
518 // StgMutArrPtr.ptrs, no SRT
519 case MUT_ARR_PTRS_CLEAN:
520 case MUT_ARR_PTRS_DIRTY:
521 case MUT_ARR_PTRS_FROZEN:
522 case MUT_ARR_PTRS_FROZEN0:
523 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
524 (StgPtr)(((StgMutArrPtrs *)c)->payload));
525 *first_child = find_ptrs(&se.info);
526 if (*first_child == NULL)
527 return;
528 break;
529
530 // StgMutArrPtr.ptrs, no SRT
531 case SMALL_MUT_ARR_PTRS_CLEAN:
532 case SMALL_MUT_ARR_PTRS_DIRTY:
533 case SMALL_MUT_ARR_PTRS_FROZEN:
534 case SMALL_MUT_ARR_PTRS_FROZEN0:
535 init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
536 (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
537 *first_child = find_ptrs(&se.info);
538 if (*first_child == NULL)
539 return;
540 break;
541
542 // layout.payload.ptrs, SRT
543 case FUN: // *c is a heap object.
544 case FUN_2_0:
545 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
546 *first_child = find_ptrs(&se.info);
547 if (*first_child == NULL)
548 // no child from ptrs, so check SRT
549 goto fun_srt_only;
550 break;
551
552 case THUNK:
553 case THUNK_2_0:
554 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
555 (StgPtr)((StgThunk *)c)->payload);
556 *first_child = find_ptrs(&se.info);
557 if (*first_child == NULL)
558 // no child from ptrs, so check SRT
559 goto thunk_srt_only;
560 break;
561
562 // 1 fixed child, SRT
563 case FUN_1_0:
564 case FUN_1_1:
565 *first_child = c->payload[0];
566 ASSERT(*first_child != NULL);
567 init_srt_fun(&se.info, get_fun_itbl(c));
568 break;
569
570 case THUNK_1_0:
571 case THUNK_1_1:
572 *first_child = ((StgThunk *)c)->payload[0];
573 ASSERT(*first_child != NULL);
574 init_srt_thunk(&se.info, get_thunk_itbl(c));
575 break;
576
577 case FUN_STATIC: // *c is a heap object.
578 ASSERT(get_itbl(c)->srt_bitmap != 0);
579 case FUN_0_1:
580 case FUN_0_2:
581 fun_srt_only:
582 init_srt_fun(&se.info, get_fun_itbl(c));
583 *first_child = find_srt(&se.info);
584 if (*first_child == NULL)
585 return; // no child
586 break;
587
588 // SRT only
589 case THUNK_STATIC:
590 ASSERT(get_itbl(c)->srt_bitmap != 0);
591 case THUNK_0_1:
592 case THUNK_0_2:
593 thunk_srt_only:
594 init_srt_thunk(&se.info, get_thunk_itbl(c));
595 *first_child = find_srt(&se.info);
596 if (*first_child == NULL)
597 return; // no child
598 break;
599
600 case TREC_CHUNK:
601 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
602 se.info.next.step = 0; // entry no.
603 break;
604
605 // cannot appear
606 case PAP:
607 case AP:
608 case AP_STACK:
609 case TSO:
610 case STACK:
611 case IND_STATIC:
612 // stack objects
613 case UPDATE_FRAME:
614 case CATCH_FRAME:
615 case UNDERFLOW_FRAME:
616 case STOP_FRAME:
617 case RET_BCO:
618 case RET_SMALL:
619 case RET_BIG:
620 // invalid objects
621 case IND:
622 case INVALID_OBJECT:
623 default:
624 barf("Invalid object *c in push()");
625 return;
626 }
627
628 if (stackTop - 1 < stackBottom) {
629 #if defined(DEBUG_RETAINER)
630 // debugBelch("push() to the next stack.\n");
631 #endif
632 // currentStack->free is updated when the active stack is switched
633 // to the next stack.
634 currentStack->free = (StgPtr)stackTop;
635
636 if (currentStack->link == NULL) {
637 nbd = allocGroup(BLOCKS_IN_STACK);
638 nbd->link = NULL;
639 nbd->u.back = currentStack;
640 currentStack->link = nbd;
641 } else
642 nbd = currentStack->link;
643
644 newStackBlock(nbd);
645 }
646
647 // adjust stackTop (acutal push)
648 stackTop--;
649 // If the size of stackElement was huge, we would better replace the
650 // following statement by either a memcpy() call or a switch statement
651 // on the type of the element. Currently, the size of stackElement is
652 // small enough (5 words) that this direct assignment seems to be enough.
653
654 // ToDo: The line below leads to the warning:
655 // warning: 'se.info.type' may be used uninitialized in this function
656 // This is caused by the fact that there are execution paths through the
657 // large switch statement above where some cases do not initialize this
658 // field. Is this really harmless? Can we avoid the warning?
659 *stackTop = se;
660
661 #if defined(DEBUG_RETAINER)
662 stackSize++;
663 if (stackSize > maxStackSize) maxStackSize = stackSize;
664 // ASSERT(stackSize >= 0);
665 // debugBelch("stackSize = %d\n", stackSize);
666 #endif
667 }
668
669 /* -----------------------------------------------------------------------------
670 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
671 * Invariants:
672 * stackTop cannot be equal to stackLimit unless the whole stack is
673 * empty, in which case popOff() is not allowed.
674 * Note:
675 * You can think of popOffReal() as a part of popOff() which is
676 * executed at the end of popOff() in necessary. Since popOff() is
677 * likely to be executed quite often while popOffReal() is not, we
678 * separate popOffReal() from popOff(), which is declared as an
679 * INLINE function (for the sake of execution speed). popOffReal()
680 * is called only within popOff() and nowhere else.
681 * -------------------------------------------------------------------------- */
682 static void
683 popOffReal(void)
684 {
685 bdescr *pbd; // Previous Block Descriptor
686
687 #if defined(DEBUG_RETAINER)
688 // debugBelch("pop() to the previous stack.\n");
689 #endif
690
691 ASSERT(stackTop + 1 == stackLimit);
692 ASSERT(stackBottom == (stackElement *)currentStack->start);
693
694 if (firstStack == currentStack) {
695 // The stack is completely empty.
696 stackTop++;
697 ASSERT(stackTop == stackLimit);
698 #if defined(DEBUG_RETAINER)
699 stackSize--;
700 if (stackSize > maxStackSize) maxStackSize = stackSize;
701 /*
702 ASSERT(stackSize >= 0);
703 debugBelch("stackSize = %d\n", stackSize);
704 */
705 #endif
706 return;
707 }
708
709 // currentStack->free is updated when the active stack is switched back
710 // to the previous stack.
711 currentStack->free = (StgPtr)stackLimit;
712
713 // find the previous block descriptor
714 pbd = currentStack->u.back;
715 ASSERT(pbd != NULL);
716
717 returnToOldStack(pbd);
718
719 #if defined(DEBUG_RETAINER)
720 stackSize--;
721 if (stackSize > maxStackSize) maxStackSize = stackSize;
722 /*
723 ASSERT(stackSize >= 0);
724 debugBelch("stackSize = %d\n", stackSize);
725 */
726 #endif
727 }
728
729 static INLINE void
730 popOff(void) {
731 #if defined(DEBUG_RETAINER)
732 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
733 #endif
734
735 ASSERT(stackTop != stackLimit);
736 ASSERT(!isEmptyRetainerStack());
737
738 // <= (instead of <) is wrong!
739 if (stackTop + 1 < stackLimit) {
740 stackTop++;
741 #if defined(DEBUG_RETAINER)
742 stackSize--;
743 if (stackSize > maxStackSize) maxStackSize = stackSize;
744 /*
745 ASSERT(stackSize >= 0);
746 debugBelch("stackSize = %d\n", stackSize);
747 */
748 #endif
749 return;
750 }
751
752 popOffReal();
753 }
754
755 /* -----------------------------------------------------------------------------
756 * Finds the next object to be considered for retainer profiling and store
757 * its pointer to *c.
758 * Test if the topmost stack element indicates that more objects are left,
759 * and if so, retrieve the first object and store its pointer to *c. Also,
760 * set *cp and *r appropriately, both of which are stored in the stack element.
761 * The topmost stack element then is overwritten so as for it to now denote
762 * the next object.
763 * If the topmost stack element indicates no more objects are left, pop
764 * off the stack element until either an object can be retrieved or
765 * the current stack chunk becomes empty, indicated by true returned by
766 * isOnBoundary(), in which case *c is set to NULL.
767 * Note:
768 * It is okay to call this function even when the current stack chunk
769 * is empty.
770 * -------------------------------------------------------------------------- */
771 static INLINE void
772 pop( StgClosure **c, StgClosure **cp, retainer *r )
773 {
774 stackElement *se;
775
776 #if defined(DEBUG_RETAINER)
777 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
778 #endif
779
780 do {
781 if (isOnBoundary()) { // if the current stack chunk is depleted
782 *c = NULL;
783 return;
784 }
785
786 se = stackTop;
787
788 switch (get_itbl(se->c)->type) {
789 // two children (fixed), no SRT
790 // nothing in se.info
791 case CONSTR_2_0:
792 *c = se->c->payload[1];
793 *cp = se->c;
794 *r = se->c_child_r;
795 popOff();
796 return;
797
798 // three children (fixed), no SRT
799 // need to push a stackElement
800 case MVAR_CLEAN:
801 case MVAR_DIRTY:
802 if (se->info.next.step == 2) {
803 *c = (StgClosure *)((StgMVar *)se->c)->tail;
804 se->info.next.step++; // move to the next step
805 // no popOff
806 } else {
807 *c = ((StgMVar *)se->c)->value;
808 popOff();
809 }
810 *cp = se->c;
811 *r = se->c_child_r;
812 return;
813
814 // three children (fixed), no SRT
815 case WEAK:
816 if (se->info.next.step == 2) {
817 *c = ((StgWeak *)se->c)->value;
818 se->info.next.step++;
819 // no popOff
820 } else {
821 *c = ((StgWeak *)se->c)->finalizer;
822 popOff();
823 }
824 *cp = se->c;
825 *r = se->c_child_r;
826 return;
827
828 case TREC_CHUNK: {
829 // These are pretty complicated: we have N entries, each
830 // of which contains 3 fields that we want to follow. So
831 // we divide the step counter: the 2 low bits indicate
832 // which field, and the rest of the bits indicate the
833 // entry number (starting from zero).
834 TRecEntry *entry;
835 uint32_t entry_no = se->info.next.step >> 2;
836 uint32_t field_no = se->info.next.step & 3;
837 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
838 *c = NULL;
839 popOff();
840 return;
841 }
842 entry = &((StgTRecChunk *)se->c)->entries[entry_no];
843 if (field_no == 0) {
844 *c = (StgClosure *)entry->tvar;
845 } else if (field_no == 1) {
846 *c = entry->expected_value;
847 } else {
848 *c = entry->new_value;
849 }
850 *cp = se->c;
851 *r = se->c_child_r;
852 se->info.next.step++;
853 return;
854 }
855
856 case TVAR:
857 case CONSTR:
858 case PRIM:
859 case MUT_PRIM:
860 case BCO:
861 // StgMutArrPtr.ptrs, no SRT
862 case MUT_ARR_PTRS_CLEAN:
863 case MUT_ARR_PTRS_DIRTY:
864 case MUT_ARR_PTRS_FROZEN:
865 case MUT_ARR_PTRS_FROZEN0:
866 *c = find_ptrs(&se->info);
867 if (*c == NULL) {
868 popOff();
869 break;
870 }
871 *cp = se->c;
872 *r = se->c_child_r;
873 return;
874
875 // layout.payload.ptrs, SRT
876 case FUN: // always a heap object
877 case FUN_2_0:
878 if (se->info.type == posTypePtrs) {
879 *c = find_ptrs(&se->info);
880 if (*c != NULL) {
881 *cp = se->c;
882 *r = se->c_child_r;
883 return;
884 }
885 init_srt_fun(&se->info, get_fun_itbl(se->c));
886 }
887 goto do_srt;
888
889 case THUNK:
890 case THUNK_2_0:
891 if (se->info.type == posTypePtrs) {
892 *c = find_ptrs(&se->info);
893 if (*c != NULL) {
894 *cp = se->c;
895 *r = se->c_child_r;
896 return;
897 }
898 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
899 }
900 goto do_srt;
901
902 // SRT
903 do_srt:
904 case THUNK_STATIC:
905 case FUN_STATIC:
906 case FUN_0_1:
907 case FUN_0_2:
908 case THUNK_0_1:
909 case THUNK_0_2:
910 case FUN_1_0:
911 case FUN_1_1:
912 case THUNK_1_0:
913 case THUNK_1_1:
914 *c = find_srt(&se->info);
915 if (*c != NULL) {
916 *cp = se->c;
917 *r = se->c_child_r;
918 return;
919 }
920 popOff();
921 break;
922
923 // no child (fixed), no SRT
924 case CONSTR_0_1:
925 case CONSTR_0_2:
926 case ARR_WORDS:
927 // one child (fixed), no SRT
928 case MUT_VAR_CLEAN:
929 case MUT_VAR_DIRTY:
930 case THUNK_SELECTOR:
931 case CONSTR_1_1:
932 // cannot appear
933 case PAP:
934 case AP:
935 case AP_STACK:
936 case TSO:
937 case STACK:
938 case IND_STATIC:
939 case CONSTR_NOCAF:
940 // stack objects
941 case UPDATE_FRAME:
942 case CATCH_FRAME:
943 case UNDERFLOW_FRAME:
944 case STOP_FRAME:
945 case RET_BCO:
946 case RET_SMALL:
947 case RET_BIG:
948 // invalid objects
949 case IND:
950 case INVALID_OBJECT:
951 default:
952 barf("Invalid object *c in pop()");
953 return;
954 }
955 } while (true);
956 }
957
958 /* -----------------------------------------------------------------------------
959 * RETAINER PROFILING ENGINE
960 * -------------------------------------------------------------------------- */
961
962 void
963 initRetainerProfiling( void )
964 {
965 initializeAllRetainerSet();
966 retainerGeneration = 0;
967 }
968
969 /* -----------------------------------------------------------------------------
970 * This function must be called before f-closing prof_file.
971 * -------------------------------------------------------------------------- */
972 void
973 endRetainerProfiling( void )
974 {
975 #if defined(SECOND_APPROACH)
976 outputAllRetainerSet(prof_file);
977 #endif
978 }
979
980 /* -----------------------------------------------------------------------------
981 * Returns the actual pointer to the retainer set of the closure *c.
982 * It may adjust RSET(c) subject to flip.
983 * Side effects:
984 * RSET(c) is initialized to NULL if its current value does not
985 * conform to flip.
986 * Note:
987 * Even though this function has side effects, they CAN be ignored because
988 * subsequent calls to retainerSetOf() always result in the same return value
989 * and retainerSetOf() is the only way to retrieve retainerSet of a given
990 * closure.
991 * We have to perform an XOR (^) operation each time a closure is examined.
992 * The reason is that we do not know when a closure is visited last.
993 * -------------------------------------------------------------------------- */
994 static INLINE void
995 maybeInitRetainerSet( StgClosure *c )
996 {
997 if (!isRetainerSetFieldValid(c)) {
998 setRetainerSetToNull(c);
999 }
1000 }
1001
1002 /* -----------------------------------------------------------------------------
1003 * Returns true if *c is a retainer.
1004 * -------------------------------------------------------------------------- */
1005 static INLINE bool
1006 isRetainer( StgClosure *c )
1007 {
1008 switch (get_itbl(c)->type) {
1009 //
1010 // True case
1011 //
1012 // TSOs MUST be retainers: they constitute the set of roots.
1013 case TSO:
1014 case STACK:
1015
1016 // mutable objects
1017 case MUT_PRIM:
1018 case MVAR_CLEAN:
1019 case MVAR_DIRTY:
1020 case TVAR:
1021 case MUT_VAR_CLEAN:
1022 case MUT_VAR_DIRTY:
1023 case MUT_ARR_PTRS_CLEAN:
1024 case MUT_ARR_PTRS_DIRTY:
1025
1026 // thunks are retainers.
1027 case THUNK:
1028 case THUNK_1_0:
1029 case THUNK_0_1:
1030 case THUNK_2_0:
1031 case THUNK_1_1:
1032 case THUNK_0_2:
1033 case THUNK_SELECTOR:
1034 case AP:
1035 case AP_STACK:
1036
1037 // Static thunks, or CAFS, are obviously retainers.
1038 case THUNK_STATIC:
1039
1040 // WEAK objects are roots; there is separate code in which traversing
1041 // begins from WEAK objects.
1042 case WEAK:
1043 return true;
1044
1045 //
1046 // False case
1047 //
1048
1049 // constructors
1050 case CONSTR:
1051 case CONSTR_NOCAF:
1052 case CONSTR_1_0:
1053 case CONSTR_0_1:
1054 case CONSTR_2_0:
1055 case CONSTR_1_1:
1056 case CONSTR_0_2:
1057 // functions
1058 case FUN:
1059 case FUN_1_0:
1060 case FUN_0_1:
1061 case FUN_2_0:
1062 case FUN_1_1:
1063 case FUN_0_2:
1064 // partial applications
1065 case PAP:
1066 // indirection
1067 // IND_STATIC used to be an error, but at the moment it can happen
1068 // as isAlive doesn't look through IND_STATIC as it ignores static
1069 // closures. See trac #3956 for a program that hit this error.
1070 case IND_STATIC:
1071 case BLACKHOLE:
1072 // static objects
1073 case FUN_STATIC:
1074 // misc
1075 case PRIM:
1076 case BCO:
1077 case ARR_WORDS:
1078 // STM
1079 case TREC_CHUNK:
1080 // immutable arrays
1081 case MUT_ARR_PTRS_FROZEN:
1082 case MUT_ARR_PTRS_FROZEN0:
1083 return false;
1084
1085 //
1086 // Error case
1087 //
1088 // Stack objects are invalid because they are never treated as
1089 // legal objects during retainer profiling.
1090 case UPDATE_FRAME:
1091 case CATCH_FRAME:
1092 case UNDERFLOW_FRAME:
1093 case STOP_FRAME:
1094 case RET_BCO:
1095 case RET_SMALL:
1096 case RET_BIG:
1097 // other cases
1098 case IND:
1099 case INVALID_OBJECT:
1100 default:
1101 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1102 return false;
1103 }
1104 }
1105
1106 /* -----------------------------------------------------------------------------
1107 * Returns the retainer function value for the closure *c, i.e., R(*c).
1108 * This function does NOT return the retainer(s) of *c.
1109 * Invariants:
1110 * *c must be a retainer.
1111 * Note:
1112 * Depending on the definition of this function, the maintenance of retainer
1113 * sets can be made easier. If most retainer sets are likely to be created
1114 * again across garbage collections, refreshAllRetainerSet() in
1115 * RetainerSet.c can simply do nothing.
1116 * If this is not the case, we can free all the retainer sets and
1117 * re-initialize the hash table.
1118 * See refreshAllRetainerSet() in RetainerSet.c.
1119 * -------------------------------------------------------------------------- */
1120 static INLINE retainer
1121 getRetainerFrom( StgClosure *c )
1122 {
1123 ASSERT(isRetainer(c));
1124
1125 #if defined(RETAINER_SCHEME_INFO)
1126 // Retainer scheme 1: retainer = info table
1127 return get_itbl(c);
1128 #elif defined(RETAINER_SCHEME_CCS)
1129 // Retainer scheme 2: retainer = cost centre stack
1130 return c->header.prof.ccs;
1131 #elif defined(RETAINER_SCHEME_CC)
1132 // Retainer scheme 3: retainer = cost centre
1133 return c->header.prof.ccs->cc;
1134 #endif
1135 }
1136
1137 /* -----------------------------------------------------------------------------
1138 * Associates the retainer set *s with the closure *c, that is, *s becomes
1139 * the retainer set of *c.
1140 * Invariants:
1141 * c != NULL
1142 * s != NULL
1143 * -------------------------------------------------------------------------- */
1144 static INLINE void
1145 associate( StgClosure *c, RetainerSet *s )
1146 {
1147 // StgWord has the same size as pointers, so the following type
1148 // casting is okay.
1149 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1150 }
1151
1152 /* -----------------------------------------------------------------------------
1153 Call retainClosure for each of the closures covered by a large bitmap.
1154 -------------------------------------------------------------------------- */
1155
1156 static void
1157 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, uint32_t size,
1158 StgClosure *c, retainer c_child_r)
1159 {
1160 uint32_t i, b;
1161 StgWord bitmap;
1162
1163 b = 0;
1164 bitmap = large_bitmap->bitmap[b];
1165 for (i = 0; i < size; ) {
1166 if ((bitmap & 1) == 0) {
1167 retainClosure((StgClosure *)*p, c, c_child_r);
1168 }
1169 i++;
1170 p++;
1171 if (i % BITS_IN(W_) == 0) {
1172 b++;
1173 bitmap = large_bitmap->bitmap[b];
1174 } else {
1175 bitmap = bitmap >> 1;
1176 }
1177 }
1178 }
1179
1180 static INLINE StgPtr
1181 retain_small_bitmap (StgPtr p, uint32_t size, StgWord bitmap,
1182 StgClosure *c, retainer c_child_r)
1183 {
1184 while (size > 0) {
1185 if ((bitmap & 1) == 0) {
1186 retainClosure((StgClosure *)*p, c, c_child_r);
1187 }
1188 p++;
1189 bitmap = bitmap >> 1;
1190 size--;
1191 }
1192 return p;
1193 }
1194
1195 /* -----------------------------------------------------------------------------
1196 * Call retainClosure for each of the closures in an SRT.
1197 * ------------------------------------------------------------------------- */
1198
1199 static void
1200 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1201 {
1202 uint32_t i, b, size;
1203 StgWord bitmap;
1204 StgClosure **p;
1205
1206 b = 0;
1207 p = (StgClosure **)srt->srt;
1208 size = srt->l.size;
1209 bitmap = srt->l.bitmap[b];
1210 for (i = 0; i < size; ) {
1211 if ((bitmap & 1) != 0) {
1212 retainClosure((StgClosure *)*p, c, c_child_r);
1213 }
1214 i++;
1215 p++;
1216 if (i % BITS_IN(W_) == 0) {
1217 b++;
1218 bitmap = srt->l.bitmap[b];
1219 } else {
1220 bitmap = bitmap >> 1;
1221 }
1222 }
1223 }
1224
1225 static INLINE void
1226 retainSRT (StgClosure **srt, uint32_t srt_bitmap, StgClosure *c,
1227 retainer c_child_r)
1228 {
1229 uint32_t bitmap;
1230 StgClosure **p;
1231
1232 bitmap = srt_bitmap;
1233 p = srt;
1234
1235 if (bitmap == (StgHalfWord)(-1)) {
1236 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1237 return;
1238 }
1239
1240 while (bitmap != 0) {
1241 if ((bitmap & 1) != 0) {
1242 #if defined(COMPILING_WINDOWS_DLL)
1243 if ( (unsigned long)(*srt) & 0x1 ) {
1244 retainClosure(* (StgClosure**) ((unsigned long) (*srt) & ~0x1),
1245 c, c_child_r);
1246 } else {
1247 retainClosure(*srt,c,c_child_r);
1248 }
1249 #else
1250 retainClosure(*srt,c,c_child_r);
1251 #endif
1252 }
1253 p++;
1254 bitmap = bitmap >> 1;
1255 }
1256 }
1257
1258 /* -----------------------------------------------------------------------------
1259 * Process all the objects in the stack chunk from stackStart to stackEnd
1260 * with *c and *c_child_r being their parent and their most recent retainer,
1261 * respectively. Treat stackOptionalFun as another child of *c if it is
1262 * not NULL.
1263 * Invariants:
1264 * *c is one of the following: TSO, AP_STACK.
1265 * If *c is TSO, c == c_child_r.
1266 * stackStart < stackEnd.
1267 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1268 * interpretation conforms to the current value of flip (even when they
1269 * are interpreted to be NULL).
1270 * If *c is TSO, its state is not ThreadComplete,or ThreadKilled,
1271 * which means that its stack is ready to process.
1272 * Note:
1273 * This code was almost plagiarzied from GC.c! For each pointer,
1274 * retainClosure() is invoked instead of evacuate().
1275 * -------------------------------------------------------------------------- */
1276 static void
1277 retainStack( StgClosure *c, retainer c_child_r,
1278 StgPtr stackStart, StgPtr stackEnd )
1279 {
1280 stackElement *oldStackBoundary;
1281 StgPtr p;
1282 const StgRetInfoTable *info;
1283 StgWord bitmap;
1284 uint32_t size;
1285
1286 #if defined(DEBUG_RETAINER)
1287 cStackSize++;
1288 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1289 #endif
1290
1291 /*
1292 Each invocation of retainStack() creates a new virtual
1293 stack. Since all such stacks share a single common stack, we
1294 record the current currentStackBoundary, which will be restored
1295 at the exit.
1296 */
1297 oldStackBoundary = currentStackBoundary;
1298 currentStackBoundary = stackTop;
1299
1300 #if defined(DEBUG_RETAINER)
1301 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1302 #endif
1303
1304 ASSERT(get_itbl(c)->type == STACK);
1305
1306 p = stackStart;
1307 while (p < stackEnd) {
1308 info = get_ret_itbl((StgClosure *)p);
1309
1310 switch(info->i.type) {
1311
1312 case UPDATE_FRAME:
1313 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1314 p += sizeofW(StgUpdateFrame);
1315 continue;
1316
1317 case UNDERFLOW_FRAME:
1318 case STOP_FRAME:
1319 case CATCH_FRAME:
1320 case CATCH_STM_FRAME:
1321 case CATCH_RETRY_FRAME:
1322 case ATOMICALLY_FRAME:
1323 case RET_SMALL:
1324 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1325 size = BITMAP_SIZE(info->i.layout.bitmap);
1326 p++;
1327 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1328
1329 follow_srt:
1330 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1331 continue;
1332
1333 case RET_BCO: {
1334 StgBCO *bco;
1335
1336 p++;
1337 retainClosure((StgClosure *)*p, c, c_child_r);
1338 bco = (StgBCO *)*p;
1339 p++;
1340 size = BCO_BITMAP_SIZE(bco);
1341 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1342 p += size;
1343 continue;
1344 }
1345
1346 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1347 case RET_BIG:
1348 size = GET_LARGE_BITMAP(&info->i)->size;
1349 p++;
1350 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1351 size, c, c_child_r);
1352 p += size;
1353 // and don't forget to follow the SRT
1354 goto follow_srt;
1355
1356 case RET_FUN: {
1357 StgRetFun *ret_fun = (StgRetFun *)p;
1358 const StgFunInfoTable *fun_info;
1359
1360 retainClosure(ret_fun->fun, c, c_child_r);
1361 fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(ret_fun->fun));
1362
1363 p = (P_)&ret_fun->payload;
1364 switch (fun_info->f.fun_type) {
1365 case ARG_GEN:
1366 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1367 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1368 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1369 break;
1370 case ARG_GEN_BIG:
1371 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1372 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1373 size, c, c_child_r);
1374 p += size;
1375 break;
1376 default:
1377 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1378 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1379 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1380 break;
1381 }
1382 goto follow_srt;
1383 }
1384
1385 default:
1386 barf("Invalid object found in retainStack(): %d",
1387 (int)(info->i.type));
1388 }
1389 }
1390
1391 // restore currentStackBoundary
1392 currentStackBoundary = oldStackBoundary;
1393 #if defined(DEBUG_RETAINER)
1394 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1395 #endif
1396
1397 #if defined(DEBUG_RETAINER)
1398 cStackSize--;
1399 #endif
1400 }
1401
1402 /* ----------------------------------------------------------------------------
1403 * Call retainClosure for each of the children of a PAP/AP
1404 * ------------------------------------------------------------------------- */
1405
1406 static INLINE StgPtr
1407 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1408 retainer c_child_r, /* NOT tagged */
1409 StgClosure *fun, /* tagged */
1410 StgClosure** payload, StgWord n_args)
1411 {
1412 StgPtr p;
1413 StgWord bitmap;
1414 const StgFunInfoTable *fun_info;
1415
1416 retainClosure(fun, pap, c_child_r);
1417 fun = UNTAG_CLOSURE(fun);
1418 fun_info = get_fun_itbl(fun);
1419 ASSERT(fun_info->i.type != PAP);
1420
1421 p = (StgPtr)payload;
1422
1423 switch (fun_info->f.fun_type) {
1424 case ARG_GEN:
1425 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1426 p = retain_small_bitmap(p, n_args, bitmap,
1427 pap, c_child_r);
1428 break;
1429 case ARG_GEN_BIG:
1430 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1431 n_args, pap, c_child_r);
1432 p += n_args;
1433 break;
1434 case ARG_BCO:
1435 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1436 n_args, pap, c_child_r);
1437 p += n_args;
1438 break;
1439 default:
1440 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1441 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1442 break;
1443 }
1444 return p;
1445 }
1446
1447 /* -----------------------------------------------------------------------------
1448 * Compute the retainer set of *c0 and all its desecents by traversing.
1449 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1450 * Invariants:
1451 * c0 = cp0 = r0 holds only for root objects.
1452 * RSET(cp0) and RSET(r0) are valid, i.e., their
1453 * interpretation conforms to the current value of flip (even when they
1454 * are interpreted to be NULL).
1455 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1456 * the current value of flip. If it does not, during the execution
1457 * of this function, RSET(c0) must be initialized as well as all
1458 * its descendants.
1459 * Note:
1460 * stackTop must be the same at the beginning and the exit of this function.
1461 * *c0 can be TSO (as well as AP_STACK).
1462 * -------------------------------------------------------------------------- */
1463 static void
1464 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1465 {
1466 // c = Current closure (possibly tagged)
1467 // cp = Current closure's Parent (NOT tagged)
1468 // r = current closures' most recent Retainer (NOT tagged)
1469 // c_child_r = current closure's children's most recent retainer
1470 // first_child = first child of c
1471 StgClosure *c, *cp, *first_child;
1472 RetainerSet *s, *retainerSetOfc;
1473 retainer r, c_child_r;
1474 StgWord typeOfc;
1475
1476 #if defined(DEBUG_RETAINER)
1477 // StgPtr oldStackTop;
1478 #endif
1479
1480 #if defined(DEBUG_RETAINER)
1481 // oldStackTop = stackTop;
1482 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1483 #endif
1484
1485 // (c, cp, r) = (c0, cp0, r0)
1486 c = c0;
1487 cp = cp0;
1488 r = r0;
1489 goto inner_loop;
1490
1491 loop:
1492 //debugBelch("loop");
1493 // pop to (c, cp, r);
1494 pop(&c, &cp, &r);
1495
1496 if (c == NULL) {
1497 #if defined(DEBUG_RETAINER)
1498 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1499 #endif
1500 return;
1501 }
1502
1503 //debugBelch("inner_loop");
1504
1505 inner_loop:
1506 c = UNTAG_CLOSURE(c);
1507
1508 // c = current closure under consideration,
1509 // cp = current closure's parent,
1510 // r = current closure's most recent retainer
1511 //
1512 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1513 // RSET(cp) and RSET(r) are valid.
1514 // RSET(c) is valid only if c has been visited before.
1515 //
1516 // Loop invariants (on the relation between c, cp, and r)
1517 // if cp is not a retainer, r belongs to RSET(cp).
1518 // if cp is a retainer, r == cp.
1519
1520 typeOfc = get_itbl(c)->type;
1521
1522 #if defined(DEBUG_RETAINER)
1523 switch (typeOfc) {
1524 case IND_STATIC:
1525 case CONSTR_NOCAF:
1526 case THUNK_STATIC:
1527 case FUN_STATIC:
1528 break;
1529 default:
1530 if (retainerSetOf(c) == NULL) { // first visit?
1531 costArray[typeOfc] += cost(c);
1532 sumOfNewCost += cost(c);
1533 }
1534 break;
1535 }
1536 #endif
1537
1538 // special cases
1539 switch (typeOfc) {
1540 case TSO:
1541 if (((StgTSO *)c)->what_next == ThreadComplete ||
1542 ((StgTSO *)c)->what_next == ThreadKilled) {
1543 #if defined(DEBUG_RETAINER)
1544 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1545 #endif
1546 goto loop;
1547 }
1548 break;
1549
1550 case IND_STATIC:
1551 // We just skip IND_STATIC, so its retainer set is never computed.
1552 c = ((StgIndStatic *)c)->indirectee;
1553 goto inner_loop;
1554 // static objects with no pointers out, so goto loop.
1555 case CONSTR_NOCAF:
1556 // It is not just enough not to compute the retainer set for *c; it is
1557 // mandatory because CONSTR_NOCAF are not reachable from
1558 // scavenged_static_objects, the list from which is assumed to traverse
1559 // all static objects after major garbage collections.
1560 goto loop;
1561 case THUNK_STATIC:
1562 case FUN_STATIC:
1563 if (get_itbl(c)->srt_bitmap == 0) {
1564 // No need to compute the retainer set; no dynamic objects
1565 // are reachable from *c.
1566 //
1567 // Static objects: if we traverse all the live closures,
1568 // including static closures, during each heap census then
1569 // we will observe that some static closures appear and
1570 // disappear. eg. a closure may contain a pointer to a
1571 // static function 'f' which is not otherwise reachable
1572 // (it doesn't indirectly point to any CAFs, so it doesn't
1573 // appear in any SRTs), so we would find 'f' during
1574 // traversal. However on the next sweep there may be no
1575 // closures pointing to 'f'.
1576 //
1577 // We must therefore ignore static closures whose SRT is
1578 // empty, because these are exactly the closures that may
1579 // "appear". A closure with a non-empty SRT, and which is
1580 // still required, will always be reachable.
1581 //
1582 // But what about CONSTR? Surely these may be able
1583 // to appear, and they don't have SRTs, so we can't
1584 // check. So for now, we're calling
1585 // resetStaticObjectForRetainerProfiling() from the
1586 // garbage collector to reset the retainer sets in all the
1587 // reachable static objects.
1588 goto loop;
1589 }
1590 default:
1591 break;
1592 }
1593
1594 // The above objects are ignored in computing the average number of times
1595 // an object is visited.
1596 timesAnyObjectVisited++;
1597
1598 // If this is the first visit to c, initialize its retainer set.
1599 maybeInitRetainerSet(c);
1600 retainerSetOfc = retainerSetOf(c);
1601
1602 // Now compute s:
1603 // isRetainer(cp) == true => s == NULL
1604 // isRetainer(cp) == false => s == cp.retainer
1605 if (isRetainer(cp))
1606 s = NULL;
1607 else
1608 s = retainerSetOf(cp);
1609
1610 // (c, cp, r, s) is available.
1611
1612 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1613 if (retainerSetOfc == NULL) {
1614 // This is the first visit to *c.
1615 numObjectVisited++;
1616
1617 if (s == NULL)
1618 associate(c, singleton(r));
1619 else
1620 // s is actually the retainer set of *c!
1621 associate(c, s);
1622
1623 // compute c_child_r
1624 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1625 } else {
1626 // This is not the first visit to *c.
1627 if (isMember(r, retainerSetOfc))
1628 goto loop; // no need to process child
1629
1630 if (s == NULL)
1631 associate(c, addElement(r, retainerSetOfc));
1632 else {
1633 // s is not NULL and cp is not a retainer. This means that
1634 // each time *cp is visited, so is *c. Thus, if s has
1635 // exactly one more element in its retainer set than c, s
1636 // is also the new retainer set for *c.
1637 if (s->num == retainerSetOfc->num + 1) {
1638 associate(c, s);
1639 }
1640 // Otherwise, just add R_r to the current retainer set of *c.
1641 else {
1642 associate(c, addElement(r, retainerSetOfc));
1643 }
1644 }
1645
1646 if (isRetainer(c))
1647 goto loop; // no need to process child
1648
1649 // compute c_child_r
1650 c_child_r = r;
1651 }
1652
1653 // now, RSET() of all of *c, *cp, and *r is valid.
1654 // (c, c_child_r) are available.
1655
1656 // process child
1657
1658 // Special case closures: we process these all in one go rather
1659 // than attempting to save the current position, because doing so
1660 // would be hard.
1661 switch (typeOfc) {
1662 case STACK:
1663 retainStack(c, c_child_r,
1664 ((StgStack *)c)->sp,
1665 ((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
1666 goto loop;
1667
1668 case TSO:
1669 {
1670 StgTSO *tso = (StgTSO *)c;
1671
1672 retainClosure((StgClosure*) tso->stackobj, c, c_child_r);
1673 retainClosure((StgClosure*) tso->blocked_exceptions, c, c_child_r);
1674 retainClosure((StgClosure*) tso->bq, c, c_child_r);
1675 retainClosure((StgClosure*) tso->trec, c, c_child_r);
1676 if ( tso->why_blocked == BlockedOnMVar
1677 || tso->why_blocked == BlockedOnMVarRead
1678 || tso->why_blocked == BlockedOnBlackHole
1679 || tso->why_blocked == BlockedOnMsgThrowTo
1680 ) {
1681 retainClosure(tso->block_info.closure, c, c_child_r);
1682 }
1683 goto loop;
1684 }
1685
1686 case PAP:
1687 {
1688 StgPAP *pap = (StgPAP *)c;
1689 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1690 goto loop;
1691 }
1692
1693 case AP:
1694 {
1695 StgAP *ap = (StgAP *)c;
1696 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1697 goto loop;
1698 }
1699
1700 case AP_STACK:
1701 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1702 retainStack(c, c_child_r,
1703 (StgPtr)((StgAP_STACK *)c)->payload,
1704 (StgPtr)((StgAP_STACK *)c)->payload +
1705 ((StgAP_STACK *)c)->size);
1706 goto loop;
1707 }
1708
1709 push(c, c_child_r, &first_child);
1710
1711 // If first_child is null, c has no child.
1712 // If first_child is not null, the top stack element points to the next
1713 // object. push() may or may not push a stackElement on the stack.
1714 if (first_child == NULL)
1715 goto loop;
1716
1717 // (c, cp, r) = (first_child, c, c_child_r)
1718 r = c_child_r;
1719 cp = c;
1720 c = first_child;
1721 goto inner_loop;
1722 }
1723
1724 /* -----------------------------------------------------------------------------
1725 * Compute the retainer set for every object reachable from *tl.
1726 * -------------------------------------------------------------------------- */
1727 static void
1728 retainRoot(void *user STG_UNUSED, StgClosure **tl)
1729 {
1730 StgClosure *c;
1731
1732 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1733 // be a root.
1734
1735 ASSERT(isEmptyRetainerStack());
1736 currentStackBoundary = stackTop;
1737
1738 c = UNTAG_CLOSURE(*tl);
1739 maybeInitRetainerSet(c);
1740 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1741 retainClosure(c, c, getRetainerFrom(c));
1742 } else {
1743 retainClosure(c, c, CCS_SYSTEM);
1744 }
1745
1746 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1747 // *tl might be a TSO which is ThreadComplete, in which
1748 // case we ignore it for the purposes of retainer profiling.
1749 }
1750
1751 /* -----------------------------------------------------------------------------
1752 * Compute the retainer set for each of the objects in the heap.
1753 * -------------------------------------------------------------------------- */
1754 static void
1755 computeRetainerSet( void )
1756 {
1757 StgWeak *weak;
1758 RetainerSet *rtl;
1759 uint32_t g, n;
1760 StgPtr ml;
1761 bdescr *bd;
1762 #if defined(DEBUG_RETAINER)
1763 RetainerSet tmpRetainerSet;
1764 #endif
1765
1766 markCapabilities(retainRoot, NULL); // for scheduler roots
1767
1768 // This function is called after a major GC, when key, value, and finalizer
1769 // all are guaranteed to be valid, or reachable.
1770 //
1771 // The following code assumes that WEAK objects are considered to be roots
1772 // for retainer profilng.
1773 for (n = 0; n < n_capabilities; n++) {
1774 // NB: after a GC, all nursery weak_ptr_lists have been migrated
1775 // to the global lists living in the generations
1776 ASSERT(capabilities[n]->weak_ptr_list_hd == NULL);
1777 ASSERT(capabilities[n]->weak_ptr_list_tl == NULL);
1778 }
1779 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1780 for (weak = generations[g].weak_ptr_list; weak != NULL; weak = weak->link) {
1781 // retainRoot((StgClosure *)weak);
1782 retainRoot(NULL, (StgClosure **)&weak);
1783 }
1784 }
1785
1786 // Consider roots from the stable ptr table.
1787 markStableTables(retainRoot, NULL);
1788
1789 // The following code resets the rs field of each unvisited mutable
1790 // object (computing sumOfNewCostExtra and updating costArray[] when
1791 // debugging retainer profiler).
1792 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1793 // NOT true: even G0 has a block on its mutable list
1794 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1795
1796 // Traversing through mut_list is necessary
1797 // because we can find MUT_VAR objects which have not been
1798 // visited during retainer profiling.
1799 for (n = 0; n < n_capabilities; n++) {
1800 for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) {
1801 for (ml = bd->start; ml < bd->free; ml++) {
1802
1803 maybeInitRetainerSet((StgClosure *)*ml);
1804 rtl = retainerSetOf((StgClosure *)*ml);
1805
1806 #if defined(DEBUG_RETAINER)
1807 if (rtl == NULL) {
1808 // first visit to *ml
1809 // This is a violation of the interface rule!
1810 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1811
1812 switch (get_itbl((StgClosure *)ml)->type) {
1813 case IND_STATIC:
1814 // no cost involved
1815 break;
1816 case CONSTR_NOCAF:
1817 case THUNK_STATIC:
1818 case FUN_STATIC:
1819 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1820 break;
1821 default:
1822 // dynamic objects
1823 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1824 sumOfNewCostExtra += cost((StgClosure *)ml);
1825 break;
1826 }
1827 }
1828 #endif
1829 }
1830 }
1831 }
1832 }
1833 }
1834
1835 /* -----------------------------------------------------------------------------
1836 * Traverse all static objects for which we compute retainer sets,
1837 * and reset their rs fields to NULL, which is accomplished by
1838 * invoking maybeInitRetainerSet(). This function must be called
1839 * before zeroing all objects reachable from scavenged_static_objects
1840 * in the case of major garbage collections. See GarbageCollect() in
1841 * GC.c.
1842 * Note:
1843 * The mut_once_list of the oldest generation must also be traversed?
1844 * Why? Because if the evacuation of an object pointed to by a static
1845 * indirection object fails, it is put back to the mut_once_list of
1846 * the oldest generation.
1847 * However, this is not necessary because any static indirection objects
1848 * are just traversed through to reach dynamic objects. In other words,
1849 * they are not taken into consideration in computing retainer sets.
1850 *
1851 * SDM (20/7/2011): I don't think this is doing anything sensible,
1852 * because it happens before retainerProfile() and at the beginning of
1853 * retainerProfil() we change the sense of 'flip'. So all of the
1854 * calls to maybeInitRetainerSet() here are initialising retainer sets
1855 * with the wrong flip. Also, I don't see why this is necessary. I
1856 * added a maybeInitRetainerSet() call to retainRoot(), and that seems
1857 * to have fixed the assertion failure in retainerSetOf() I was
1858 * encountering.
1859 * -------------------------------------------------------------------------- */
1860 void
1861 resetStaticObjectForRetainerProfiling( StgClosure *static_objects )
1862 {
1863 #if defined(DEBUG_RETAINER)
1864 uint32_t count;
1865 #endif
1866 StgClosure *p;
1867
1868 #if defined(DEBUG_RETAINER)
1869 count = 0;
1870 #endif
1871 p = static_objects;
1872 while (p != END_OF_STATIC_OBJECT_LIST) {
1873 p = UNTAG_STATIC_LIST_PTR(p);
1874 #if defined(DEBUG_RETAINER)
1875 count++;
1876 #endif
1877 switch (get_itbl(p)->type) {
1878 case IND_STATIC:
1879 // Since we do not compute the retainer set of any
1880 // IND_STATIC object, we don't have to reset its retainer
1881 // field.
1882 p = (StgClosure*)*IND_STATIC_LINK(p);
1883 break;
1884 case THUNK_STATIC:
1885 maybeInitRetainerSet(p);
1886 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1887 break;
1888 case FUN_STATIC:
1889 maybeInitRetainerSet(p);
1890 p = (StgClosure*)*FUN_STATIC_LINK(p);
1891 break;
1892 case CONSTR:
1893 case CONSTR_1_0:
1894 case CONSTR_2_0:
1895 case CONSTR_1_1:
1896 case CONSTR_NOCAF:
1897 maybeInitRetainerSet(p);
1898 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1899 break;
1900 default:
1901 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1902 p, get_itbl(p)->type);
1903 break;
1904 }
1905 }
1906 #if defined(DEBUG_RETAINER)
1907 // debugBelch("count in scavenged_static_objects = %d\n", count);
1908 #endif
1909 }
1910
1911 /* -----------------------------------------------------------------------------
1912 * Perform retainer profiling.
1913 * N is the oldest generation being profilied, where the generations are
1914 * numbered starting at 0.
1915 * Invariants:
1916 * Note:
1917 * This function should be called only immediately after major garbage
1918 * collection.
1919 * ------------------------------------------------------------------------- */
1920 void
1921 retainerProfile(void)
1922 {
1923 #if defined(DEBUG_RETAINER)
1924 uint32_t i;
1925 uint32_t totalHeapSize; // total raw heap size (computed by linear scanning)
1926 #endif
1927
1928 #if defined(DEBUG_RETAINER)
1929 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1930 #endif
1931
1932 stat_startRP();
1933
1934 // We haven't flipped the bit yet.
1935 #if defined(DEBUG_RETAINER)
1936 debugBelch("Before traversing:\n");
1937 sumOfCostLinear = 0;
1938 for (i = 0;i < N_CLOSURE_TYPES; i++)
1939 costArrayLinear[i] = 0;
1940 totalHeapSize = checkHeapSanityForRetainerProfiling();
1941
1942 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1943 /*
1944 debugBelch("costArrayLinear[] = ");
1945 for (i = 0;i < N_CLOSURE_TYPES; i++)
1946 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1947 debugBelch("\n");
1948 */
1949
1950 ASSERT(sumOfCostLinear == totalHeapSize);
1951
1952 /*
1953 #define pcostArrayLinear(index) \
1954 if (costArrayLinear[index] > 0) \
1955 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1956 pcostArrayLinear(THUNK_STATIC);
1957 pcostArrayLinear(FUN_STATIC);
1958 pcostArrayLinear(CONSTR_NOCAF);
1959 */
1960 #endif
1961
1962 // Now we flips flip.
1963 flip = flip ^ 1;
1964
1965 #if defined(DEBUG_RETAINER)
1966 stackSize = 0;
1967 maxStackSize = 0;
1968 cStackSize = 0;
1969 maxCStackSize = 0;
1970 #endif
1971 numObjectVisited = 0;
1972 timesAnyObjectVisited = 0;
1973
1974 #if defined(DEBUG_RETAINER)
1975 debugBelch("During traversing:\n");
1976 sumOfNewCost = 0;
1977 sumOfNewCostExtra = 0;
1978 for (i = 0;i < N_CLOSURE_TYPES; i++)
1979 costArray[i] = 0;
1980 #endif
1981
1982 /*
1983 We initialize the traverse stack each time the retainer profiling is
1984 performed (because the traverse stack size varies on each retainer profiling
1985 and this operation is not costly anyhow). However, we just refresh the
1986 retainer sets.
1987 */
1988 initializeTraverseStack();
1989 #if defined(DEBUG_RETAINER)
1990 initializeAllRetainerSet();
1991 #else
1992 refreshAllRetainerSet();
1993 #endif
1994 computeRetainerSet();
1995
1996 #if defined(DEBUG_RETAINER)
1997 debugBelch("After traversing:\n");
1998 sumOfCostLinear = 0;
1999 for (i = 0;i < N_CLOSURE_TYPES; i++)
2000 costArrayLinear[i] = 0;
2001 totalHeapSize = checkHeapSanityForRetainerProfiling();
2002
2003 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2004 ASSERT(sumOfCostLinear == totalHeapSize);
2005
2006 // now, compare the two results
2007 /*
2008 Note:
2009 costArray[] must be exactly the same as costArrayLinear[].
2010 Known exceptions:
2011 1) Dead weak pointers, whose type is CONSTR. These objects are not
2012 reachable from any roots.
2013 */
2014 debugBelch("Comparison:\n");
2015 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2016 for (i = 0;i < N_CLOSURE_TYPES; i++)
2017 if (costArray[i] != costArrayLinear[i])
2018 // nothing should be printed except MUT_VAR after major GCs
2019 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2020 debugBelch("\n");
2021
2022 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2023 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2024 debugBelch("\tcostArray[] (must be empty) = ");
2025 for (i = 0;i < N_CLOSURE_TYPES; i++)
2026 if (costArray[i] != costArrayLinear[i])
2027 // nothing should be printed except MUT_VAR after major GCs
2028 debugBelch("[%u:%u] ", i, costArray[i]);
2029 debugBelch("\n");
2030
2031 // only for major garbage collection
2032 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2033 #endif
2034
2035 // post-processing
2036 closeTraverseStack();
2037 #if defined(DEBUG_RETAINER)
2038 closeAllRetainerSet();
2039 #else
2040 // Note that there is no post-processing for the retainer sets.
2041 #endif
2042 retainerGeneration++;
2043
2044 stat_endRP(
2045 retainerGeneration - 1, // retainerGeneration has just been incremented!
2046 #if defined(DEBUG_RETAINER)
2047 maxCStackSize, maxStackSize,
2048 #endif
2049 (double)timesAnyObjectVisited / numObjectVisited);
2050 }
2051
2052 /* -----------------------------------------------------------------------------
2053 * DEBUGGING CODE
2054 * -------------------------------------------------------------------------- */
2055
2056 #if defined(DEBUG_RETAINER)
2057
2058 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2059 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2060 ((StgWord)(*(StgPtr)r)!=(StgWord)0xaaaaaaaaaaaaaaaaULL))
2061
2062 static uint32_t
2063 sanityCheckHeapClosure( StgClosure *c )
2064 {
2065 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2066 ASSERT(LOOKS_LIKE_PTR(c));
2067
2068 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2069 if (get_itbl(c)->type == CONSTR &&
2070 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2071 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2072 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2073 costArray[get_itbl(c)->type] += cost(c);
2074 sumOfNewCost += cost(c);
2075 } else
2076 debugBelch(
2077 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2078 flip, c, get_itbl(c)->type,
2079 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2080 RSET(c));
2081 } else {
2082 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2083 }
2084
2085 return closure_sizeW(c);
2086 }
2087
2088 static uint32_t
2089 heapCheck( bdescr *bd )
2090 {
2091 StgPtr p;
2092 static uint32_t costSum, size;
2093
2094 costSum = 0;
2095 while (bd != NULL) {
2096 p = bd->start;
2097 while (p < bd->free) {
2098 size = sanityCheckHeapClosure((StgClosure *)p);
2099 sumOfCostLinear += size;
2100 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2101 p += size;
2102 // no need for slop check; I think slops are not used currently.
2103 }
2104 ASSERT(p == bd->free);
2105 costSum += bd->free - bd->start;
2106 bd = bd->link;
2107 }
2108
2109 return costSum;
2110 }
2111
2112 static uint32_t
2113 smallObjectPoolCheck(void)
2114 {
2115 bdescr *bd;
2116 StgPtr p;
2117 static uint32_t costSum, size;
2118
2119 bd = g0s0->blocks;
2120 costSum = 0;
2121
2122 // first block
2123 if (bd == NULL)
2124 return costSum;
2125
2126 p = bd->start;
2127 while (p < alloc_Hp) {
2128 size = sanityCheckHeapClosure((StgClosure *)p);
2129 sumOfCostLinear += size;
2130 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2131 p += size;
2132 }
2133 ASSERT(p == alloc_Hp);
2134 costSum += alloc_Hp - bd->start;
2135
2136 bd = bd->link;
2137 while (bd != NULL) {
2138 p = bd->start;
2139 while (p < bd->free) {
2140 size = sanityCheckHeapClosure((StgClosure *)p);
2141 sumOfCostLinear += size;
2142 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2143 p += size;
2144 }
2145 ASSERT(p == bd->free);
2146 costSum += bd->free - bd->start;
2147 bd = bd->link;
2148 }
2149
2150 return costSum;
2151 }
2152
2153 static uint32_t
2154 chainCheck(bdescr *bd)
2155 {
2156 uint32_t costSum, size;
2157
2158 costSum = 0;
2159 while (bd != NULL) {
2160 // bd->free - bd->start is not an accurate measurement of the
2161 // object size. Actually it is always zero, so we compute its
2162 // size explicitly.
2163 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2164 sumOfCostLinear += size;
2165 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2166 costSum += size;
2167 bd = bd->link;
2168 }
2169
2170 return costSum;
2171 }
2172
2173 static uint32_t
2174 checkHeapSanityForRetainerProfiling( void )
2175 {
2176 uint32_t costSum, g, s;
2177
2178 costSum = 0;
2179 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2180 if (RtsFlags.GcFlags.generations == 1) {
2181 costSum += heapCheck(g0s0->to_blocks);
2182 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2183 costSum += chainCheck(g0s0->large_objects);
2184 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2185 } else {
2186 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2187 for (s = 0; s < generations[g].n_steps; s++) {
2188 /*
2189 After all live objects have been scavenged, the garbage
2190 collector may create some objects in
2191 scheduleFinalizers(). These objects are created through
2192 allocate(), so the small object pool or the large object
2193 pool of the g0s0 may not be empty.
2194 */
2195 if (g == 0 && s == 0) {
2196 costSum += smallObjectPoolCheck();
2197 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2198 costSum += chainCheck(generations[g].steps[s].large_objects);
2199 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2200 } else {
2201 costSum += heapCheck(generations[g].steps[s].blocks);
2202 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2203 costSum += chainCheck(generations[g].steps[s].large_objects);
2204 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2205 }
2206 }
2207 }
2208
2209 return costSum;
2210 }
2211
2212 void
2213 findPointer(StgPtr p)
2214 {
2215 StgPtr q, r, e;
2216 bdescr *bd;
2217 uint32_t g, s;
2218
2219 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2220 for (s = 0; s < generations[g].n_steps; s++) {
2221 // if (g == 0 && s == 0) continue;
2222 bd = generations[g].steps[s].blocks;
2223 for (; bd; bd = bd->link) {
2224 for (q = bd->start; q < bd->free; q++) {
2225 if (*q == (StgWord)p) {
2226 r = q;
2227 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2228 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2229 // return;
2230 }
2231 }
2232 }
2233 bd = generations[g].steps[s].large_objects;
2234 for (; bd; bd = bd->link) {
2235 e = bd->start + cost((StgClosure *)bd->start);
2236 for (q = bd->start; q < e; q++) {
2237 if (*q == (StgWord)p) {
2238 r = q;
2239 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2240 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2241 // return;
2242 }
2243 }
2244 }
2245 }
2246 }
2247 }
2248
2249 static void
2250 belongToHeap(StgPtr p)
2251 {
2252 bdescr *bd;
2253 uint32_t g, s;
2254
2255 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2256 for (s = 0; s < generations[g].n_steps; s++) {
2257 // if (g == 0 && s == 0) continue;
2258 bd = generations[g].steps[s].blocks;
2259 for (; bd; bd = bd->link) {
2260 if (bd->start <= p && p < bd->free) {
2261 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2262 return;
2263 }
2264 }
2265 bd = generations[g].steps[s].large_objects;
2266 for (; bd; bd = bd->link) {
2267 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2268 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2269 return;
2270 }
2271 }
2272 }
2273 }
2274 }
2275 #endif /* DEBUG_RETAINER */
2276
2277 #endif /* PROFILING */