Fix calculation in threadStackOverflow
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #if defined(PROFILING)
11
12 // Turn off inlining when debugging - it obfuscates things
13 #if defined(DEBUG)
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "PosixSource.h"
20 #include "Rts.h"
21
22 #include "RtsUtils.h"
23 #include "RetainerProfile.h"
24 #include "RetainerSet.h"
25 #include "Schedule.h"
26 #include "Printer.h"
27 #include "Weak.h"
28 #include "sm/Sanity.h"
29 #include "Profiling.h"
30 #include "Stats.h"
31 #include "ProfHeap.h"
32 #include "Apply.h"
33 #include "Stable.h" /* markStableTables */
34 #include "sm/Storage.h" // for END_OF_STATIC_LIST
35
36 /* Note [What is a retainer?]
37 ~~~~~~~~~~~~~~~~~~~~~~~~~~
38 The definition of what sorts of things are counted as retainers is a bit hard to
39 pin down. Intuitively, we want to identify closures which will help the user
40 identify memory leaks due to thunks. In practice we also end up lumping mutable
41 objects in this group for reasons that have been lost to time.
42
43 The definition of retainer is implemented in isRetainer(), defined later in this
44 file.
45 */
46
47
48 /*
49 Note: what to change in order to plug-in a new retainer profiling scheme?
50 (1) type retainer in ../includes/StgRetainerProf.h
51 (2) retainer function R(), i.e., getRetainerFrom()
52 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
53 in RetainerSet.h, if needed.
54 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
55 */
56
57 /* -----------------------------------------------------------------------------
58 * Declarations...
59 * -------------------------------------------------------------------------- */
60
61 static uint32_t retainerGeneration; // generation
62
63 static uint32_t numObjectVisited; // total number of objects visited
64 static uint32_t timesAnyObjectVisited; // number of times any objects are
65 // visited
66
67 /*
68 The rs field in the profile header of any object points to its retainer
69 set in an indirect way: if flip is 0, it points to the retainer set;
70 if flip is 1, it points to the next byte after the retainer set (even
71 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
72 pointer. See retainerSetOf().
73 */
74
75 StgWord flip = 0; // flip bit
76 // must be 0 if DEBUG_RETAINER is on (for static closures)
77
78 #define setRetainerSetToNull(c) \
79 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
80
81 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
82 static void retainClosure(StgClosure *, StgClosure *, retainer);
83 #if defined(DEBUG_RETAINER)
84 static void belongToHeap(StgPtr p);
85 #endif
86
87 #if defined(DEBUG_RETAINER)
88 /*
89 cStackSize records how many times retainStack() has been invoked recursively,
90 that is, the number of activation records for retainStack() on the C stack.
91 maxCStackSize records its max value.
92 Invariants:
93 cStackSize <= maxCStackSize
94 */
95 static uint32_t cStackSize, maxCStackSize;
96
97 static uint32_t sumOfNewCost; // sum of the cost of each object, computed
98 // when the object is first visited
99 static uint32_t sumOfNewCostExtra; // for those objects not visited during
100 // retainer profiling, e.g., MUT_VAR
101 static uint32_t costArray[N_CLOSURE_TYPES];
102
103 uint32_t sumOfCostLinear; // sum of the costs of all object, computed
104 // when linearly traversing the heap after
105 // retainer profiling
106 uint32_t costArrayLinear[N_CLOSURE_TYPES];
107 #endif
108
109 /* -----------------------------------------------------------------------------
110 * Retainer stack - header
111 * Note:
112 * Although the retainer stack implementation could be separated *
113 * from the retainer profiling engine, there does not seem to be
114 * any advantage in doing that; retainer stack is an integral part
115 * of retainer profiling engine and cannot be use elsewhere at
116 * all.
117 * -------------------------------------------------------------------------- */
118
119 typedef enum {
120 posTypeStep,
121 posTypePtrs,
122 posTypeSRT,
123 posTypeLargeSRT,
124 } nextPosType;
125
126 typedef union {
127 // fixed layout or layout specified by a field in the closure
128 StgWord step;
129
130 // layout.payload
131 struct {
132 // See StgClosureInfo in InfoTables.h
133 StgHalfWord pos;
134 StgHalfWord ptrs;
135 StgPtr payload;
136 } ptrs;
137
138 // SRT
139 struct {
140 StgClosure **srt;
141 StgWord srt_bitmap;
142 } srt;
143
144 // Large SRT
145 struct {
146 StgLargeSRT *srt;
147 StgWord offset;
148 } large_srt;
149
150 } nextPos;
151
152 typedef struct {
153 nextPosType type;
154 nextPos next;
155 } stackPos;
156
157 typedef struct {
158 StgClosure *c;
159 retainer c_child_r;
160 stackPos info;
161 } stackElement;
162
163 /*
164 Invariants:
165 firstStack points to the first block group.
166 currentStack points to the block group currently being used.
167 currentStack->free == stackLimit.
168 stackTop points to the topmost byte in the stack of currentStack.
169 Unless the whole stack is empty, stackTop must point to the topmost
170 object (or byte) in the whole stack. Thus, it is only when the whole stack
171 is empty that stackTop == stackLimit (not during the execution of push()
172 and pop()).
173 stackBottom == currentStack->start.
174 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
175 Note:
176 When a current stack becomes empty, stackTop is set to point to
177 the topmost element on the previous block group so as to satisfy
178 the invariants described above.
179 */
180 static bdescr *firstStack = NULL;
181 static bdescr *currentStack;
182 static stackElement *stackBottom, *stackTop, *stackLimit;
183
184 /*
185 currentStackBoundary is used to mark the current stack chunk.
186 If stackTop == currentStackBoundary, it means that the current stack chunk
187 is empty. It is the responsibility of the user to keep currentStackBoundary
188 valid all the time if it is to be employed.
189 */
190 static stackElement *currentStackBoundary;
191
192 /*
193 stackSize records the current size of the stack.
194 maxStackSize records its high water mark.
195 Invariants:
196 stackSize <= maxStackSize
197 Note:
198 stackSize is just an estimate measure of the depth of the graph. The reason
199 is that some heap objects have only a single child and may not result
200 in a new element being pushed onto the stack. Therefore, at the end of
201 retainer profiling, maxStackSize + maxCStackSize is some value no greater
202 than the actual depth of the graph.
203 */
204 #if defined(DEBUG_RETAINER)
205 static int stackSize, maxStackSize;
206 #endif
207
208 // number of blocks allocated for one stack
209 #define BLOCKS_IN_STACK 1
210
211 /* -----------------------------------------------------------------------------
212 * Add a new block group to the stack.
213 * Invariants:
214 * currentStack->link == s.
215 * -------------------------------------------------------------------------- */
216 static INLINE void
217 newStackBlock( bdescr *bd )
218 {
219 currentStack = bd;
220 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
221 stackBottom = (stackElement *)bd->start;
222 stackLimit = (stackElement *)stackTop;
223 bd->free = (StgPtr)stackLimit;
224 }
225
226 /* -----------------------------------------------------------------------------
227 * Return to the previous block group.
228 * Invariants:
229 * s->link == currentStack.
230 * -------------------------------------------------------------------------- */
231 static INLINE void
232 returnToOldStack( bdescr *bd )
233 {
234 currentStack = bd;
235 stackTop = (stackElement *)bd->free;
236 stackBottom = (stackElement *)bd->start;
237 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
238 bd->free = (StgPtr)stackLimit;
239 }
240
241 /* -----------------------------------------------------------------------------
242 * Initializes the traverse stack.
243 * -------------------------------------------------------------------------- */
244 static void
245 initializeTraverseStack( void )
246 {
247 if (firstStack != NULL) {
248 freeChain(firstStack);
249 }
250
251 firstStack = allocGroup(BLOCKS_IN_STACK);
252 firstStack->link = NULL;
253 firstStack->u.back = NULL;
254
255 newStackBlock(firstStack);
256 }
257
258 /* -----------------------------------------------------------------------------
259 * Frees all the block groups in the traverse stack.
260 * Invariants:
261 * firstStack != NULL
262 * -------------------------------------------------------------------------- */
263 static void
264 closeTraverseStack( void )
265 {
266 freeChain(firstStack);
267 firstStack = NULL;
268 }
269
270 /* -----------------------------------------------------------------------------
271 * Returns true if the whole stack is empty.
272 * -------------------------------------------------------------------------- */
273 static INLINE bool
274 isEmptyRetainerStack( void )
275 {
276 return (firstStack == currentStack) && stackTop == stackLimit;
277 }
278
279 /* -----------------------------------------------------------------------------
280 * Returns size of stack
281 * -------------------------------------------------------------------------- */
282 #if defined(DEBUG)
283 W_
284 retainerStackBlocks( void )
285 {
286 bdescr* bd;
287 W_ res = 0;
288
289 for (bd = firstStack; bd != NULL; bd = bd->link)
290 res += bd->blocks;
291
292 return res;
293 }
294 #endif
295
296 /* -----------------------------------------------------------------------------
297 * Returns true if stackTop is at the stack boundary of the current stack,
298 * i.e., if the current stack chunk is empty.
299 * -------------------------------------------------------------------------- */
300 static INLINE bool
301 isOnBoundary( void )
302 {
303 return stackTop == currentStackBoundary;
304 }
305
306 /* -----------------------------------------------------------------------------
307 * Initializes *info from ptrs and payload.
308 * Invariants:
309 * payload[] begins with ptrs pointers followed by non-pointers.
310 * -------------------------------------------------------------------------- */
311 static INLINE void
312 init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
313 {
314 info->type = posTypePtrs;
315 info->next.ptrs.pos = 0;
316 info->next.ptrs.ptrs = ptrs;
317 info->next.ptrs.payload = payload;
318 }
319
320 /* -----------------------------------------------------------------------------
321 * Find the next object from *info.
322 * -------------------------------------------------------------------------- */
323 static INLINE StgClosure *
324 find_ptrs( stackPos *info )
325 {
326 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
327 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
328 } else {
329 return NULL;
330 }
331 }
332
333 /* -----------------------------------------------------------------------------
334 * Initializes *info from SRT information stored in *infoTable.
335 * -------------------------------------------------------------------------- */
336 static INLINE void
337 init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
338 {
339 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
340 info->type = posTypeLargeSRT;
341 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
342 info->next.large_srt.offset = 0;
343 } else {
344 info->type = posTypeSRT;
345 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
346 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
347 }
348 }
349
350 static INLINE void
351 init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
352 {
353 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
354 info->type = posTypeLargeSRT;
355 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
356 info->next.large_srt.offset = 0;
357 } else {
358 info->type = posTypeSRT;
359 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
360 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
361 }
362 }
363
364 /* -----------------------------------------------------------------------------
365 * Find the next object from *info.
366 * -------------------------------------------------------------------------- */
367 static INLINE StgClosure *
368 find_srt( stackPos *info )
369 {
370 StgClosure *c;
371 StgWord bitmap;
372
373 if (info->type == posTypeSRT) {
374 // Small SRT bitmap
375 bitmap = info->next.srt.srt_bitmap;
376 while (bitmap != 0) {
377 if ((bitmap & 1) != 0) {
378 #if defined(COMPILING_WINDOWS_DLL)
379 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
380 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
381 else
382 c = *(info->next.srt.srt);
383 #else
384 c = *(info->next.srt.srt);
385 #endif
386 bitmap = bitmap >> 1;
387 info->next.srt.srt++;
388 info->next.srt.srt_bitmap = bitmap;
389 return c;
390 }
391 bitmap = bitmap >> 1;
392 info->next.srt.srt++;
393 }
394 // bitmap is now zero...
395 return NULL;
396 }
397 else {
398 // Large SRT bitmap
399 uint32_t i = info->next.large_srt.offset;
400 StgWord bitmap;
401
402 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
403 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
404 bitmap = bitmap >> (i % BITS_IN(StgWord));
405 while (i < info->next.large_srt.srt->l.size) {
406 if ((bitmap & 1) != 0) {
407 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
408 i++;
409 info->next.large_srt.offset = i;
410 return c;
411 }
412 i++;
413 if (i % BITS_IN(W_) == 0) {
414 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
415 } else {
416 bitmap = bitmap >> 1;
417 }
418 }
419 // reached the end of this bitmap.
420 info->next.large_srt.offset = i;
421 return NULL;
422 }
423 }
424
425 /* -----------------------------------------------------------------------------
426 * push() pushes a stackElement representing the next child of *c
427 * onto the traverse stack. If *c has no child, *first_child is set
428 * to NULL and nothing is pushed onto the stack. If *c has only one
429 * child, *c_chlid is set to that child and nothing is pushed onto
430 * the stack. If *c has more than two children, *first_child is set
431 * to the first child and a stackElement representing the second
432 * child is pushed onto the stack.
433
434 * Invariants:
435 * *c_child_r is the most recent retainer of *c's children.
436 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
437 * there cannot be any stack objects.
438 * Note: SRTs are considered to be children as well.
439 * -------------------------------------------------------------------------- */
440 static INLINE void
441 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
442 {
443 stackElement se;
444 bdescr *nbd; // Next Block Descriptor
445
446 #if defined(DEBUG_RETAINER)
447 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
448 #endif
449
450 ASSERT(get_itbl(c)->type != TSO);
451 ASSERT(get_itbl(c)->type != AP_STACK);
452
453 //
454 // fill in se
455 //
456
457 se.c = c;
458 se.c_child_r = c_child_r;
459
460 // fill in se.info
461 switch (get_itbl(c)->type) {
462 // no child, no SRT
463 case CONSTR_0_1:
464 case CONSTR_0_2:
465 case ARR_WORDS:
466 case COMPACT_NFDATA:
467 *first_child = NULL;
468 return;
469
470 // one child (fixed), no SRT
471 case MUT_VAR_CLEAN:
472 case MUT_VAR_DIRTY:
473 *first_child = ((StgMutVar *)c)->var;
474 return;
475 case THUNK_SELECTOR:
476 *first_child = ((StgSelector *)c)->selectee;
477 return;
478 case BLACKHOLE:
479 *first_child = ((StgInd *)c)->indirectee;
480 return;
481 case CONSTR_1_0:
482 case CONSTR_1_1:
483 *first_child = c->payload[0];
484 return;
485
486 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
487 // of the next child. We do not write a separate initialization code.
488 // Also we do not have to initialize info.type;
489
490 // two children (fixed), no SRT
491 // need to push a stackElement, but nothing to store in se.info
492 case CONSTR_2_0:
493 *first_child = c->payload[0]; // return the first pointer
494 // se.info.type = posTypeStep;
495 // se.info.next.step = 2; // 2 = second
496 break;
497
498 // three children (fixed), no SRT
499 // need to push a stackElement
500 case MVAR_CLEAN:
501 case MVAR_DIRTY:
502 // head must be TSO and the head of a linked list of TSOs.
503 // Shoule it be a child? Seems to be yes.
504 *first_child = (StgClosure *)((StgMVar *)c)->head;
505 // se.info.type = posTypeStep;
506 se.info.next.step = 2; // 2 = second
507 break;
508
509 // three children (fixed), no SRT
510 case WEAK:
511 *first_child = ((StgWeak *)c)->key;
512 // se.info.type = posTypeStep;
513 se.info.next.step = 2;
514 break;
515
516 // layout.payload.ptrs, no SRT
517 case TVAR:
518 case CONSTR:
519 case CONSTR_NOCAF:
520 case PRIM:
521 case MUT_PRIM:
522 case BCO:
523 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
524 (StgPtr)c->payload);
525 *first_child = find_ptrs(&se.info);
526 if (*first_child == NULL)
527 return; // no child
528 break;
529
530 // StgMutArrPtr.ptrs, no SRT
531 case MUT_ARR_PTRS_CLEAN:
532 case MUT_ARR_PTRS_DIRTY:
533 case MUT_ARR_PTRS_FROZEN:
534 case MUT_ARR_PTRS_FROZEN0:
535 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
536 (StgPtr)(((StgMutArrPtrs *)c)->payload));
537 *first_child = find_ptrs(&se.info);
538 if (*first_child == NULL)
539 return;
540 break;
541
542 // StgMutArrPtr.ptrs, no SRT
543 case SMALL_MUT_ARR_PTRS_CLEAN:
544 case SMALL_MUT_ARR_PTRS_DIRTY:
545 case SMALL_MUT_ARR_PTRS_FROZEN:
546 case SMALL_MUT_ARR_PTRS_FROZEN0:
547 init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
548 (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
549 *first_child = find_ptrs(&se.info);
550 if (*first_child == NULL)
551 return;
552 break;
553
554 // layout.payload.ptrs, SRT
555 case FUN: // *c is a heap object.
556 case FUN_2_0:
557 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
558 *first_child = find_ptrs(&se.info);
559 if (*first_child == NULL)
560 // no child from ptrs, so check SRT
561 goto fun_srt_only;
562 break;
563
564 case THUNK:
565 case THUNK_2_0:
566 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
567 (StgPtr)((StgThunk *)c)->payload);
568 *first_child = find_ptrs(&se.info);
569 if (*first_child == NULL)
570 // no child from ptrs, so check SRT
571 goto thunk_srt_only;
572 break;
573
574 // 1 fixed child, SRT
575 case FUN_1_0:
576 case FUN_1_1:
577 *first_child = c->payload[0];
578 ASSERT(*first_child != NULL);
579 init_srt_fun(&se.info, get_fun_itbl(c));
580 break;
581
582 case THUNK_1_0:
583 case THUNK_1_1:
584 *first_child = ((StgThunk *)c)->payload[0];
585 ASSERT(*first_child != NULL);
586 init_srt_thunk(&se.info, get_thunk_itbl(c));
587 break;
588
589 case FUN_STATIC: // *c is a heap object.
590 ASSERT(get_itbl(c)->srt_bitmap != 0);
591 case FUN_0_1:
592 case FUN_0_2:
593 fun_srt_only:
594 init_srt_fun(&se.info, get_fun_itbl(c));
595 *first_child = find_srt(&se.info);
596 if (*first_child == NULL)
597 return; // no child
598 break;
599
600 // SRT only
601 case THUNK_STATIC:
602 ASSERT(get_itbl(c)->srt_bitmap != 0);
603 case THUNK_0_1:
604 case THUNK_0_2:
605 thunk_srt_only:
606 init_srt_thunk(&se.info, get_thunk_itbl(c));
607 *first_child = find_srt(&se.info);
608 if (*first_child == NULL)
609 return; // no child
610 break;
611
612 case TREC_CHUNK:
613 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
614 se.info.next.step = 0; // entry no.
615 break;
616
617 // cannot appear
618 case PAP:
619 case AP:
620 case AP_STACK:
621 case TSO:
622 case STACK:
623 case IND_STATIC:
624 // stack objects
625 case UPDATE_FRAME:
626 case CATCH_FRAME:
627 case UNDERFLOW_FRAME:
628 case STOP_FRAME:
629 case RET_BCO:
630 case RET_SMALL:
631 case RET_BIG:
632 // invalid objects
633 case IND:
634 case INVALID_OBJECT:
635 default:
636 barf("Invalid object *c in push()");
637 return;
638 }
639
640 if (stackTop - 1 < stackBottom) {
641 #if defined(DEBUG_RETAINER)
642 // debugBelch("push() to the next stack.\n");
643 #endif
644 // currentStack->free is updated when the active stack is switched
645 // to the next stack.
646 currentStack->free = (StgPtr)stackTop;
647
648 if (currentStack->link == NULL) {
649 nbd = allocGroup(BLOCKS_IN_STACK);
650 nbd->link = NULL;
651 nbd->u.back = currentStack;
652 currentStack->link = nbd;
653 } else
654 nbd = currentStack->link;
655
656 newStackBlock(nbd);
657 }
658
659 // adjust stackTop (acutal push)
660 stackTop--;
661 // If the size of stackElement was huge, we would better replace the
662 // following statement by either a memcpy() call or a switch statement
663 // on the type of the element. Currently, the size of stackElement is
664 // small enough (5 words) that this direct assignment seems to be enough.
665
666 // ToDo: The line below leads to the warning:
667 // warning: 'se.info.type' may be used uninitialized in this function
668 // This is caused by the fact that there are execution paths through the
669 // large switch statement above where some cases do not initialize this
670 // field. Is this really harmless? Can we avoid the warning?
671 *stackTop = se;
672
673 #if defined(DEBUG_RETAINER)
674 stackSize++;
675 if (stackSize > maxStackSize) maxStackSize = stackSize;
676 // ASSERT(stackSize >= 0);
677 // debugBelch("stackSize = %d\n", stackSize);
678 #endif
679 }
680
681 /* -----------------------------------------------------------------------------
682 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
683 * Invariants:
684 * stackTop cannot be equal to stackLimit unless the whole stack is
685 * empty, in which case popOff() is not allowed.
686 * Note:
687 * You can think of popOffReal() as a part of popOff() which is
688 * executed at the end of popOff() in necessary. Since popOff() is
689 * likely to be executed quite often while popOffReal() is not, we
690 * separate popOffReal() from popOff(), which is declared as an
691 * INLINE function (for the sake of execution speed). popOffReal()
692 * is called only within popOff() and nowhere else.
693 * -------------------------------------------------------------------------- */
694 static void
695 popOffReal(void)
696 {
697 bdescr *pbd; // Previous Block Descriptor
698
699 #if defined(DEBUG_RETAINER)
700 // debugBelch("pop() to the previous stack.\n");
701 #endif
702
703 ASSERT(stackTop + 1 == stackLimit);
704 ASSERT(stackBottom == (stackElement *)currentStack->start);
705
706 if (firstStack == currentStack) {
707 // The stack is completely empty.
708 stackTop++;
709 ASSERT(stackTop == stackLimit);
710 #if defined(DEBUG_RETAINER)
711 stackSize--;
712 if (stackSize > maxStackSize) maxStackSize = stackSize;
713 /*
714 ASSERT(stackSize >= 0);
715 debugBelch("stackSize = %d\n", stackSize);
716 */
717 #endif
718 return;
719 }
720
721 // currentStack->free is updated when the active stack is switched back
722 // to the previous stack.
723 currentStack->free = (StgPtr)stackLimit;
724
725 // find the previous block descriptor
726 pbd = currentStack->u.back;
727 ASSERT(pbd != NULL);
728
729 returnToOldStack(pbd);
730
731 #if defined(DEBUG_RETAINER)
732 stackSize--;
733 if (stackSize > maxStackSize) maxStackSize = stackSize;
734 /*
735 ASSERT(stackSize >= 0);
736 debugBelch("stackSize = %d\n", stackSize);
737 */
738 #endif
739 }
740
741 static INLINE void
742 popOff(void) {
743 #if defined(DEBUG_RETAINER)
744 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
745 #endif
746
747 ASSERT(stackTop != stackLimit);
748 ASSERT(!isEmptyRetainerStack());
749
750 // <= (instead of <) is wrong!
751 if (stackTop + 1 < stackLimit) {
752 stackTop++;
753 #if defined(DEBUG_RETAINER)
754 stackSize--;
755 if (stackSize > maxStackSize) maxStackSize = stackSize;
756 /*
757 ASSERT(stackSize >= 0);
758 debugBelch("stackSize = %d\n", stackSize);
759 */
760 #endif
761 return;
762 }
763
764 popOffReal();
765 }
766
767 /* -----------------------------------------------------------------------------
768 * Finds the next object to be considered for retainer profiling and store
769 * its pointer to *c.
770 * Test if the topmost stack element indicates that more objects are left,
771 * and if so, retrieve the first object and store its pointer to *c. Also,
772 * set *cp and *r appropriately, both of which are stored in the stack element.
773 * The topmost stack element then is overwritten so as for it to now denote
774 * the next object.
775 * If the topmost stack element indicates no more objects are left, pop
776 * off the stack element until either an object can be retrieved or
777 * the current stack chunk becomes empty, indicated by true returned by
778 * isOnBoundary(), in which case *c is set to NULL.
779 * Note:
780 * It is okay to call this function even when the current stack chunk
781 * is empty.
782 * -------------------------------------------------------------------------- */
783 static INLINE void
784 pop( StgClosure **c, StgClosure **cp, retainer *r )
785 {
786 stackElement *se;
787
788 #if defined(DEBUG_RETAINER)
789 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
790 #endif
791
792 do {
793 if (isOnBoundary()) { // if the current stack chunk is depleted
794 *c = NULL;
795 return;
796 }
797
798 se = stackTop;
799
800 switch (get_itbl(se->c)->type) {
801 // two children (fixed), no SRT
802 // nothing in se.info
803 case CONSTR_2_0:
804 *c = se->c->payload[1];
805 *cp = se->c;
806 *r = se->c_child_r;
807 popOff();
808 return;
809
810 // three children (fixed), no SRT
811 // need to push a stackElement
812 case MVAR_CLEAN:
813 case MVAR_DIRTY:
814 if (se->info.next.step == 2) {
815 *c = (StgClosure *)((StgMVar *)se->c)->tail;
816 se->info.next.step++; // move to the next step
817 // no popOff
818 } else {
819 *c = ((StgMVar *)se->c)->value;
820 popOff();
821 }
822 *cp = se->c;
823 *r = se->c_child_r;
824 return;
825
826 // three children (fixed), no SRT
827 case WEAK:
828 if (se->info.next.step == 2) {
829 *c = ((StgWeak *)se->c)->value;
830 se->info.next.step++;
831 // no popOff
832 } else {
833 *c = ((StgWeak *)se->c)->finalizer;
834 popOff();
835 }
836 *cp = se->c;
837 *r = se->c_child_r;
838 return;
839
840 case TREC_CHUNK: {
841 // These are pretty complicated: we have N entries, each
842 // of which contains 3 fields that we want to follow. So
843 // we divide the step counter: the 2 low bits indicate
844 // which field, and the rest of the bits indicate the
845 // entry number (starting from zero).
846 TRecEntry *entry;
847 uint32_t entry_no = se->info.next.step >> 2;
848 uint32_t field_no = se->info.next.step & 3;
849 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
850 *c = NULL;
851 popOff();
852 return;
853 }
854 entry = &((StgTRecChunk *)se->c)->entries[entry_no];
855 if (field_no == 0) {
856 *c = (StgClosure *)entry->tvar;
857 } else if (field_no == 1) {
858 *c = entry->expected_value;
859 } else {
860 *c = entry->new_value;
861 }
862 *cp = se->c;
863 *r = se->c_child_r;
864 se->info.next.step++;
865 return;
866 }
867
868 case TVAR:
869 case CONSTR:
870 case PRIM:
871 case MUT_PRIM:
872 case BCO:
873 // StgMutArrPtr.ptrs, no SRT
874 case MUT_ARR_PTRS_CLEAN:
875 case MUT_ARR_PTRS_DIRTY:
876 case MUT_ARR_PTRS_FROZEN:
877 case MUT_ARR_PTRS_FROZEN0:
878 *c = find_ptrs(&se->info);
879 if (*c == NULL) {
880 popOff();
881 break;
882 }
883 *cp = se->c;
884 *r = se->c_child_r;
885 return;
886
887 // layout.payload.ptrs, SRT
888 case FUN: // always a heap object
889 case FUN_2_0:
890 if (se->info.type == posTypePtrs) {
891 *c = find_ptrs(&se->info);
892 if (*c != NULL) {
893 *cp = se->c;
894 *r = se->c_child_r;
895 return;
896 }
897 init_srt_fun(&se->info, get_fun_itbl(se->c));
898 }
899 goto do_srt;
900
901 case THUNK:
902 case THUNK_2_0:
903 if (se->info.type == posTypePtrs) {
904 *c = find_ptrs(&se->info);
905 if (*c != NULL) {
906 *cp = se->c;
907 *r = se->c_child_r;
908 return;
909 }
910 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
911 }
912 goto do_srt;
913
914 // SRT
915 do_srt:
916 case THUNK_STATIC:
917 case FUN_STATIC:
918 case FUN_0_1:
919 case FUN_0_2:
920 case THUNK_0_1:
921 case THUNK_0_2:
922 case FUN_1_0:
923 case FUN_1_1:
924 case THUNK_1_0:
925 case THUNK_1_1:
926 *c = find_srt(&se->info);
927 if (*c != NULL) {
928 *cp = se->c;
929 *r = se->c_child_r;
930 return;
931 }
932 popOff();
933 break;
934
935 // no child (fixed), no SRT
936 case CONSTR_0_1:
937 case CONSTR_0_2:
938 case ARR_WORDS:
939 // one child (fixed), no SRT
940 case MUT_VAR_CLEAN:
941 case MUT_VAR_DIRTY:
942 case THUNK_SELECTOR:
943 case CONSTR_1_1:
944 // cannot appear
945 case PAP:
946 case AP:
947 case AP_STACK:
948 case TSO:
949 case STACK:
950 case IND_STATIC:
951 case CONSTR_NOCAF:
952 // stack objects
953 case UPDATE_FRAME:
954 case CATCH_FRAME:
955 case UNDERFLOW_FRAME:
956 case STOP_FRAME:
957 case RET_BCO:
958 case RET_SMALL:
959 case RET_BIG:
960 // invalid objects
961 case IND:
962 case INVALID_OBJECT:
963 default:
964 barf("Invalid object *c in pop()");
965 return;
966 }
967 } while (true);
968 }
969
970 /* -----------------------------------------------------------------------------
971 * RETAINER PROFILING ENGINE
972 * -------------------------------------------------------------------------- */
973
974 void
975 initRetainerProfiling( void )
976 {
977 initializeAllRetainerSet();
978 retainerGeneration = 0;
979 }
980
981 /* -----------------------------------------------------------------------------
982 * This function must be called before f-closing prof_file.
983 * -------------------------------------------------------------------------- */
984 void
985 endRetainerProfiling( void )
986 {
987 #if defined(SECOND_APPROACH)
988 outputAllRetainerSet(prof_file);
989 #endif
990 }
991
992 /* -----------------------------------------------------------------------------
993 * Returns the actual pointer to the retainer set of the closure *c.
994 * It may adjust RSET(c) subject to flip.
995 * Side effects:
996 * RSET(c) is initialized to NULL if its current value does not
997 * conform to flip.
998 * Note:
999 * Even though this function has side effects, they CAN be ignored because
1000 * subsequent calls to retainerSetOf() always result in the same return value
1001 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1002 * closure.
1003 * We have to perform an XOR (^) operation each time a closure is examined.
1004 * The reason is that we do not know when a closure is visited last.
1005 * -------------------------------------------------------------------------- */
1006 static INLINE void
1007 maybeInitRetainerSet( StgClosure *c )
1008 {
1009 if (!isRetainerSetFieldValid(c)) {
1010 setRetainerSetToNull(c);
1011 }
1012 }
1013
1014 /* -----------------------------------------------------------------------------
1015 * Returns true if *c is a retainer.
1016 * -------------------------------------------------------------------------- */
1017 static INLINE bool
1018 isRetainer( StgClosure *c )
1019 {
1020 switch (get_itbl(c)->type) {
1021 //
1022 // True case
1023 //
1024 // TSOs MUST be retainers: they constitute the set of roots.
1025 case TSO:
1026 case STACK:
1027
1028 // mutable objects
1029 case MUT_PRIM:
1030 case MVAR_CLEAN:
1031 case MVAR_DIRTY:
1032 case TVAR:
1033 case MUT_VAR_CLEAN:
1034 case MUT_VAR_DIRTY:
1035 case MUT_ARR_PTRS_CLEAN:
1036 case MUT_ARR_PTRS_DIRTY:
1037 case SMALL_MUT_ARR_PTRS_CLEAN:
1038 case SMALL_MUT_ARR_PTRS_DIRTY:
1039 case BLOCKING_QUEUE:
1040
1041 // thunks are retainers.
1042 case THUNK:
1043 case THUNK_1_0:
1044 case THUNK_0_1:
1045 case THUNK_2_0:
1046 case THUNK_1_1:
1047 case THUNK_0_2:
1048 case THUNK_SELECTOR:
1049 case AP:
1050 case AP_STACK:
1051
1052 // Static thunks, or CAFS, are obviously retainers.
1053 case THUNK_STATIC:
1054
1055 // WEAK objects are roots; there is separate code in which traversing
1056 // begins from WEAK objects.
1057 case WEAK:
1058 return true;
1059
1060 //
1061 // False case
1062 //
1063
1064 // constructors
1065 case CONSTR:
1066 case CONSTR_NOCAF:
1067 case CONSTR_1_0:
1068 case CONSTR_0_1:
1069 case CONSTR_2_0:
1070 case CONSTR_1_1:
1071 case CONSTR_0_2:
1072 // functions
1073 case FUN:
1074 case FUN_1_0:
1075 case FUN_0_1:
1076 case FUN_2_0:
1077 case FUN_1_1:
1078 case FUN_0_2:
1079 // partial applications
1080 case PAP:
1081 // indirection
1082 // IND_STATIC used to be an error, but at the moment it can happen
1083 // as isAlive doesn't look through IND_STATIC as it ignores static
1084 // closures. See trac #3956 for a program that hit this error.
1085 case IND_STATIC:
1086 case BLACKHOLE:
1087 case WHITEHOLE:
1088 // static objects
1089 case FUN_STATIC:
1090 // misc
1091 case PRIM:
1092 case BCO:
1093 case ARR_WORDS:
1094 case COMPACT_NFDATA:
1095 // STM
1096 case TREC_CHUNK:
1097 // immutable arrays
1098 case MUT_ARR_PTRS_FROZEN:
1099 case MUT_ARR_PTRS_FROZEN0:
1100 case SMALL_MUT_ARR_PTRS_FROZEN:
1101 case SMALL_MUT_ARR_PTRS_FROZEN0:
1102 return false;
1103
1104 //
1105 // Error case
1106 //
1107 // Stack objects are invalid because they are never treated as
1108 // legal objects during retainer profiling.
1109 case UPDATE_FRAME:
1110 case CATCH_FRAME:
1111 case CATCH_RETRY_FRAME:
1112 case CATCH_STM_FRAME:
1113 case UNDERFLOW_FRAME:
1114 case ATOMICALLY_FRAME:
1115 case STOP_FRAME:
1116 case RET_BCO:
1117 case RET_SMALL:
1118 case RET_BIG:
1119 case RET_FUN:
1120 // other cases
1121 case IND:
1122 case INVALID_OBJECT:
1123 default:
1124 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1125 return false;
1126 }
1127 }
1128
1129 /* -----------------------------------------------------------------------------
1130 * Returns the retainer function value for the closure *c, i.e., R(*c).
1131 * This function does NOT return the retainer(s) of *c.
1132 * Invariants:
1133 * *c must be a retainer.
1134 * Note:
1135 * Depending on the definition of this function, the maintenance of retainer
1136 * sets can be made easier. If most retainer sets are likely to be created
1137 * again across garbage collections, refreshAllRetainerSet() in
1138 * RetainerSet.c can simply do nothing.
1139 * If this is not the case, we can free all the retainer sets and
1140 * re-initialize the hash table.
1141 * See refreshAllRetainerSet() in RetainerSet.c.
1142 * -------------------------------------------------------------------------- */
1143 static INLINE retainer
1144 getRetainerFrom( StgClosure *c )
1145 {
1146 ASSERT(isRetainer(c));
1147
1148 #if defined(RETAINER_SCHEME_INFO)
1149 // Retainer scheme 1: retainer = info table
1150 return get_itbl(c);
1151 #elif defined(RETAINER_SCHEME_CCS)
1152 // Retainer scheme 2: retainer = cost centre stack
1153 return c->header.prof.ccs;
1154 #elif defined(RETAINER_SCHEME_CC)
1155 // Retainer scheme 3: retainer = cost centre
1156 return c->header.prof.ccs->cc;
1157 #endif
1158 }
1159
1160 /* -----------------------------------------------------------------------------
1161 * Associates the retainer set *s with the closure *c, that is, *s becomes
1162 * the retainer set of *c.
1163 * Invariants:
1164 * c != NULL
1165 * s != NULL
1166 * -------------------------------------------------------------------------- */
1167 static INLINE void
1168 associate( StgClosure *c, RetainerSet *s )
1169 {
1170 // StgWord has the same size as pointers, so the following type
1171 // casting is okay.
1172 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1173 }
1174
1175 /* -----------------------------------------------------------------------------
1176 Call retainClosure for each of the closures covered by a large bitmap.
1177 -------------------------------------------------------------------------- */
1178
1179 static void
1180 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, uint32_t size,
1181 StgClosure *c, retainer c_child_r)
1182 {
1183 uint32_t i, b;
1184 StgWord bitmap;
1185
1186 b = 0;
1187 bitmap = large_bitmap->bitmap[b];
1188 for (i = 0; i < size; ) {
1189 if ((bitmap & 1) == 0) {
1190 retainClosure((StgClosure *)*p, c, c_child_r);
1191 }
1192 i++;
1193 p++;
1194 if (i % BITS_IN(W_) == 0) {
1195 b++;
1196 bitmap = large_bitmap->bitmap[b];
1197 } else {
1198 bitmap = bitmap >> 1;
1199 }
1200 }
1201 }
1202
1203 static INLINE StgPtr
1204 retain_small_bitmap (StgPtr p, uint32_t size, StgWord bitmap,
1205 StgClosure *c, retainer c_child_r)
1206 {
1207 while (size > 0) {
1208 if ((bitmap & 1) == 0) {
1209 retainClosure((StgClosure *)*p, c, c_child_r);
1210 }
1211 p++;
1212 bitmap = bitmap >> 1;
1213 size--;
1214 }
1215 return p;
1216 }
1217
1218 /* -----------------------------------------------------------------------------
1219 * Call retainClosure for each of the closures in an SRT.
1220 * ------------------------------------------------------------------------- */
1221
1222 static void
1223 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1224 {
1225 uint32_t i, b, size;
1226 StgWord bitmap;
1227 StgClosure **p;
1228
1229 b = 0;
1230 p = (StgClosure **)srt->srt;
1231 size = srt->l.size;
1232 bitmap = srt->l.bitmap[b];
1233 for (i = 0; i < size; ) {
1234 if ((bitmap & 1) != 0) {
1235 retainClosure((StgClosure *)*p, c, c_child_r);
1236 }
1237 i++;
1238 p++;
1239 if (i % BITS_IN(W_) == 0) {
1240 b++;
1241 bitmap = srt->l.bitmap[b];
1242 } else {
1243 bitmap = bitmap >> 1;
1244 }
1245 }
1246 }
1247
1248 static INLINE void
1249 retainSRT (StgClosure **srt, uint32_t srt_bitmap, StgClosure *c,
1250 retainer c_child_r)
1251 {
1252 uint32_t bitmap;
1253 StgClosure **p;
1254
1255 bitmap = srt_bitmap;
1256 p = srt;
1257
1258 if (bitmap == (StgHalfWord)(-1)) {
1259 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1260 return;
1261 }
1262
1263 while (bitmap != 0) {
1264 if ((bitmap & 1) != 0) {
1265 #if defined(COMPILING_WINDOWS_DLL)
1266 if ( (unsigned long)(*srt) & 0x1 ) {
1267 retainClosure(* (StgClosure**) ((unsigned long) (*srt) & ~0x1),
1268 c, c_child_r);
1269 } else {
1270 retainClosure(*srt,c,c_child_r);
1271 }
1272 #else
1273 retainClosure(*srt,c,c_child_r);
1274 #endif
1275 }
1276 p++;
1277 bitmap = bitmap >> 1;
1278 }
1279 }
1280
1281 /* -----------------------------------------------------------------------------
1282 * Process all the objects in the stack chunk from stackStart to stackEnd
1283 * with *c and *c_child_r being their parent and their most recent retainer,
1284 * respectively. Treat stackOptionalFun as another child of *c if it is
1285 * not NULL.
1286 * Invariants:
1287 * *c is one of the following: TSO, AP_STACK.
1288 * If *c is TSO, c == c_child_r.
1289 * stackStart < stackEnd.
1290 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1291 * interpretation conforms to the current value of flip (even when they
1292 * are interpreted to be NULL).
1293 * If *c is TSO, its state is not ThreadComplete,or ThreadKilled,
1294 * which means that its stack is ready to process.
1295 * Note:
1296 * This code was almost plagiarzied from GC.c! For each pointer,
1297 * retainClosure() is invoked instead of evacuate().
1298 * -------------------------------------------------------------------------- */
1299 static void
1300 retainStack( StgClosure *c, retainer c_child_r,
1301 StgPtr stackStart, StgPtr stackEnd )
1302 {
1303 stackElement *oldStackBoundary;
1304 StgPtr p;
1305 const StgRetInfoTable *info;
1306 StgWord bitmap;
1307 uint32_t size;
1308
1309 #if defined(DEBUG_RETAINER)
1310 cStackSize++;
1311 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1312 #endif
1313
1314 /*
1315 Each invocation of retainStack() creates a new virtual
1316 stack. Since all such stacks share a single common stack, we
1317 record the current currentStackBoundary, which will be restored
1318 at the exit.
1319 */
1320 oldStackBoundary = currentStackBoundary;
1321 currentStackBoundary = stackTop;
1322
1323 #if defined(DEBUG_RETAINER)
1324 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1325 #endif
1326
1327 ASSERT(get_itbl(c)->type == STACK);
1328
1329 p = stackStart;
1330 while (p < stackEnd) {
1331 info = get_ret_itbl((StgClosure *)p);
1332
1333 switch(info->i.type) {
1334
1335 case UPDATE_FRAME:
1336 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1337 p += sizeofW(StgUpdateFrame);
1338 continue;
1339
1340 case UNDERFLOW_FRAME:
1341 case STOP_FRAME:
1342 case CATCH_FRAME:
1343 case CATCH_STM_FRAME:
1344 case CATCH_RETRY_FRAME:
1345 case ATOMICALLY_FRAME:
1346 case RET_SMALL:
1347 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1348 size = BITMAP_SIZE(info->i.layout.bitmap);
1349 p++;
1350 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1351
1352 follow_srt:
1353 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1354 continue;
1355
1356 case RET_BCO: {
1357 StgBCO *bco;
1358
1359 p++;
1360 retainClosure((StgClosure *)*p, c, c_child_r);
1361 bco = (StgBCO *)*p;
1362 p++;
1363 size = BCO_BITMAP_SIZE(bco);
1364 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1365 p += size;
1366 continue;
1367 }
1368
1369 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1370 case RET_BIG:
1371 size = GET_LARGE_BITMAP(&info->i)->size;
1372 p++;
1373 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1374 size, c, c_child_r);
1375 p += size;
1376 // and don't forget to follow the SRT
1377 goto follow_srt;
1378
1379 case RET_FUN: {
1380 StgRetFun *ret_fun = (StgRetFun *)p;
1381 const StgFunInfoTable *fun_info;
1382
1383 retainClosure(ret_fun->fun, c, c_child_r);
1384 fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(ret_fun->fun));
1385
1386 p = (P_)&ret_fun->payload;
1387 switch (fun_info->f.fun_type) {
1388 case ARG_GEN:
1389 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1390 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1391 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1392 break;
1393 case ARG_GEN_BIG:
1394 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1395 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1396 size, c, c_child_r);
1397 p += size;
1398 break;
1399 default:
1400 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1401 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1402 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1403 break;
1404 }
1405 goto follow_srt;
1406 }
1407
1408 default:
1409 barf("Invalid object found in retainStack(): %d",
1410 (int)(info->i.type));
1411 }
1412 }
1413
1414 // restore currentStackBoundary
1415 currentStackBoundary = oldStackBoundary;
1416 #if defined(DEBUG_RETAINER)
1417 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1418 #endif
1419
1420 #if defined(DEBUG_RETAINER)
1421 cStackSize--;
1422 #endif
1423 }
1424
1425 /* ----------------------------------------------------------------------------
1426 * Call retainClosure for each of the children of a PAP/AP
1427 * ------------------------------------------------------------------------- */
1428
1429 static INLINE StgPtr
1430 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1431 retainer c_child_r, /* NOT tagged */
1432 StgClosure *fun, /* tagged */
1433 StgClosure** payload, StgWord n_args)
1434 {
1435 StgPtr p;
1436 StgWord bitmap;
1437 const StgFunInfoTable *fun_info;
1438
1439 retainClosure(fun, pap, c_child_r);
1440 fun = UNTAG_CLOSURE(fun);
1441 fun_info = get_fun_itbl(fun);
1442 ASSERT(fun_info->i.type != PAP);
1443
1444 p = (StgPtr)payload;
1445
1446 switch (fun_info->f.fun_type) {
1447 case ARG_GEN:
1448 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1449 p = retain_small_bitmap(p, n_args, bitmap,
1450 pap, c_child_r);
1451 break;
1452 case ARG_GEN_BIG:
1453 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1454 n_args, pap, c_child_r);
1455 p += n_args;
1456 break;
1457 case ARG_BCO:
1458 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1459 n_args, pap, c_child_r);
1460 p += n_args;
1461 break;
1462 default:
1463 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1464 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1465 break;
1466 }
1467 return p;
1468 }
1469
1470 /* -----------------------------------------------------------------------------
1471 * Compute the retainer set of *c0 and all its desecents by traversing.
1472 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1473 * Invariants:
1474 * c0 = cp0 = r0 holds only for root objects.
1475 * RSET(cp0) and RSET(r0) are valid, i.e., their
1476 * interpretation conforms to the current value of flip (even when they
1477 * are interpreted to be NULL).
1478 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1479 * the current value of flip. If it does not, during the execution
1480 * of this function, RSET(c0) must be initialized as well as all
1481 * its descendants.
1482 * Note:
1483 * stackTop must be the same at the beginning and the exit of this function.
1484 * *c0 can be TSO (as well as AP_STACK).
1485 * -------------------------------------------------------------------------- */
1486 static void
1487 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1488 {
1489 // c = Current closure (possibly tagged)
1490 // cp = Current closure's Parent (NOT tagged)
1491 // r = current closures' most recent Retainer (NOT tagged)
1492 // c_child_r = current closure's children's most recent retainer
1493 // first_child = first child of c
1494 StgClosure *c, *cp, *first_child;
1495 RetainerSet *s, *retainerSetOfc;
1496 retainer r, c_child_r;
1497 StgWord typeOfc;
1498
1499 #if defined(DEBUG_RETAINER)
1500 // StgPtr oldStackTop;
1501 #endif
1502
1503 #if defined(DEBUG_RETAINER)
1504 // oldStackTop = stackTop;
1505 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1506 #endif
1507
1508 // (c, cp, r) = (c0, cp0, r0)
1509 c = c0;
1510 cp = cp0;
1511 r = r0;
1512 goto inner_loop;
1513
1514 loop:
1515 //debugBelch("loop");
1516 // pop to (c, cp, r);
1517 pop(&c, &cp, &r);
1518
1519 if (c == NULL) {
1520 #if defined(DEBUG_RETAINER)
1521 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1522 #endif
1523 return;
1524 }
1525
1526 //debugBelch("inner_loop");
1527
1528 inner_loop:
1529 c = UNTAG_CLOSURE(c);
1530
1531 // c = current closure under consideration,
1532 // cp = current closure's parent,
1533 // r = current closure's most recent retainer
1534 //
1535 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1536 // RSET(cp) and RSET(r) are valid.
1537 // RSET(c) is valid only if c has been visited before.
1538 //
1539 // Loop invariants (on the relation between c, cp, and r)
1540 // if cp is not a retainer, r belongs to RSET(cp).
1541 // if cp is a retainer, r == cp.
1542
1543 typeOfc = get_itbl(c)->type;
1544
1545 #if defined(DEBUG_RETAINER)
1546 switch (typeOfc) {
1547 case IND_STATIC:
1548 case CONSTR_NOCAF:
1549 case THUNK_STATIC:
1550 case FUN_STATIC:
1551 break;
1552 default:
1553 if (retainerSetOf(c) == NULL) { // first visit?
1554 costArray[typeOfc] += cost(c);
1555 sumOfNewCost += cost(c);
1556 }
1557 break;
1558 }
1559 #endif
1560
1561 // special cases
1562 switch (typeOfc) {
1563 case TSO:
1564 if (((StgTSO *)c)->what_next == ThreadComplete ||
1565 ((StgTSO *)c)->what_next == ThreadKilled) {
1566 #if defined(DEBUG_RETAINER)
1567 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1568 #endif
1569 goto loop;
1570 }
1571 break;
1572
1573 case IND_STATIC:
1574 // We just skip IND_STATIC, so its retainer set is never computed.
1575 c = ((StgIndStatic *)c)->indirectee;
1576 goto inner_loop;
1577 // static objects with no pointers out, so goto loop.
1578 case CONSTR_NOCAF:
1579 // It is not just enough not to compute the retainer set for *c; it is
1580 // mandatory because CONSTR_NOCAF are not reachable from
1581 // scavenged_static_objects, the list from which is assumed to traverse
1582 // all static objects after major garbage collections.
1583 goto loop;
1584 case THUNK_STATIC:
1585 case FUN_STATIC:
1586 if (get_itbl(c)->srt_bitmap == 0) {
1587 // No need to compute the retainer set; no dynamic objects
1588 // are reachable from *c.
1589 //
1590 // Static objects: if we traverse all the live closures,
1591 // including static closures, during each heap census then
1592 // we will observe that some static closures appear and
1593 // disappear. eg. a closure may contain a pointer to a
1594 // static function 'f' which is not otherwise reachable
1595 // (it doesn't indirectly point to any CAFs, so it doesn't
1596 // appear in any SRTs), so we would find 'f' during
1597 // traversal. However on the next sweep there may be no
1598 // closures pointing to 'f'.
1599 //
1600 // We must therefore ignore static closures whose SRT is
1601 // empty, because these are exactly the closures that may
1602 // "appear". A closure with a non-empty SRT, and which is
1603 // still required, will always be reachable.
1604 //
1605 // But what about CONSTR? Surely these may be able
1606 // to appear, and they don't have SRTs, so we can't
1607 // check. So for now, we're calling
1608 // resetStaticObjectForRetainerProfiling() from the
1609 // garbage collector to reset the retainer sets in all the
1610 // reachable static objects.
1611 goto loop;
1612 }
1613 default:
1614 break;
1615 }
1616
1617 // The above objects are ignored in computing the average number of times
1618 // an object is visited.
1619 timesAnyObjectVisited++;
1620
1621 // If this is the first visit to c, initialize its retainer set.
1622 maybeInitRetainerSet(c);
1623 retainerSetOfc = retainerSetOf(c);
1624
1625 // Now compute s:
1626 // isRetainer(cp) == true => s == NULL
1627 // isRetainer(cp) == false => s == cp.retainer
1628 if (isRetainer(cp))
1629 s = NULL;
1630 else
1631 s = retainerSetOf(cp);
1632
1633 // (c, cp, r, s) is available.
1634
1635 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1636 if (retainerSetOfc == NULL) {
1637 // This is the first visit to *c.
1638 numObjectVisited++;
1639
1640 if (s == NULL)
1641 associate(c, singleton(r));
1642 else
1643 // s is actually the retainer set of *c!
1644 associate(c, s);
1645
1646 // compute c_child_r
1647 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1648 } else {
1649 // This is not the first visit to *c.
1650 if (isMember(r, retainerSetOfc))
1651 goto loop; // no need to process child
1652
1653 if (s == NULL)
1654 associate(c, addElement(r, retainerSetOfc));
1655 else {
1656 // s is not NULL and cp is not a retainer. This means that
1657 // each time *cp is visited, so is *c. Thus, if s has
1658 // exactly one more element in its retainer set than c, s
1659 // is also the new retainer set for *c.
1660 if (s->num == retainerSetOfc->num + 1) {
1661 associate(c, s);
1662 }
1663 // Otherwise, just add R_r to the current retainer set of *c.
1664 else {
1665 associate(c, addElement(r, retainerSetOfc));
1666 }
1667 }
1668
1669 if (isRetainer(c))
1670 goto loop; // no need to process child
1671
1672 // compute c_child_r
1673 c_child_r = r;
1674 }
1675
1676 // now, RSET() of all of *c, *cp, and *r is valid.
1677 // (c, c_child_r) are available.
1678
1679 // process child
1680
1681 // Special case closures: we process these all in one go rather
1682 // than attempting to save the current position, because doing so
1683 // would be hard.
1684 switch (typeOfc) {
1685 case STACK:
1686 retainStack(c, c_child_r,
1687 ((StgStack *)c)->sp,
1688 ((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
1689 goto loop;
1690
1691 case TSO:
1692 {
1693 StgTSO *tso = (StgTSO *)c;
1694
1695 retainClosure((StgClosure*) tso->stackobj, c, c_child_r);
1696 retainClosure((StgClosure*) tso->blocked_exceptions, c, c_child_r);
1697 retainClosure((StgClosure*) tso->bq, c, c_child_r);
1698 retainClosure((StgClosure*) tso->trec, c, c_child_r);
1699 if ( tso->why_blocked == BlockedOnMVar
1700 || tso->why_blocked == BlockedOnMVarRead
1701 || tso->why_blocked == BlockedOnBlackHole
1702 || tso->why_blocked == BlockedOnMsgThrowTo
1703 ) {
1704 retainClosure(tso->block_info.closure, c, c_child_r);
1705 }
1706 goto loop;
1707 }
1708
1709 case PAP:
1710 {
1711 StgPAP *pap = (StgPAP *)c;
1712 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1713 goto loop;
1714 }
1715
1716 case AP:
1717 {
1718 StgAP *ap = (StgAP *)c;
1719 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1720 goto loop;
1721 }
1722
1723 case AP_STACK:
1724 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1725 retainStack(c, c_child_r,
1726 (StgPtr)((StgAP_STACK *)c)->payload,
1727 (StgPtr)((StgAP_STACK *)c)->payload +
1728 ((StgAP_STACK *)c)->size);
1729 goto loop;
1730 }
1731
1732 push(c, c_child_r, &first_child);
1733
1734 // If first_child is null, c has no child.
1735 // If first_child is not null, the top stack element points to the next
1736 // object. push() may or may not push a stackElement on the stack.
1737 if (first_child == NULL)
1738 goto loop;
1739
1740 // (c, cp, r) = (first_child, c, c_child_r)
1741 r = c_child_r;
1742 cp = c;
1743 c = first_child;
1744 goto inner_loop;
1745 }
1746
1747 /* -----------------------------------------------------------------------------
1748 * Compute the retainer set for every object reachable from *tl.
1749 * -------------------------------------------------------------------------- */
1750 static void
1751 retainRoot(void *user STG_UNUSED, StgClosure **tl)
1752 {
1753 StgClosure *c;
1754
1755 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1756 // be a root.
1757
1758 ASSERT(isEmptyRetainerStack());
1759 currentStackBoundary = stackTop;
1760
1761 c = UNTAG_CLOSURE(*tl);
1762 maybeInitRetainerSet(c);
1763 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1764 retainClosure(c, c, getRetainerFrom(c));
1765 } else {
1766 retainClosure(c, c, CCS_SYSTEM);
1767 }
1768
1769 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1770 // *tl might be a TSO which is ThreadComplete, in which
1771 // case we ignore it for the purposes of retainer profiling.
1772 }
1773
1774 /* -----------------------------------------------------------------------------
1775 * Compute the retainer set for each of the objects in the heap.
1776 * -------------------------------------------------------------------------- */
1777 static void
1778 computeRetainerSet( void )
1779 {
1780 StgWeak *weak;
1781 uint32_t g, n;
1782 StgPtr ml;
1783 bdescr *bd;
1784 #if defined(DEBUG_RETAINER)
1785 RetainerSet *rtl;
1786 RetainerSet tmpRetainerSet;
1787 #endif
1788
1789 markCapabilities(retainRoot, NULL); // for scheduler roots
1790
1791 // This function is called after a major GC, when key, value, and finalizer
1792 // all are guaranteed to be valid, or reachable.
1793 //
1794 // The following code assumes that WEAK objects are considered to be roots
1795 // for retainer profilng.
1796 for (n = 0; n < n_capabilities; n++) {
1797 // NB: after a GC, all nursery weak_ptr_lists have been migrated
1798 // to the global lists living in the generations
1799 ASSERT(capabilities[n]->weak_ptr_list_hd == NULL);
1800 ASSERT(capabilities[n]->weak_ptr_list_tl == NULL);
1801 }
1802 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1803 for (weak = generations[g].weak_ptr_list; weak != NULL; weak = weak->link) {
1804 // retainRoot((StgClosure *)weak);
1805 retainRoot(NULL, (StgClosure **)&weak);
1806 }
1807 }
1808
1809 // Consider roots from the stable ptr table.
1810 markStableTables(retainRoot, NULL);
1811
1812 // The following code resets the rs field of each unvisited mutable
1813 // object (computing sumOfNewCostExtra and updating costArray[] when
1814 // debugging retainer profiler).
1815 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1816 // NOT true: even G0 has a block on its mutable list
1817 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1818
1819 // Traversing through mut_list is necessary
1820 // because we can find MUT_VAR objects which have not been
1821 // visited during retainer profiling.
1822 for (n = 0; n < n_capabilities; n++) {
1823 for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) {
1824 for (ml = bd->start; ml < bd->free; ml++) {
1825
1826 maybeInitRetainerSet((StgClosure *)*ml);
1827
1828 #if defined(DEBUG_RETAINER)
1829 rtl = retainerSetOf((StgClosure *)*ml);
1830 if (rtl == NULL) {
1831 // first visit to *ml
1832 // This is a violation of the interface rule!
1833 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1834
1835 switch (get_itbl((StgClosure *)ml)->type) {
1836 case IND_STATIC:
1837 // no cost involved
1838 break;
1839 case CONSTR_NOCAF:
1840 case THUNK_STATIC:
1841 case FUN_STATIC:
1842 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1843 break;
1844 default:
1845 // dynamic objects
1846 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1847 sumOfNewCostExtra += cost((StgClosure *)ml);
1848 break;
1849 }
1850 }
1851 #endif
1852 }
1853 }
1854 }
1855 }
1856 }
1857
1858 /* -----------------------------------------------------------------------------
1859 * Traverse all static objects for which we compute retainer sets,
1860 * and reset their rs fields to NULL, which is accomplished by
1861 * invoking maybeInitRetainerSet(). This function must be called
1862 * before zeroing all objects reachable from scavenged_static_objects
1863 * in the case of major garbage collections. See GarbageCollect() in
1864 * GC.c.
1865 * Note:
1866 * The mut_once_list of the oldest generation must also be traversed?
1867 * Why? Because if the evacuation of an object pointed to by a static
1868 * indirection object fails, it is put back to the mut_once_list of
1869 * the oldest generation.
1870 * However, this is not necessary because any static indirection objects
1871 * are just traversed through to reach dynamic objects. In other words,
1872 * they are not taken into consideration in computing retainer sets.
1873 *
1874 * SDM (20/7/2011): I don't think this is doing anything sensible,
1875 * because it happens before retainerProfile() and at the beginning of
1876 * retainerProfil() we change the sense of 'flip'. So all of the
1877 * calls to maybeInitRetainerSet() here are initialising retainer sets
1878 * with the wrong flip. Also, I don't see why this is necessary. I
1879 * added a maybeInitRetainerSet() call to retainRoot(), and that seems
1880 * to have fixed the assertion failure in retainerSetOf() I was
1881 * encountering.
1882 * -------------------------------------------------------------------------- */
1883 void
1884 resetStaticObjectForRetainerProfiling( StgClosure *static_objects )
1885 {
1886 #if defined(DEBUG_RETAINER)
1887 uint32_t count;
1888 #endif
1889 StgClosure *p;
1890
1891 #if defined(DEBUG_RETAINER)
1892 count = 0;
1893 #endif
1894 p = static_objects;
1895 while (p != END_OF_STATIC_OBJECT_LIST) {
1896 p = UNTAG_STATIC_LIST_PTR(p);
1897 #if defined(DEBUG_RETAINER)
1898 count++;
1899 #endif
1900 switch (get_itbl(p)->type) {
1901 case IND_STATIC:
1902 // Since we do not compute the retainer set of any
1903 // IND_STATIC object, we don't have to reset its retainer
1904 // field.
1905 p = (StgClosure*)*IND_STATIC_LINK(p);
1906 break;
1907 case THUNK_STATIC:
1908 maybeInitRetainerSet(p);
1909 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1910 break;
1911 case FUN_STATIC:
1912 maybeInitRetainerSet(p);
1913 p = (StgClosure*)*FUN_STATIC_LINK(p);
1914 break;
1915 case CONSTR:
1916 case CONSTR_1_0:
1917 case CONSTR_2_0:
1918 case CONSTR_1_1:
1919 case CONSTR_NOCAF:
1920 maybeInitRetainerSet(p);
1921 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1922 break;
1923 default:
1924 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1925 p, get_itbl(p)->type);
1926 break;
1927 }
1928 }
1929 #if defined(DEBUG_RETAINER)
1930 // debugBelch("count in scavenged_static_objects = %d\n", count);
1931 #endif
1932 }
1933
1934 /* -----------------------------------------------------------------------------
1935 * Perform retainer profiling.
1936 * N is the oldest generation being profilied, where the generations are
1937 * numbered starting at 0.
1938 * Invariants:
1939 * Note:
1940 * This function should be called only immediately after major garbage
1941 * collection.
1942 * ------------------------------------------------------------------------- */
1943 void
1944 retainerProfile(void)
1945 {
1946 #if defined(DEBUG_RETAINER)
1947 uint32_t i;
1948 uint32_t totalHeapSize; // total raw heap size (computed by linear scanning)
1949 #endif
1950
1951 #if defined(DEBUG_RETAINER)
1952 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1953 #endif
1954
1955 stat_startRP();
1956
1957 // We haven't flipped the bit yet.
1958 #if defined(DEBUG_RETAINER)
1959 debugBelch("Before traversing:\n");
1960 sumOfCostLinear = 0;
1961 for (i = 0;i < N_CLOSURE_TYPES; i++)
1962 costArrayLinear[i] = 0;
1963 totalHeapSize = checkHeapSanityForRetainerProfiling();
1964
1965 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1966 /*
1967 debugBelch("costArrayLinear[] = ");
1968 for (i = 0;i < N_CLOSURE_TYPES; i++)
1969 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1970 debugBelch("\n");
1971 */
1972
1973 ASSERT(sumOfCostLinear == totalHeapSize);
1974
1975 /*
1976 #define pcostArrayLinear(index) \
1977 if (costArrayLinear[index] > 0) \
1978 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1979 pcostArrayLinear(THUNK_STATIC);
1980 pcostArrayLinear(FUN_STATIC);
1981 pcostArrayLinear(CONSTR_NOCAF);
1982 */
1983 #endif
1984
1985 // Now we flips flip.
1986 flip = flip ^ 1;
1987
1988 #if defined(DEBUG_RETAINER)
1989 stackSize = 0;
1990 maxStackSize = 0;
1991 cStackSize = 0;
1992 maxCStackSize = 0;
1993 #endif
1994 numObjectVisited = 0;
1995 timesAnyObjectVisited = 0;
1996
1997 #if defined(DEBUG_RETAINER)
1998 debugBelch("During traversing:\n");
1999 sumOfNewCost = 0;
2000 sumOfNewCostExtra = 0;
2001 for (i = 0;i < N_CLOSURE_TYPES; i++)
2002 costArray[i] = 0;
2003 #endif
2004
2005 /*
2006 We initialize the traverse stack each time the retainer profiling is
2007 performed (because the traverse stack size varies on each retainer profiling
2008 and this operation is not costly anyhow). However, we just refresh the
2009 retainer sets.
2010 */
2011 initializeTraverseStack();
2012 #if defined(DEBUG_RETAINER)
2013 initializeAllRetainerSet();
2014 #else
2015 refreshAllRetainerSet();
2016 #endif
2017 computeRetainerSet();
2018
2019 #if defined(DEBUG_RETAINER)
2020 debugBelch("After traversing:\n");
2021 sumOfCostLinear = 0;
2022 for (i = 0;i < N_CLOSURE_TYPES; i++)
2023 costArrayLinear[i] = 0;
2024 totalHeapSize = checkHeapSanityForRetainerProfiling();
2025
2026 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2027 ASSERT(sumOfCostLinear == totalHeapSize);
2028
2029 // now, compare the two results
2030 /*
2031 Note:
2032 costArray[] must be exactly the same as costArrayLinear[].
2033 Known exceptions:
2034 1) Dead weak pointers, whose type is CONSTR. These objects are not
2035 reachable from any roots.
2036 */
2037 debugBelch("Comparison:\n");
2038 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2039 for (i = 0;i < N_CLOSURE_TYPES; i++)
2040 if (costArray[i] != costArrayLinear[i])
2041 // nothing should be printed except MUT_VAR after major GCs
2042 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2043 debugBelch("\n");
2044
2045 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2046 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2047 debugBelch("\tcostArray[] (must be empty) = ");
2048 for (i = 0;i < N_CLOSURE_TYPES; i++)
2049 if (costArray[i] != costArrayLinear[i])
2050 // nothing should be printed except MUT_VAR after major GCs
2051 debugBelch("[%u:%u] ", i, costArray[i]);
2052 debugBelch("\n");
2053
2054 // only for major garbage collection
2055 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2056 #endif
2057
2058 // post-processing
2059 closeTraverseStack();
2060 #if defined(DEBUG_RETAINER)
2061 closeAllRetainerSet();
2062 #else
2063 // Note that there is no post-processing for the retainer sets.
2064 #endif
2065 retainerGeneration++;
2066
2067 stat_endRP(
2068 retainerGeneration - 1, // retainerGeneration has just been incremented!
2069 #if defined(DEBUG_RETAINER)
2070 maxCStackSize, maxStackSize,
2071 #endif
2072 (double)timesAnyObjectVisited / numObjectVisited);
2073 }
2074
2075 /* -----------------------------------------------------------------------------
2076 * DEBUGGING CODE
2077 * -------------------------------------------------------------------------- */
2078
2079 #if defined(DEBUG_RETAINER)
2080
2081 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2082 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2083 ((StgWord)(*(StgPtr)r)!=(StgWord)0xaaaaaaaaaaaaaaaaULL))
2084
2085 static uint32_t
2086 sanityCheckHeapClosure( StgClosure *c )
2087 {
2088 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2089 ASSERT(LOOKS_LIKE_PTR(c));
2090
2091 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2092 if (get_itbl(c)->type == CONSTR &&
2093 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2094 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2095 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2096 costArray[get_itbl(c)->type] += cost(c);
2097 sumOfNewCost += cost(c);
2098 } else
2099 debugBelch(
2100 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2101 flip, c, get_itbl(c)->type,
2102 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2103 RSET(c));
2104 } else {
2105 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2106 }
2107
2108 return closure_sizeW(c);
2109 }
2110
2111 static uint32_t
2112 heapCheck( bdescr *bd )
2113 {
2114 StgPtr p;
2115 static uint32_t costSum, size;
2116
2117 costSum = 0;
2118 while (bd != NULL) {
2119 p = bd->start;
2120 while (p < bd->free) {
2121 size = sanityCheckHeapClosure((StgClosure *)p);
2122 sumOfCostLinear += size;
2123 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2124 p += size;
2125 // no need for slop check; I think slops are not used currently.
2126 }
2127 ASSERT(p == bd->free);
2128 costSum += bd->free - bd->start;
2129 bd = bd->link;
2130 }
2131
2132 return costSum;
2133 }
2134
2135 static uint32_t
2136 smallObjectPoolCheck(void)
2137 {
2138 bdescr *bd;
2139 StgPtr p;
2140 static uint32_t costSum, size;
2141
2142 bd = g0s0->blocks;
2143 costSum = 0;
2144
2145 // first block
2146 if (bd == NULL)
2147 return costSum;
2148
2149 p = bd->start;
2150 while (p < alloc_Hp) {
2151 size = sanityCheckHeapClosure((StgClosure *)p);
2152 sumOfCostLinear += size;
2153 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2154 p += size;
2155 }
2156 ASSERT(p == alloc_Hp);
2157 costSum += alloc_Hp - bd->start;
2158
2159 bd = bd->link;
2160 while (bd != NULL) {
2161 p = bd->start;
2162 while (p < bd->free) {
2163 size = sanityCheckHeapClosure((StgClosure *)p);
2164 sumOfCostLinear += size;
2165 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2166 p += size;
2167 }
2168 ASSERT(p == bd->free);
2169 costSum += bd->free - bd->start;
2170 bd = bd->link;
2171 }
2172
2173 return costSum;
2174 }
2175
2176 static uint32_t
2177 chainCheck(bdescr *bd)
2178 {
2179 uint32_t costSum, size;
2180
2181 costSum = 0;
2182 while (bd != NULL) {
2183 // bd->free - bd->start is not an accurate measurement of the
2184 // object size. Actually it is always zero, so we compute its
2185 // size explicitly.
2186 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2187 sumOfCostLinear += size;
2188 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2189 costSum += size;
2190 bd = bd->link;
2191 }
2192
2193 return costSum;
2194 }
2195
2196 static uint32_t
2197 checkHeapSanityForRetainerProfiling( void )
2198 {
2199 uint32_t costSum, g, s;
2200
2201 costSum = 0;
2202 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2203 if (RtsFlags.GcFlags.generations == 1) {
2204 costSum += heapCheck(g0s0->to_blocks);
2205 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2206 costSum += chainCheck(g0s0->large_objects);
2207 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2208 } else {
2209 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2210 for (s = 0; s < generations[g].n_steps; s++) {
2211 /*
2212 After all live objects have been scavenged, the garbage
2213 collector may create some objects in
2214 scheduleFinalizers(). These objects are created through
2215 allocate(), so the small object pool or the large object
2216 pool of the g0s0 may not be empty.
2217 */
2218 if (g == 0 && s == 0) {
2219 costSum += smallObjectPoolCheck();
2220 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2221 costSum += chainCheck(generations[g].steps[s].large_objects);
2222 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2223 } else {
2224 costSum += heapCheck(generations[g].steps[s].blocks);
2225 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2226 costSum += chainCheck(generations[g].steps[s].large_objects);
2227 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2228 }
2229 }
2230 }
2231
2232 return costSum;
2233 }
2234
2235 void
2236 findPointer(StgPtr p)
2237 {
2238 StgPtr q, r, e;
2239 bdescr *bd;
2240 uint32_t g, s;
2241
2242 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2243 for (s = 0; s < generations[g].n_steps; s++) {
2244 // if (g == 0 && s == 0) continue;
2245 bd = generations[g].steps[s].blocks;
2246 for (; bd; bd = bd->link) {
2247 for (q = bd->start; q < bd->free; q++) {
2248 if (*q == (StgWord)p) {
2249 r = q;
2250 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2251 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2252 // return;
2253 }
2254 }
2255 }
2256 bd = generations[g].steps[s].large_objects;
2257 for (; bd; bd = bd->link) {
2258 e = bd->start + cost((StgClosure *)bd->start);
2259 for (q = bd->start; q < e; q++) {
2260 if (*q == (StgWord)p) {
2261 r = q;
2262 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2263 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2264 // return;
2265 }
2266 }
2267 }
2268 }
2269 }
2270 }
2271
2272 static void
2273 belongToHeap(StgPtr p)
2274 {
2275 bdescr *bd;
2276 uint32_t g, s;
2277
2278 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2279 for (s = 0; s < generations[g].n_steps; s++) {
2280 // if (g == 0 && s == 0) continue;
2281 bd = generations[g].steps[s].blocks;
2282 for (; bd; bd = bd->link) {
2283 if (bd->start <= p && p < bd->free) {
2284 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2285 return;
2286 }
2287 }
2288 bd = generations[g].steps[s].large_objects;
2289 for (; bd; bd = bd->link) {
2290 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2291 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2292 return;
2293 }
2294 }
2295 }
2296 }
2297 }
2298 #endif /* DEBUG_RETAINER */
2299
2300 #endif /* PROFILING */