rts: Replace `nat` with `uint32_t`
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #ifdef PROFILING
11
12 // Turn off inlining when debugging - it obfuscates things
13 #ifdef DEBUG
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "PosixSource.h"
20 #include "Rts.h"
21
22 #include "RtsUtils.h"
23 #include "RetainerProfile.h"
24 #include "RetainerSet.h"
25 #include "Schedule.h"
26 #include "Printer.h"
27 #include "Weak.h"
28 #include "sm/Sanity.h"
29 #include "Profiling.h"
30 #include "Stats.h"
31 #include "ProfHeap.h"
32 #include "Apply.h"
33 #include "Stable.h" /* markStableTables */
34 #include "sm/Storage.h" // for END_OF_STATIC_LIST
35
36 /*
37 Note: what to change in order to plug-in a new retainer profiling scheme?
38 (1) type retainer in ../includes/StgRetainerProf.h
39 (2) retainer function R(), i.e., getRetainerFrom()
40 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
41 in RetainerSet.h, if needed.
42 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
43 */
44
45 /* -----------------------------------------------------------------------------
46 * Declarations...
47 * -------------------------------------------------------------------------- */
48
49 static uint32_t retainerGeneration; // generation
50
51 static uint32_t numObjectVisited; // total number of objects visited
52 static uint32_t timesAnyObjectVisited; // number of times any objects are
53 // visited
54
55 /*
56 The rs field in the profile header of any object points to its retainer
57 set in an indirect way: if flip is 0, it points to the retainer set;
58 if flip is 1, it points to the next byte after the retainer set (even
59 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
60 pointer. See retainerSetOf().
61 */
62
63 StgWord flip = 0; // flip bit
64 // must be 0 if DEBUG_RETAINER is on (for static closures)
65
66 #define setRetainerSetToNull(c) \
67 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
68
69 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
70 static void retainClosure(StgClosure *, StgClosure *, retainer);
71 #ifdef DEBUG_RETAINER
72 static void belongToHeap(StgPtr p);
73 #endif
74
75 #ifdef DEBUG_RETAINER
76 /*
77 cStackSize records how many times retainStack() has been invoked recursively,
78 that is, the number of activation records for retainStack() on the C stack.
79 maxCStackSize records its max value.
80 Invariants:
81 cStackSize <= maxCStackSize
82 */
83 static uint32_t cStackSize, maxCStackSize;
84
85 static uint32_t sumOfNewCost; // sum of the cost of each object, computed
86 // when the object is first visited
87 static uint32_t sumOfNewCostExtra; // for those objects not visited during
88 // retainer profiling, e.g., MUT_VAR
89 static uint32_t costArray[N_CLOSURE_TYPES];
90
91 uint32_t sumOfCostLinear; // sum of the costs of all object, computed
92 // when linearly traversing the heap after
93 // retainer profiling
94 uint32_t costArrayLinear[N_CLOSURE_TYPES];
95 #endif
96
97 /* -----------------------------------------------------------------------------
98 * Retainer stack - header
99 * Note:
100 * Although the retainer stack implementation could be separated *
101 * from the retainer profiling engine, there does not seem to be
102 * any advantage in doing that; retainer stack is an integral part
103 * of retainer profiling engine and cannot be use elsewhere at
104 * all.
105 * -------------------------------------------------------------------------- */
106
107 typedef enum {
108 posTypeStep,
109 posTypePtrs,
110 posTypeSRT,
111 posTypeLargeSRT,
112 } nextPosType;
113
114 typedef union {
115 // fixed layout or layout specified by a field in the closure
116 StgWord step;
117
118 // layout.payload
119 struct {
120 // See StgClosureInfo in InfoTables.h
121 #if SIZEOF_VOID_P == 8
122 StgWord32 pos;
123 StgWord32 ptrs;
124 #else
125 StgWord16 pos;
126 StgWord16 ptrs;
127 #endif
128 StgPtr payload;
129 } ptrs;
130
131 // SRT
132 struct {
133 StgClosure **srt;
134 StgWord srt_bitmap;
135 } srt;
136
137 // Large SRT
138 struct {
139 StgLargeSRT *srt;
140 StgWord offset;
141 } large_srt;
142
143 } nextPos;
144
145 typedef struct {
146 nextPosType type;
147 nextPos next;
148 } stackPos;
149
150 typedef struct {
151 StgClosure *c;
152 retainer c_child_r;
153 stackPos info;
154 } stackElement;
155
156 /*
157 Invariants:
158 firstStack points to the first block group.
159 currentStack points to the block group currently being used.
160 currentStack->free == stackLimit.
161 stackTop points to the topmost byte in the stack of currentStack.
162 Unless the whole stack is empty, stackTop must point to the topmost
163 object (or byte) in the whole stack. Thus, it is only when the whole stack
164 is empty that stackTop == stackLimit (not during the execution of push()
165 and pop()).
166 stackBottom == currentStack->start.
167 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
168 Note:
169 When a current stack becomes empty, stackTop is set to point to
170 the topmost element on the previous block group so as to satisfy
171 the invariants described above.
172 */
173 static bdescr *firstStack = NULL;
174 static bdescr *currentStack;
175 static stackElement *stackBottom, *stackTop, *stackLimit;
176
177 /*
178 currentStackBoundary is used to mark the current stack chunk.
179 If stackTop == currentStackBoundary, it means that the current stack chunk
180 is empty. It is the responsibility of the user to keep currentStackBoundary
181 valid all the time if it is to be employed.
182 */
183 static stackElement *currentStackBoundary;
184
185 /*
186 stackSize records the current size of the stack.
187 maxStackSize records its high water mark.
188 Invariants:
189 stackSize <= maxStackSize
190 Note:
191 stackSize is just an estimate measure of the depth of the graph. The reason
192 is that some heap objects have only a single child and may not result
193 in a new element being pushed onto the stack. Therefore, at the end of
194 retainer profiling, maxStackSize + maxCStackSize is some value no greater
195 than the actual depth of the graph.
196 */
197 #ifdef DEBUG_RETAINER
198 static int stackSize, maxStackSize;
199 #endif
200
201 // number of blocks allocated for one stack
202 #define BLOCKS_IN_STACK 1
203
204 /* -----------------------------------------------------------------------------
205 * Add a new block group to the stack.
206 * Invariants:
207 * currentStack->link == s.
208 * -------------------------------------------------------------------------- */
209 static INLINE void
210 newStackBlock( bdescr *bd )
211 {
212 currentStack = bd;
213 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
214 stackBottom = (stackElement *)bd->start;
215 stackLimit = (stackElement *)stackTop;
216 bd->free = (StgPtr)stackLimit;
217 }
218
219 /* -----------------------------------------------------------------------------
220 * Return to the previous block group.
221 * Invariants:
222 * s->link == currentStack.
223 * -------------------------------------------------------------------------- */
224 static INLINE void
225 returnToOldStack( bdescr *bd )
226 {
227 currentStack = bd;
228 stackTop = (stackElement *)bd->free;
229 stackBottom = (stackElement *)bd->start;
230 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
231 bd->free = (StgPtr)stackLimit;
232 }
233
234 /* -----------------------------------------------------------------------------
235 * Initializes the traverse stack.
236 * -------------------------------------------------------------------------- */
237 static void
238 initializeTraverseStack( void )
239 {
240 if (firstStack != NULL) {
241 freeChain(firstStack);
242 }
243
244 firstStack = allocGroup(BLOCKS_IN_STACK);
245 firstStack->link = NULL;
246 firstStack->u.back = NULL;
247
248 newStackBlock(firstStack);
249 }
250
251 /* -----------------------------------------------------------------------------
252 * Frees all the block groups in the traverse stack.
253 * Invariants:
254 * firstStack != NULL
255 * -------------------------------------------------------------------------- */
256 static void
257 closeTraverseStack( void )
258 {
259 freeChain(firstStack);
260 firstStack = NULL;
261 }
262
263 /* -----------------------------------------------------------------------------
264 * Returns rtsTrue if the whole stack is empty.
265 * -------------------------------------------------------------------------- */
266 static INLINE rtsBool
267 isEmptyRetainerStack( void )
268 {
269 return (firstStack == currentStack) && stackTop == stackLimit;
270 }
271
272 /* -----------------------------------------------------------------------------
273 * Returns size of stack
274 * -------------------------------------------------------------------------- */
275 #ifdef DEBUG
276 W_
277 retainerStackBlocks( void )
278 {
279 bdescr* bd;
280 W_ res = 0;
281
282 for (bd = firstStack; bd != NULL; bd = bd->link)
283 res += bd->blocks;
284
285 return res;
286 }
287 #endif
288
289 /* -----------------------------------------------------------------------------
290 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
291 * i.e., if the current stack chunk is empty.
292 * -------------------------------------------------------------------------- */
293 static INLINE rtsBool
294 isOnBoundary( void )
295 {
296 return stackTop == currentStackBoundary;
297 }
298
299 /* -----------------------------------------------------------------------------
300 * Initializes *info from ptrs and payload.
301 * Invariants:
302 * payload[] begins with ptrs pointers followed by non-pointers.
303 * -------------------------------------------------------------------------- */
304 static INLINE void
305 init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
306 {
307 info->type = posTypePtrs;
308 info->next.ptrs.pos = 0;
309 info->next.ptrs.ptrs = ptrs;
310 info->next.ptrs.payload = payload;
311 }
312
313 /* -----------------------------------------------------------------------------
314 * Find the next object from *info.
315 * -------------------------------------------------------------------------- */
316 static INLINE StgClosure *
317 find_ptrs( stackPos *info )
318 {
319 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
320 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
321 } else {
322 return NULL;
323 }
324 }
325
326 /* -----------------------------------------------------------------------------
327 * Initializes *info from SRT information stored in *infoTable.
328 * -------------------------------------------------------------------------- */
329 static INLINE void
330 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
331 {
332 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
333 info->type = posTypeLargeSRT;
334 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
335 info->next.large_srt.offset = 0;
336 } else {
337 info->type = posTypeSRT;
338 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
339 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
340 }
341 }
342
343 static INLINE void
344 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
345 {
346 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
347 info->type = posTypeLargeSRT;
348 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
349 info->next.large_srt.offset = 0;
350 } else {
351 info->type = posTypeSRT;
352 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
353 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
354 }
355 }
356
357 /* -----------------------------------------------------------------------------
358 * Find the next object from *info.
359 * -------------------------------------------------------------------------- */
360 static INLINE StgClosure *
361 find_srt( stackPos *info )
362 {
363 StgClosure *c;
364 StgWord bitmap;
365
366 if (info->type == posTypeSRT) {
367 // Small SRT bitmap
368 bitmap = info->next.srt.srt_bitmap;
369 while (bitmap != 0) {
370 if ((bitmap & 1) != 0) {
371 #if defined(COMPILING_WINDOWS_DLL)
372 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
373 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
374 else
375 c = *(info->next.srt.srt);
376 #else
377 c = *(info->next.srt.srt);
378 #endif
379 bitmap = bitmap >> 1;
380 info->next.srt.srt++;
381 info->next.srt.srt_bitmap = bitmap;
382 return c;
383 }
384 bitmap = bitmap >> 1;
385 info->next.srt.srt++;
386 }
387 // bitmap is now zero...
388 return NULL;
389 }
390 else {
391 // Large SRT bitmap
392 uint32_t i = info->next.large_srt.offset;
393 StgWord bitmap;
394
395 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
396 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
397 bitmap = bitmap >> (i % BITS_IN(StgWord));
398 while (i < info->next.large_srt.srt->l.size) {
399 if ((bitmap & 1) != 0) {
400 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
401 i++;
402 info->next.large_srt.offset = i;
403 return c;
404 }
405 i++;
406 if (i % BITS_IN(W_) == 0) {
407 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
408 } else {
409 bitmap = bitmap >> 1;
410 }
411 }
412 // reached the end of this bitmap.
413 info->next.large_srt.offset = i;
414 return NULL;
415 }
416 }
417
418 /* -----------------------------------------------------------------------------
419 * push() pushes a stackElement representing the next child of *c
420 * onto the traverse stack. If *c has no child, *first_child is set
421 * to NULL and nothing is pushed onto the stack. If *c has only one
422 * child, *c_chlid is set to that child and nothing is pushed onto
423 * the stack. If *c has more than two children, *first_child is set
424 * to the first child and a stackElement representing the second
425 * child is pushed onto the stack.
426
427 * Invariants:
428 * *c_child_r is the most recent retainer of *c's children.
429 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
430 * there cannot be any stack objects.
431 * Note: SRTs are considered to be children as well.
432 * -------------------------------------------------------------------------- */
433 static INLINE void
434 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
435 {
436 stackElement se;
437 bdescr *nbd; // Next Block Descriptor
438
439 #ifdef DEBUG_RETAINER
440 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
441 #endif
442
443 ASSERT(get_itbl(c)->type != TSO);
444 ASSERT(get_itbl(c)->type != AP_STACK);
445
446 //
447 // fill in se
448 //
449
450 se.c = c;
451 se.c_child_r = c_child_r;
452
453 // fill in se.info
454 switch (get_itbl(c)->type) {
455 // no child, no SRT
456 case CONSTR_0_1:
457 case CONSTR_0_2:
458 case ARR_WORDS:
459 *first_child = NULL;
460 return;
461
462 // one child (fixed), no SRT
463 case MUT_VAR_CLEAN:
464 case MUT_VAR_DIRTY:
465 *first_child = ((StgMutVar *)c)->var;
466 return;
467 case THUNK_SELECTOR:
468 *first_child = ((StgSelector *)c)->selectee;
469 return;
470 case BLACKHOLE:
471 *first_child = ((StgInd *)c)->indirectee;
472 return;
473 case CONSTR_1_0:
474 case CONSTR_1_1:
475 *first_child = c->payload[0];
476 return;
477
478 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
479 // of the next child. We do not write a separate initialization code.
480 // Also we do not have to initialize info.type;
481
482 // two children (fixed), no SRT
483 // need to push a stackElement, but nothing to store in se.info
484 case CONSTR_2_0:
485 *first_child = c->payload[0]; // return the first pointer
486 // se.info.type = posTypeStep;
487 // se.info.next.step = 2; // 2 = second
488 break;
489
490 // three children (fixed), no SRT
491 // need to push a stackElement
492 case MVAR_CLEAN:
493 case MVAR_DIRTY:
494 // head must be TSO and the head of a linked list of TSOs.
495 // Shoule it be a child? Seems to be yes.
496 *first_child = (StgClosure *)((StgMVar *)c)->head;
497 // se.info.type = posTypeStep;
498 se.info.next.step = 2; // 2 = second
499 break;
500
501 // three children (fixed), no SRT
502 case WEAK:
503 *first_child = ((StgWeak *)c)->key;
504 // se.info.type = posTypeStep;
505 se.info.next.step = 2;
506 break;
507
508 // layout.payload.ptrs, no SRT
509 case TVAR:
510 case CONSTR:
511 case PRIM:
512 case MUT_PRIM:
513 case BCO:
514 case CONSTR_STATIC:
515 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
516 (StgPtr)c->payload);
517 *first_child = find_ptrs(&se.info);
518 if (*first_child == NULL)
519 return; // no child
520 break;
521
522 // StgMutArrPtr.ptrs, no SRT
523 case MUT_ARR_PTRS_CLEAN:
524 case MUT_ARR_PTRS_DIRTY:
525 case MUT_ARR_PTRS_FROZEN:
526 case MUT_ARR_PTRS_FROZEN0:
527 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
528 (StgPtr)(((StgMutArrPtrs *)c)->payload));
529 *first_child = find_ptrs(&se.info);
530 if (*first_child == NULL)
531 return;
532 break;
533
534 // StgMutArrPtr.ptrs, no SRT
535 case SMALL_MUT_ARR_PTRS_CLEAN:
536 case SMALL_MUT_ARR_PTRS_DIRTY:
537 case SMALL_MUT_ARR_PTRS_FROZEN:
538 case SMALL_MUT_ARR_PTRS_FROZEN0:
539 init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
540 (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
541 *first_child = find_ptrs(&se.info);
542 if (*first_child == NULL)
543 return;
544 break;
545
546 // layout.payload.ptrs, SRT
547 case FUN: // *c is a heap object.
548 case FUN_2_0:
549 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
550 *first_child = find_ptrs(&se.info);
551 if (*first_child == NULL)
552 // no child from ptrs, so check SRT
553 goto fun_srt_only;
554 break;
555
556 case THUNK:
557 case THUNK_2_0:
558 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
559 (StgPtr)((StgThunk *)c)->payload);
560 *first_child = find_ptrs(&se.info);
561 if (*first_child == NULL)
562 // no child from ptrs, so check SRT
563 goto thunk_srt_only;
564 break;
565
566 // 1 fixed child, SRT
567 case FUN_1_0:
568 case FUN_1_1:
569 *first_child = c->payload[0];
570 ASSERT(*first_child != NULL);
571 init_srt_fun(&se.info, get_fun_itbl(c));
572 break;
573
574 case THUNK_1_0:
575 case THUNK_1_1:
576 *first_child = ((StgThunk *)c)->payload[0];
577 ASSERT(*first_child != NULL);
578 init_srt_thunk(&se.info, get_thunk_itbl(c));
579 break;
580
581 case FUN_STATIC: // *c is a heap object.
582 ASSERT(get_itbl(c)->srt_bitmap != 0);
583 case FUN_0_1:
584 case FUN_0_2:
585 fun_srt_only:
586 init_srt_fun(&se.info, get_fun_itbl(c));
587 *first_child = find_srt(&se.info);
588 if (*first_child == NULL)
589 return; // no child
590 break;
591
592 // SRT only
593 case THUNK_STATIC:
594 ASSERT(get_itbl(c)->srt_bitmap != 0);
595 case THUNK_0_1:
596 case THUNK_0_2:
597 thunk_srt_only:
598 init_srt_thunk(&se.info, get_thunk_itbl(c));
599 *first_child = find_srt(&se.info);
600 if (*first_child == NULL)
601 return; // no child
602 break;
603
604 case TREC_CHUNK:
605 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
606 se.info.next.step = 0; // entry no.
607 break;
608
609 // cannot appear
610 case PAP:
611 case AP:
612 case AP_STACK:
613 case TSO:
614 case STACK:
615 case IND_STATIC:
616 case CONSTR_NOCAF_STATIC:
617 // stack objects
618 case UPDATE_FRAME:
619 case CATCH_FRAME:
620 case UNDERFLOW_FRAME:
621 case STOP_FRAME:
622 case RET_BCO:
623 case RET_SMALL:
624 case RET_BIG:
625 // invalid objects
626 case IND:
627 case INVALID_OBJECT:
628 default:
629 barf("Invalid object *c in push()");
630 return;
631 }
632
633 if (stackTop - 1 < stackBottom) {
634 #ifdef DEBUG_RETAINER
635 // debugBelch("push() to the next stack.\n");
636 #endif
637 // currentStack->free is updated when the active stack is switched
638 // to the next stack.
639 currentStack->free = (StgPtr)stackTop;
640
641 if (currentStack->link == NULL) {
642 nbd = allocGroup(BLOCKS_IN_STACK);
643 nbd->link = NULL;
644 nbd->u.back = currentStack;
645 currentStack->link = nbd;
646 } else
647 nbd = currentStack->link;
648
649 newStackBlock(nbd);
650 }
651
652 // adjust stackTop (acutal push)
653 stackTop--;
654 // If the size of stackElement was huge, we would better replace the
655 // following statement by either a memcpy() call or a switch statement
656 // on the type of the element. Currently, the size of stackElement is
657 // small enough (5 words) that this direct assignment seems to be enough.
658
659 // ToDo: The line below leads to the warning:
660 // warning: 'se.info.type' may be used uninitialized in this function
661 // This is caused by the fact that there are execution paths through the
662 // large switch statement above where some cases do not initialize this
663 // field. Is this really harmless? Can we avoid the warning?
664 *stackTop = se;
665
666 #ifdef DEBUG_RETAINER
667 stackSize++;
668 if (stackSize > maxStackSize) maxStackSize = stackSize;
669 // ASSERT(stackSize >= 0);
670 // debugBelch("stackSize = %d\n", stackSize);
671 #endif
672 }
673
674 /* -----------------------------------------------------------------------------
675 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
676 * Invariants:
677 * stackTop cannot be equal to stackLimit unless the whole stack is
678 * empty, in which case popOff() is not allowed.
679 * Note:
680 * You can think of popOffReal() as a part of popOff() which is
681 * executed at the end of popOff() in necessary. Since popOff() is
682 * likely to be executed quite often while popOffReal() is not, we
683 * separate popOffReal() from popOff(), which is declared as an
684 * INLINE function (for the sake of execution speed). popOffReal()
685 * is called only within popOff() and nowhere else.
686 * -------------------------------------------------------------------------- */
687 static void
688 popOffReal(void)
689 {
690 bdescr *pbd; // Previous Block Descriptor
691
692 #ifdef DEBUG_RETAINER
693 // debugBelch("pop() to the previous stack.\n");
694 #endif
695
696 ASSERT(stackTop + 1 == stackLimit);
697 ASSERT(stackBottom == (stackElement *)currentStack->start);
698
699 if (firstStack == currentStack) {
700 // The stack is completely empty.
701 stackTop++;
702 ASSERT(stackTop == stackLimit);
703 #ifdef DEBUG_RETAINER
704 stackSize--;
705 if (stackSize > maxStackSize) maxStackSize = stackSize;
706 /*
707 ASSERT(stackSize >= 0);
708 debugBelch("stackSize = %d\n", stackSize);
709 */
710 #endif
711 return;
712 }
713
714 // currentStack->free is updated when the active stack is switched back
715 // to the previous stack.
716 currentStack->free = (StgPtr)stackLimit;
717
718 // find the previous block descriptor
719 pbd = currentStack->u.back;
720 ASSERT(pbd != NULL);
721
722 returnToOldStack(pbd);
723
724 #ifdef DEBUG_RETAINER
725 stackSize--;
726 if (stackSize > maxStackSize) maxStackSize = stackSize;
727 /*
728 ASSERT(stackSize >= 0);
729 debugBelch("stackSize = %d\n", stackSize);
730 */
731 #endif
732 }
733
734 static INLINE void
735 popOff(void) {
736 #ifdef DEBUG_RETAINER
737 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
738 #endif
739
740 ASSERT(stackTop != stackLimit);
741 ASSERT(!isEmptyRetainerStack());
742
743 // <= (instead of <) is wrong!
744 if (stackTop + 1 < stackLimit) {
745 stackTop++;
746 #ifdef DEBUG_RETAINER
747 stackSize--;
748 if (stackSize > maxStackSize) maxStackSize = stackSize;
749 /*
750 ASSERT(stackSize >= 0);
751 debugBelch("stackSize = %d\n", stackSize);
752 */
753 #endif
754 return;
755 }
756
757 popOffReal();
758 }
759
760 /* -----------------------------------------------------------------------------
761 * Finds the next object to be considered for retainer profiling and store
762 * its pointer to *c.
763 * Test if the topmost stack element indicates that more objects are left,
764 * and if so, retrieve the first object and store its pointer to *c. Also,
765 * set *cp and *r appropriately, both of which are stored in the stack element.
766 * The topmost stack element then is overwritten so as for it to now denote
767 * the next object.
768 * If the topmost stack element indicates no more objects are left, pop
769 * off the stack element until either an object can be retrieved or
770 * the current stack chunk becomes empty, indicated by rtsTrue returned by
771 * isOnBoundary(), in which case *c is set to NULL.
772 * Note:
773 * It is okay to call this function even when the current stack chunk
774 * is empty.
775 * -------------------------------------------------------------------------- */
776 static INLINE void
777 pop( StgClosure **c, StgClosure **cp, retainer *r )
778 {
779 stackElement *se;
780
781 #ifdef DEBUG_RETAINER
782 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
783 #endif
784
785 do {
786 if (isOnBoundary()) { // if the current stack chunk is depleted
787 *c = NULL;
788 return;
789 }
790
791 se = stackTop;
792
793 switch (get_itbl(se->c)->type) {
794 // two children (fixed), no SRT
795 // nothing in se.info
796 case CONSTR_2_0:
797 *c = se->c->payload[1];
798 *cp = se->c;
799 *r = se->c_child_r;
800 popOff();
801 return;
802
803 // three children (fixed), no SRT
804 // need to push a stackElement
805 case MVAR_CLEAN:
806 case MVAR_DIRTY:
807 if (se->info.next.step == 2) {
808 *c = (StgClosure *)((StgMVar *)se->c)->tail;
809 se->info.next.step++; // move to the next step
810 // no popOff
811 } else {
812 *c = ((StgMVar *)se->c)->value;
813 popOff();
814 }
815 *cp = se->c;
816 *r = se->c_child_r;
817 return;
818
819 // three children (fixed), no SRT
820 case WEAK:
821 if (se->info.next.step == 2) {
822 *c = ((StgWeak *)se->c)->value;
823 se->info.next.step++;
824 // no popOff
825 } else {
826 *c = ((StgWeak *)se->c)->finalizer;
827 popOff();
828 }
829 *cp = se->c;
830 *r = se->c_child_r;
831 return;
832
833 case TREC_CHUNK: {
834 // These are pretty complicated: we have N entries, each
835 // of which contains 3 fields that we want to follow. So
836 // we divide the step counter: the 2 low bits indicate
837 // which field, and the rest of the bits indicate the
838 // entry number (starting from zero).
839 TRecEntry *entry;
840 uint32_t entry_no = se->info.next.step >> 2;
841 uint32_t field_no = se->info.next.step & 3;
842 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
843 *c = NULL;
844 popOff();
845 return;
846 }
847 entry = &((StgTRecChunk *)se->c)->entries[entry_no];
848 if (field_no == 0) {
849 *c = (StgClosure *)entry->tvar;
850 } else if (field_no == 1) {
851 *c = entry->expected_value;
852 } else {
853 *c = entry->new_value;
854 }
855 *cp = se->c;
856 *r = se->c_child_r;
857 se->info.next.step++;
858 return;
859 }
860
861 case TVAR:
862 case CONSTR:
863 case PRIM:
864 case MUT_PRIM:
865 case BCO:
866 case CONSTR_STATIC:
867 // StgMutArrPtr.ptrs, no SRT
868 case MUT_ARR_PTRS_CLEAN:
869 case MUT_ARR_PTRS_DIRTY:
870 case MUT_ARR_PTRS_FROZEN:
871 case MUT_ARR_PTRS_FROZEN0:
872 *c = find_ptrs(&se->info);
873 if (*c == NULL) {
874 popOff();
875 break;
876 }
877 *cp = se->c;
878 *r = se->c_child_r;
879 return;
880
881 // layout.payload.ptrs, SRT
882 case FUN: // always a heap object
883 case FUN_2_0:
884 if (se->info.type == posTypePtrs) {
885 *c = find_ptrs(&se->info);
886 if (*c != NULL) {
887 *cp = se->c;
888 *r = se->c_child_r;
889 return;
890 }
891 init_srt_fun(&se->info, get_fun_itbl(se->c));
892 }
893 goto do_srt;
894
895 case THUNK:
896 case THUNK_2_0:
897 if (se->info.type == posTypePtrs) {
898 *c = find_ptrs(&se->info);
899 if (*c != NULL) {
900 *cp = se->c;
901 *r = se->c_child_r;
902 return;
903 }
904 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
905 }
906 goto do_srt;
907
908 // SRT
909 do_srt:
910 case THUNK_STATIC:
911 case FUN_STATIC:
912 case FUN_0_1:
913 case FUN_0_2:
914 case THUNK_0_1:
915 case THUNK_0_2:
916 case FUN_1_0:
917 case FUN_1_1:
918 case THUNK_1_0:
919 case THUNK_1_1:
920 *c = find_srt(&se->info);
921 if (*c != NULL) {
922 *cp = se->c;
923 *r = se->c_child_r;
924 return;
925 }
926 popOff();
927 break;
928
929 // no child (fixed), no SRT
930 case CONSTR_0_1:
931 case CONSTR_0_2:
932 case ARR_WORDS:
933 // one child (fixed), no SRT
934 case MUT_VAR_CLEAN:
935 case MUT_VAR_DIRTY:
936 case THUNK_SELECTOR:
937 case CONSTR_1_1:
938 // cannot appear
939 case PAP:
940 case AP:
941 case AP_STACK:
942 case TSO:
943 case STACK:
944 case IND_STATIC:
945 case CONSTR_NOCAF_STATIC:
946 // stack objects
947 case UPDATE_FRAME:
948 case CATCH_FRAME:
949 case UNDERFLOW_FRAME:
950 case STOP_FRAME:
951 case RET_BCO:
952 case RET_SMALL:
953 case RET_BIG:
954 // invalid objects
955 case IND:
956 case INVALID_OBJECT:
957 default:
958 barf("Invalid object *c in pop()");
959 return;
960 }
961 } while (rtsTrue);
962 }
963
964 /* -----------------------------------------------------------------------------
965 * RETAINER PROFILING ENGINE
966 * -------------------------------------------------------------------------- */
967
968 void
969 initRetainerProfiling( void )
970 {
971 initializeAllRetainerSet();
972 retainerGeneration = 0;
973 }
974
975 /* -----------------------------------------------------------------------------
976 * This function must be called before f-closing prof_file.
977 * -------------------------------------------------------------------------- */
978 void
979 endRetainerProfiling( void )
980 {
981 #ifdef SECOND_APPROACH
982 outputAllRetainerSet(prof_file);
983 #endif
984 }
985
986 /* -----------------------------------------------------------------------------
987 * Returns the actual pointer to the retainer set of the closure *c.
988 * It may adjust RSET(c) subject to flip.
989 * Side effects:
990 * RSET(c) is initialized to NULL if its current value does not
991 * conform to flip.
992 * Note:
993 * Even though this function has side effects, they CAN be ignored because
994 * subsequent calls to retainerSetOf() always result in the same return value
995 * and retainerSetOf() is the only way to retrieve retainerSet of a given
996 * closure.
997 * We have to perform an XOR (^) operation each time a closure is examined.
998 * The reason is that we do not know when a closure is visited last.
999 * -------------------------------------------------------------------------- */
1000 static INLINE void
1001 maybeInitRetainerSet( StgClosure *c )
1002 {
1003 if (!isRetainerSetFieldValid(c)) {
1004 setRetainerSetToNull(c);
1005 }
1006 }
1007
1008 /* -----------------------------------------------------------------------------
1009 * Returns rtsTrue if *c is a retainer.
1010 * -------------------------------------------------------------------------- */
1011 static INLINE rtsBool
1012 isRetainer( StgClosure *c )
1013 {
1014 switch (get_itbl(c)->type) {
1015 //
1016 // True case
1017 //
1018 // TSOs MUST be retainers: they constitute the set of roots.
1019 case TSO:
1020 case STACK:
1021
1022 // mutable objects
1023 case MUT_PRIM:
1024 case MVAR_CLEAN:
1025 case MVAR_DIRTY:
1026 case TVAR:
1027 case MUT_VAR_CLEAN:
1028 case MUT_VAR_DIRTY:
1029 case MUT_ARR_PTRS_CLEAN:
1030 case MUT_ARR_PTRS_DIRTY:
1031
1032 // thunks are retainers.
1033 case THUNK:
1034 case THUNK_1_0:
1035 case THUNK_0_1:
1036 case THUNK_2_0:
1037 case THUNK_1_1:
1038 case THUNK_0_2:
1039 case THUNK_SELECTOR:
1040 case AP:
1041 case AP_STACK:
1042
1043 // Static thunks, or CAFS, are obviously retainers.
1044 case THUNK_STATIC:
1045
1046 // WEAK objects are roots; there is separate code in which traversing
1047 // begins from WEAK objects.
1048 case WEAK:
1049 return rtsTrue;
1050
1051 //
1052 // False case
1053 //
1054
1055 // constructors
1056 case CONSTR:
1057 case CONSTR_1_0:
1058 case CONSTR_0_1:
1059 case CONSTR_2_0:
1060 case CONSTR_1_1:
1061 case CONSTR_0_2:
1062 // functions
1063 case FUN:
1064 case FUN_1_0:
1065 case FUN_0_1:
1066 case FUN_2_0:
1067 case FUN_1_1:
1068 case FUN_0_2:
1069 // partial applications
1070 case PAP:
1071 // indirection
1072 // IND_STATIC used to be an error, but at the moment it can happen
1073 // as isAlive doesn't look through IND_STATIC as it ignores static
1074 // closures. See trac #3956 for a program that hit this error.
1075 case IND_STATIC:
1076 case BLACKHOLE:
1077 // static objects
1078 case CONSTR_STATIC:
1079 case FUN_STATIC:
1080 // misc
1081 case PRIM:
1082 case BCO:
1083 case ARR_WORDS:
1084 // STM
1085 case TREC_CHUNK:
1086 // immutable arrays
1087 case MUT_ARR_PTRS_FROZEN:
1088 case MUT_ARR_PTRS_FROZEN0:
1089 return rtsFalse;
1090
1091 //
1092 // Error case
1093 //
1094 // CONSTR_NOCAF_STATIC
1095 // cannot be *c, *cp, *r in the retainer profiling loop.
1096 case CONSTR_NOCAF_STATIC:
1097 // Stack objects are invalid because they are never treated as
1098 // legal objects during retainer profiling.
1099 case UPDATE_FRAME:
1100 case CATCH_FRAME:
1101 case UNDERFLOW_FRAME:
1102 case STOP_FRAME:
1103 case RET_BCO:
1104 case RET_SMALL:
1105 case RET_BIG:
1106 // other cases
1107 case IND:
1108 case INVALID_OBJECT:
1109 default:
1110 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1111 return rtsFalse;
1112 }
1113 }
1114
1115 /* -----------------------------------------------------------------------------
1116 * Returns the retainer function value for the closure *c, i.e., R(*c).
1117 * This function does NOT return the retainer(s) of *c.
1118 * Invariants:
1119 * *c must be a retainer.
1120 * Note:
1121 * Depending on the definition of this function, the maintenance of retainer
1122 * sets can be made easier. If most retainer sets are likely to be created
1123 * again across garbage collections, refreshAllRetainerSet() in
1124 * RetainerSet.c can simply do nothing.
1125 * If this is not the case, we can free all the retainer sets and
1126 * re-initialize the hash table.
1127 * See refreshAllRetainerSet() in RetainerSet.c.
1128 * -------------------------------------------------------------------------- */
1129 static INLINE retainer
1130 getRetainerFrom( StgClosure *c )
1131 {
1132 ASSERT(isRetainer(c));
1133
1134 #if defined(RETAINER_SCHEME_INFO)
1135 // Retainer scheme 1: retainer = info table
1136 return get_itbl(c);
1137 #elif defined(RETAINER_SCHEME_CCS)
1138 // Retainer scheme 2: retainer = cost centre stack
1139 return c->header.prof.ccs;
1140 #elif defined(RETAINER_SCHEME_CC)
1141 // Retainer scheme 3: retainer = cost centre
1142 return c->header.prof.ccs->cc;
1143 #endif
1144 }
1145
1146 /* -----------------------------------------------------------------------------
1147 * Associates the retainer set *s with the closure *c, that is, *s becomes
1148 * the retainer set of *c.
1149 * Invariants:
1150 * c != NULL
1151 * s != NULL
1152 * -------------------------------------------------------------------------- */
1153 static INLINE void
1154 associate( StgClosure *c, RetainerSet *s )
1155 {
1156 // StgWord has the same size as pointers, so the following type
1157 // casting is okay.
1158 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1159 }
1160
1161 /* -----------------------------------------------------------------------------
1162 Call retainClosure for each of the closures covered by a large bitmap.
1163 -------------------------------------------------------------------------- */
1164
1165 static void
1166 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, uint32_t size,
1167 StgClosure *c, retainer c_child_r)
1168 {
1169 uint32_t i, b;
1170 StgWord bitmap;
1171
1172 b = 0;
1173 bitmap = large_bitmap->bitmap[b];
1174 for (i = 0; i < size; ) {
1175 if ((bitmap & 1) == 0) {
1176 retainClosure((StgClosure *)*p, c, c_child_r);
1177 }
1178 i++;
1179 p++;
1180 if (i % BITS_IN(W_) == 0) {
1181 b++;
1182 bitmap = large_bitmap->bitmap[b];
1183 } else {
1184 bitmap = bitmap >> 1;
1185 }
1186 }
1187 }
1188
1189 static INLINE StgPtr
1190 retain_small_bitmap (StgPtr p, uint32_t size, StgWord bitmap,
1191 StgClosure *c, retainer c_child_r)
1192 {
1193 while (size > 0) {
1194 if ((bitmap & 1) == 0) {
1195 retainClosure((StgClosure *)*p, c, c_child_r);
1196 }
1197 p++;
1198 bitmap = bitmap >> 1;
1199 size--;
1200 }
1201 return p;
1202 }
1203
1204 /* -----------------------------------------------------------------------------
1205 * Call retainClosure for each of the closures in an SRT.
1206 * ------------------------------------------------------------------------- */
1207
1208 static void
1209 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1210 {
1211 uint32_t i, b, size;
1212 StgWord bitmap;
1213 StgClosure **p;
1214
1215 b = 0;
1216 p = (StgClosure **)srt->srt;
1217 size = srt->l.size;
1218 bitmap = srt->l.bitmap[b];
1219 for (i = 0; i < size; ) {
1220 if ((bitmap & 1) != 0) {
1221 retainClosure((StgClosure *)*p, c, c_child_r);
1222 }
1223 i++;
1224 p++;
1225 if (i % BITS_IN(W_) == 0) {
1226 b++;
1227 bitmap = srt->l.bitmap[b];
1228 } else {
1229 bitmap = bitmap >> 1;
1230 }
1231 }
1232 }
1233
1234 static INLINE void
1235 retainSRT (StgClosure **srt, uint32_t srt_bitmap, StgClosure *c,
1236 retainer c_child_r)
1237 {
1238 uint32_t bitmap;
1239 StgClosure **p;
1240
1241 bitmap = srt_bitmap;
1242 p = srt;
1243
1244 if (bitmap == (StgHalfWord)(-1)) {
1245 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1246 return;
1247 }
1248
1249 while (bitmap != 0) {
1250 if ((bitmap & 1) != 0) {
1251 #if defined(COMPILING_WINDOWS_DLL)
1252 if ( (unsigned long)(*srt) & 0x1 ) {
1253 retainClosure(* (StgClosure**) ((unsigned long) (*srt) & ~0x1),
1254 c, c_child_r);
1255 } else {
1256 retainClosure(*srt,c,c_child_r);
1257 }
1258 #else
1259 retainClosure(*srt,c,c_child_r);
1260 #endif
1261 }
1262 p++;
1263 bitmap = bitmap >> 1;
1264 }
1265 }
1266
1267 /* -----------------------------------------------------------------------------
1268 * Process all the objects in the stack chunk from stackStart to stackEnd
1269 * with *c and *c_child_r being their parent and their most recent retainer,
1270 * respectively. Treat stackOptionalFun as another child of *c if it is
1271 * not NULL.
1272 * Invariants:
1273 * *c is one of the following: TSO, AP_STACK.
1274 * If *c is TSO, c == c_child_r.
1275 * stackStart < stackEnd.
1276 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1277 * interpretation conforms to the current value of flip (even when they
1278 * are interpreted to be NULL).
1279 * If *c is TSO, its state is not ThreadComplete,or ThreadKilled,
1280 * which means that its stack is ready to process.
1281 * Note:
1282 * This code was almost plagiarzied from GC.c! For each pointer,
1283 * retainClosure() is invoked instead of evacuate().
1284 * -------------------------------------------------------------------------- */
1285 static void
1286 retainStack( StgClosure *c, retainer c_child_r,
1287 StgPtr stackStart, StgPtr stackEnd )
1288 {
1289 stackElement *oldStackBoundary;
1290 StgPtr p;
1291 StgRetInfoTable *info;
1292 StgWord bitmap;
1293 uint32_t size;
1294
1295 #ifdef DEBUG_RETAINER
1296 cStackSize++;
1297 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1298 #endif
1299
1300 /*
1301 Each invocation of retainStack() creates a new virtual
1302 stack. Since all such stacks share a single common stack, we
1303 record the current currentStackBoundary, which will be restored
1304 at the exit.
1305 */
1306 oldStackBoundary = currentStackBoundary;
1307 currentStackBoundary = stackTop;
1308
1309 #ifdef DEBUG_RETAINER
1310 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1311 #endif
1312
1313 ASSERT(get_itbl(c)->type == STACK);
1314
1315 p = stackStart;
1316 while (p < stackEnd) {
1317 info = get_ret_itbl((StgClosure *)p);
1318
1319 switch(info->i.type) {
1320
1321 case UPDATE_FRAME:
1322 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1323 p += sizeofW(StgUpdateFrame);
1324 continue;
1325
1326 case UNDERFLOW_FRAME:
1327 case STOP_FRAME:
1328 case CATCH_FRAME:
1329 case CATCH_STM_FRAME:
1330 case CATCH_RETRY_FRAME:
1331 case ATOMICALLY_FRAME:
1332 case RET_SMALL:
1333 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1334 size = BITMAP_SIZE(info->i.layout.bitmap);
1335 p++;
1336 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1337
1338 follow_srt:
1339 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1340 continue;
1341
1342 case RET_BCO: {
1343 StgBCO *bco;
1344
1345 p++;
1346 retainClosure((StgClosure *)*p, c, c_child_r);
1347 bco = (StgBCO *)*p;
1348 p++;
1349 size = BCO_BITMAP_SIZE(bco);
1350 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1351 p += size;
1352 continue;
1353 }
1354
1355 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1356 case RET_BIG:
1357 size = GET_LARGE_BITMAP(&info->i)->size;
1358 p++;
1359 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1360 size, c, c_child_r);
1361 p += size;
1362 // and don't forget to follow the SRT
1363 goto follow_srt;
1364
1365 case RET_FUN: {
1366 StgRetFun *ret_fun = (StgRetFun *)p;
1367 StgFunInfoTable *fun_info;
1368
1369 retainClosure(ret_fun->fun, c, c_child_r);
1370 fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
1371
1372 p = (P_)&ret_fun->payload;
1373 switch (fun_info->f.fun_type) {
1374 case ARG_GEN:
1375 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1376 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1377 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1378 break;
1379 case ARG_GEN_BIG:
1380 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1381 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1382 size, c, c_child_r);
1383 p += size;
1384 break;
1385 default:
1386 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1387 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1388 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1389 break;
1390 }
1391 goto follow_srt;
1392 }
1393
1394 default:
1395 barf("Invalid object found in retainStack(): %d",
1396 (int)(info->i.type));
1397 }
1398 }
1399
1400 // restore currentStackBoundary
1401 currentStackBoundary = oldStackBoundary;
1402 #ifdef DEBUG_RETAINER
1403 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1404 #endif
1405
1406 #ifdef DEBUG_RETAINER
1407 cStackSize--;
1408 #endif
1409 }
1410
1411 /* ----------------------------------------------------------------------------
1412 * Call retainClosure for each of the children of a PAP/AP
1413 * ------------------------------------------------------------------------- */
1414
1415 static INLINE StgPtr
1416 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1417 retainer c_child_r, /* NOT tagged */
1418 StgClosure *fun, /* tagged */
1419 StgClosure** payload, StgWord n_args)
1420 {
1421 StgPtr p;
1422 StgWord bitmap;
1423 StgFunInfoTable *fun_info;
1424
1425 retainClosure(fun, pap, c_child_r);
1426 fun = UNTAG_CLOSURE(fun);
1427 fun_info = get_fun_itbl(fun);
1428 ASSERT(fun_info->i.type != PAP);
1429
1430 p = (StgPtr)payload;
1431
1432 switch (fun_info->f.fun_type) {
1433 case ARG_GEN:
1434 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1435 p = retain_small_bitmap(p, n_args, bitmap,
1436 pap, c_child_r);
1437 break;
1438 case ARG_GEN_BIG:
1439 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1440 n_args, pap, c_child_r);
1441 p += n_args;
1442 break;
1443 case ARG_BCO:
1444 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1445 n_args, pap, c_child_r);
1446 p += n_args;
1447 break;
1448 default:
1449 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1450 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1451 break;
1452 }
1453 return p;
1454 }
1455
1456 /* -----------------------------------------------------------------------------
1457 * Compute the retainer set of *c0 and all its desecents by traversing.
1458 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1459 * Invariants:
1460 * c0 = cp0 = r0 holds only for root objects.
1461 * RSET(cp0) and RSET(r0) are valid, i.e., their
1462 * interpretation conforms to the current value of flip (even when they
1463 * are interpreted to be NULL).
1464 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1465 * the current value of flip. If it does not, during the execution
1466 * of this function, RSET(c0) must be initialized as well as all
1467 * its descendants.
1468 * Note:
1469 * stackTop must be the same at the beginning and the exit of this function.
1470 * *c0 can be TSO (as well as AP_STACK).
1471 * -------------------------------------------------------------------------- */
1472 static void
1473 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1474 {
1475 // c = Current closure (possibly tagged)
1476 // cp = Current closure's Parent (NOT tagged)
1477 // r = current closures' most recent Retainer (NOT tagged)
1478 // c_child_r = current closure's children's most recent retainer
1479 // first_child = first child of c
1480 StgClosure *c, *cp, *first_child;
1481 RetainerSet *s, *retainerSetOfc;
1482 retainer r, c_child_r;
1483 StgWord typeOfc;
1484
1485 #ifdef DEBUG_RETAINER
1486 // StgPtr oldStackTop;
1487 #endif
1488
1489 #ifdef DEBUG_RETAINER
1490 // oldStackTop = stackTop;
1491 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1492 #endif
1493
1494 // (c, cp, r) = (c0, cp0, r0)
1495 c = c0;
1496 cp = cp0;
1497 r = r0;
1498 goto inner_loop;
1499
1500 loop:
1501 //debugBelch("loop");
1502 // pop to (c, cp, r);
1503 pop(&c, &cp, &r);
1504
1505 if (c == NULL) {
1506 #ifdef DEBUG_RETAINER
1507 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1508 #endif
1509 return;
1510 }
1511
1512 //debugBelch("inner_loop");
1513
1514 inner_loop:
1515 c = UNTAG_CLOSURE(c);
1516
1517 // c = current closure under consideration,
1518 // cp = current closure's parent,
1519 // r = current closure's most recent retainer
1520 //
1521 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1522 // RSET(cp) and RSET(r) are valid.
1523 // RSET(c) is valid only if c has been visited before.
1524 //
1525 // Loop invariants (on the relation between c, cp, and r)
1526 // if cp is not a retainer, r belongs to RSET(cp).
1527 // if cp is a retainer, r == cp.
1528
1529 typeOfc = get_itbl(c)->type;
1530
1531 #ifdef DEBUG_RETAINER
1532 switch (typeOfc) {
1533 case IND_STATIC:
1534 case CONSTR_NOCAF_STATIC:
1535 case CONSTR_STATIC:
1536 case THUNK_STATIC:
1537 case FUN_STATIC:
1538 break;
1539 default:
1540 if (retainerSetOf(c) == NULL) { // first visit?
1541 costArray[typeOfc] += cost(c);
1542 sumOfNewCost += cost(c);
1543 }
1544 break;
1545 }
1546 #endif
1547
1548 // special cases
1549 switch (typeOfc) {
1550 case TSO:
1551 if (((StgTSO *)c)->what_next == ThreadComplete ||
1552 ((StgTSO *)c)->what_next == ThreadKilled) {
1553 #ifdef DEBUG_RETAINER
1554 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1555 #endif
1556 goto loop;
1557 }
1558 break;
1559
1560 case IND_STATIC:
1561 // We just skip IND_STATIC, so its retainer set is never computed.
1562 c = ((StgIndStatic *)c)->indirectee;
1563 goto inner_loop;
1564 // static objects with no pointers out, so goto loop.
1565 case CONSTR_NOCAF_STATIC:
1566 // It is not just enough not to compute the retainer set for *c; it is
1567 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1568 // scavenged_static_objects, the list from which is assumed to traverse
1569 // all static objects after major garbage collections.
1570 goto loop;
1571 case THUNK_STATIC:
1572 case FUN_STATIC:
1573 if (get_itbl(c)->srt_bitmap == 0) {
1574 // No need to compute the retainer set; no dynamic objects
1575 // are reachable from *c.
1576 //
1577 // Static objects: if we traverse all the live closures,
1578 // including static closures, during each heap census then
1579 // we will observe that some static closures appear and
1580 // disappear. eg. a closure may contain a pointer to a
1581 // static function 'f' which is not otherwise reachable
1582 // (it doesn't indirectly point to any CAFs, so it doesn't
1583 // appear in any SRTs), so we would find 'f' during
1584 // traversal. However on the next sweep there may be no
1585 // closures pointing to 'f'.
1586 //
1587 // We must therefore ignore static closures whose SRT is
1588 // empty, because these are exactly the closures that may
1589 // "appear". A closure with a non-empty SRT, and which is
1590 // still required, will always be reachable.
1591 //
1592 // But what about CONSTR_STATIC? Surely these may be able
1593 // to appear, and they don't have SRTs, so we can't
1594 // check. So for now, we're calling
1595 // resetStaticObjectForRetainerProfiling() from the
1596 // garbage collector to reset the retainer sets in all the
1597 // reachable static objects.
1598 goto loop;
1599 }
1600 default:
1601 break;
1602 }
1603
1604 // The above objects are ignored in computing the average number of times
1605 // an object is visited.
1606 timesAnyObjectVisited++;
1607
1608 // If this is the first visit to c, initialize its retainer set.
1609 maybeInitRetainerSet(c);
1610 retainerSetOfc = retainerSetOf(c);
1611
1612 // Now compute s:
1613 // isRetainer(cp) == rtsTrue => s == NULL
1614 // isRetainer(cp) == rtsFalse => s == cp.retainer
1615 if (isRetainer(cp))
1616 s = NULL;
1617 else
1618 s = retainerSetOf(cp);
1619
1620 // (c, cp, r, s) is available.
1621
1622 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1623 if (retainerSetOfc == NULL) {
1624 // This is the first visit to *c.
1625 numObjectVisited++;
1626
1627 if (s == NULL)
1628 associate(c, singleton(r));
1629 else
1630 // s is actually the retainer set of *c!
1631 associate(c, s);
1632
1633 // compute c_child_r
1634 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1635 } else {
1636 // This is not the first visit to *c.
1637 if (isMember(r, retainerSetOfc))
1638 goto loop; // no need to process child
1639
1640 if (s == NULL)
1641 associate(c, addElement(r, retainerSetOfc));
1642 else {
1643 // s is not NULL and cp is not a retainer. This means that
1644 // each time *cp is visited, so is *c. Thus, if s has
1645 // exactly one more element in its retainer set than c, s
1646 // is also the new retainer set for *c.
1647 if (s->num == retainerSetOfc->num + 1) {
1648 associate(c, s);
1649 }
1650 // Otherwise, just add R_r to the current retainer set of *c.
1651 else {
1652 associate(c, addElement(r, retainerSetOfc));
1653 }
1654 }
1655
1656 if (isRetainer(c))
1657 goto loop; // no need to process child
1658
1659 // compute c_child_r
1660 c_child_r = r;
1661 }
1662
1663 // now, RSET() of all of *c, *cp, and *r is valid.
1664 // (c, c_child_r) are available.
1665
1666 // process child
1667
1668 // Special case closures: we process these all in one go rather
1669 // than attempting to save the current position, because doing so
1670 // would be hard.
1671 switch (typeOfc) {
1672 case STACK:
1673 retainStack(c, c_child_r,
1674 ((StgStack *)c)->sp,
1675 ((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
1676 goto loop;
1677
1678 case TSO:
1679 {
1680 StgTSO *tso = (StgTSO *)c;
1681
1682 retainClosure(tso->stackobj, c, c_child_r);
1683 retainClosure(tso->blocked_exceptions, c, c_child_r);
1684 retainClosure(tso->bq, c, c_child_r);
1685 retainClosure(tso->trec, c, c_child_r);
1686 if ( tso->why_blocked == BlockedOnMVar
1687 || tso->why_blocked == BlockedOnMVarRead
1688 || tso->why_blocked == BlockedOnBlackHole
1689 || tso->why_blocked == BlockedOnMsgThrowTo
1690 ) {
1691 retainClosure(tso->block_info.closure, c, c_child_r);
1692 }
1693 goto loop;
1694 }
1695
1696 case PAP:
1697 {
1698 StgPAP *pap = (StgPAP *)c;
1699 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1700 goto loop;
1701 }
1702
1703 case AP:
1704 {
1705 StgAP *ap = (StgAP *)c;
1706 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1707 goto loop;
1708 }
1709
1710 case AP_STACK:
1711 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1712 retainStack(c, c_child_r,
1713 (StgPtr)((StgAP_STACK *)c)->payload,
1714 (StgPtr)((StgAP_STACK *)c)->payload +
1715 ((StgAP_STACK *)c)->size);
1716 goto loop;
1717 }
1718
1719 push(c, c_child_r, &first_child);
1720
1721 // If first_child is null, c has no child.
1722 // If first_child is not null, the top stack element points to the next
1723 // object. push() may or may not push a stackElement on the stack.
1724 if (first_child == NULL)
1725 goto loop;
1726
1727 // (c, cp, r) = (first_child, c, c_child_r)
1728 r = c_child_r;
1729 cp = c;
1730 c = first_child;
1731 goto inner_loop;
1732 }
1733
1734 /* -----------------------------------------------------------------------------
1735 * Compute the retainer set for every object reachable from *tl.
1736 * -------------------------------------------------------------------------- */
1737 static void
1738 retainRoot(void *user STG_UNUSED, StgClosure **tl)
1739 {
1740 StgClosure *c;
1741
1742 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1743 // be a root.
1744
1745 ASSERT(isEmptyRetainerStack());
1746 currentStackBoundary = stackTop;
1747
1748 c = UNTAG_CLOSURE(*tl);
1749 maybeInitRetainerSet(c);
1750 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1751 retainClosure(c, c, getRetainerFrom(c));
1752 } else {
1753 retainClosure(c, c, CCS_SYSTEM);
1754 }
1755
1756 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1757 // *tl might be a TSO which is ThreadComplete, in which
1758 // case we ignore it for the purposes of retainer profiling.
1759 }
1760
1761 /* -----------------------------------------------------------------------------
1762 * Compute the retainer set for each of the objects in the heap.
1763 * -------------------------------------------------------------------------- */
1764 static void
1765 computeRetainerSet( void )
1766 {
1767 StgWeak *weak;
1768 RetainerSet *rtl;
1769 uint32_t g, n;
1770 StgPtr ml;
1771 bdescr *bd;
1772 #ifdef DEBUG_RETAINER
1773 RetainerSet tmpRetainerSet;
1774 #endif
1775
1776 markCapabilities(retainRoot, NULL); // for scheduler roots
1777
1778 // This function is called after a major GC, when key, value, and finalizer
1779 // all are guaranteed to be valid, or reachable.
1780 //
1781 // The following code assumes that WEAK objects are considered to be roots
1782 // for retainer profilng.
1783 for (n = 0; n < n_capabilities; n++) {
1784 // NB: after a GC, all nursery weak_ptr_lists have been migrated
1785 // to the global lists living in the generations
1786 ASSERT(capabilities[n]->weak_ptr_list_hd == NULL);
1787 ASSERT(capabilities[n]->weak_ptr_list_tl == NULL);
1788 }
1789 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1790 for (weak = generations[g].weak_ptr_list; weak != NULL; weak = weak->link) {
1791 // retainRoot((StgClosure *)weak);
1792 retainRoot(NULL, (StgClosure **)&weak);
1793 }
1794 }
1795
1796 // Consider roots from the stable ptr table.
1797 markStableTables(retainRoot, NULL);
1798
1799 // The following code resets the rs field of each unvisited mutable
1800 // object (computing sumOfNewCostExtra and updating costArray[] when
1801 // debugging retainer profiler).
1802 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1803 // NOT TRUE: even G0 has a block on its mutable list
1804 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1805
1806 // Traversing through mut_list is necessary
1807 // because we can find MUT_VAR objects which have not been
1808 // visited during retainer profiling.
1809 for (n = 0; n < n_capabilities; n++) {
1810 for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) {
1811 for (ml = bd->start; ml < bd->free; ml++) {
1812
1813 maybeInitRetainerSet((StgClosure *)*ml);
1814 rtl = retainerSetOf((StgClosure *)*ml);
1815
1816 #ifdef DEBUG_RETAINER
1817 if (rtl == NULL) {
1818 // first visit to *ml
1819 // This is a violation of the interface rule!
1820 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1821
1822 switch (get_itbl((StgClosure *)ml)->type) {
1823 case IND_STATIC:
1824 // no cost involved
1825 break;
1826 case CONSTR_NOCAF_STATIC:
1827 case CONSTR_STATIC:
1828 case THUNK_STATIC:
1829 case FUN_STATIC:
1830 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1831 break;
1832 default:
1833 // dynamic objects
1834 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1835 sumOfNewCostExtra += cost((StgClosure *)ml);
1836 break;
1837 }
1838 }
1839 #endif
1840 }
1841 }
1842 }
1843 }
1844 }
1845
1846 /* -----------------------------------------------------------------------------
1847 * Traverse all static objects for which we compute retainer sets,
1848 * and reset their rs fields to NULL, which is accomplished by
1849 * invoking maybeInitRetainerSet(). This function must be called
1850 * before zeroing all objects reachable from scavenged_static_objects
1851 * in the case of major gabage collections. See GarbageCollect() in
1852 * GC.c.
1853 * Note:
1854 * The mut_once_list of the oldest generation must also be traversed?
1855 * Why? Because if the evacuation of an object pointed to by a static
1856 * indirection object fails, it is put back to the mut_once_list of
1857 * the oldest generation.
1858 * However, this is not necessary because any static indirection objects
1859 * are just traversed through to reach dynamic objects. In other words,
1860 * they are not taken into consideration in computing retainer sets.
1861 *
1862 * SDM (20/7/2011): I don't think this is doing anything sensible,
1863 * because it happens before retainerProfile() and at the beginning of
1864 * retainerProfil() we change the sense of 'flip'. So all of the
1865 * calls to maybeInitRetainerSet() here are initialising retainer sets
1866 * with the wrong flip. Also, I don't see why this is necessary. I
1867 * added a maybeInitRetainerSet() call to retainRoot(), and that seems
1868 * to have fixed the assertion failure in retainerSetOf() I was
1869 * encountering.
1870 * -------------------------------------------------------------------------- */
1871 void
1872 resetStaticObjectForRetainerProfiling( StgClosure *static_objects )
1873 {
1874 #ifdef DEBUG_RETAINER
1875 uint32_t count;
1876 #endif
1877 StgClosure *p;
1878
1879 #ifdef DEBUG_RETAINER
1880 count = 0;
1881 #endif
1882 p = static_objects;
1883 while (p != END_OF_STATIC_OBJECT_LIST) {
1884 p = UNTAG_STATIC_LIST_PTR(p);
1885 #ifdef DEBUG_RETAINER
1886 count++;
1887 #endif
1888 switch (get_itbl(p)->type) {
1889 case IND_STATIC:
1890 // Since we do not compute the retainer set of any
1891 // IND_STATIC object, we don't have to reset its retainer
1892 // field.
1893 p = (StgClosure*)*IND_STATIC_LINK(p);
1894 break;
1895 case THUNK_STATIC:
1896 maybeInitRetainerSet(p);
1897 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1898 break;
1899 case FUN_STATIC:
1900 maybeInitRetainerSet(p);
1901 p = (StgClosure*)*FUN_STATIC_LINK(p);
1902 break;
1903 case CONSTR_STATIC:
1904 maybeInitRetainerSet(p);
1905 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1906 break;
1907 default:
1908 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1909 p, get_itbl(p)->type);
1910 break;
1911 }
1912 }
1913 #ifdef DEBUG_RETAINER
1914 // debugBelch("count in scavenged_static_objects = %d\n", count);
1915 #endif
1916 }
1917
1918 /* -----------------------------------------------------------------------------
1919 * Perform retainer profiling.
1920 * N is the oldest generation being profilied, where the generations are
1921 * numbered starting at 0.
1922 * Invariants:
1923 * Note:
1924 * This function should be called only immediately after major garbage
1925 * collection.
1926 * ------------------------------------------------------------------------- */
1927 void
1928 retainerProfile(void)
1929 {
1930 #ifdef DEBUG_RETAINER
1931 uint32_t i;
1932 uint32_t totalHeapSize; // total raw heap size (computed by linear scanning)
1933 #endif
1934
1935 #ifdef DEBUG_RETAINER
1936 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1937 #endif
1938
1939 stat_startRP();
1940
1941 // We haven't flipped the bit yet.
1942 #ifdef DEBUG_RETAINER
1943 debugBelch("Before traversing:\n");
1944 sumOfCostLinear = 0;
1945 for (i = 0;i < N_CLOSURE_TYPES; i++)
1946 costArrayLinear[i] = 0;
1947 totalHeapSize = checkHeapSanityForRetainerProfiling();
1948
1949 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1950 /*
1951 debugBelch("costArrayLinear[] = ");
1952 for (i = 0;i < N_CLOSURE_TYPES; i++)
1953 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1954 debugBelch("\n");
1955 */
1956
1957 ASSERT(sumOfCostLinear == totalHeapSize);
1958
1959 /*
1960 #define pcostArrayLinear(index) \
1961 if (costArrayLinear[index] > 0) \
1962 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1963 pcostArrayLinear(THUNK_STATIC);
1964 pcostArrayLinear(FUN_STATIC);
1965 pcostArrayLinear(CONSTR_STATIC);
1966 pcostArrayLinear(CONSTR_NOCAF_STATIC);
1967 */
1968 #endif
1969
1970 // Now we flips flip.
1971 flip = flip ^ 1;
1972
1973 #ifdef DEBUG_RETAINER
1974 stackSize = 0;
1975 maxStackSize = 0;
1976 cStackSize = 0;
1977 maxCStackSize = 0;
1978 #endif
1979 numObjectVisited = 0;
1980 timesAnyObjectVisited = 0;
1981
1982 #ifdef DEBUG_RETAINER
1983 debugBelch("During traversing:\n");
1984 sumOfNewCost = 0;
1985 sumOfNewCostExtra = 0;
1986 for (i = 0;i < N_CLOSURE_TYPES; i++)
1987 costArray[i] = 0;
1988 #endif
1989
1990 /*
1991 We initialize the traverse stack each time the retainer profiling is
1992 performed (because the traverse stack size varies on each retainer profiling
1993 and this operation is not costly anyhow). However, we just refresh the
1994 retainer sets.
1995 */
1996 initializeTraverseStack();
1997 #ifdef DEBUG_RETAINER
1998 initializeAllRetainerSet();
1999 #else
2000 refreshAllRetainerSet();
2001 #endif
2002 computeRetainerSet();
2003
2004 #ifdef DEBUG_RETAINER
2005 debugBelch("After traversing:\n");
2006 sumOfCostLinear = 0;
2007 for (i = 0;i < N_CLOSURE_TYPES; i++)
2008 costArrayLinear[i] = 0;
2009 totalHeapSize = checkHeapSanityForRetainerProfiling();
2010
2011 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2012 ASSERT(sumOfCostLinear == totalHeapSize);
2013
2014 // now, compare the two results
2015 /*
2016 Note:
2017 costArray[] must be exactly the same as costArrayLinear[].
2018 Known exceptions:
2019 1) Dead weak pointers, whose type is CONSTR. These objects are not
2020 reachable from any roots.
2021 */
2022 debugBelch("Comparison:\n");
2023 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2024 for (i = 0;i < N_CLOSURE_TYPES; i++)
2025 if (costArray[i] != costArrayLinear[i])
2026 // nothing should be printed except MUT_VAR after major GCs
2027 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2028 debugBelch("\n");
2029
2030 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2031 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2032 debugBelch("\tcostArray[] (must be empty) = ");
2033 for (i = 0;i < N_CLOSURE_TYPES; i++)
2034 if (costArray[i] != costArrayLinear[i])
2035 // nothing should be printed except MUT_VAR after major GCs
2036 debugBelch("[%u:%u] ", i, costArray[i]);
2037 debugBelch("\n");
2038
2039 // only for major garbage collection
2040 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2041 #endif
2042
2043 // post-processing
2044 closeTraverseStack();
2045 #ifdef DEBUG_RETAINER
2046 closeAllRetainerSet();
2047 #else
2048 // Note that there is no post-processing for the retainer sets.
2049 #endif
2050 retainerGeneration++;
2051
2052 stat_endRP(
2053 retainerGeneration - 1, // retainerGeneration has just been incremented!
2054 #ifdef DEBUG_RETAINER
2055 maxCStackSize, maxStackSize,
2056 #endif
2057 (double)timesAnyObjectVisited / numObjectVisited);
2058 }
2059
2060 /* -----------------------------------------------------------------------------
2061 * DEBUGGING CODE
2062 * -------------------------------------------------------------------------- */
2063
2064 #ifdef DEBUG_RETAINER
2065
2066 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2067 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2068 ((StgWord)(*(StgPtr)r)!=(StgWord)0xaaaaaaaaaaaaaaaaULL))
2069
2070 static uint32_t
2071 sanityCheckHeapClosure( StgClosure *c )
2072 {
2073 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2074 ASSERT(!closure_STATIC(c));
2075 ASSERT(LOOKS_LIKE_PTR(c));
2076
2077 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2078 if (get_itbl(c)->type == CONSTR &&
2079 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2080 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2081 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2082 costArray[get_itbl(c)->type] += cost(c);
2083 sumOfNewCost += cost(c);
2084 } else
2085 debugBelch(
2086 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2087 flip, c, get_itbl(c)->type,
2088 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2089 RSET(c));
2090 } else {
2091 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2092 }
2093
2094 return closure_sizeW(c);
2095 }
2096
2097 static uint32_t
2098 heapCheck( bdescr *bd )
2099 {
2100 StgPtr p;
2101 static uint32_t costSum, size;
2102
2103 costSum = 0;
2104 while (bd != NULL) {
2105 p = bd->start;
2106 while (p < bd->free) {
2107 size = sanityCheckHeapClosure((StgClosure *)p);
2108 sumOfCostLinear += size;
2109 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2110 p += size;
2111 // no need for slop check; I think slops are not used currently.
2112 }
2113 ASSERT(p == bd->free);
2114 costSum += bd->free - bd->start;
2115 bd = bd->link;
2116 }
2117
2118 return costSum;
2119 }
2120
2121 static uint32_t
2122 smallObjectPoolCheck(void)
2123 {
2124 bdescr *bd;
2125 StgPtr p;
2126 static uint32_t costSum, size;
2127
2128 bd = g0s0->blocks;
2129 costSum = 0;
2130
2131 // first block
2132 if (bd == NULL)
2133 return costSum;
2134
2135 p = bd->start;
2136 while (p < alloc_Hp) {
2137 size = sanityCheckHeapClosure((StgClosure *)p);
2138 sumOfCostLinear += size;
2139 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2140 p += size;
2141 }
2142 ASSERT(p == alloc_Hp);
2143 costSum += alloc_Hp - bd->start;
2144
2145 bd = bd->link;
2146 while (bd != NULL) {
2147 p = bd->start;
2148 while (p < bd->free) {
2149 size = sanityCheckHeapClosure((StgClosure *)p);
2150 sumOfCostLinear += size;
2151 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2152 p += size;
2153 }
2154 ASSERT(p == bd->free);
2155 costSum += bd->free - bd->start;
2156 bd = bd->link;
2157 }
2158
2159 return costSum;
2160 }
2161
2162 static uint32_t
2163 chainCheck(bdescr *bd)
2164 {
2165 uint32_t costSum, size;
2166
2167 costSum = 0;
2168 while (bd != NULL) {
2169 // bd->free - bd->start is not an accurate measurement of the
2170 // object size. Actually it is always zero, so we compute its
2171 // size explicitly.
2172 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2173 sumOfCostLinear += size;
2174 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2175 costSum += size;
2176 bd = bd->link;
2177 }
2178
2179 return costSum;
2180 }
2181
2182 static uint32_t
2183 checkHeapSanityForRetainerProfiling( void )
2184 {
2185 uint32_t costSum, g, s;
2186
2187 costSum = 0;
2188 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2189 if (RtsFlags.GcFlags.generations == 1) {
2190 costSum += heapCheck(g0s0->to_blocks);
2191 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2192 costSum += chainCheck(g0s0->large_objects);
2193 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2194 } else {
2195 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2196 for (s = 0; s < generations[g].n_steps; s++) {
2197 /*
2198 After all live objects have been scavenged, the garbage
2199 collector may create some objects in
2200 scheduleFinalizers(). These objects are created through
2201 allocate(), so the small object pool or the large object
2202 pool of the g0s0 may not be empty.
2203 */
2204 if (g == 0 && s == 0) {
2205 costSum += smallObjectPoolCheck();
2206 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2207 costSum += chainCheck(generations[g].steps[s].large_objects);
2208 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2209 } else {
2210 costSum += heapCheck(generations[g].steps[s].blocks);
2211 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2212 costSum += chainCheck(generations[g].steps[s].large_objects);
2213 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2214 }
2215 }
2216 }
2217
2218 return costSum;
2219 }
2220
2221 void
2222 findPointer(StgPtr p)
2223 {
2224 StgPtr q, r, e;
2225 bdescr *bd;
2226 uint32_t g, s;
2227
2228 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2229 for (s = 0; s < generations[g].n_steps; s++) {
2230 // if (g == 0 && s == 0) continue;
2231 bd = generations[g].steps[s].blocks;
2232 for (; bd; bd = bd->link) {
2233 for (q = bd->start; q < bd->free; q++) {
2234 if (*q == (StgWord)p) {
2235 r = q;
2236 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2237 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2238 // return;
2239 }
2240 }
2241 }
2242 bd = generations[g].steps[s].large_objects;
2243 for (; bd; bd = bd->link) {
2244 e = bd->start + cost((StgClosure *)bd->start);
2245 for (q = bd->start; q < e; q++) {
2246 if (*q == (StgWord)p) {
2247 r = q;
2248 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2249 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2250 // return;
2251 }
2252 }
2253 }
2254 }
2255 }
2256 }
2257
2258 static void
2259 belongToHeap(StgPtr p)
2260 {
2261 bdescr *bd;
2262 uint32_t g, s;
2263
2264 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2265 for (s = 0; s < generations[g].n_steps; s++) {
2266 // if (g == 0 && s == 0) continue;
2267 bd = generations[g].steps[s].blocks;
2268 for (; bd; bd = bd->link) {
2269 if (bd->start <= p && p < bd->free) {
2270 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2271 return;
2272 }
2273 }
2274 bd = generations[g].steps[s].large_objects;
2275 for (; bd; bd = bd->link) {
2276 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2277 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2278 return;
2279 }
2280 }
2281 }
2282 }
2283 }
2284 #endif /* DEBUG_RETAINER */
2285
2286 #endif /* PROFILING */