Build RTS as dynamic library
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #ifdef PROFILING
11
12 // Turn off inlining when debugging - it obfuscates things
13 #ifdef DEBUG
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "Rts.h"
20 #include "RtsUtils.h"
21 #include "RetainerProfile.h"
22 #include "RetainerSet.h"
23 #include "Schedule.h"
24 #include "Printer.h"
25 #include "RtsFlags.h"
26 #include "Weak.h"
27 #include "Sanity.h"
28 #include "Profiling.h"
29 #include "Stats.h"
30 #include "ProfHeap.h"
31 #include "Apply.h"
32
33 /*
34 Note: what to change in order to plug-in a new retainer profiling scheme?
35 (1) type retainer in ../includes/StgRetainerProf.h
36 (2) retainer function R(), i.e., getRetainerFrom()
37 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
38 in RetainerSet.h, if needed.
39 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
40 */
41
42 /* -----------------------------------------------------------------------------
43 * Declarations...
44 * -------------------------------------------------------------------------- */
45
46 static nat retainerGeneration; // generation
47
48 static nat numObjectVisited; // total number of objects visited
49 static nat timesAnyObjectVisited; // number of times any objects are visited
50
51 /*
52 The rs field in the profile header of any object points to its retainer
53 set in an indirect way: if flip is 0, it points to the retainer set;
54 if flip is 1, it points to the next byte after the retainer set (even
55 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
56 pointer. See retainerSetOf().
57 */
58
59 StgWord flip = 0; // flip bit
60 // must be 0 if DEBUG_RETAINER is on (for static closures)
61
62 #define setRetainerSetToNull(c) \
63 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
64
65 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
66 static void retainClosure(StgClosure *, StgClosure *, retainer);
67 #ifdef DEBUG_RETAINER
68 static void belongToHeap(StgPtr p);
69 #endif
70
71 #ifdef DEBUG_RETAINER
72 /*
73 cStackSize records how many times retainStack() has been invoked recursively,
74 that is, the number of activation records for retainStack() on the C stack.
75 maxCStackSize records its max value.
76 Invariants:
77 cStackSize <= maxCStackSize
78 */
79 static nat cStackSize, maxCStackSize;
80
81 static nat sumOfNewCost; // sum of the cost of each object, computed
82 // when the object is first visited
83 static nat sumOfNewCostExtra; // for those objects not visited during
84 // retainer profiling, e.g., MUT_VAR
85 static nat costArray[N_CLOSURE_TYPES];
86
87 nat sumOfCostLinear; // sum of the costs of all object, computed
88 // when linearly traversing the heap after
89 // retainer profiling
90 nat costArrayLinear[N_CLOSURE_TYPES];
91 #endif
92
93 /* -----------------------------------------------------------------------------
94 * Retainer stack - header
95 * Note:
96 * Although the retainer stack implementation could be separated *
97 * from the retainer profiling engine, there does not seem to be
98 * any advantage in doing that; retainer stack is an integral part
99 * of retainer profiling engine and cannot be use elsewhere at
100 * all.
101 * -------------------------------------------------------------------------- */
102
103 typedef enum {
104 posTypeStep,
105 posTypePtrs,
106 posTypeSRT,
107 posTypeLargeSRT,
108 } nextPosType;
109
110 typedef union {
111 // fixed layout or layout specified by a field in the closure
112 StgWord step;
113
114 // layout.payload
115 struct {
116 // See StgClosureInfo in InfoTables.h
117 #if SIZEOF_VOID_P == 8
118 StgWord32 pos;
119 StgWord32 ptrs;
120 #else
121 StgWord16 pos;
122 StgWord16 ptrs;
123 #endif
124 StgPtr payload;
125 } ptrs;
126
127 // SRT
128 struct {
129 StgClosure **srt;
130 StgWord srt_bitmap;
131 } srt;
132
133 // Large SRT
134 struct {
135 StgLargeSRT *srt;
136 StgWord offset;
137 } large_srt;
138
139 } nextPos;
140
141 typedef struct {
142 nextPosType type;
143 nextPos next;
144 } stackPos;
145
146 typedef struct {
147 StgClosure *c;
148 retainer c_child_r;
149 stackPos info;
150 } stackElement;
151
152 /*
153 Invariants:
154 firstStack points to the first block group.
155 currentStack points to the block group currently being used.
156 currentStack->free == stackLimit.
157 stackTop points to the topmost byte in the stack of currentStack.
158 Unless the whole stack is empty, stackTop must point to the topmost
159 object (or byte) in the whole stack. Thus, it is only when the whole stack
160 is empty that stackTop == stackLimit (not during the execution of push()
161 and pop()).
162 stackBottom == currentStack->start.
163 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
164 Note:
165 When a current stack becomes empty, stackTop is set to point to
166 the topmost element on the previous block group so as to satisfy
167 the invariants described above.
168 */
169 static bdescr *firstStack = NULL;
170 static bdescr *currentStack;
171 static stackElement *stackBottom, *stackTop, *stackLimit;
172
173 /*
174 currentStackBoundary is used to mark the current stack chunk.
175 If stackTop == currentStackBoundary, it means that the current stack chunk
176 is empty. It is the responsibility of the user to keep currentStackBoundary
177 valid all the time if it is to be employed.
178 */
179 static stackElement *currentStackBoundary;
180
181 /*
182 stackSize records the current size of the stack.
183 maxStackSize records its high water mark.
184 Invariants:
185 stackSize <= maxStackSize
186 Note:
187 stackSize is just an estimate measure of the depth of the graph. The reason
188 is that some heap objects have only a single child and may not result
189 in a new element being pushed onto the stack. Therefore, at the end of
190 retainer profiling, maxStackSize + maxCStackSize is some value no greater
191 than the actual depth of the graph.
192 */
193 #ifdef DEBUG_RETAINER
194 static int stackSize, maxStackSize;
195 #endif
196
197 // number of blocks allocated for one stack
198 #define BLOCKS_IN_STACK 1
199
200 /* -----------------------------------------------------------------------------
201 * Add a new block group to the stack.
202 * Invariants:
203 * currentStack->link == s.
204 * -------------------------------------------------------------------------- */
205 static INLINE void
206 newStackBlock( bdescr *bd )
207 {
208 currentStack = bd;
209 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
210 stackBottom = (stackElement *)bd->start;
211 stackLimit = (stackElement *)stackTop;
212 bd->free = (StgPtr)stackLimit;
213 }
214
215 /* -----------------------------------------------------------------------------
216 * Return to the previous block group.
217 * Invariants:
218 * s->link == currentStack.
219 * -------------------------------------------------------------------------- */
220 static INLINE void
221 returnToOldStack( bdescr *bd )
222 {
223 currentStack = bd;
224 stackTop = (stackElement *)bd->free;
225 stackBottom = (stackElement *)bd->start;
226 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
227 bd->free = (StgPtr)stackLimit;
228 }
229
230 /* -----------------------------------------------------------------------------
231 * Initializes the traverse stack.
232 * -------------------------------------------------------------------------- */
233 static void
234 initializeTraverseStack( void )
235 {
236 if (firstStack != NULL) {
237 freeChain(firstStack);
238 }
239
240 firstStack = allocGroup(BLOCKS_IN_STACK);
241 firstStack->link = NULL;
242 firstStack->u.back = NULL;
243
244 newStackBlock(firstStack);
245 }
246
247 /* -----------------------------------------------------------------------------
248 * Frees all the block groups in the traverse stack.
249 * Invariants:
250 * firstStack != NULL
251 * -------------------------------------------------------------------------- */
252 static void
253 closeTraverseStack( void )
254 {
255 freeChain(firstStack);
256 firstStack = NULL;
257 }
258
259 /* -----------------------------------------------------------------------------
260 * Returns rtsTrue if the whole stack is empty.
261 * -------------------------------------------------------------------------- */
262 static INLINE rtsBool
263 isEmptyRetainerStack( void )
264 {
265 return (firstStack == currentStack) && stackTop == stackLimit;
266 }
267
268 /* -----------------------------------------------------------------------------
269 * Returns size of stack
270 * -------------------------------------------------------------------------- */
271 #ifdef DEBUG
272 lnat
273 retainerStackBlocks( void )
274 {
275 bdescr* bd;
276 lnat res = 0;
277
278 for (bd = firstStack; bd != NULL; bd = bd->link)
279 res += bd->blocks;
280
281 return res;
282 }
283 #endif
284
285 /* -----------------------------------------------------------------------------
286 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
287 * i.e., if the current stack chunk is empty.
288 * -------------------------------------------------------------------------- */
289 static INLINE rtsBool
290 isOnBoundary( void )
291 {
292 return stackTop == currentStackBoundary;
293 }
294
295 /* -----------------------------------------------------------------------------
296 * Initializes *info from ptrs and payload.
297 * Invariants:
298 * payload[] begins with ptrs pointers followed by non-pointers.
299 * -------------------------------------------------------------------------- */
300 static INLINE void
301 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
302 {
303 info->type = posTypePtrs;
304 info->next.ptrs.pos = 0;
305 info->next.ptrs.ptrs = ptrs;
306 info->next.ptrs.payload = payload;
307 }
308
309 /* -----------------------------------------------------------------------------
310 * Find the next object from *info.
311 * -------------------------------------------------------------------------- */
312 static INLINE StgClosure *
313 find_ptrs( stackPos *info )
314 {
315 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
316 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
317 } else {
318 return NULL;
319 }
320 }
321
322 /* -----------------------------------------------------------------------------
323 * Initializes *info from SRT information stored in *infoTable.
324 * -------------------------------------------------------------------------- */
325 static INLINE void
326 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
327 {
328 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
329 info->type = posTypeLargeSRT;
330 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
331 info->next.large_srt.offset = 0;
332 } else {
333 info->type = posTypeSRT;
334 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
335 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
336 }
337 }
338
339 static INLINE void
340 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
341 {
342 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
343 info->type = posTypeLargeSRT;
344 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
345 info->next.large_srt.offset = 0;
346 } else {
347 info->type = posTypeSRT;
348 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
349 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
350 }
351 }
352
353 /* -----------------------------------------------------------------------------
354 * Find the next object from *info.
355 * -------------------------------------------------------------------------- */
356 static INLINE StgClosure *
357 find_srt( stackPos *info )
358 {
359 StgClosure *c;
360 StgWord bitmap;
361
362 if (info->type == posTypeSRT) {
363 // Small SRT bitmap
364 bitmap = info->next.srt.srt_bitmap;
365 while (bitmap != 0) {
366 if ((bitmap & 1) != 0) {
367 #if defined(__PIC__) && defined(mingw32_TARGET_OS)
368 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
369 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
370 else
371 c = *(info->next.srt.srt);
372 #else
373 c = *(info->next.srt.srt);
374 #endif
375 bitmap = bitmap >> 1;
376 info->next.srt.srt++;
377 info->next.srt.srt_bitmap = bitmap;
378 return c;
379 }
380 bitmap = bitmap >> 1;
381 info->next.srt.srt++;
382 }
383 // bitmap is now zero...
384 return NULL;
385 }
386 else {
387 // Large SRT bitmap
388 nat i = info->next.large_srt.offset;
389 StgWord bitmap;
390
391 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
392 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
393 bitmap = bitmap >> (i % BITS_IN(StgWord));
394 while (i < info->next.large_srt.srt->l.size) {
395 if ((bitmap & 1) != 0) {
396 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
397 i++;
398 info->next.large_srt.offset = i;
399 return c;
400 }
401 i++;
402 if (i % BITS_IN(W_) == 0) {
403 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
404 } else {
405 bitmap = bitmap >> 1;
406 }
407 }
408 // reached the end of this bitmap.
409 info->next.large_srt.offset = i;
410 return NULL;
411 }
412 }
413
414 /* -----------------------------------------------------------------------------
415 * push() pushes a stackElement representing the next child of *c
416 * onto the traverse stack. If *c has no child, *first_child is set
417 * to NULL and nothing is pushed onto the stack. If *c has only one
418 * child, *c_chlid is set to that child and nothing is pushed onto
419 * the stack. If *c has more than two children, *first_child is set
420 * to the first child and a stackElement representing the second
421 * child is pushed onto the stack.
422
423 * Invariants:
424 * *c_child_r is the most recent retainer of *c's children.
425 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
426 * there cannot be any stack objects.
427 * Note: SRTs are considered to be children as well.
428 * -------------------------------------------------------------------------- */
429 static INLINE void
430 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
431 {
432 stackElement se;
433 bdescr *nbd; // Next Block Descriptor
434
435 #ifdef DEBUG_RETAINER
436 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
437 #endif
438
439 ASSERT(get_itbl(c)->type != TSO);
440 ASSERT(get_itbl(c)->type != AP_STACK);
441
442 //
443 // fill in se
444 //
445
446 se.c = c;
447 se.c_child_r = c_child_r;
448
449 // fill in se.info
450 switch (get_itbl(c)->type) {
451 // no child, no SRT
452 case CONSTR_0_1:
453 case CONSTR_0_2:
454 case CAF_BLACKHOLE:
455 case BLACKHOLE:
456 case SE_BLACKHOLE:
457 case SE_CAF_BLACKHOLE:
458 case ARR_WORDS:
459 *first_child = NULL;
460 return;
461
462 // one child (fixed), no SRT
463 case MUT_VAR_CLEAN:
464 case MUT_VAR_DIRTY:
465 *first_child = ((StgMutVar *)c)->var;
466 return;
467 case THUNK_SELECTOR:
468 *first_child = ((StgSelector *)c)->selectee;
469 return;
470 case IND_PERM:
471 case IND_OLDGEN_PERM:
472 case IND_OLDGEN:
473 *first_child = ((StgInd *)c)->indirectee;
474 return;
475 case CONSTR_1_0:
476 case CONSTR_1_1:
477 *first_child = c->payload[0];
478 return;
479
480 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
481 // of the next child. We do not write a separate initialization code.
482 // Also we do not have to initialize info.type;
483
484 // two children (fixed), no SRT
485 // need to push a stackElement, but nothing to store in se.info
486 case CONSTR_2_0:
487 *first_child = c->payload[0]; // return the first pointer
488 // se.info.type = posTypeStep;
489 // se.info.next.step = 2; // 2 = second
490 break;
491
492 // three children (fixed), no SRT
493 // need to push a stackElement
494 case MVAR:
495 // head must be TSO and the head of a linked list of TSOs.
496 // Shoule it be a child? Seems to be yes.
497 *first_child = (StgClosure *)((StgMVar *)c)->head;
498 // se.info.type = posTypeStep;
499 se.info.next.step = 2; // 2 = second
500 break;
501
502 // three children (fixed), no SRT
503 case WEAK:
504 *first_child = ((StgWeak *)c)->key;
505 // se.info.type = posTypeStep;
506 se.info.next.step = 2;
507 break;
508
509 // layout.payload.ptrs, no SRT
510 case CONSTR:
511 case STABLE_NAME:
512 case BCO:
513 case CONSTR_STATIC:
514 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
515 (StgPtr)c->payload);
516 *first_child = find_ptrs(&se.info);
517 if (*first_child == NULL)
518 return; // no child
519 break;
520
521 // StgMutArrPtr.ptrs, no SRT
522 case MUT_ARR_PTRS_CLEAN:
523 case MUT_ARR_PTRS_DIRTY:
524 case MUT_ARR_PTRS_FROZEN:
525 case MUT_ARR_PTRS_FROZEN0:
526 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
527 (StgPtr)(((StgMutArrPtrs *)c)->payload));
528 *first_child = find_ptrs(&se.info);
529 if (*first_child == NULL)
530 return;
531 break;
532
533 // layout.payload.ptrs, SRT
534 case FUN: // *c is a heap object.
535 case FUN_2_0:
536 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
537 *first_child = find_ptrs(&se.info);
538 if (*first_child == NULL)
539 // no child from ptrs, so check SRT
540 goto fun_srt_only;
541 break;
542
543 case THUNK:
544 case THUNK_2_0:
545 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
546 (StgPtr)((StgThunk *)c)->payload);
547 *first_child = find_ptrs(&se.info);
548 if (*first_child == NULL)
549 // no child from ptrs, so check SRT
550 goto thunk_srt_only;
551 break;
552
553 // 1 fixed child, SRT
554 case FUN_1_0:
555 case FUN_1_1:
556 *first_child = c->payload[0];
557 ASSERT(*first_child != NULL);
558 init_srt_fun(&se.info, get_fun_itbl(c));
559 break;
560
561 case THUNK_1_0:
562 case THUNK_1_1:
563 *first_child = ((StgThunk *)c)->payload[0];
564 ASSERT(*first_child != NULL);
565 init_srt_thunk(&se.info, get_thunk_itbl(c));
566 break;
567
568 case FUN_STATIC: // *c is a heap object.
569 ASSERT(get_itbl(c)->srt_bitmap != 0);
570 case FUN_0_1:
571 case FUN_0_2:
572 fun_srt_only:
573 init_srt_fun(&se.info, get_fun_itbl(c));
574 *first_child = find_srt(&se.info);
575 if (*first_child == NULL)
576 return; // no child
577 break;
578
579 // SRT only
580 case THUNK_STATIC:
581 ASSERT(get_itbl(c)->srt_bitmap != 0);
582 case THUNK_0_1:
583 case THUNK_0_2:
584 thunk_srt_only:
585 init_srt_thunk(&se.info, get_thunk_itbl(c));
586 *first_child = find_srt(&se.info);
587 if (*first_child == NULL)
588 return; // no child
589 break;
590
591 case TVAR_WATCH_QUEUE:
592 *first_child = (StgClosure *)((StgTVarWatchQueue *)c)->closure;
593 se.info.next.step = 2; // 2 = second
594 break;
595 case TVAR:
596 *first_child = (StgClosure *)((StgTVar *)c)->current_value;
597 break;
598 case TREC_HEADER:
599 *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
600 break;
601 case TREC_CHUNK:
602 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
603 se.info.next.step = 0; // entry no.
604 break;
605
606 // cannot appear
607 case PAP:
608 case AP:
609 case AP_STACK:
610 case TSO:
611 case IND_STATIC:
612 case CONSTR_NOCAF_STATIC:
613 // stack objects
614 case UPDATE_FRAME:
615 case CATCH_FRAME:
616 case STOP_FRAME:
617 case RET_DYN:
618 case RET_BCO:
619 case RET_SMALL:
620 case RET_BIG:
621 // invalid objects
622 case IND:
623 case BLOCKED_FETCH:
624 case FETCH_ME:
625 case FETCH_ME_BQ:
626 case RBH:
627 case REMOTE_REF:
628 case EVACUATED:
629 case INVALID_OBJECT:
630 default:
631 barf("Invalid object *c in push()");
632 return;
633 }
634
635 if (stackTop - 1 < stackBottom) {
636 #ifdef DEBUG_RETAINER
637 // debugBelch("push() to the next stack.\n");
638 #endif
639 // currentStack->free is updated when the active stack is switched
640 // to the next stack.
641 currentStack->free = (StgPtr)stackTop;
642
643 if (currentStack->link == NULL) {
644 nbd = allocGroup(BLOCKS_IN_STACK);
645 nbd->link = NULL;
646 nbd->u.back = currentStack;
647 currentStack->link = nbd;
648 } else
649 nbd = currentStack->link;
650
651 newStackBlock(nbd);
652 }
653
654 // adjust stackTop (acutal push)
655 stackTop--;
656 // If the size of stackElement was huge, we would better replace the
657 // following statement by either a memcpy() call or a switch statement
658 // on the type of the element. Currently, the size of stackElement is
659 // small enough (5 words) that this direct assignment seems to be enough.
660
661 // ToDo: The line below leads to the warning:
662 // warning: 'se.info.type' may be used uninitialized in this function
663 // This is caused by the fact that there are execution paths through the
664 // large switch statement above where some cases do not initialize this
665 // field. Is this really harmless? Can we avoid the warning?
666 *stackTop = se;
667
668 #ifdef DEBUG_RETAINER
669 stackSize++;
670 if (stackSize > maxStackSize) maxStackSize = stackSize;
671 // ASSERT(stackSize >= 0);
672 // debugBelch("stackSize = %d\n", stackSize);
673 #endif
674 }
675
676 /* -----------------------------------------------------------------------------
677 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
678 * Invariants:
679 * stackTop cannot be equal to stackLimit unless the whole stack is
680 * empty, in which case popOff() is not allowed.
681 * Note:
682 * You can think of popOffReal() as a part of popOff() which is
683 * executed at the end of popOff() in necessary. Since popOff() is
684 * likely to be executed quite often while popOffReal() is not, we
685 * separate popOffReal() from popOff(), which is declared as an
686 * INLINE function (for the sake of execution speed). popOffReal()
687 * is called only within popOff() and nowhere else.
688 * -------------------------------------------------------------------------- */
689 static void
690 popOffReal(void)
691 {
692 bdescr *pbd; // Previous Block Descriptor
693
694 #ifdef DEBUG_RETAINER
695 // debugBelch("pop() to the previous stack.\n");
696 #endif
697
698 ASSERT(stackTop + 1 == stackLimit);
699 ASSERT(stackBottom == (stackElement *)currentStack->start);
700
701 if (firstStack == currentStack) {
702 // The stack is completely empty.
703 stackTop++;
704 ASSERT(stackTop == stackLimit);
705 #ifdef DEBUG_RETAINER
706 stackSize--;
707 if (stackSize > maxStackSize) maxStackSize = stackSize;
708 /*
709 ASSERT(stackSize >= 0);
710 debugBelch("stackSize = %d\n", stackSize);
711 */
712 #endif
713 return;
714 }
715
716 // currentStack->free is updated when the active stack is switched back
717 // to the previous stack.
718 currentStack->free = (StgPtr)stackLimit;
719
720 // find the previous block descriptor
721 pbd = currentStack->u.back;
722 ASSERT(pbd != NULL);
723
724 returnToOldStack(pbd);
725
726 #ifdef DEBUG_RETAINER
727 stackSize--;
728 if (stackSize > maxStackSize) maxStackSize = stackSize;
729 /*
730 ASSERT(stackSize >= 0);
731 debugBelch("stackSize = %d\n", stackSize);
732 */
733 #endif
734 }
735
736 static INLINE void
737 popOff(void) {
738 #ifdef DEBUG_RETAINER
739 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
740 #endif
741
742 ASSERT(stackTop != stackLimit);
743 ASSERT(!isEmptyRetainerStack());
744
745 // <= (instead of <) is wrong!
746 if (stackTop + 1 < stackLimit) {
747 stackTop++;
748 #ifdef DEBUG_RETAINER
749 stackSize--;
750 if (stackSize > maxStackSize) maxStackSize = stackSize;
751 /*
752 ASSERT(stackSize >= 0);
753 debugBelch("stackSize = %d\n", stackSize);
754 */
755 #endif
756 return;
757 }
758
759 popOffReal();
760 }
761
762 /* -----------------------------------------------------------------------------
763 * Finds the next object to be considered for retainer profiling and store
764 * its pointer to *c.
765 * Test if the topmost stack element indicates that more objects are left,
766 * and if so, retrieve the first object and store its pointer to *c. Also,
767 * set *cp and *r appropriately, both of which are stored in the stack element.
768 * The topmost stack element then is overwritten so as for it to now denote
769 * the next object.
770 * If the topmost stack element indicates no more objects are left, pop
771 * off the stack element until either an object can be retrieved or
772 * the current stack chunk becomes empty, indicated by rtsTrue returned by
773 * isOnBoundary(), in which case *c is set to NULL.
774 * Note:
775 * It is okay to call this function even when the current stack chunk
776 * is empty.
777 * -------------------------------------------------------------------------- */
778 static INLINE void
779 pop( StgClosure **c, StgClosure **cp, retainer *r )
780 {
781 stackElement *se;
782
783 #ifdef DEBUG_RETAINER
784 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
785 #endif
786
787 do {
788 if (isOnBoundary()) { // if the current stack chunk is depleted
789 *c = NULL;
790 return;
791 }
792
793 se = stackTop;
794
795 switch (get_itbl(se->c)->type) {
796 // two children (fixed), no SRT
797 // nothing in se.info
798 case CONSTR_2_0:
799 *c = se->c->payload[1];
800 *cp = se->c;
801 *r = se->c_child_r;
802 popOff();
803 return;
804
805 // three children (fixed), no SRT
806 // need to push a stackElement
807 case MVAR:
808 if (se->info.next.step == 2) {
809 *c = (StgClosure *)((StgMVar *)se->c)->tail;
810 se->info.next.step++; // move to the next step
811 // no popOff
812 } else {
813 *c = ((StgMVar *)se->c)->value;
814 popOff();
815 }
816 *cp = se->c;
817 *r = se->c_child_r;
818 return;
819
820 // three children (fixed), no SRT
821 case WEAK:
822 if (se->info.next.step == 2) {
823 *c = ((StgWeak *)se->c)->value;
824 se->info.next.step++;
825 // no popOff
826 } else {
827 *c = ((StgWeak *)se->c)->finalizer;
828 popOff();
829 }
830 *cp = se->c;
831 *r = se->c_child_r;
832 return;
833
834 case TVAR_WATCH_QUEUE:
835 if (se->info.next.step == 2) {
836 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->next_queue_entry;
837 se->info.next.step++; // move to the next step
838 // no popOff
839 } else {
840 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->prev_queue_entry;
841 popOff();
842 }
843 *cp = se->c;
844 *r = se->c_child_r;
845 return;
846
847 case TVAR:
848 *c = (StgClosure *)((StgTVar *)se->c)->first_watch_queue_entry;
849 *cp = se->c;
850 *r = se->c_child_r;
851 popOff();
852 return;
853
854 case TREC_HEADER:
855 *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
856 *cp = se->c;
857 *r = se->c_child_r;
858 popOff();
859 return;
860
861 case TREC_CHUNK: {
862 // These are pretty complicated: we have N entries, each
863 // of which contains 3 fields that we want to follow. So
864 // we divide the step counter: the 2 low bits indicate
865 // which field, and the rest of the bits indicate the
866 // entry number (starting from zero).
867 nat entry_no = se->info.next.step >> 2;
868 nat field_no = se->info.next.step & 3;
869 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
870 *c = NULL;
871 popOff();
872 return;
873 }
874 TRecEntry *entry = &((StgTRecChunk *)se->c)->entries[entry_no];
875 if (field_no == 0) {
876 *c = (StgClosure *)entry->tvar;
877 } else if (field_no == 1) {
878 *c = entry->expected_value;
879 } else {
880 *c = entry->new_value;
881 }
882 *cp = se->c;
883 *r = se->c_child_r;
884 se->info.next.step++;
885 return;
886 }
887
888 case CONSTR:
889 case STABLE_NAME:
890 case BCO:
891 case CONSTR_STATIC:
892 // StgMutArrPtr.ptrs, no SRT
893 case MUT_ARR_PTRS_CLEAN:
894 case MUT_ARR_PTRS_DIRTY:
895 case MUT_ARR_PTRS_FROZEN:
896 case MUT_ARR_PTRS_FROZEN0:
897 *c = find_ptrs(&se->info);
898 if (*c == NULL) {
899 popOff();
900 break;
901 }
902 *cp = se->c;
903 *r = se->c_child_r;
904 return;
905
906 // layout.payload.ptrs, SRT
907 case FUN: // always a heap object
908 case FUN_2_0:
909 if (se->info.type == posTypePtrs) {
910 *c = find_ptrs(&se->info);
911 if (*c != NULL) {
912 *cp = se->c;
913 *r = se->c_child_r;
914 return;
915 }
916 init_srt_fun(&se->info, get_fun_itbl(se->c));
917 }
918 goto do_srt;
919
920 case THUNK:
921 case THUNK_2_0:
922 if (se->info.type == posTypePtrs) {
923 *c = find_ptrs(&se->info);
924 if (*c != NULL) {
925 *cp = se->c;
926 *r = se->c_child_r;
927 return;
928 }
929 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
930 }
931 goto do_srt;
932
933 // SRT
934 do_srt:
935 case THUNK_STATIC:
936 case FUN_STATIC:
937 case FUN_0_1:
938 case FUN_0_2:
939 case THUNK_0_1:
940 case THUNK_0_2:
941 case FUN_1_0:
942 case FUN_1_1:
943 case THUNK_1_0:
944 case THUNK_1_1:
945 *c = find_srt(&se->info);
946 if (*c != NULL) {
947 *cp = se->c;
948 *r = se->c_child_r;
949 return;
950 }
951 popOff();
952 break;
953
954 // no child (fixed), no SRT
955 case CONSTR_0_1:
956 case CONSTR_0_2:
957 case CAF_BLACKHOLE:
958 case BLACKHOLE:
959 case SE_BLACKHOLE:
960 case SE_CAF_BLACKHOLE:
961 case ARR_WORDS:
962 // one child (fixed), no SRT
963 case MUT_VAR_CLEAN:
964 case MUT_VAR_DIRTY:
965 case THUNK_SELECTOR:
966 case IND_PERM:
967 case IND_OLDGEN_PERM:
968 case IND_OLDGEN:
969 case CONSTR_1_1:
970 // cannot appear
971 case PAP:
972 case AP:
973 case AP_STACK:
974 case TSO:
975 case IND_STATIC:
976 case CONSTR_NOCAF_STATIC:
977 // stack objects
978 case RET_DYN:
979 case UPDATE_FRAME:
980 case CATCH_FRAME:
981 case STOP_FRAME:
982 case RET_BCO:
983 case RET_SMALL:
984 case RET_BIG:
985 // invalid objects
986 case IND:
987 case BLOCKED_FETCH:
988 case FETCH_ME:
989 case FETCH_ME_BQ:
990 case RBH:
991 case REMOTE_REF:
992 case EVACUATED:
993 case INVALID_OBJECT:
994 default:
995 barf("Invalid object *c in pop()");
996 return;
997 }
998 } while (rtsTrue);
999 }
1000
1001 /* -----------------------------------------------------------------------------
1002 * RETAINER PROFILING ENGINE
1003 * -------------------------------------------------------------------------- */
1004
1005 void
1006 initRetainerProfiling( void )
1007 {
1008 initializeAllRetainerSet();
1009 retainerGeneration = 0;
1010 }
1011
1012 /* -----------------------------------------------------------------------------
1013 * This function must be called before f-closing prof_file.
1014 * -------------------------------------------------------------------------- */
1015 void
1016 endRetainerProfiling( void )
1017 {
1018 #ifdef SECOND_APPROACH
1019 outputAllRetainerSet(prof_file);
1020 #endif
1021 }
1022
1023 /* -----------------------------------------------------------------------------
1024 * Returns the actual pointer to the retainer set of the closure *c.
1025 * It may adjust RSET(c) subject to flip.
1026 * Side effects:
1027 * RSET(c) is initialized to NULL if its current value does not
1028 * conform to flip.
1029 * Note:
1030 * Even though this function has side effects, they CAN be ignored because
1031 * subsequent calls to retainerSetOf() always result in the same return value
1032 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1033 * closure.
1034 * We have to perform an XOR (^) operation each time a closure is examined.
1035 * The reason is that we do not know when a closure is visited last.
1036 * -------------------------------------------------------------------------- */
1037 static INLINE void
1038 maybeInitRetainerSet( StgClosure *c )
1039 {
1040 if (!isRetainerSetFieldValid(c)) {
1041 setRetainerSetToNull(c);
1042 }
1043 }
1044
1045 /* -----------------------------------------------------------------------------
1046 * Returns rtsTrue if *c is a retainer.
1047 * -------------------------------------------------------------------------- */
1048 static INLINE rtsBool
1049 isRetainer( StgClosure *c )
1050 {
1051 switch (get_itbl(c)->type) {
1052 //
1053 // True case
1054 //
1055 // TSOs MUST be retainers: they constitute the set of roots.
1056 case TSO:
1057
1058 // mutable objects
1059 case MVAR:
1060 case MUT_VAR_CLEAN:
1061 case MUT_VAR_DIRTY:
1062 case MUT_ARR_PTRS_CLEAN:
1063 case MUT_ARR_PTRS_DIRTY:
1064 case MUT_ARR_PTRS_FROZEN:
1065 case MUT_ARR_PTRS_FROZEN0:
1066
1067 // thunks are retainers.
1068 case THUNK:
1069 case THUNK_1_0:
1070 case THUNK_0_1:
1071 case THUNK_2_0:
1072 case THUNK_1_1:
1073 case THUNK_0_2:
1074 case THUNK_SELECTOR:
1075 case AP:
1076 case AP_STACK:
1077
1078 // Static thunks, or CAFS, are obviously retainers.
1079 case THUNK_STATIC:
1080
1081 // WEAK objects are roots; there is separate code in which traversing
1082 // begins from WEAK objects.
1083 case WEAK:
1084
1085 // Since the other mutvar-type things are retainers, seems
1086 // like the right thing to do:
1087 case TVAR:
1088 return rtsTrue;
1089
1090 //
1091 // False case
1092 //
1093
1094 // constructors
1095 case CONSTR:
1096 case CONSTR_1_0:
1097 case CONSTR_0_1:
1098 case CONSTR_2_0:
1099 case CONSTR_1_1:
1100 case CONSTR_0_2:
1101 // functions
1102 case FUN:
1103 case FUN_1_0:
1104 case FUN_0_1:
1105 case FUN_2_0:
1106 case FUN_1_1:
1107 case FUN_0_2:
1108 // partial applications
1109 case PAP:
1110 // blackholes
1111 case CAF_BLACKHOLE:
1112 case BLACKHOLE:
1113 case SE_BLACKHOLE:
1114 case SE_CAF_BLACKHOLE:
1115 // indirection
1116 case IND_PERM:
1117 case IND_OLDGEN_PERM:
1118 case IND_OLDGEN:
1119 // static objects
1120 case CONSTR_STATIC:
1121 case FUN_STATIC:
1122 // misc
1123 case STABLE_NAME:
1124 case BCO:
1125 case ARR_WORDS:
1126 // STM
1127 case TVAR_WATCH_QUEUE:
1128 case TREC_HEADER:
1129 case TREC_CHUNK:
1130 return rtsFalse;
1131
1132 //
1133 // Error case
1134 //
1135 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1136 case IND_STATIC:
1137 // CONSTR_NOCAF_STATIC
1138 // cannot be *c, *cp, *r in the retainer profiling loop.
1139 case CONSTR_NOCAF_STATIC:
1140 // Stack objects are invalid because they are never treated as
1141 // legal objects during retainer profiling.
1142 case UPDATE_FRAME:
1143 case CATCH_FRAME:
1144 case STOP_FRAME:
1145 case RET_DYN:
1146 case RET_BCO:
1147 case RET_SMALL:
1148 case RET_BIG:
1149 // other cases
1150 case IND:
1151 case BLOCKED_FETCH:
1152 case FETCH_ME:
1153 case FETCH_ME_BQ:
1154 case RBH:
1155 case REMOTE_REF:
1156 case EVACUATED:
1157 case INVALID_OBJECT:
1158 default:
1159 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1160 return rtsFalse;
1161 }
1162 }
1163
1164 /* -----------------------------------------------------------------------------
1165 * Returns the retainer function value for the closure *c, i.e., R(*c).
1166 * This function does NOT return the retainer(s) of *c.
1167 * Invariants:
1168 * *c must be a retainer.
1169 * Note:
1170 * Depending on the definition of this function, the maintenance of retainer
1171 * sets can be made easier. If most retainer sets are likely to be created
1172 * again across garbage collections, refreshAllRetainerSet() in
1173 * RetainerSet.c can simply do nothing.
1174 * If this is not the case, we can free all the retainer sets and
1175 * re-initialize the hash table.
1176 * See refreshAllRetainerSet() in RetainerSet.c.
1177 * -------------------------------------------------------------------------- */
1178 static INLINE retainer
1179 getRetainerFrom( StgClosure *c )
1180 {
1181 ASSERT(isRetainer(c));
1182
1183 #if defined(RETAINER_SCHEME_INFO)
1184 // Retainer scheme 1: retainer = info table
1185 return get_itbl(c);
1186 #elif defined(RETAINER_SCHEME_CCS)
1187 // Retainer scheme 2: retainer = cost centre stack
1188 return c->header.prof.ccs;
1189 #elif defined(RETAINER_SCHEME_CC)
1190 // Retainer scheme 3: retainer = cost centre
1191 return c->header.prof.ccs->cc;
1192 #endif
1193 }
1194
1195 /* -----------------------------------------------------------------------------
1196 * Associates the retainer set *s with the closure *c, that is, *s becomes
1197 * the retainer set of *c.
1198 * Invariants:
1199 * c != NULL
1200 * s != NULL
1201 * -------------------------------------------------------------------------- */
1202 static INLINE void
1203 associate( StgClosure *c, RetainerSet *s )
1204 {
1205 // StgWord has the same size as pointers, so the following type
1206 // casting is okay.
1207 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1208 }
1209
1210 /* -----------------------------------------------------------------------------
1211 Call retainClosure for each of the closures covered by a large bitmap.
1212 -------------------------------------------------------------------------- */
1213
1214 static void
1215 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1216 StgClosure *c, retainer c_child_r)
1217 {
1218 nat i, b;
1219 StgWord bitmap;
1220
1221 b = 0;
1222 bitmap = large_bitmap->bitmap[b];
1223 for (i = 0; i < size; ) {
1224 if ((bitmap & 1) == 0) {
1225 retainClosure((StgClosure *)*p, c, c_child_r);
1226 }
1227 i++;
1228 p++;
1229 if (i % BITS_IN(W_) == 0) {
1230 b++;
1231 bitmap = large_bitmap->bitmap[b];
1232 } else {
1233 bitmap = bitmap >> 1;
1234 }
1235 }
1236 }
1237
1238 static INLINE StgPtr
1239 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1240 StgClosure *c, retainer c_child_r)
1241 {
1242 while (size > 0) {
1243 if ((bitmap & 1) == 0) {
1244 retainClosure((StgClosure *)*p, c, c_child_r);
1245 }
1246 p++;
1247 bitmap = bitmap >> 1;
1248 size--;
1249 }
1250 return p;
1251 }
1252
1253 /* -----------------------------------------------------------------------------
1254 * Call retainClosure for each of the closures in an SRT.
1255 * ------------------------------------------------------------------------- */
1256
1257 static void
1258 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1259 {
1260 nat i, b, size;
1261 StgWord bitmap;
1262 StgClosure **p;
1263
1264 b = 0;
1265 p = (StgClosure **)srt->srt;
1266 size = srt->l.size;
1267 bitmap = srt->l.bitmap[b];
1268 for (i = 0; i < size; ) {
1269 if ((bitmap & 1) != 0) {
1270 retainClosure((StgClosure *)*p, c, c_child_r);
1271 }
1272 i++;
1273 p++;
1274 if (i % BITS_IN(W_) == 0) {
1275 b++;
1276 bitmap = srt->l.bitmap[b];
1277 } else {
1278 bitmap = bitmap >> 1;
1279 }
1280 }
1281 }
1282
1283 static INLINE void
1284 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1285 {
1286 nat bitmap;
1287 StgClosure **p;
1288
1289 bitmap = srt_bitmap;
1290 p = srt;
1291
1292 if (bitmap == (StgHalfWord)(-1)) {
1293 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1294 return;
1295 }
1296
1297 while (bitmap != 0) {
1298 if ((bitmap & 1) != 0) {
1299 #ifdef ENABLE_WIN32_DLL_SUPPORT
1300 if ( (unsigned long)(*srt) & 0x1 ) {
1301 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1302 c, c_child_r);
1303 } else {
1304 retainClosure(*srt,c,c_child_r);
1305 }
1306 #else
1307 retainClosure(*srt,c,c_child_r);
1308 #endif
1309 }
1310 p++;
1311 bitmap = bitmap >> 1;
1312 }
1313 }
1314
1315 /* -----------------------------------------------------------------------------
1316 * Process all the objects in the stack chunk from stackStart to stackEnd
1317 * with *c and *c_child_r being their parent and their most recent retainer,
1318 * respectively. Treat stackOptionalFun as another child of *c if it is
1319 * not NULL.
1320 * Invariants:
1321 * *c is one of the following: TSO, AP_STACK.
1322 * If *c is TSO, c == c_child_r.
1323 * stackStart < stackEnd.
1324 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1325 * interpretation conforms to the current value of flip (even when they
1326 * are interpreted to be NULL).
1327 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1328 * or ThreadKilled, which means that its stack is ready to process.
1329 * Note:
1330 * This code was almost plagiarzied from GC.c! For each pointer,
1331 * retainClosure() is invoked instead of evacuate().
1332 * -------------------------------------------------------------------------- */
1333 static void
1334 retainStack( StgClosure *c, retainer c_child_r,
1335 StgPtr stackStart, StgPtr stackEnd )
1336 {
1337 stackElement *oldStackBoundary;
1338 StgPtr p;
1339 StgRetInfoTable *info;
1340 StgWord32 bitmap;
1341 nat size;
1342
1343 #ifdef DEBUG_RETAINER
1344 cStackSize++;
1345 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1346 #endif
1347
1348 /*
1349 Each invocation of retainStack() creates a new virtual
1350 stack. Since all such stacks share a single common stack, we
1351 record the current currentStackBoundary, which will be restored
1352 at the exit.
1353 */
1354 oldStackBoundary = currentStackBoundary;
1355 currentStackBoundary = stackTop;
1356
1357 #ifdef DEBUG_RETAINER
1358 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1359 #endif
1360
1361 ASSERT(get_itbl(c)->type != TSO ||
1362 (((StgTSO *)c)->what_next != ThreadRelocated &&
1363 ((StgTSO *)c)->what_next != ThreadComplete &&
1364 ((StgTSO *)c)->what_next != ThreadKilled));
1365
1366 p = stackStart;
1367 while (p < stackEnd) {
1368 info = get_ret_itbl((StgClosure *)p);
1369
1370 switch(info->i.type) {
1371
1372 case UPDATE_FRAME:
1373 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1374 p += sizeofW(StgUpdateFrame);
1375 continue;
1376
1377 case STOP_FRAME:
1378 case CATCH_FRAME:
1379 case CATCH_STM_FRAME:
1380 case CATCH_RETRY_FRAME:
1381 case ATOMICALLY_FRAME:
1382 case RET_SMALL:
1383 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1384 size = BITMAP_SIZE(info->i.layout.bitmap);
1385 p++;
1386 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1387
1388 follow_srt:
1389 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1390 continue;
1391
1392 case RET_BCO: {
1393 StgBCO *bco;
1394
1395 p++;
1396 retainClosure((StgClosure *)*p, c, c_child_r);
1397 bco = (StgBCO *)*p;
1398 p++;
1399 size = BCO_BITMAP_SIZE(bco);
1400 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1401 p += size;
1402 continue;
1403 }
1404
1405 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1406 case RET_BIG:
1407 size = GET_LARGE_BITMAP(&info->i)->size;
1408 p++;
1409 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1410 size, c, c_child_r);
1411 p += size;
1412 // and don't forget to follow the SRT
1413 goto follow_srt;
1414
1415 // Dynamic bitmap: the mask is stored on the stack
1416 case RET_DYN: {
1417 StgWord dyn;
1418 dyn = ((StgRetDyn *)p)->liveness;
1419
1420 // traverse the bitmap first
1421 bitmap = RET_DYN_LIVENESS(dyn);
1422 p = (P_)&((StgRetDyn *)p)->payload[0];
1423 size = RET_DYN_BITMAP_SIZE;
1424 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1425
1426 // skip over the non-ptr words
1427 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1428
1429 // follow the ptr words
1430 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1431 retainClosure((StgClosure *)*p, c, c_child_r);
1432 p++;
1433 }
1434 continue;
1435 }
1436
1437 case RET_FUN: {
1438 StgRetFun *ret_fun = (StgRetFun *)p;
1439 StgFunInfoTable *fun_info;
1440
1441 retainClosure(ret_fun->fun, c, c_child_r);
1442 fun_info = get_fun_itbl(ret_fun->fun);
1443
1444 p = (P_)&ret_fun->payload;
1445 switch (fun_info->f.fun_type) {
1446 case ARG_GEN:
1447 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1448 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1449 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1450 break;
1451 case ARG_GEN_BIG:
1452 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1453 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1454 size, c, c_child_r);
1455 p += size;
1456 break;
1457 default:
1458 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1459 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1460 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1461 break;
1462 }
1463 goto follow_srt;
1464 }
1465
1466 default:
1467 barf("Invalid object found in retainStack(): %d",
1468 (int)(info->i.type));
1469 }
1470 }
1471
1472 // restore currentStackBoundary
1473 currentStackBoundary = oldStackBoundary;
1474 #ifdef DEBUG_RETAINER
1475 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1476 #endif
1477
1478 #ifdef DEBUG_RETAINER
1479 cStackSize--;
1480 #endif
1481 }
1482
1483 /* ----------------------------------------------------------------------------
1484 * Call retainClosure for each of the children of a PAP/AP
1485 * ------------------------------------------------------------------------- */
1486
1487 static INLINE StgPtr
1488 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1489 retainer c_child_r, /* NOT tagged */
1490 StgClosure *fun, /* tagged */
1491 StgClosure** payload, StgWord n_args)
1492 {
1493 StgPtr p;
1494 StgWord bitmap;
1495 StgFunInfoTable *fun_info;
1496
1497 retainClosure(fun, pap, c_child_r);
1498 fun = UNTAG_CLOSURE(fun);
1499 fun_info = get_fun_itbl(fun);
1500 ASSERT(fun_info->i.type != PAP);
1501
1502 p = (StgPtr)payload;
1503
1504 switch (fun_info->f.fun_type) {
1505 case ARG_GEN:
1506 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1507 p = retain_small_bitmap(p, n_args, bitmap,
1508 pap, c_child_r);
1509 break;
1510 case ARG_GEN_BIG:
1511 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1512 n_args, pap, c_child_r);
1513 p += n_args;
1514 break;
1515 case ARG_BCO:
1516 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1517 n_args, pap, c_child_r);
1518 p += n_args;
1519 break;
1520 default:
1521 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1522 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1523 break;
1524 }
1525 return p;
1526 }
1527
1528 /* -----------------------------------------------------------------------------
1529 * Compute the retainer set of *c0 and all its desecents by traversing.
1530 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1531 * Invariants:
1532 * c0 = cp0 = r0 holds only for root objects.
1533 * RSET(cp0) and RSET(r0) are valid, i.e., their
1534 * interpretation conforms to the current value of flip (even when they
1535 * are interpreted to be NULL).
1536 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1537 * the current value of flip. If it does not, during the execution
1538 * of this function, RSET(c0) must be initialized as well as all
1539 * its descendants.
1540 * Note:
1541 * stackTop must be the same at the beginning and the exit of this function.
1542 * *c0 can be TSO (as well as AP_STACK).
1543 * -------------------------------------------------------------------------- */
1544 static void
1545 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1546 {
1547 // c = Current closure (possibly tagged)
1548 // cp = Current closure's Parent (NOT tagged)
1549 // r = current closures' most recent Retainer (NOT tagged)
1550 // c_child_r = current closure's children's most recent retainer
1551 // first_child = first child of c
1552 StgClosure *c, *cp, *first_child;
1553 RetainerSet *s, *retainerSetOfc;
1554 retainer r, c_child_r;
1555 StgWord typeOfc;
1556
1557 #ifdef DEBUG_RETAINER
1558 // StgPtr oldStackTop;
1559 #endif
1560
1561 #ifdef DEBUG_RETAINER
1562 // oldStackTop = stackTop;
1563 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1564 #endif
1565
1566 // (c, cp, r) = (c0, cp0, r0)
1567 c = c0;
1568 cp = cp0;
1569 r = r0;
1570 goto inner_loop;
1571
1572 loop:
1573 //debugBelch("loop");
1574 // pop to (c, cp, r);
1575 pop(&c, &cp, &r);
1576
1577 if (c == NULL) {
1578 #ifdef DEBUG_RETAINER
1579 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1580 #endif
1581 return;
1582 }
1583
1584 //debugBelch("inner_loop");
1585
1586 inner_loop:
1587 c = UNTAG_CLOSURE(c);
1588
1589 // c = current closure under consideration,
1590 // cp = current closure's parent,
1591 // r = current closure's most recent retainer
1592 //
1593 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1594 // RSET(cp) and RSET(r) are valid.
1595 // RSET(c) is valid only if c has been visited before.
1596 //
1597 // Loop invariants (on the relation between c, cp, and r)
1598 // if cp is not a retainer, r belongs to RSET(cp).
1599 // if cp is a retainer, r == cp.
1600
1601 typeOfc = get_itbl(c)->type;
1602
1603 #ifdef DEBUG_RETAINER
1604 switch (typeOfc) {
1605 case IND_STATIC:
1606 case CONSTR_NOCAF_STATIC:
1607 case CONSTR_STATIC:
1608 case THUNK_STATIC:
1609 case FUN_STATIC:
1610 break;
1611 default:
1612 if (retainerSetOf(c) == NULL) { // first visit?
1613 costArray[typeOfc] += cost(c);
1614 sumOfNewCost += cost(c);
1615 }
1616 break;
1617 }
1618 #endif
1619
1620 // special cases
1621 switch (typeOfc) {
1622 case TSO:
1623 if (((StgTSO *)c)->what_next == ThreadComplete ||
1624 ((StgTSO *)c)->what_next == ThreadKilled) {
1625 #ifdef DEBUG_RETAINER
1626 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1627 #endif
1628 goto loop;
1629 }
1630 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1631 #ifdef DEBUG_RETAINER
1632 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1633 #endif
1634 c = (StgClosure *)((StgTSO *)c)->link;
1635 goto inner_loop;
1636 }
1637 break;
1638
1639 case IND_STATIC:
1640 // We just skip IND_STATIC, so its retainer set is never computed.
1641 c = ((StgIndStatic *)c)->indirectee;
1642 goto inner_loop;
1643 // static objects with no pointers out, so goto loop.
1644 case CONSTR_NOCAF_STATIC:
1645 // It is not just enough not to compute the retainer set for *c; it is
1646 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1647 // scavenged_static_objects, the list from which is assumed to traverse
1648 // all static objects after major garbage collections.
1649 goto loop;
1650 case THUNK_STATIC:
1651 case FUN_STATIC:
1652 if (get_itbl(c)->srt_bitmap == 0) {
1653 // No need to compute the retainer set; no dynamic objects
1654 // are reachable from *c.
1655 //
1656 // Static objects: if we traverse all the live closures,
1657 // including static closures, during each heap census then
1658 // we will observe that some static closures appear and
1659 // disappear. eg. a closure may contain a pointer to a
1660 // static function 'f' which is not otherwise reachable
1661 // (it doesn't indirectly point to any CAFs, so it doesn't
1662 // appear in any SRTs), so we would find 'f' during
1663 // traversal. However on the next sweep there may be no
1664 // closures pointing to 'f'.
1665 //
1666 // We must therefore ignore static closures whose SRT is
1667 // empty, because these are exactly the closures that may
1668 // "appear". A closure with a non-empty SRT, and which is
1669 // still required, will always be reachable.
1670 //
1671 // But what about CONSTR_STATIC? Surely these may be able
1672 // to appear, and they don't have SRTs, so we can't
1673 // check. So for now, we're calling
1674 // resetStaticObjectForRetainerProfiling() from the
1675 // garbage collector to reset the retainer sets in all the
1676 // reachable static objects.
1677 goto loop;
1678 }
1679 default:
1680 break;
1681 }
1682
1683 // The above objects are ignored in computing the average number of times
1684 // an object is visited.
1685 timesAnyObjectVisited++;
1686
1687 // If this is the first visit to c, initialize its retainer set.
1688 maybeInitRetainerSet(c);
1689 retainerSetOfc = retainerSetOf(c);
1690
1691 // Now compute s:
1692 // isRetainer(cp) == rtsTrue => s == NULL
1693 // isRetainer(cp) == rtsFalse => s == cp.retainer
1694 if (isRetainer(cp))
1695 s = NULL;
1696 else
1697 s = retainerSetOf(cp);
1698
1699 // (c, cp, r, s) is available.
1700
1701 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1702 if (retainerSetOfc == NULL) {
1703 // This is the first visit to *c.
1704 numObjectVisited++;
1705
1706 if (s == NULL)
1707 associate(c, singleton(r));
1708 else
1709 // s is actually the retainer set of *c!
1710 associate(c, s);
1711
1712 // compute c_child_r
1713 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1714 } else {
1715 // This is not the first visit to *c.
1716 if (isMember(r, retainerSetOfc))
1717 goto loop; // no need to process child
1718
1719 if (s == NULL)
1720 associate(c, addElement(r, retainerSetOfc));
1721 else {
1722 // s is not NULL and cp is not a retainer. This means that
1723 // each time *cp is visited, so is *c. Thus, if s has
1724 // exactly one more element in its retainer set than c, s
1725 // is also the new retainer set for *c.
1726 if (s->num == retainerSetOfc->num + 1) {
1727 associate(c, s);
1728 }
1729 // Otherwise, just add R_r to the current retainer set of *c.
1730 else {
1731 associate(c, addElement(r, retainerSetOfc));
1732 }
1733 }
1734
1735 if (isRetainer(c))
1736 goto loop; // no need to process child
1737
1738 // compute c_child_r
1739 c_child_r = r;
1740 }
1741
1742 // now, RSET() of all of *c, *cp, and *r is valid.
1743 // (c, c_child_r) are available.
1744
1745 // process child
1746
1747 // Special case closures: we process these all in one go rather
1748 // than attempting to save the current position, because doing so
1749 // would be hard.
1750 switch (typeOfc) {
1751 case TSO:
1752 retainStack(c, c_child_r,
1753 ((StgTSO *)c)->sp,
1754 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1755 goto loop;
1756
1757 case PAP:
1758 {
1759 StgPAP *pap = (StgPAP *)c;
1760 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1761 goto loop;
1762 }
1763
1764 case AP:
1765 {
1766 StgAP *ap = (StgAP *)c;
1767 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1768 goto loop;
1769 }
1770
1771 case AP_STACK:
1772 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1773 retainStack(c, c_child_r,
1774 (StgPtr)((StgAP_STACK *)c)->payload,
1775 (StgPtr)((StgAP_STACK *)c)->payload +
1776 ((StgAP_STACK *)c)->size);
1777 goto loop;
1778 }
1779
1780 push(c, c_child_r, &first_child);
1781
1782 // If first_child is null, c has no child.
1783 // If first_child is not null, the top stack element points to the next
1784 // object. push() may or may not push a stackElement on the stack.
1785 if (first_child == NULL)
1786 goto loop;
1787
1788 // (c, cp, r) = (first_child, c, c_child_r)
1789 r = c_child_r;
1790 cp = c;
1791 c = first_child;
1792 goto inner_loop;
1793 }
1794
1795 /* -----------------------------------------------------------------------------
1796 * Compute the retainer set for every object reachable from *tl.
1797 * -------------------------------------------------------------------------- */
1798 static void
1799 retainRoot( StgClosure **tl )
1800 {
1801 StgClosure *c;
1802
1803 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1804 // be a root.
1805
1806 ASSERT(isEmptyRetainerStack());
1807 currentStackBoundary = stackTop;
1808
1809 c = UNTAG_CLOSURE(*tl);
1810 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1811 retainClosure(c, c, getRetainerFrom(c));
1812 } else {
1813 retainClosure(c, c, CCS_SYSTEM);
1814 }
1815
1816 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1817 // *tl might be a TSO which is ThreadComplete, in which
1818 // case we ignore it for the purposes of retainer profiling.
1819 }
1820
1821 /* -----------------------------------------------------------------------------
1822 * Compute the retainer set for each of the objects in the heap.
1823 * -------------------------------------------------------------------------- */
1824 static void
1825 computeRetainerSet( void )
1826 {
1827 StgWeak *weak;
1828 RetainerSet *rtl;
1829 nat g;
1830 StgPtr ml;
1831 bdescr *bd;
1832 #ifdef DEBUG_RETAINER
1833 RetainerSet tmpRetainerSet;
1834 #endif
1835
1836 GetRoots(retainRoot); // for scheduler roots
1837
1838 // This function is called after a major GC, when key, value, and finalizer
1839 // all are guaranteed to be valid, or reachable.
1840 //
1841 // The following code assumes that WEAK objects are considered to be roots
1842 // for retainer profilng.
1843 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1844 // retainRoot((StgClosure *)weak);
1845 retainRoot((StgClosure **)&weak);
1846
1847 // Consider roots from the stable ptr table.
1848 markStablePtrTable(retainRoot);
1849
1850 // The following code resets the rs field of each unvisited mutable
1851 // object (computing sumOfNewCostExtra and updating costArray[] when
1852 // debugging retainer profiler).
1853 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1854 // NOT TRUE: even G0 has a block on its mutable list
1855 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1856
1857 // Traversing through mut_list is necessary
1858 // because we can find MUT_VAR objects which have not been
1859 // visited during retainer profiling.
1860 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1861 for (ml = bd->start; ml < bd->free; ml++) {
1862
1863 maybeInitRetainerSet((StgClosure *)*ml);
1864 rtl = retainerSetOf((StgClosure *)*ml);
1865
1866 #ifdef DEBUG_RETAINER
1867 if (rtl == NULL) {
1868 // first visit to *ml
1869 // This is a violation of the interface rule!
1870 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1871
1872 switch (get_itbl((StgClosure *)ml)->type) {
1873 case IND_STATIC:
1874 // no cost involved
1875 break;
1876 case CONSTR_NOCAF_STATIC:
1877 case CONSTR_STATIC:
1878 case THUNK_STATIC:
1879 case FUN_STATIC:
1880 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1881 break;
1882 default:
1883 // dynamic objects
1884 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1885 sumOfNewCostExtra += cost((StgClosure *)ml);
1886 break;
1887 }
1888 }
1889 #endif
1890 }
1891 }
1892 }
1893 }
1894
1895 /* -----------------------------------------------------------------------------
1896 * Traverse all static objects for which we compute retainer sets,
1897 * and reset their rs fields to NULL, which is accomplished by
1898 * invoking maybeInitRetainerSet(). This function must be called
1899 * before zeroing all objects reachable from scavenged_static_objects
1900 * in the case of major gabage collections. See GarbageCollect() in
1901 * GC.c.
1902 * Note:
1903 * The mut_once_list of the oldest generation must also be traversed?
1904 * Why? Because if the evacuation of an object pointed to by a static
1905 * indirection object fails, it is put back to the mut_once_list of
1906 * the oldest generation.
1907 * However, this is not necessary because any static indirection objects
1908 * are just traversed through to reach dynamic objects. In other words,
1909 * they are not taken into consideration in computing retainer sets.
1910 * -------------------------------------------------------------------------- */
1911 void
1912 resetStaticObjectForRetainerProfiling( void )
1913 {
1914 #ifdef DEBUG_RETAINER
1915 nat count;
1916 #endif
1917 StgClosure *p;
1918
1919 #ifdef DEBUG_RETAINER
1920 count = 0;
1921 #endif
1922 p = scavenged_static_objects;
1923 while (p != END_OF_STATIC_LIST) {
1924 #ifdef DEBUG_RETAINER
1925 count++;
1926 #endif
1927 switch (get_itbl(p)->type) {
1928 case IND_STATIC:
1929 // Since we do not compute the retainer set of any
1930 // IND_STATIC object, we don't have to reset its retainer
1931 // field.
1932 p = (StgClosure*)*IND_STATIC_LINK(p);
1933 break;
1934 case THUNK_STATIC:
1935 maybeInitRetainerSet(p);
1936 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1937 break;
1938 case FUN_STATIC:
1939 maybeInitRetainerSet(p);
1940 p = (StgClosure*)*FUN_STATIC_LINK(p);
1941 break;
1942 case CONSTR_STATIC:
1943 maybeInitRetainerSet(p);
1944 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1945 break;
1946 default:
1947 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1948 p, get_itbl(p)->type);
1949 break;
1950 }
1951 }
1952 #ifdef DEBUG_RETAINER
1953 // debugBelch("count in scavenged_static_objects = %d\n", count);
1954 #endif
1955 }
1956
1957 /* -----------------------------------------------------------------------------
1958 * Perform retainer profiling.
1959 * N is the oldest generation being profilied, where the generations are
1960 * numbered starting at 0.
1961 * Invariants:
1962 * Note:
1963 * This function should be called only immediately after major garbage
1964 * collection.
1965 * ------------------------------------------------------------------------- */
1966 void
1967 retainerProfile(void)
1968 {
1969 #ifdef DEBUG_RETAINER
1970 nat i;
1971 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1972 #endif
1973
1974 #ifdef DEBUG_RETAINER
1975 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1976 #endif
1977
1978 stat_startRP();
1979
1980 // We haven't flipped the bit yet.
1981 #ifdef DEBUG_RETAINER
1982 debugBelch("Before traversing:\n");
1983 sumOfCostLinear = 0;
1984 for (i = 0;i < N_CLOSURE_TYPES; i++)
1985 costArrayLinear[i] = 0;
1986 totalHeapSize = checkHeapSanityForRetainerProfiling();
1987
1988 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1989 /*
1990 debugBelch("costArrayLinear[] = ");
1991 for (i = 0;i < N_CLOSURE_TYPES; i++)
1992 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1993 debugBelch("\n");
1994 */
1995
1996 ASSERT(sumOfCostLinear == totalHeapSize);
1997
1998 /*
1999 #define pcostArrayLinear(index) \
2000 if (costArrayLinear[index] > 0) \
2001 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
2002 pcostArrayLinear(THUNK_STATIC);
2003 pcostArrayLinear(FUN_STATIC);
2004 pcostArrayLinear(CONSTR_STATIC);
2005 pcostArrayLinear(CONSTR_NOCAF_STATIC);
2006 */
2007 #endif
2008
2009 // Now we flips flip.
2010 flip = flip ^ 1;
2011
2012 #ifdef DEBUG_RETAINER
2013 stackSize = 0;
2014 maxStackSize = 0;
2015 cStackSize = 0;
2016 maxCStackSize = 0;
2017 #endif
2018 numObjectVisited = 0;
2019 timesAnyObjectVisited = 0;
2020
2021 #ifdef DEBUG_RETAINER
2022 debugBelch("During traversing:\n");
2023 sumOfNewCost = 0;
2024 sumOfNewCostExtra = 0;
2025 for (i = 0;i < N_CLOSURE_TYPES; i++)
2026 costArray[i] = 0;
2027 #endif
2028
2029 /*
2030 We initialize the traverse stack each time the retainer profiling is
2031 performed (because the traverse stack size varies on each retainer profiling
2032 and this operation is not costly anyhow). However, we just refresh the
2033 retainer sets.
2034 */
2035 initializeTraverseStack();
2036 #ifdef DEBUG_RETAINER
2037 initializeAllRetainerSet();
2038 #else
2039 refreshAllRetainerSet();
2040 #endif
2041 computeRetainerSet();
2042
2043 #ifdef DEBUG_RETAINER
2044 debugBelch("After traversing:\n");
2045 sumOfCostLinear = 0;
2046 for (i = 0;i < N_CLOSURE_TYPES; i++)
2047 costArrayLinear[i] = 0;
2048 totalHeapSize = checkHeapSanityForRetainerProfiling();
2049
2050 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2051 ASSERT(sumOfCostLinear == totalHeapSize);
2052
2053 // now, compare the two results
2054 /*
2055 Note:
2056 costArray[] must be exactly the same as costArrayLinear[].
2057 Known exceptions:
2058 1) Dead weak pointers, whose type is CONSTR. These objects are not
2059 reachable from any roots.
2060 */
2061 debugBelch("Comparison:\n");
2062 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2063 for (i = 0;i < N_CLOSURE_TYPES; i++)
2064 if (costArray[i] != costArrayLinear[i])
2065 // nothing should be printed except MUT_VAR after major GCs
2066 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2067 debugBelch("\n");
2068
2069 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2070 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2071 debugBelch("\tcostArray[] (must be empty) = ");
2072 for (i = 0;i < N_CLOSURE_TYPES; i++)
2073 if (costArray[i] != costArrayLinear[i])
2074 // nothing should be printed except MUT_VAR after major GCs
2075 debugBelch("[%u:%u] ", i, costArray[i]);
2076 debugBelch("\n");
2077
2078 // only for major garbage collection
2079 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2080 #endif
2081
2082 // post-processing
2083 closeTraverseStack();
2084 #ifdef DEBUG_RETAINER
2085 closeAllRetainerSet();
2086 #else
2087 // Note that there is no post-processing for the retainer sets.
2088 #endif
2089 retainerGeneration++;
2090
2091 stat_endRP(
2092 retainerGeneration - 1, // retainerGeneration has just been incremented!
2093 #ifdef DEBUG_RETAINER
2094 maxCStackSize, maxStackSize,
2095 #endif
2096 (double)timesAnyObjectVisited / numObjectVisited);
2097 }
2098
2099 /* -----------------------------------------------------------------------------
2100 * DEBUGGING CODE
2101 * -------------------------------------------------------------------------- */
2102
2103 #ifdef DEBUG_RETAINER
2104
2105 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2106 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2107 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2108
2109 static nat
2110 sanityCheckHeapClosure( StgClosure *c )
2111 {
2112 StgInfoTable *info;
2113
2114 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2115 ASSERT(!closure_STATIC(c));
2116 ASSERT(LOOKS_LIKE_PTR(c));
2117
2118 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2119 if (get_itbl(c)->type == CONSTR &&
2120 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2121 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2122 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2123 costArray[get_itbl(c)->type] += cost(c);
2124 sumOfNewCost += cost(c);
2125 } else
2126 debugBelch(
2127 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2128 flip, c, get_itbl(c)->type,
2129 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2130 RSET(c));
2131 } else {
2132 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2133 }
2134
2135 return closure_sizeW(c);
2136 }
2137
2138 static nat
2139 heapCheck( bdescr *bd )
2140 {
2141 StgPtr p;
2142 static nat costSum, size;
2143
2144 costSum = 0;
2145 while (bd != NULL) {
2146 p = bd->start;
2147 while (p < bd->free) {
2148 size = sanityCheckHeapClosure((StgClosure *)p);
2149 sumOfCostLinear += size;
2150 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2151 p += size;
2152 // no need for slop check; I think slops are not used currently.
2153 }
2154 ASSERT(p == bd->free);
2155 costSum += bd->free - bd->start;
2156 bd = bd->link;
2157 }
2158
2159 return costSum;
2160 }
2161
2162 static nat
2163 smallObjectPoolCheck(void)
2164 {
2165 bdescr *bd;
2166 StgPtr p;
2167 static nat costSum, size;
2168
2169 bd = small_alloc_list;
2170 costSum = 0;
2171
2172 // first block
2173 if (bd == NULL)
2174 return costSum;
2175
2176 p = bd->start;
2177 while (p < alloc_Hp) {
2178 size = sanityCheckHeapClosure((StgClosure *)p);
2179 sumOfCostLinear += size;
2180 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2181 p += size;
2182 }
2183 ASSERT(p == alloc_Hp);
2184 costSum += alloc_Hp - bd->start;
2185
2186 bd = bd->link;
2187 while (bd != NULL) {
2188 p = bd->start;
2189 while (p < bd->free) {
2190 size = sanityCheckHeapClosure((StgClosure *)p);
2191 sumOfCostLinear += size;
2192 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2193 p += size;
2194 }
2195 ASSERT(p == bd->free);
2196 costSum += bd->free - bd->start;
2197 bd = bd->link;
2198 }
2199
2200 return costSum;
2201 }
2202
2203 static nat
2204 chainCheck(bdescr *bd)
2205 {
2206 nat costSum, size;
2207
2208 costSum = 0;
2209 while (bd != NULL) {
2210 // bd->free - bd->start is not an accurate measurement of the
2211 // object size. Actually it is always zero, so we compute its
2212 // size explicitly.
2213 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2214 sumOfCostLinear += size;
2215 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2216 costSum += size;
2217 bd = bd->link;
2218 }
2219
2220 return costSum;
2221 }
2222
2223 static nat
2224 checkHeapSanityForRetainerProfiling( void )
2225 {
2226 nat costSum, g, s;
2227
2228 costSum = 0;
2229 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2230 if (RtsFlags.GcFlags.generations == 1) {
2231 costSum += heapCheck(g0s0->to_blocks);
2232 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2233 costSum += chainCheck(g0s0->large_objects);
2234 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2235 } else {
2236 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2237 for (s = 0; s < generations[g].n_steps; s++) {
2238 /*
2239 After all live objects have been scavenged, the garbage
2240 collector may create some objects in
2241 scheduleFinalizers(). These objects are created throught
2242 allocate(), so the small object pool or the large object
2243 pool of the g0s0 may not be empty.
2244 */
2245 if (g == 0 && s == 0) {
2246 costSum += smallObjectPoolCheck();
2247 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2248 costSum += chainCheck(generations[g].steps[s].large_objects);
2249 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2250 } else {
2251 costSum += heapCheck(generations[g].steps[s].blocks);
2252 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2253 costSum += chainCheck(generations[g].steps[s].large_objects);
2254 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2255 }
2256 }
2257 }
2258
2259 return costSum;
2260 }
2261
2262 void
2263 findPointer(StgPtr p)
2264 {
2265 StgPtr q, r, e;
2266 bdescr *bd;
2267 nat g, s;
2268
2269 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2270 for (s = 0; s < generations[g].n_steps; s++) {
2271 // if (g == 0 && s == 0) continue;
2272 bd = generations[g].steps[s].blocks;
2273 for (; bd; bd = bd->link) {
2274 for (q = bd->start; q < bd->free; q++) {
2275 if (*q == (StgWord)p) {
2276 r = q;
2277 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2278 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2279 // return;
2280 }
2281 }
2282 }
2283 bd = generations[g].steps[s].large_objects;
2284 for (; bd; bd = bd->link) {
2285 e = bd->start + cost((StgClosure *)bd->start);
2286 for (q = bd->start; q < e; q++) {
2287 if (*q == (StgWord)p) {
2288 r = q;
2289 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2290 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2291 // return;
2292 }
2293 }
2294 }
2295 }
2296 }
2297 }
2298
2299 static void
2300 belongToHeap(StgPtr p)
2301 {
2302 bdescr *bd;
2303 nat g, s;
2304
2305 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2306 for (s = 0; s < generations[g].n_steps; s++) {
2307 // if (g == 0 && s == 0) continue;
2308 bd = generations[g].steps[s].blocks;
2309 for (; bd; bd = bd->link) {
2310 if (bd->start <= p && p < bd->free) {
2311 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2312 return;
2313 }
2314 }
2315 bd = generations[g].steps[s].large_objects;
2316 for (; bd; bd = bd->link) {
2317 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2318 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2319 return;
2320 }
2321 }
2322 }
2323 }
2324 }
2325 #endif /* DEBUG_RETAINER */
2326
2327 #endif /* PROFILING */