Add allocateInGen() for allocating in a specific generation, and cleanups
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #ifdef PROFILING
11
12 // Turn off inlining when debugging - it obfuscates things
13 #ifdef DEBUG
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "Rts.h"
20 #include "RtsUtils.h"
21 #include "RetainerProfile.h"
22 #include "RetainerSet.h"
23 #include "Schedule.h"
24 #include "Printer.h"
25 #include "RtsFlags.h"
26 #include "Weak.h"
27 #include "Sanity.h"
28 #include "Profiling.h"
29 #include "Stats.h"
30 #include "ProfHeap.h"
31 #include "Apply.h"
32
33 /*
34 Note: what to change in order to plug-in a new retainer profiling scheme?
35 (1) type retainer in ../includes/StgRetainerProf.h
36 (2) retainer function R(), i.e., getRetainerFrom()
37 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
38 in RetainerSet.h, if needed.
39 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
40 */
41
42 /* -----------------------------------------------------------------------------
43 * Declarations...
44 * -------------------------------------------------------------------------- */
45
46 static nat retainerGeneration; // generation
47
48 static nat numObjectVisited; // total number of objects visited
49 static nat timesAnyObjectVisited; // number of times any objects are visited
50
51 /*
52 The rs field in the profile header of any object points to its retainer
53 set in an indirect way: if flip is 0, it points to the retainer set;
54 if flip is 1, it points to the next byte after the retainer set (even
55 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
56 pointer. See retainerSetOf().
57 */
58
59 StgWord flip = 0; // flip bit
60 // must be 0 if DEBUG_RETAINER is on (for static closures)
61
62 #define setRetainerSetToNull(c) \
63 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
64
65 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
66 static void retainClosure(StgClosure *, StgClosure *, retainer);
67 #ifdef DEBUG_RETAINER
68 static void belongToHeap(StgPtr p);
69 #endif
70
71 #ifdef DEBUG_RETAINER
72 /*
73 cStackSize records how many times retainStack() has been invoked recursively,
74 that is, the number of activation records for retainStack() on the C stack.
75 maxCStackSize records its max value.
76 Invariants:
77 cStackSize <= maxCStackSize
78 */
79 static nat cStackSize, maxCStackSize;
80
81 static nat sumOfNewCost; // sum of the cost of each object, computed
82 // when the object is first visited
83 static nat sumOfNewCostExtra; // for those objects not visited during
84 // retainer profiling, e.g., MUT_VAR
85 static nat costArray[N_CLOSURE_TYPES];
86
87 nat sumOfCostLinear; // sum of the costs of all object, computed
88 // when linearly traversing the heap after
89 // retainer profiling
90 nat costArrayLinear[N_CLOSURE_TYPES];
91 #endif
92
93 /* -----------------------------------------------------------------------------
94 * Retainer stack - header
95 * Note:
96 * Although the retainer stack implementation could be separated *
97 * from the retainer profiling engine, there does not seem to be
98 * any advantage in doing that; retainer stack is an integral part
99 * of retainer profiling engine and cannot be use elsewhere at
100 * all.
101 * -------------------------------------------------------------------------- */
102
103 typedef enum {
104 posTypeStep,
105 posTypePtrs,
106 posTypeSRT,
107 posTypeLargeSRT,
108 } nextPosType;
109
110 typedef union {
111 // fixed layout or layout specified by a field in the closure
112 StgWord step;
113
114 // layout.payload
115 struct {
116 // See StgClosureInfo in InfoTables.h
117 #if SIZEOF_VOID_P == 8
118 StgWord32 pos;
119 StgWord32 ptrs;
120 #else
121 StgWord16 pos;
122 StgWord16 ptrs;
123 #endif
124 StgPtr payload;
125 } ptrs;
126
127 // SRT
128 struct {
129 StgClosure **srt;
130 StgWord srt_bitmap;
131 } srt;
132
133 // Large SRT
134 struct {
135 StgLargeSRT *srt;
136 StgWord offset;
137 } large_srt;
138
139 } nextPos;
140
141 typedef struct {
142 nextPosType type;
143 nextPos next;
144 } stackPos;
145
146 typedef struct {
147 StgClosure *c;
148 retainer c_child_r;
149 stackPos info;
150 } stackElement;
151
152 /*
153 Invariants:
154 firstStack points to the first block group.
155 currentStack points to the block group currently being used.
156 currentStack->free == stackLimit.
157 stackTop points to the topmost byte in the stack of currentStack.
158 Unless the whole stack is empty, stackTop must point to the topmost
159 object (or byte) in the whole stack. Thus, it is only when the whole stack
160 is empty that stackTop == stackLimit (not during the execution of push()
161 and pop()).
162 stackBottom == currentStack->start.
163 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
164 Note:
165 When a current stack becomes empty, stackTop is set to point to
166 the topmost element on the previous block group so as to satisfy
167 the invariants described above.
168 */
169 static bdescr *firstStack = NULL;
170 static bdescr *currentStack;
171 static stackElement *stackBottom, *stackTop, *stackLimit;
172
173 /*
174 currentStackBoundary is used to mark the current stack chunk.
175 If stackTop == currentStackBoundary, it means that the current stack chunk
176 is empty. It is the responsibility of the user to keep currentStackBoundary
177 valid all the time if it is to be employed.
178 */
179 static stackElement *currentStackBoundary;
180
181 /*
182 stackSize records the current size of the stack.
183 maxStackSize records its high water mark.
184 Invariants:
185 stackSize <= maxStackSize
186 Note:
187 stackSize is just an estimate measure of the depth of the graph. The reason
188 is that some heap objects have only a single child and may not result
189 in a new element being pushed onto the stack. Therefore, at the end of
190 retainer profiling, maxStackSize + maxCStackSize is some value no greater
191 than the actual depth of the graph.
192 */
193 #ifdef DEBUG_RETAINER
194 static int stackSize, maxStackSize;
195 #endif
196
197 // number of blocks allocated for one stack
198 #define BLOCKS_IN_STACK 1
199
200 /* -----------------------------------------------------------------------------
201 * Add a new block group to the stack.
202 * Invariants:
203 * currentStack->link == s.
204 * -------------------------------------------------------------------------- */
205 static INLINE void
206 newStackBlock( bdescr *bd )
207 {
208 currentStack = bd;
209 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
210 stackBottom = (stackElement *)bd->start;
211 stackLimit = (stackElement *)stackTop;
212 bd->free = (StgPtr)stackLimit;
213 }
214
215 /* -----------------------------------------------------------------------------
216 * Return to the previous block group.
217 * Invariants:
218 * s->link == currentStack.
219 * -------------------------------------------------------------------------- */
220 static INLINE void
221 returnToOldStack( bdescr *bd )
222 {
223 currentStack = bd;
224 stackTop = (stackElement *)bd->free;
225 stackBottom = (stackElement *)bd->start;
226 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
227 bd->free = (StgPtr)stackLimit;
228 }
229
230 /* -----------------------------------------------------------------------------
231 * Initializes the traverse stack.
232 * -------------------------------------------------------------------------- */
233 static void
234 initializeTraverseStack( void )
235 {
236 if (firstStack != NULL) {
237 freeChain(firstStack);
238 }
239
240 firstStack = allocGroup(BLOCKS_IN_STACK);
241 firstStack->link = NULL;
242 firstStack->u.back = NULL;
243
244 newStackBlock(firstStack);
245 }
246
247 /* -----------------------------------------------------------------------------
248 * Frees all the block groups in the traverse stack.
249 * Invariants:
250 * firstStack != NULL
251 * -------------------------------------------------------------------------- */
252 static void
253 closeTraverseStack( void )
254 {
255 freeChain(firstStack);
256 firstStack = NULL;
257 }
258
259 /* -----------------------------------------------------------------------------
260 * Returns rtsTrue if the whole stack is empty.
261 * -------------------------------------------------------------------------- */
262 static INLINE rtsBool
263 isEmptyRetainerStack( void )
264 {
265 return (firstStack == currentStack) && stackTop == stackLimit;
266 }
267
268 /* -----------------------------------------------------------------------------
269 * Returns size of stack
270 * -------------------------------------------------------------------------- */
271 #ifdef DEBUG
272 lnat
273 retainerStackBlocks( void )
274 {
275 bdescr* bd;
276 lnat res = 0;
277
278 for (bd = firstStack; bd != NULL; bd = bd->link)
279 res += bd->blocks;
280
281 return res;
282 }
283 #endif
284
285 /* -----------------------------------------------------------------------------
286 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
287 * i.e., if the current stack chunk is empty.
288 * -------------------------------------------------------------------------- */
289 static INLINE rtsBool
290 isOnBoundary( void )
291 {
292 return stackTop == currentStackBoundary;
293 }
294
295 /* -----------------------------------------------------------------------------
296 * Initializes *info from ptrs and payload.
297 * Invariants:
298 * payload[] begins with ptrs pointers followed by non-pointers.
299 * -------------------------------------------------------------------------- */
300 static INLINE void
301 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
302 {
303 info->type = posTypePtrs;
304 info->next.ptrs.pos = 0;
305 info->next.ptrs.ptrs = ptrs;
306 info->next.ptrs.payload = payload;
307 }
308
309 /* -----------------------------------------------------------------------------
310 * Find the next object from *info.
311 * -------------------------------------------------------------------------- */
312 static INLINE StgClosure *
313 find_ptrs( stackPos *info )
314 {
315 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
316 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
317 } else {
318 return NULL;
319 }
320 }
321
322 /* -----------------------------------------------------------------------------
323 * Initializes *info from SRT information stored in *infoTable.
324 * -------------------------------------------------------------------------- */
325 static INLINE void
326 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
327 {
328 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
329 info->type = posTypeLargeSRT;
330 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
331 info->next.large_srt.offset = 0;
332 } else {
333 info->type = posTypeSRT;
334 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
335 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
336 }
337 }
338
339 static INLINE void
340 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
341 {
342 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
343 info->type = posTypeLargeSRT;
344 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
345 info->next.large_srt.offset = 0;
346 } else {
347 info->type = posTypeSRT;
348 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
349 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
350 }
351 }
352
353 /* -----------------------------------------------------------------------------
354 * Find the next object from *info.
355 * -------------------------------------------------------------------------- */
356 static INLINE StgClosure *
357 find_srt( stackPos *info )
358 {
359 StgClosure *c;
360 StgWord bitmap;
361
362 if (info->type == posTypeSRT) {
363 // Small SRT bitmap
364 bitmap = info->next.srt.srt_bitmap;
365 while (bitmap != 0) {
366 if ((bitmap & 1) != 0) {
367 #if defined(__PIC__) && defined(mingw32_TARGET_OS)
368 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
369 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
370 else
371 c = *(info->next.srt.srt);
372 #else
373 c = *(info->next.srt.srt);
374 #endif
375 bitmap = bitmap >> 1;
376 info->next.srt.srt++;
377 info->next.srt.srt_bitmap = bitmap;
378 return c;
379 }
380 bitmap = bitmap >> 1;
381 info->next.srt.srt++;
382 }
383 // bitmap is now zero...
384 return NULL;
385 }
386 else {
387 // Large SRT bitmap
388 nat i = info->next.large_srt.offset;
389 StgWord bitmap;
390
391 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
392 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
393 bitmap = bitmap >> (i % BITS_IN(StgWord));
394 while (i < info->next.large_srt.srt->l.size) {
395 if ((bitmap & 1) != 0) {
396 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
397 i++;
398 info->next.large_srt.offset = i;
399 return c;
400 }
401 i++;
402 if (i % BITS_IN(W_) == 0) {
403 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
404 } else {
405 bitmap = bitmap >> 1;
406 }
407 }
408 // reached the end of this bitmap.
409 info->next.large_srt.offset = i;
410 return NULL;
411 }
412 }
413
414 /* -----------------------------------------------------------------------------
415 * push() pushes a stackElement representing the next child of *c
416 * onto the traverse stack. If *c has no child, *first_child is set
417 * to NULL and nothing is pushed onto the stack. If *c has only one
418 * child, *c_chlid is set to that child and nothing is pushed onto
419 * the stack. If *c has more than two children, *first_child is set
420 * to the first child and a stackElement representing the second
421 * child is pushed onto the stack.
422
423 * Invariants:
424 * *c_child_r is the most recent retainer of *c's children.
425 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
426 * there cannot be any stack objects.
427 * Note: SRTs are considered to be children as well.
428 * -------------------------------------------------------------------------- */
429 static INLINE void
430 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
431 {
432 stackElement se;
433 bdescr *nbd; // Next Block Descriptor
434
435 #ifdef DEBUG_RETAINER
436 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
437 #endif
438
439 ASSERT(get_itbl(c)->type != TSO);
440 ASSERT(get_itbl(c)->type != AP_STACK);
441
442 //
443 // fill in se
444 //
445
446 se.c = c;
447 se.c_child_r = c_child_r;
448
449 // fill in se.info
450 switch (get_itbl(c)->type) {
451 // no child, no SRT
452 case CONSTR_0_1:
453 case CONSTR_0_2:
454 case CAF_BLACKHOLE:
455 case BLACKHOLE:
456 case SE_BLACKHOLE:
457 case SE_CAF_BLACKHOLE:
458 case ARR_WORDS:
459 *first_child = NULL;
460 return;
461
462 // one child (fixed), no SRT
463 case MUT_VAR_CLEAN:
464 case MUT_VAR_DIRTY:
465 *first_child = ((StgMutVar *)c)->var;
466 return;
467 case THUNK_SELECTOR:
468 *first_child = ((StgSelector *)c)->selectee;
469 return;
470 case IND_PERM:
471 case IND_OLDGEN_PERM:
472 case IND_OLDGEN:
473 *first_child = ((StgInd *)c)->indirectee;
474 return;
475 case CONSTR_1_0:
476 case CONSTR_1_1:
477 *first_child = c->payload[0];
478 return;
479
480 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
481 // of the next child. We do not write a separate initialization code.
482 // Also we do not have to initialize info.type;
483
484 // two children (fixed), no SRT
485 // need to push a stackElement, but nothing to store in se.info
486 case CONSTR_2_0:
487 *first_child = c->payload[0]; // return the first pointer
488 // se.info.type = posTypeStep;
489 // se.info.next.step = 2; // 2 = second
490 break;
491
492 // three children (fixed), no SRT
493 // need to push a stackElement
494 case MVAR_CLEAN:
495 case MVAR_DIRTY:
496 // head must be TSO and the head of a linked list of TSOs.
497 // Shoule it be a child? Seems to be yes.
498 *first_child = (StgClosure *)((StgMVar *)c)->head;
499 // se.info.type = posTypeStep;
500 se.info.next.step = 2; // 2 = second
501 break;
502
503 // three children (fixed), no SRT
504 case WEAK:
505 *first_child = ((StgWeak *)c)->key;
506 // se.info.type = posTypeStep;
507 se.info.next.step = 2;
508 break;
509
510 // layout.payload.ptrs, no SRT
511 case CONSTR:
512 case STABLE_NAME:
513 case BCO:
514 case CONSTR_STATIC:
515 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
516 (StgPtr)c->payload);
517 *first_child = find_ptrs(&se.info);
518 if (*first_child == NULL)
519 return; // no child
520 break;
521
522 // StgMutArrPtr.ptrs, no SRT
523 case MUT_ARR_PTRS_CLEAN:
524 case MUT_ARR_PTRS_DIRTY:
525 case MUT_ARR_PTRS_FROZEN:
526 case MUT_ARR_PTRS_FROZEN0:
527 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
528 (StgPtr)(((StgMutArrPtrs *)c)->payload));
529 *first_child = find_ptrs(&se.info);
530 if (*first_child == NULL)
531 return;
532 break;
533
534 // layout.payload.ptrs, SRT
535 case FUN: // *c is a heap object.
536 case FUN_2_0:
537 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
538 *first_child = find_ptrs(&se.info);
539 if (*first_child == NULL)
540 // no child from ptrs, so check SRT
541 goto fun_srt_only;
542 break;
543
544 case THUNK:
545 case THUNK_2_0:
546 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
547 (StgPtr)((StgThunk *)c)->payload);
548 *first_child = find_ptrs(&se.info);
549 if (*first_child == NULL)
550 // no child from ptrs, so check SRT
551 goto thunk_srt_only;
552 break;
553
554 // 1 fixed child, SRT
555 case FUN_1_0:
556 case FUN_1_1:
557 *first_child = c->payload[0];
558 ASSERT(*first_child != NULL);
559 init_srt_fun(&se.info, get_fun_itbl(c));
560 break;
561
562 case THUNK_1_0:
563 case THUNK_1_1:
564 *first_child = ((StgThunk *)c)->payload[0];
565 ASSERT(*first_child != NULL);
566 init_srt_thunk(&se.info, get_thunk_itbl(c));
567 break;
568
569 case FUN_STATIC: // *c is a heap object.
570 ASSERT(get_itbl(c)->srt_bitmap != 0);
571 case FUN_0_1:
572 case FUN_0_2:
573 fun_srt_only:
574 init_srt_fun(&se.info, get_fun_itbl(c));
575 *first_child = find_srt(&se.info);
576 if (*first_child == NULL)
577 return; // no child
578 break;
579
580 // SRT only
581 case THUNK_STATIC:
582 ASSERT(get_itbl(c)->srt_bitmap != 0);
583 case THUNK_0_1:
584 case THUNK_0_2:
585 thunk_srt_only:
586 init_srt_thunk(&se.info, get_thunk_itbl(c));
587 *first_child = find_srt(&se.info);
588 if (*first_child == NULL)
589 return; // no child
590 break;
591
592 case TVAR_WATCH_QUEUE:
593 *first_child = (StgClosure *)((StgTVarWatchQueue *)c)->closure;
594 se.info.next.step = 2; // 2 = second
595 break;
596 case TVAR:
597 *first_child = (StgClosure *)((StgTVar *)c)->current_value;
598 break;
599 case TREC_HEADER:
600 *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
601 break;
602 case TREC_CHUNK:
603 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
604 se.info.next.step = 0; // entry no.
605 break;
606
607 // cannot appear
608 case PAP:
609 case AP:
610 case AP_STACK:
611 case TSO:
612 case IND_STATIC:
613 case CONSTR_NOCAF_STATIC:
614 // stack objects
615 case UPDATE_FRAME:
616 case CATCH_FRAME:
617 case STOP_FRAME:
618 case RET_DYN:
619 case RET_BCO:
620 case RET_SMALL:
621 case RET_BIG:
622 // invalid objects
623 case IND:
624 case BLOCKED_FETCH:
625 case FETCH_ME:
626 case FETCH_ME_BQ:
627 case RBH:
628 case REMOTE_REF:
629 case EVACUATED:
630 case INVALID_OBJECT:
631 default:
632 barf("Invalid object *c in push()");
633 return;
634 }
635
636 if (stackTop - 1 < stackBottom) {
637 #ifdef DEBUG_RETAINER
638 // debugBelch("push() to the next stack.\n");
639 #endif
640 // currentStack->free is updated when the active stack is switched
641 // to the next stack.
642 currentStack->free = (StgPtr)stackTop;
643
644 if (currentStack->link == NULL) {
645 nbd = allocGroup(BLOCKS_IN_STACK);
646 nbd->link = NULL;
647 nbd->u.back = currentStack;
648 currentStack->link = nbd;
649 } else
650 nbd = currentStack->link;
651
652 newStackBlock(nbd);
653 }
654
655 // adjust stackTop (acutal push)
656 stackTop--;
657 // If the size of stackElement was huge, we would better replace the
658 // following statement by either a memcpy() call or a switch statement
659 // on the type of the element. Currently, the size of stackElement is
660 // small enough (5 words) that this direct assignment seems to be enough.
661
662 // ToDo: The line below leads to the warning:
663 // warning: 'se.info.type' may be used uninitialized in this function
664 // This is caused by the fact that there are execution paths through the
665 // large switch statement above where some cases do not initialize this
666 // field. Is this really harmless? Can we avoid the warning?
667 *stackTop = se;
668
669 #ifdef DEBUG_RETAINER
670 stackSize++;
671 if (stackSize > maxStackSize) maxStackSize = stackSize;
672 // ASSERT(stackSize >= 0);
673 // debugBelch("stackSize = %d\n", stackSize);
674 #endif
675 }
676
677 /* -----------------------------------------------------------------------------
678 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
679 * Invariants:
680 * stackTop cannot be equal to stackLimit unless the whole stack is
681 * empty, in which case popOff() is not allowed.
682 * Note:
683 * You can think of popOffReal() as a part of popOff() which is
684 * executed at the end of popOff() in necessary. Since popOff() is
685 * likely to be executed quite often while popOffReal() is not, we
686 * separate popOffReal() from popOff(), which is declared as an
687 * INLINE function (for the sake of execution speed). popOffReal()
688 * is called only within popOff() and nowhere else.
689 * -------------------------------------------------------------------------- */
690 static void
691 popOffReal(void)
692 {
693 bdescr *pbd; // Previous Block Descriptor
694
695 #ifdef DEBUG_RETAINER
696 // debugBelch("pop() to the previous stack.\n");
697 #endif
698
699 ASSERT(stackTop + 1 == stackLimit);
700 ASSERT(stackBottom == (stackElement *)currentStack->start);
701
702 if (firstStack == currentStack) {
703 // The stack is completely empty.
704 stackTop++;
705 ASSERT(stackTop == stackLimit);
706 #ifdef DEBUG_RETAINER
707 stackSize--;
708 if (stackSize > maxStackSize) maxStackSize = stackSize;
709 /*
710 ASSERT(stackSize >= 0);
711 debugBelch("stackSize = %d\n", stackSize);
712 */
713 #endif
714 return;
715 }
716
717 // currentStack->free is updated when the active stack is switched back
718 // to the previous stack.
719 currentStack->free = (StgPtr)stackLimit;
720
721 // find the previous block descriptor
722 pbd = currentStack->u.back;
723 ASSERT(pbd != NULL);
724
725 returnToOldStack(pbd);
726
727 #ifdef DEBUG_RETAINER
728 stackSize--;
729 if (stackSize > maxStackSize) maxStackSize = stackSize;
730 /*
731 ASSERT(stackSize >= 0);
732 debugBelch("stackSize = %d\n", stackSize);
733 */
734 #endif
735 }
736
737 static INLINE void
738 popOff(void) {
739 #ifdef DEBUG_RETAINER
740 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
741 #endif
742
743 ASSERT(stackTop != stackLimit);
744 ASSERT(!isEmptyRetainerStack());
745
746 // <= (instead of <) is wrong!
747 if (stackTop + 1 < stackLimit) {
748 stackTop++;
749 #ifdef DEBUG_RETAINER
750 stackSize--;
751 if (stackSize > maxStackSize) maxStackSize = stackSize;
752 /*
753 ASSERT(stackSize >= 0);
754 debugBelch("stackSize = %d\n", stackSize);
755 */
756 #endif
757 return;
758 }
759
760 popOffReal();
761 }
762
763 /* -----------------------------------------------------------------------------
764 * Finds the next object to be considered for retainer profiling and store
765 * its pointer to *c.
766 * Test if the topmost stack element indicates that more objects are left,
767 * and if so, retrieve the first object and store its pointer to *c. Also,
768 * set *cp and *r appropriately, both of which are stored in the stack element.
769 * The topmost stack element then is overwritten so as for it to now denote
770 * the next object.
771 * If the topmost stack element indicates no more objects are left, pop
772 * off the stack element until either an object can be retrieved or
773 * the current stack chunk becomes empty, indicated by rtsTrue returned by
774 * isOnBoundary(), in which case *c is set to NULL.
775 * Note:
776 * It is okay to call this function even when the current stack chunk
777 * is empty.
778 * -------------------------------------------------------------------------- */
779 static INLINE void
780 pop( StgClosure **c, StgClosure **cp, retainer *r )
781 {
782 stackElement *se;
783
784 #ifdef DEBUG_RETAINER
785 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
786 #endif
787
788 do {
789 if (isOnBoundary()) { // if the current stack chunk is depleted
790 *c = NULL;
791 return;
792 }
793
794 se = stackTop;
795
796 switch (get_itbl(se->c)->type) {
797 // two children (fixed), no SRT
798 // nothing in se.info
799 case CONSTR_2_0:
800 *c = se->c->payload[1];
801 *cp = se->c;
802 *r = se->c_child_r;
803 popOff();
804 return;
805
806 // three children (fixed), no SRT
807 // need to push a stackElement
808 case MVAR_CLEAN:
809 case MVAR_DIRTY:
810 if (se->info.next.step == 2) {
811 *c = (StgClosure *)((StgMVar *)se->c)->tail;
812 se->info.next.step++; // move to the next step
813 // no popOff
814 } else {
815 *c = ((StgMVar *)se->c)->value;
816 popOff();
817 }
818 *cp = se->c;
819 *r = se->c_child_r;
820 return;
821
822 // three children (fixed), no SRT
823 case WEAK:
824 if (se->info.next.step == 2) {
825 *c = ((StgWeak *)se->c)->value;
826 se->info.next.step++;
827 // no popOff
828 } else {
829 *c = ((StgWeak *)se->c)->finalizer;
830 popOff();
831 }
832 *cp = se->c;
833 *r = se->c_child_r;
834 return;
835
836 case TVAR_WATCH_QUEUE:
837 if (se->info.next.step == 2) {
838 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->next_queue_entry;
839 se->info.next.step++; // move to the next step
840 // no popOff
841 } else {
842 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->prev_queue_entry;
843 popOff();
844 }
845 *cp = se->c;
846 *r = se->c_child_r;
847 return;
848
849 case TVAR:
850 *c = (StgClosure *)((StgTVar *)se->c)->first_watch_queue_entry;
851 *cp = se->c;
852 *r = se->c_child_r;
853 popOff();
854 return;
855
856 case TREC_HEADER:
857 *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
858 *cp = se->c;
859 *r = se->c_child_r;
860 popOff();
861 return;
862
863 case TREC_CHUNK: {
864 // These are pretty complicated: we have N entries, each
865 // of which contains 3 fields that we want to follow. So
866 // we divide the step counter: the 2 low bits indicate
867 // which field, and the rest of the bits indicate the
868 // entry number (starting from zero).
869 TRecEntry *entry;
870 nat entry_no = se->info.next.step >> 2;
871 nat field_no = se->info.next.step & 3;
872 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
873 *c = NULL;
874 popOff();
875 return;
876 }
877 entry = &((StgTRecChunk *)se->c)->entries[entry_no];
878 if (field_no == 0) {
879 *c = (StgClosure *)entry->tvar;
880 } else if (field_no == 1) {
881 *c = entry->expected_value;
882 } else {
883 *c = entry->new_value;
884 }
885 *cp = se->c;
886 *r = se->c_child_r;
887 se->info.next.step++;
888 return;
889 }
890
891 case CONSTR:
892 case STABLE_NAME:
893 case BCO:
894 case CONSTR_STATIC:
895 // StgMutArrPtr.ptrs, no SRT
896 case MUT_ARR_PTRS_CLEAN:
897 case MUT_ARR_PTRS_DIRTY:
898 case MUT_ARR_PTRS_FROZEN:
899 case MUT_ARR_PTRS_FROZEN0:
900 *c = find_ptrs(&se->info);
901 if (*c == NULL) {
902 popOff();
903 break;
904 }
905 *cp = se->c;
906 *r = se->c_child_r;
907 return;
908
909 // layout.payload.ptrs, SRT
910 case FUN: // always a heap object
911 case FUN_2_0:
912 if (se->info.type == posTypePtrs) {
913 *c = find_ptrs(&se->info);
914 if (*c != NULL) {
915 *cp = se->c;
916 *r = se->c_child_r;
917 return;
918 }
919 init_srt_fun(&se->info, get_fun_itbl(se->c));
920 }
921 goto do_srt;
922
923 case THUNK:
924 case THUNK_2_0:
925 if (se->info.type == posTypePtrs) {
926 *c = find_ptrs(&se->info);
927 if (*c != NULL) {
928 *cp = se->c;
929 *r = se->c_child_r;
930 return;
931 }
932 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
933 }
934 goto do_srt;
935
936 // SRT
937 do_srt:
938 case THUNK_STATIC:
939 case FUN_STATIC:
940 case FUN_0_1:
941 case FUN_0_2:
942 case THUNK_0_1:
943 case THUNK_0_2:
944 case FUN_1_0:
945 case FUN_1_1:
946 case THUNK_1_0:
947 case THUNK_1_1:
948 *c = find_srt(&se->info);
949 if (*c != NULL) {
950 *cp = se->c;
951 *r = se->c_child_r;
952 return;
953 }
954 popOff();
955 break;
956
957 // no child (fixed), no SRT
958 case CONSTR_0_1:
959 case CONSTR_0_2:
960 case CAF_BLACKHOLE:
961 case BLACKHOLE:
962 case SE_BLACKHOLE:
963 case SE_CAF_BLACKHOLE:
964 case ARR_WORDS:
965 // one child (fixed), no SRT
966 case MUT_VAR_CLEAN:
967 case MUT_VAR_DIRTY:
968 case THUNK_SELECTOR:
969 case IND_PERM:
970 case IND_OLDGEN_PERM:
971 case IND_OLDGEN:
972 case CONSTR_1_1:
973 // cannot appear
974 case PAP:
975 case AP:
976 case AP_STACK:
977 case TSO:
978 case IND_STATIC:
979 case CONSTR_NOCAF_STATIC:
980 // stack objects
981 case RET_DYN:
982 case UPDATE_FRAME:
983 case CATCH_FRAME:
984 case STOP_FRAME:
985 case RET_BCO:
986 case RET_SMALL:
987 case RET_BIG:
988 // invalid objects
989 case IND:
990 case BLOCKED_FETCH:
991 case FETCH_ME:
992 case FETCH_ME_BQ:
993 case RBH:
994 case REMOTE_REF:
995 case EVACUATED:
996 case INVALID_OBJECT:
997 default:
998 barf("Invalid object *c in pop()");
999 return;
1000 }
1001 } while (rtsTrue);
1002 }
1003
1004 /* -----------------------------------------------------------------------------
1005 * RETAINER PROFILING ENGINE
1006 * -------------------------------------------------------------------------- */
1007
1008 void
1009 initRetainerProfiling( void )
1010 {
1011 initializeAllRetainerSet();
1012 retainerGeneration = 0;
1013 }
1014
1015 /* -----------------------------------------------------------------------------
1016 * This function must be called before f-closing prof_file.
1017 * -------------------------------------------------------------------------- */
1018 void
1019 endRetainerProfiling( void )
1020 {
1021 #ifdef SECOND_APPROACH
1022 outputAllRetainerSet(prof_file);
1023 #endif
1024 }
1025
1026 /* -----------------------------------------------------------------------------
1027 * Returns the actual pointer to the retainer set of the closure *c.
1028 * It may adjust RSET(c) subject to flip.
1029 * Side effects:
1030 * RSET(c) is initialized to NULL if its current value does not
1031 * conform to flip.
1032 * Note:
1033 * Even though this function has side effects, they CAN be ignored because
1034 * subsequent calls to retainerSetOf() always result in the same return value
1035 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1036 * closure.
1037 * We have to perform an XOR (^) operation each time a closure is examined.
1038 * The reason is that we do not know when a closure is visited last.
1039 * -------------------------------------------------------------------------- */
1040 static INLINE void
1041 maybeInitRetainerSet( StgClosure *c )
1042 {
1043 if (!isRetainerSetFieldValid(c)) {
1044 setRetainerSetToNull(c);
1045 }
1046 }
1047
1048 /* -----------------------------------------------------------------------------
1049 * Returns rtsTrue if *c is a retainer.
1050 * -------------------------------------------------------------------------- */
1051 static INLINE rtsBool
1052 isRetainer( StgClosure *c )
1053 {
1054 switch (get_itbl(c)->type) {
1055 //
1056 // True case
1057 //
1058 // TSOs MUST be retainers: they constitute the set of roots.
1059 case TSO:
1060
1061 // mutable objects
1062 case MVAR_CLEAN:
1063 case MVAR_DIRTY:
1064 case MUT_VAR_CLEAN:
1065 case MUT_VAR_DIRTY:
1066 case MUT_ARR_PTRS_CLEAN:
1067 case MUT_ARR_PTRS_DIRTY:
1068 case MUT_ARR_PTRS_FROZEN:
1069 case MUT_ARR_PTRS_FROZEN0:
1070
1071 // thunks are retainers.
1072 case THUNK:
1073 case THUNK_1_0:
1074 case THUNK_0_1:
1075 case THUNK_2_0:
1076 case THUNK_1_1:
1077 case THUNK_0_2:
1078 case THUNK_SELECTOR:
1079 case AP:
1080 case AP_STACK:
1081
1082 // Static thunks, or CAFS, are obviously retainers.
1083 case THUNK_STATIC:
1084
1085 // WEAK objects are roots; there is separate code in which traversing
1086 // begins from WEAK objects.
1087 case WEAK:
1088
1089 // Since the other mutvar-type things are retainers, seems
1090 // like the right thing to do:
1091 case TVAR:
1092 return rtsTrue;
1093
1094 //
1095 // False case
1096 //
1097
1098 // constructors
1099 case CONSTR:
1100 case CONSTR_1_0:
1101 case CONSTR_0_1:
1102 case CONSTR_2_0:
1103 case CONSTR_1_1:
1104 case CONSTR_0_2:
1105 // functions
1106 case FUN:
1107 case FUN_1_0:
1108 case FUN_0_1:
1109 case FUN_2_0:
1110 case FUN_1_1:
1111 case FUN_0_2:
1112 // partial applications
1113 case PAP:
1114 // blackholes
1115 case CAF_BLACKHOLE:
1116 case BLACKHOLE:
1117 case SE_BLACKHOLE:
1118 case SE_CAF_BLACKHOLE:
1119 // indirection
1120 case IND_PERM:
1121 case IND_OLDGEN_PERM:
1122 case IND_OLDGEN:
1123 // static objects
1124 case CONSTR_STATIC:
1125 case FUN_STATIC:
1126 // misc
1127 case STABLE_NAME:
1128 case BCO:
1129 case ARR_WORDS:
1130 // STM
1131 case TVAR_WATCH_QUEUE:
1132 case TREC_HEADER:
1133 case TREC_CHUNK:
1134 return rtsFalse;
1135
1136 //
1137 // Error case
1138 //
1139 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1140 case IND_STATIC:
1141 // CONSTR_NOCAF_STATIC
1142 // cannot be *c, *cp, *r in the retainer profiling loop.
1143 case CONSTR_NOCAF_STATIC:
1144 // Stack objects are invalid because they are never treated as
1145 // legal objects during retainer profiling.
1146 case UPDATE_FRAME:
1147 case CATCH_FRAME:
1148 case STOP_FRAME:
1149 case RET_DYN:
1150 case RET_BCO:
1151 case RET_SMALL:
1152 case RET_BIG:
1153 // other cases
1154 case IND:
1155 case BLOCKED_FETCH:
1156 case FETCH_ME:
1157 case FETCH_ME_BQ:
1158 case RBH:
1159 case REMOTE_REF:
1160 case EVACUATED:
1161 case INVALID_OBJECT:
1162 default:
1163 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1164 return rtsFalse;
1165 }
1166 }
1167
1168 /* -----------------------------------------------------------------------------
1169 * Returns the retainer function value for the closure *c, i.e., R(*c).
1170 * This function does NOT return the retainer(s) of *c.
1171 * Invariants:
1172 * *c must be a retainer.
1173 * Note:
1174 * Depending on the definition of this function, the maintenance of retainer
1175 * sets can be made easier. If most retainer sets are likely to be created
1176 * again across garbage collections, refreshAllRetainerSet() in
1177 * RetainerSet.c can simply do nothing.
1178 * If this is not the case, we can free all the retainer sets and
1179 * re-initialize the hash table.
1180 * See refreshAllRetainerSet() in RetainerSet.c.
1181 * -------------------------------------------------------------------------- */
1182 static INLINE retainer
1183 getRetainerFrom( StgClosure *c )
1184 {
1185 ASSERT(isRetainer(c));
1186
1187 #if defined(RETAINER_SCHEME_INFO)
1188 // Retainer scheme 1: retainer = info table
1189 return get_itbl(c);
1190 #elif defined(RETAINER_SCHEME_CCS)
1191 // Retainer scheme 2: retainer = cost centre stack
1192 return c->header.prof.ccs;
1193 #elif defined(RETAINER_SCHEME_CC)
1194 // Retainer scheme 3: retainer = cost centre
1195 return c->header.prof.ccs->cc;
1196 #endif
1197 }
1198
1199 /* -----------------------------------------------------------------------------
1200 * Associates the retainer set *s with the closure *c, that is, *s becomes
1201 * the retainer set of *c.
1202 * Invariants:
1203 * c != NULL
1204 * s != NULL
1205 * -------------------------------------------------------------------------- */
1206 static INLINE void
1207 associate( StgClosure *c, RetainerSet *s )
1208 {
1209 // StgWord has the same size as pointers, so the following type
1210 // casting is okay.
1211 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1212 }
1213
1214 /* -----------------------------------------------------------------------------
1215 Call retainClosure for each of the closures covered by a large bitmap.
1216 -------------------------------------------------------------------------- */
1217
1218 static void
1219 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1220 StgClosure *c, retainer c_child_r)
1221 {
1222 nat i, b;
1223 StgWord bitmap;
1224
1225 b = 0;
1226 bitmap = large_bitmap->bitmap[b];
1227 for (i = 0; i < size; ) {
1228 if ((bitmap & 1) == 0) {
1229 retainClosure((StgClosure *)*p, c, c_child_r);
1230 }
1231 i++;
1232 p++;
1233 if (i % BITS_IN(W_) == 0) {
1234 b++;
1235 bitmap = large_bitmap->bitmap[b];
1236 } else {
1237 bitmap = bitmap >> 1;
1238 }
1239 }
1240 }
1241
1242 static INLINE StgPtr
1243 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1244 StgClosure *c, retainer c_child_r)
1245 {
1246 while (size > 0) {
1247 if ((bitmap & 1) == 0) {
1248 retainClosure((StgClosure *)*p, c, c_child_r);
1249 }
1250 p++;
1251 bitmap = bitmap >> 1;
1252 size--;
1253 }
1254 return p;
1255 }
1256
1257 /* -----------------------------------------------------------------------------
1258 * Call retainClosure for each of the closures in an SRT.
1259 * ------------------------------------------------------------------------- */
1260
1261 static void
1262 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1263 {
1264 nat i, b, size;
1265 StgWord bitmap;
1266 StgClosure **p;
1267
1268 b = 0;
1269 p = (StgClosure **)srt->srt;
1270 size = srt->l.size;
1271 bitmap = srt->l.bitmap[b];
1272 for (i = 0; i < size; ) {
1273 if ((bitmap & 1) != 0) {
1274 retainClosure((StgClosure *)*p, c, c_child_r);
1275 }
1276 i++;
1277 p++;
1278 if (i % BITS_IN(W_) == 0) {
1279 b++;
1280 bitmap = srt->l.bitmap[b];
1281 } else {
1282 bitmap = bitmap >> 1;
1283 }
1284 }
1285 }
1286
1287 static INLINE void
1288 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1289 {
1290 nat bitmap;
1291 StgClosure **p;
1292
1293 bitmap = srt_bitmap;
1294 p = srt;
1295
1296 if (bitmap == (StgHalfWord)(-1)) {
1297 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1298 return;
1299 }
1300
1301 while (bitmap != 0) {
1302 if ((bitmap & 1) != 0) {
1303 #ifdef ENABLE_WIN32_DLL_SUPPORT
1304 if ( (unsigned long)(*srt) & 0x1 ) {
1305 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1306 c, c_child_r);
1307 } else {
1308 retainClosure(*srt,c,c_child_r);
1309 }
1310 #else
1311 retainClosure(*srt,c,c_child_r);
1312 #endif
1313 }
1314 p++;
1315 bitmap = bitmap >> 1;
1316 }
1317 }
1318
1319 /* -----------------------------------------------------------------------------
1320 * Process all the objects in the stack chunk from stackStart to stackEnd
1321 * with *c and *c_child_r being their parent and their most recent retainer,
1322 * respectively. Treat stackOptionalFun as another child of *c if it is
1323 * not NULL.
1324 * Invariants:
1325 * *c is one of the following: TSO, AP_STACK.
1326 * If *c is TSO, c == c_child_r.
1327 * stackStart < stackEnd.
1328 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1329 * interpretation conforms to the current value of flip (even when they
1330 * are interpreted to be NULL).
1331 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1332 * or ThreadKilled, which means that its stack is ready to process.
1333 * Note:
1334 * This code was almost plagiarzied from GC.c! For each pointer,
1335 * retainClosure() is invoked instead of evacuate().
1336 * -------------------------------------------------------------------------- */
1337 static void
1338 retainStack( StgClosure *c, retainer c_child_r,
1339 StgPtr stackStart, StgPtr stackEnd )
1340 {
1341 stackElement *oldStackBoundary;
1342 StgPtr p;
1343 StgRetInfoTable *info;
1344 StgWord32 bitmap;
1345 nat size;
1346
1347 #ifdef DEBUG_RETAINER
1348 cStackSize++;
1349 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1350 #endif
1351
1352 /*
1353 Each invocation of retainStack() creates a new virtual
1354 stack. Since all such stacks share a single common stack, we
1355 record the current currentStackBoundary, which will be restored
1356 at the exit.
1357 */
1358 oldStackBoundary = currentStackBoundary;
1359 currentStackBoundary = stackTop;
1360
1361 #ifdef DEBUG_RETAINER
1362 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1363 #endif
1364
1365 ASSERT(get_itbl(c)->type != TSO ||
1366 (((StgTSO *)c)->what_next != ThreadRelocated &&
1367 ((StgTSO *)c)->what_next != ThreadComplete &&
1368 ((StgTSO *)c)->what_next != ThreadKilled));
1369
1370 p = stackStart;
1371 while (p < stackEnd) {
1372 info = get_ret_itbl((StgClosure *)p);
1373
1374 switch(info->i.type) {
1375
1376 case UPDATE_FRAME:
1377 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1378 p += sizeofW(StgUpdateFrame);
1379 continue;
1380
1381 case STOP_FRAME:
1382 case CATCH_FRAME:
1383 case CATCH_STM_FRAME:
1384 case CATCH_RETRY_FRAME:
1385 case ATOMICALLY_FRAME:
1386 case RET_SMALL:
1387 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1388 size = BITMAP_SIZE(info->i.layout.bitmap);
1389 p++;
1390 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1391
1392 follow_srt:
1393 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1394 continue;
1395
1396 case RET_BCO: {
1397 StgBCO *bco;
1398
1399 p++;
1400 retainClosure((StgClosure *)*p, c, c_child_r);
1401 bco = (StgBCO *)*p;
1402 p++;
1403 size = BCO_BITMAP_SIZE(bco);
1404 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1405 p += size;
1406 continue;
1407 }
1408
1409 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1410 case RET_BIG:
1411 size = GET_LARGE_BITMAP(&info->i)->size;
1412 p++;
1413 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1414 size, c, c_child_r);
1415 p += size;
1416 // and don't forget to follow the SRT
1417 goto follow_srt;
1418
1419 // Dynamic bitmap: the mask is stored on the stack
1420 case RET_DYN: {
1421 StgWord dyn;
1422 dyn = ((StgRetDyn *)p)->liveness;
1423
1424 // traverse the bitmap first
1425 bitmap = RET_DYN_LIVENESS(dyn);
1426 p = (P_)&((StgRetDyn *)p)->payload[0];
1427 size = RET_DYN_BITMAP_SIZE;
1428 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1429
1430 // skip over the non-ptr words
1431 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1432
1433 // follow the ptr words
1434 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1435 retainClosure((StgClosure *)*p, c, c_child_r);
1436 p++;
1437 }
1438 continue;
1439 }
1440
1441 case RET_FUN: {
1442 StgRetFun *ret_fun = (StgRetFun *)p;
1443 StgFunInfoTable *fun_info;
1444
1445 retainClosure(ret_fun->fun, c, c_child_r);
1446 fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
1447
1448 p = (P_)&ret_fun->payload;
1449 switch (fun_info->f.fun_type) {
1450 case ARG_GEN:
1451 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1452 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1453 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1454 break;
1455 case ARG_GEN_BIG:
1456 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1457 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1458 size, c, c_child_r);
1459 p += size;
1460 break;
1461 default:
1462 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1463 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1464 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1465 break;
1466 }
1467 goto follow_srt;
1468 }
1469
1470 default:
1471 barf("Invalid object found in retainStack(): %d",
1472 (int)(info->i.type));
1473 }
1474 }
1475
1476 // restore currentStackBoundary
1477 currentStackBoundary = oldStackBoundary;
1478 #ifdef DEBUG_RETAINER
1479 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1480 #endif
1481
1482 #ifdef DEBUG_RETAINER
1483 cStackSize--;
1484 #endif
1485 }
1486
1487 /* ----------------------------------------------------------------------------
1488 * Call retainClosure for each of the children of a PAP/AP
1489 * ------------------------------------------------------------------------- */
1490
1491 static INLINE StgPtr
1492 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1493 retainer c_child_r, /* NOT tagged */
1494 StgClosure *fun, /* tagged */
1495 StgClosure** payload, StgWord n_args)
1496 {
1497 StgPtr p;
1498 StgWord bitmap;
1499 StgFunInfoTable *fun_info;
1500
1501 retainClosure(fun, pap, c_child_r);
1502 fun = UNTAG_CLOSURE(fun);
1503 fun_info = get_fun_itbl(fun);
1504 ASSERT(fun_info->i.type != PAP);
1505
1506 p = (StgPtr)payload;
1507
1508 switch (fun_info->f.fun_type) {
1509 case ARG_GEN:
1510 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1511 p = retain_small_bitmap(p, n_args, bitmap,
1512 pap, c_child_r);
1513 break;
1514 case ARG_GEN_BIG:
1515 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1516 n_args, pap, c_child_r);
1517 p += n_args;
1518 break;
1519 case ARG_BCO:
1520 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1521 n_args, pap, c_child_r);
1522 p += n_args;
1523 break;
1524 default:
1525 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1526 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1527 break;
1528 }
1529 return p;
1530 }
1531
1532 /* -----------------------------------------------------------------------------
1533 * Compute the retainer set of *c0 and all its desecents by traversing.
1534 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1535 * Invariants:
1536 * c0 = cp0 = r0 holds only for root objects.
1537 * RSET(cp0) and RSET(r0) are valid, i.e., their
1538 * interpretation conforms to the current value of flip (even when they
1539 * are interpreted to be NULL).
1540 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1541 * the current value of flip. If it does not, during the execution
1542 * of this function, RSET(c0) must be initialized as well as all
1543 * its descendants.
1544 * Note:
1545 * stackTop must be the same at the beginning and the exit of this function.
1546 * *c0 can be TSO (as well as AP_STACK).
1547 * -------------------------------------------------------------------------- */
1548 static void
1549 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1550 {
1551 // c = Current closure (possibly tagged)
1552 // cp = Current closure's Parent (NOT tagged)
1553 // r = current closures' most recent Retainer (NOT tagged)
1554 // c_child_r = current closure's children's most recent retainer
1555 // first_child = first child of c
1556 StgClosure *c, *cp, *first_child;
1557 RetainerSet *s, *retainerSetOfc;
1558 retainer r, c_child_r;
1559 StgWord typeOfc;
1560
1561 #ifdef DEBUG_RETAINER
1562 // StgPtr oldStackTop;
1563 #endif
1564
1565 #ifdef DEBUG_RETAINER
1566 // oldStackTop = stackTop;
1567 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1568 #endif
1569
1570 // (c, cp, r) = (c0, cp0, r0)
1571 c = c0;
1572 cp = cp0;
1573 r = r0;
1574 goto inner_loop;
1575
1576 loop:
1577 //debugBelch("loop");
1578 // pop to (c, cp, r);
1579 pop(&c, &cp, &r);
1580
1581 if (c == NULL) {
1582 #ifdef DEBUG_RETAINER
1583 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1584 #endif
1585 return;
1586 }
1587
1588 //debugBelch("inner_loop");
1589
1590 inner_loop:
1591 c = UNTAG_CLOSURE(c);
1592
1593 // c = current closure under consideration,
1594 // cp = current closure's parent,
1595 // r = current closure's most recent retainer
1596 //
1597 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1598 // RSET(cp) and RSET(r) are valid.
1599 // RSET(c) is valid only if c has been visited before.
1600 //
1601 // Loop invariants (on the relation between c, cp, and r)
1602 // if cp is not a retainer, r belongs to RSET(cp).
1603 // if cp is a retainer, r == cp.
1604
1605 typeOfc = get_itbl(c)->type;
1606
1607 #ifdef DEBUG_RETAINER
1608 switch (typeOfc) {
1609 case IND_STATIC:
1610 case CONSTR_NOCAF_STATIC:
1611 case CONSTR_STATIC:
1612 case THUNK_STATIC:
1613 case FUN_STATIC:
1614 break;
1615 default:
1616 if (retainerSetOf(c) == NULL) { // first visit?
1617 costArray[typeOfc] += cost(c);
1618 sumOfNewCost += cost(c);
1619 }
1620 break;
1621 }
1622 #endif
1623
1624 // special cases
1625 switch (typeOfc) {
1626 case TSO:
1627 if (((StgTSO *)c)->what_next == ThreadComplete ||
1628 ((StgTSO *)c)->what_next == ThreadKilled) {
1629 #ifdef DEBUG_RETAINER
1630 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1631 #endif
1632 goto loop;
1633 }
1634 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1635 #ifdef DEBUG_RETAINER
1636 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1637 #endif
1638 c = (StgClosure *)((StgTSO *)c)->link;
1639 goto inner_loop;
1640 }
1641 break;
1642
1643 case IND_STATIC:
1644 // We just skip IND_STATIC, so its retainer set is never computed.
1645 c = ((StgIndStatic *)c)->indirectee;
1646 goto inner_loop;
1647 // static objects with no pointers out, so goto loop.
1648 case CONSTR_NOCAF_STATIC:
1649 // It is not just enough not to compute the retainer set for *c; it is
1650 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1651 // scavenged_static_objects, the list from which is assumed to traverse
1652 // all static objects after major garbage collections.
1653 goto loop;
1654 case THUNK_STATIC:
1655 case FUN_STATIC:
1656 if (get_itbl(c)->srt_bitmap == 0) {
1657 // No need to compute the retainer set; no dynamic objects
1658 // are reachable from *c.
1659 //
1660 // Static objects: if we traverse all the live closures,
1661 // including static closures, during each heap census then
1662 // we will observe that some static closures appear and
1663 // disappear. eg. a closure may contain a pointer to a
1664 // static function 'f' which is not otherwise reachable
1665 // (it doesn't indirectly point to any CAFs, so it doesn't
1666 // appear in any SRTs), so we would find 'f' during
1667 // traversal. However on the next sweep there may be no
1668 // closures pointing to 'f'.
1669 //
1670 // We must therefore ignore static closures whose SRT is
1671 // empty, because these are exactly the closures that may
1672 // "appear". A closure with a non-empty SRT, and which is
1673 // still required, will always be reachable.
1674 //
1675 // But what about CONSTR_STATIC? Surely these may be able
1676 // to appear, and they don't have SRTs, so we can't
1677 // check. So for now, we're calling
1678 // resetStaticObjectForRetainerProfiling() from the
1679 // garbage collector to reset the retainer sets in all the
1680 // reachable static objects.
1681 goto loop;
1682 }
1683 default:
1684 break;
1685 }
1686
1687 // The above objects are ignored in computing the average number of times
1688 // an object is visited.
1689 timesAnyObjectVisited++;
1690
1691 // If this is the first visit to c, initialize its retainer set.
1692 maybeInitRetainerSet(c);
1693 retainerSetOfc = retainerSetOf(c);
1694
1695 // Now compute s:
1696 // isRetainer(cp) == rtsTrue => s == NULL
1697 // isRetainer(cp) == rtsFalse => s == cp.retainer
1698 if (isRetainer(cp))
1699 s = NULL;
1700 else
1701 s = retainerSetOf(cp);
1702
1703 // (c, cp, r, s) is available.
1704
1705 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1706 if (retainerSetOfc == NULL) {
1707 // This is the first visit to *c.
1708 numObjectVisited++;
1709
1710 if (s == NULL)
1711 associate(c, singleton(r));
1712 else
1713 // s is actually the retainer set of *c!
1714 associate(c, s);
1715
1716 // compute c_child_r
1717 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1718 } else {
1719 // This is not the first visit to *c.
1720 if (isMember(r, retainerSetOfc))
1721 goto loop; // no need to process child
1722
1723 if (s == NULL)
1724 associate(c, addElement(r, retainerSetOfc));
1725 else {
1726 // s is not NULL and cp is not a retainer. This means that
1727 // each time *cp is visited, so is *c. Thus, if s has
1728 // exactly one more element in its retainer set than c, s
1729 // is also the new retainer set for *c.
1730 if (s->num == retainerSetOfc->num + 1) {
1731 associate(c, s);
1732 }
1733 // Otherwise, just add R_r to the current retainer set of *c.
1734 else {
1735 associate(c, addElement(r, retainerSetOfc));
1736 }
1737 }
1738
1739 if (isRetainer(c))
1740 goto loop; // no need to process child
1741
1742 // compute c_child_r
1743 c_child_r = r;
1744 }
1745
1746 // now, RSET() of all of *c, *cp, and *r is valid.
1747 // (c, c_child_r) are available.
1748
1749 // process child
1750
1751 // Special case closures: we process these all in one go rather
1752 // than attempting to save the current position, because doing so
1753 // would be hard.
1754 switch (typeOfc) {
1755 case TSO:
1756 retainStack(c, c_child_r,
1757 ((StgTSO *)c)->sp,
1758 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1759 goto loop;
1760
1761 case PAP:
1762 {
1763 StgPAP *pap = (StgPAP *)c;
1764 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1765 goto loop;
1766 }
1767
1768 case AP:
1769 {
1770 StgAP *ap = (StgAP *)c;
1771 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1772 goto loop;
1773 }
1774
1775 case AP_STACK:
1776 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1777 retainStack(c, c_child_r,
1778 (StgPtr)((StgAP_STACK *)c)->payload,
1779 (StgPtr)((StgAP_STACK *)c)->payload +
1780 ((StgAP_STACK *)c)->size);
1781 goto loop;
1782 }
1783
1784 push(c, c_child_r, &first_child);
1785
1786 // If first_child is null, c has no child.
1787 // If first_child is not null, the top stack element points to the next
1788 // object. push() may or may not push a stackElement on the stack.
1789 if (first_child == NULL)
1790 goto loop;
1791
1792 // (c, cp, r) = (first_child, c, c_child_r)
1793 r = c_child_r;
1794 cp = c;
1795 c = first_child;
1796 goto inner_loop;
1797 }
1798
1799 /* -----------------------------------------------------------------------------
1800 * Compute the retainer set for every object reachable from *tl.
1801 * -------------------------------------------------------------------------- */
1802 static void
1803 retainRoot( StgClosure **tl )
1804 {
1805 StgClosure *c;
1806
1807 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1808 // be a root.
1809
1810 ASSERT(isEmptyRetainerStack());
1811 currentStackBoundary = stackTop;
1812
1813 c = UNTAG_CLOSURE(*tl);
1814 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1815 retainClosure(c, c, getRetainerFrom(c));
1816 } else {
1817 retainClosure(c, c, CCS_SYSTEM);
1818 }
1819
1820 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1821 // *tl might be a TSO which is ThreadComplete, in which
1822 // case we ignore it for the purposes of retainer profiling.
1823 }
1824
1825 /* -----------------------------------------------------------------------------
1826 * Compute the retainer set for each of the objects in the heap.
1827 * -------------------------------------------------------------------------- */
1828 static void
1829 computeRetainerSet( void )
1830 {
1831 StgWeak *weak;
1832 RetainerSet *rtl;
1833 nat g;
1834 StgPtr ml;
1835 bdescr *bd;
1836 #ifdef DEBUG_RETAINER
1837 RetainerSet tmpRetainerSet;
1838 #endif
1839
1840 GetRoots(retainRoot); // for scheduler roots
1841
1842 // This function is called after a major GC, when key, value, and finalizer
1843 // all are guaranteed to be valid, or reachable.
1844 //
1845 // The following code assumes that WEAK objects are considered to be roots
1846 // for retainer profilng.
1847 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1848 // retainRoot((StgClosure *)weak);
1849 retainRoot((StgClosure **)&weak);
1850
1851 // Consider roots from the stable ptr table.
1852 markStablePtrTable(retainRoot);
1853
1854 // The following code resets the rs field of each unvisited mutable
1855 // object (computing sumOfNewCostExtra and updating costArray[] when
1856 // debugging retainer profiler).
1857 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1858 // NOT TRUE: even G0 has a block on its mutable list
1859 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1860
1861 // Traversing through mut_list is necessary
1862 // because we can find MUT_VAR objects which have not been
1863 // visited during retainer profiling.
1864 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1865 for (ml = bd->start; ml < bd->free; ml++) {
1866
1867 maybeInitRetainerSet((StgClosure *)*ml);
1868 rtl = retainerSetOf((StgClosure *)*ml);
1869
1870 #ifdef DEBUG_RETAINER
1871 if (rtl == NULL) {
1872 // first visit to *ml
1873 // This is a violation of the interface rule!
1874 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1875
1876 switch (get_itbl((StgClosure *)ml)->type) {
1877 case IND_STATIC:
1878 // no cost involved
1879 break;
1880 case CONSTR_NOCAF_STATIC:
1881 case CONSTR_STATIC:
1882 case THUNK_STATIC:
1883 case FUN_STATIC:
1884 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1885 break;
1886 default:
1887 // dynamic objects
1888 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1889 sumOfNewCostExtra += cost((StgClosure *)ml);
1890 break;
1891 }
1892 }
1893 #endif
1894 }
1895 }
1896 }
1897 }
1898
1899 /* -----------------------------------------------------------------------------
1900 * Traverse all static objects for which we compute retainer sets,
1901 * and reset their rs fields to NULL, which is accomplished by
1902 * invoking maybeInitRetainerSet(). This function must be called
1903 * before zeroing all objects reachable from scavenged_static_objects
1904 * in the case of major gabage collections. See GarbageCollect() in
1905 * GC.c.
1906 * Note:
1907 * The mut_once_list of the oldest generation must also be traversed?
1908 * Why? Because if the evacuation of an object pointed to by a static
1909 * indirection object fails, it is put back to the mut_once_list of
1910 * the oldest generation.
1911 * However, this is not necessary because any static indirection objects
1912 * are just traversed through to reach dynamic objects. In other words,
1913 * they are not taken into consideration in computing retainer sets.
1914 * -------------------------------------------------------------------------- */
1915 void
1916 resetStaticObjectForRetainerProfiling( void )
1917 {
1918 #ifdef DEBUG_RETAINER
1919 nat count;
1920 #endif
1921 StgClosure *p;
1922
1923 #ifdef DEBUG_RETAINER
1924 count = 0;
1925 #endif
1926 p = scavenged_static_objects;
1927 while (p != END_OF_STATIC_LIST) {
1928 #ifdef DEBUG_RETAINER
1929 count++;
1930 #endif
1931 switch (get_itbl(p)->type) {
1932 case IND_STATIC:
1933 // Since we do not compute the retainer set of any
1934 // IND_STATIC object, we don't have to reset its retainer
1935 // field.
1936 p = (StgClosure*)*IND_STATIC_LINK(p);
1937 break;
1938 case THUNK_STATIC:
1939 maybeInitRetainerSet(p);
1940 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1941 break;
1942 case FUN_STATIC:
1943 maybeInitRetainerSet(p);
1944 p = (StgClosure*)*FUN_STATIC_LINK(p);
1945 break;
1946 case CONSTR_STATIC:
1947 maybeInitRetainerSet(p);
1948 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1949 break;
1950 default:
1951 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1952 p, get_itbl(p)->type);
1953 break;
1954 }
1955 }
1956 #ifdef DEBUG_RETAINER
1957 // debugBelch("count in scavenged_static_objects = %d\n", count);
1958 #endif
1959 }
1960
1961 /* -----------------------------------------------------------------------------
1962 * Perform retainer profiling.
1963 * N is the oldest generation being profilied, where the generations are
1964 * numbered starting at 0.
1965 * Invariants:
1966 * Note:
1967 * This function should be called only immediately after major garbage
1968 * collection.
1969 * ------------------------------------------------------------------------- */
1970 void
1971 retainerProfile(void)
1972 {
1973 #ifdef DEBUG_RETAINER
1974 nat i;
1975 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1976 #endif
1977
1978 #ifdef DEBUG_RETAINER
1979 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1980 #endif
1981
1982 stat_startRP();
1983
1984 // We haven't flipped the bit yet.
1985 #ifdef DEBUG_RETAINER
1986 debugBelch("Before traversing:\n");
1987 sumOfCostLinear = 0;
1988 for (i = 0;i < N_CLOSURE_TYPES; i++)
1989 costArrayLinear[i] = 0;
1990 totalHeapSize = checkHeapSanityForRetainerProfiling();
1991
1992 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1993 /*
1994 debugBelch("costArrayLinear[] = ");
1995 for (i = 0;i < N_CLOSURE_TYPES; i++)
1996 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1997 debugBelch("\n");
1998 */
1999
2000 ASSERT(sumOfCostLinear == totalHeapSize);
2001
2002 /*
2003 #define pcostArrayLinear(index) \
2004 if (costArrayLinear[index] > 0) \
2005 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
2006 pcostArrayLinear(THUNK_STATIC);
2007 pcostArrayLinear(FUN_STATIC);
2008 pcostArrayLinear(CONSTR_STATIC);
2009 pcostArrayLinear(CONSTR_NOCAF_STATIC);
2010 */
2011 #endif
2012
2013 // Now we flips flip.
2014 flip = flip ^ 1;
2015
2016 #ifdef DEBUG_RETAINER
2017 stackSize = 0;
2018 maxStackSize = 0;
2019 cStackSize = 0;
2020 maxCStackSize = 0;
2021 #endif
2022 numObjectVisited = 0;
2023 timesAnyObjectVisited = 0;
2024
2025 #ifdef DEBUG_RETAINER
2026 debugBelch("During traversing:\n");
2027 sumOfNewCost = 0;
2028 sumOfNewCostExtra = 0;
2029 for (i = 0;i < N_CLOSURE_TYPES; i++)
2030 costArray[i] = 0;
2031 #endif
2032
2033 /*
2034 We initialize the traverse stack each time the retainer profiling is
2035 performed (because the traverse stack size varies on each retainer profiling
2036 and this operation is not costly anyhow). However, we just refresh the
2037 retainer sets.
2038 */
2039 initializeTraverseStack();
2040 #ifdef DEBUG_RETAINER
2041 initializeAllRetainerSet();
2042 #else
2043 refreshAllRetainerSet();
2044 #endif
2045 computeRetainerSet();
2046
2047 #ifdef DEBUG_RETAINER
2048 debugBelch("After traversing:\n");
2049 sumOfCostLinear = 0;
2050 for (i = 0;i < N_CLOSURE_TYPES; i++)
2051 costArrayLinear[i] = 0;
2052 totalHeapSize = checkHeapSanityForRetainerProfiling();
2053
2054 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2055 ASSERT(sumOfCostLinear == totalHeapSize);
2056
2057 // now, compare the two results
2058 /*
2059 Note:
2060 costArray[] must be exactly the same as costArrayLinear[].
2061 Known exceptions:
2062 1) Dead weak pointers, whose type is CONSTR. These objects are not
2063 reachable from any roots.
2064 */
2065 debugBelch("Comparison:\n");
2066 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2067 for (i = 0;i < N_CLOSURE_TYPES; i++)
2068 if (costArray[i] != costArrayLinear[i])
2069 // nothing should be printed except MUT_VAR after major GCs
2070 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2071 debugBelch("\n");
2072
2073 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2074 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2075 debugBelch("\tcostArray[] (must be empty) = ");
2076 for (i = 0;i < N_CLOSURE_TYPES; i++)
2077 if (costArray[i] != costArrayLinear[i])
2078 // nothing should be printed except MUT_VAR after major GCs
2079 debugBelch("[%u:%u] ", i, costArray[i]);
2080 debugBelch("\n");
2081
2082 // only for major garbage collection
2083 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2084 #endif
2085
2086 // post-processing
2087 closeTraverseStack();
2088 #ifdef DEBUG_RETAINER
2089 closeAllRetainerSet();
2090 #else
2091 // Note that there is no post-processing for the retainer sets.
2092 #endif
2093 retainerGeneration++;
2094
2095 stat_endRP(
2096 retainerGeneration - 1, // retainerGeneration has just been incremented!
2097 #ifdef DEBUG_RETAINER
2098 maxCStackSize, maxStackSize,
2099 #endif
2100 (double)timesAnyObjectVisited / numObjectVisited);
2101 }
2102
2103 /* -----------------------------------------------------------------------------
2104 * DEBUGGING CODE
2105 * -------------------------------------------------------------------------- */
2106
2107 #ifdef DEBUG_RETAINER
2108
2109 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2110 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2111 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2112
2113 static nat
2114 sanityCheckHeapClosure( StgClosure *c )
2115 {
2116 StgInfoTable *info;
2117
2118 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2119 ASSERT(!closure_STATIC(c));
2120 ASSERT(LOOKS_LIKE_PTR(c));
2121
2122 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2123 if (get_itbl(c)->type == CONSTR &&
2124 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2125 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2126 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2127 costArray[get_itbl(c)->type] += cost(c);
2128 sumOfNewCost += cost(c);
2129 } else
2130 debugBelch(
2131 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2132 flip, c, get_itbl(c)->type,
2133 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2134 RSET(c));
2135 } else {
2136 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2137 }
2138
2139 return closure_sizeW(c);
2140 }
2141
2142 static nat
2143 heapCheck( bdescr *bd )
2144 {
2145 StgPtr p;
2146 static nat costSum, size;
2147
2148 costSum = 0;
2149 while (bd != NULL) {
2150 p = bd->start;
2151 while (p < bd->free) {
2152 size = sanityCheckHeapClosure((StgClosure *)p);
2153 sumOfCostLinear += size;
2154 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2155 p += size;
2156 // no need for slop check; I think slops are not used currently.
2157 }
2158 ASSERT(p == bd->free);
2159 costSum += bd->free - bd->start;
2160 bd = bd->link;
2161 }
2162
2163 return costSum;
2164 }
2165
2166 static nat
2167 smallObjectPoolCheck(void)
2168 {
2169 bdescr *bd;
2170 StgPtr p;
2171 static nat costSum, size;
2172
2173 bd = g0s0->blocks;
2174 costSum = 0;
2175
2176 // first block
2177 if (bd == NULL)
2178 return costSum;
2179
2180 p = bd->start;
2181 while (p < alloc_Hp) {
2182 size = sanityCheckHeapClosure((StgClosure *)p);
2183 sumOfCostLinear += size;
2184 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2185 p += size;
2186 }
2187 ASSERT(p == alloc_Hp);
2188 costSum += alloc_Hp - bd->start;
2189
2190 bd = bd->link;
2191 while (bd != NULL) {
2192 p = bd->start;
2193 while (p < bd->free) {
2194 size = sanityCheckHeapClosure((StgClosure *)p);
2195 sumOfCostLinear += size;
2196 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2197 p += size;
2198 }
2199 ASSERT(p == bd->free);
2200 costSum += bd->free - bd->start;
2201 bd = bd->link;
2202 }
2203
2204 return costSum;
2205 }
2206
2207 static nat
2208 chainCheck(bdescr *bd)
2209 {
2210 nat costSum, size;
2211
2212 costSum = 0;
2213 while (bd != NULL) {
2214 // bd->free - bd->start is not an accurate measurement of the
2215 // object size. Actually it is always zero, so we compute its
2216 // size explicitly.
2217 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2218 sumOfCostLinear += size;
2219 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2220 costSum += size;
2221 bd = bd->link;
2222 }
2223
2224 return costSum;
2225 }
2226
2227 static nat
2228 checkHeapSanityForRetainerProfiling( void )
2229 {
2230 nat costSum, g, s;
2231
2232 costSum = 0;
2233 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2234 if (RtsFlags.GcFlags.generations == 1) {
2235 costSum += heapCheck(g0s0->to_blocks);
2236 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2237 costSum += chainCheck(g0s0->large_objects);
2238 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2239 } else {
2240 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2241 for (s = 0; s < generations[g].n_steps; s++) {
2242 /*
2243 After all live objects have been scavenged, the garbage
2244 collector may create some objects in
2245 scheduleFinalizers(). These objects are created throught
2246 allocate(), so the small object pool or the large object
2247 pool of the g0s0 may not be empty.
2248 */
2249 if (g == 0 && s == 0) {
2250 costSum += smallObjectPoolCheck();
2251 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2252 costSum += chainCheck(generations[g].steps[s].large_objects);
2253 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2254 } else {
2255 costSum += heapCheck(generations[g].steps[s].blocks);
2256 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2257 costSum += chainCheck(generations[g].steps[s].large_objects);
2258 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2259 }
2260 }
2261 }
2262
2263 return costSum;
2264 }
2265
2266 void
2267 findPointer(StgPtr p)
2268 {
2269 StgPtr q, r, e;
2270 bdescr *bd;
2271 nat g, s;
2272
2273 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2274 for (s = 0; s < generations[g].n_steps; s++) {
2275 // if (g == 0 && s == 0) continue;
2276 bd = generations[g].steps[s].blocks;
2277 for (; bd; bd = bd->link) {
2278 for (q = bd->start; q < bd->free; q++) {
2279 if (*q == (StgWord)p) {
2280 r = q;
2281 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2282 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2283 // return;
2284 }
2285 }
2286 }
2287 bd = generations[g].steps[s].large_objects;
2288 for (; bd; bd = bd->link) {
2289 e = bd->start + cost((StgClosure *)bd->start);
2290 for (q = bd->start; q < e; q++) {
2291 if (*q == (StgWord)p) {
2292 r = q;
2293 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2294 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2295 // return;
2296 }
2297 }
2298 }
2299 }
2300 }
2301 }
2302
2303 static void
2304 belongToHeap(StgPtr p)
2305 {
2306 bdescr *bd;
2307 nat g, s;
2308
2309 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2310 for (s = 0; s < generations[g].n_steps; s++) {
2311 // if (g == 0 && s == 0) continue;
2312 bd = generations[g].steps[s].blocks;
2313 for (; bd; bd = bd->link) {
2314 if (bd->start <= p && p < bd->free) {
2315 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2316 return;
2317 }
2318 }
2319 bd = generations[g].steps[s].large_objects;
2320 for (; bd; bd = bd->link) {
2321 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2322 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2323 return;
2324 }
2325 }
2326 }
2327 }
2328 }
2329 #endif /* DEBUG_RETAINER */
2330
2331 #endif /* PROFILING */