Reorganisation of the source tree
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #ifdef PROFILING
11
12 // Turn off inlining when debugging - it obfuscates things
13 #ifdef DEBUG
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "Rts.h"
20 #include "RtsUtils.h"
21 #include "RetainerProfile.h"
22 #include "RetainerSet.h"
23 #include "Schedule.h"
24 #include "Printer.h"
25 #include "Storage.h"
26 #include "RtsFlags.h"
27 #include "Weak.h"
28 #include "Sanity.h"
29 #include "Profiling.h"
30 #include "Stats.h"
31 #include "BlockAlloc.h"
32 #include "ProfHeap.h"
33 #include "Apply.h"
34
35 /*
36 Note: what to change in order to plug-in a new retainer profiling scheme?
37 (1) type retainer in ../includes/StgRetainerProf.h
38 (2) retainer function R(), i.e., getRetainerFrom()
39 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
40 in RetainerSet.h, if needed.
41 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
42 */
43
44 /* -----------------------------------------------------------------------------
45 * Declarations...
46 * -------------------------------------------------------------------------- */
47
48 static nat retainerGeneration; // generation
49
50 static nat numObjectVisited; // total number of objects visited
51 static nat timesAnyObjectVisited; // number of times any objects are visited
52
53 /*
54 The rs field in the profile header of any object points to its retainer
55 set in an indirect way: if flip is 0, it points to the retainer set;
56 if flip is 1, it points to the next byte after the retainer set (even
57 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
58 pointer. See retainerSetOf().
59 */
60
61 StgWord flip = 0; // flip bit
62 // must be 0 if DEBUG_RETAINER is on (for static closures)
63
64 #define setRetainerSetToNull(c) \
65 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
66
67 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
68 static void retainClosure(StgClosure *, StgClosure *, retainer);
69 #ifdef DEBUG_RETAINER
70 static void belongToHeap(StgPtr p);
71 #endif
72
73 #ifdef DEBUG_RETAINER
74 /*
75 cStackSize records how many times retainStack() has been invoked recursively,
76 that is, the number of activation records for retainStack() on the C stack.
77 maxCStackSize records its max value.
78 Invariants:
79 cStackSize <= maxCStackSize
80 */
81 static nat cStackSize, maxCStackSize;
82
83 static nat sumOfNewCost; // sum of the cost of each object, computed
84 // when the object is first visited
85 static nat sumOfNewCostExtra; // for those objects not visited during
86 // retainer profiling, e.g., MUT_VAR
87 static nat costArray[N_CLOSURE_TYPES];
88
89 nat sumOfCostLinear; // sum of the costs of all object, computed
90 // when linearly traversing the heap after
91 // retainer profiling
92 nat costArrayLinear[N_CLOSURE_TYPES];
93 #endif
94
95 /* -----------------------------------------------------------------------------
96 * Retainer stack - header
97 * Note:
98 * Although the retainer stack implementation could be separated *
99 * from the retainer profiling engine, there does not seem to be
100 * any advantage in doing that; retainer stack is an integral part
101 * of retainer profiling engine and cannot be use elsewhere at
102 * all.
103 * -------------------------------------------------------------------------- */
104
105 typedef enum {
106 posTypeStep,
107 posTypePtrs,
108 posTypeSRT,
109 posTypeLargeSRT,
110 } nextPosType;
111
112 typedef union {
113 // fixed layout or layout specified by a field in the closure
114 StgWord step;
115
116 // layout.payload
117 struct {
118 // See StgClosureInfo in InfoTables.h
119 #if SIZEOF_VOID_P == 8
120 StgWord32 pos;
121 StgWord32 ptrs;
122 #else
123 StgWord16 pos;
124 StgWord16 ptrs;
125 #endif
126 StgPtr payload;
127 } ptrs;
128
129 // SRT
130 struct {
131 StgClosure **srt;
132 StgWord srt_bitmap;
133 } srt;
134
135 // Large SRT
136 struct {
137 StgLargeSRT *srt;
138 StgWord offset;
139 } large_srt;
140
141 } nextPos;
142
143 typedef struct {
144 nextPosType type;
145 nextPos next;
146 } stackPos;
147
148 typedef struct {
149 StgClosure *c;
150 retainer c_child_r;
151 stackPos info;
152 } stackElement;
153
154 /*
155 Invariants:
156 firstStack points to the first block group.
157 currentStack points to the block group currently being used.
158 currentStack->free == stackLimit.
159 stackTop points to the topmost byte in the stack of currentStack.
160 Unless the whole stack is empty, stackTop must point to the topmost
161 object (or byte) in the whole stack. Thus, it is only when the whole stack
162 is empty that stackTop == stackLimit (not during the execution of push()
163 and pop()).
164 stackBottom == currentStack->start.
165 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
166 Note:
167 When a current stack becomes empty, stackTop is set to point to
168 the topmost element on the previous block group so as to satisfy
169 the invariants described above.
170 */
171 static bdescr *firstStack = NULL;
172 static bdescr *currentStack;
173 static stackElement *stackBottom, *stackTop, *stackLimit;
174
175 /*
176 currentStackBoundary is used to mark the current stack chunk.
177 If stackTop == currentStackBoundary, it means that the current stack chunk
178 is empty. It is the responsibility of the user to keep currentStackBoundary
179 valid all the time if it is to be employed.
180 */
181 static stackElement *currentStackBoundary;
182
183 /*
184 stackSize records the current size of the stack.
185 maxStackSize records its high water mark.
186 Invariants:
187 stackSize <= maxStackSize
188 Note:
189 stackSize is just an estimate measure of the depth of the graph. The reason
190 is that some heap objects have only a single child and may not result
191 in a new element being pushed onto the stack. Therefore, at the end of
192 retainer profiling, maxStackSize + maxCStackSize is some value no greater
193 than the actual depth of the graph.
194 */
195 #ifdef DEBUG_RETAINER
196 static int stackSize, maxStackSize;
197 #endif
198
199 // number of blocks allocated for one stack
200 #define BLOCKS_IN_STACK 1
201
202 /* -----------------------------------------------------------------------------
203 * Add a new block group to the stack.
204 * Invariants:
205 * currentStack->link == s.
206 * -------------------------------------------------------------------------- */
207 static INLINE void
208 newStackBlock( bdescr *bd )
209 {
210 currentStack = bd;
211 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
212 stackBottom = (stackElement *)bd->start;
213 stackLimit = (stackElement *)stackTop;
214 bd->free = (StgPtr)stackLimit;
215 }
216
217 /* -----------------------------------------------------------------------------
218 * Return to the previous block group.
219 * Invariants:
220 * s->link == currentStack.
221 * -------------------------------------------------------------------------- */
222 static INLINE void
223 returnToOldStack( bdescr *bd )
224 {
225 currentStack = bd;
226 stackTop = (stackElement *)bd->free;
227 stackBottom = (stackElement *)bd->start;
228 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
229 bd->free = (StgPtr)stackLimit;
230 }
231
232 /* -----------------------------------------------------------------------------
233 * Initializes the traverse stack.
234 * -------------------------------------------------------------------------- */
235 static void
236 initializeTraverseStack( void )
237 {
238 if (firstStack != NULL) {
239 freeChain(firstStack);
240 }
241
242 firstStack = allocGroup(BLOCKS_IN_STACK);
243 firstStack->link = NULL;
244 firstStack->u.back = NULL;
245
246 newStackBlock(firstStack);
247 }
248
249 /* -----------------------------------------------------------------------------
250 * Frees all the block groups in the traverse stack.
251 * Invariants:
252 * firstStack != NULL
253 * -------------------------------------------------------------------------- */
254 static void
255 closeTraverseStack( void )
256 {
257 freeChain(firstStack);
258 firstStack = NULL;
259 }
260
261 /* -----------------------------------------------------------------------------
262 * Returns rtsTrue if the whole stack is empty.
263 * -------------------------------------------------------------------------- */
264 static INLINE rtsBool
265 isEmptyRetainerStack( void )
266 {
267 return (firstStack == currentStack) && stackTop == stackLimit;
268 }
269
270 /* -----------------------------------------------------------------------------
271 * Returns size of stack
272 * -------------------------------------------------------------------------- */
273 #ifdef DEBUG
274 lnat
275 retainerStackBlocks( void )
276 {
277 bdescr* bd;
278 lnat res = 0;
279
280 for (bd = firstStack; bd != NULL; bd = bd->link)
281 res += bd->blocks;
282
283 return res;
284 }
285 #endif
286
287 /* -----------------------------------------------------------------------------
288 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
289 * i.e., if the current stack chunk is empty.
290 * -------------------------------------------------------------------------- */
291 static INLINE rtsBool
292 isOnBoundary( void )
293 {
294 return stackTop == currentStackBoundary;
295 }
296
297 /* -----------------------------------------------------------------------------
298 * Initializes *info from ptrs and payload.
299 * Invariants:
300 * payload[] begins with ptrs pointers followed by non-pointers.
301 * -------------------------------------------------------------------------- */
302 static INLINE void
303 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
304 {
305 info->type = posTypePtrs;
306 info->next.ptrs.pos = 0;
307 info->next.ptrs.ptrs = ptrs;
308 info->next.ptrs.payload = payload;
309 }
310
311 /* -----------------------------------------------------------------------------
312 * Find the next object from *info.
313 * -------------------------------------------------------------------------- */
314 static INLINE StgClosure *
315 find_ptrs( stackPos *info )
316 {
317 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
318 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
319 } else {
320 return NULL;
321 }
322 }
323
324 /* -----------------------------------------------------------------------------
325 * Initializes *info from SRT information stored in *infoTable.
326 * -------------------------------------------------------------------------- */
327 static INLINE void
328 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
329 {
330 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
331 info->type = posTypeLargeSRT;
332 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
333 info->next.large_srt.offset = 0;
334 } else {
335 info->type = posTypeSRT;
336 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
337 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
338 }
339 }
340
341 static INLINE void
342 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
343 {
344 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
345 info->type = posTypeLargeSRT;
346 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
347 info->next.large_srt.offset = 0;
348 } else {
349 info->type = posTypeSRT;
350 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
351 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
352 }
353 }
354
355 /* -----------------------------------------------------------------------------
356 * Find the next object from *info.
357 * -------------------------------------------------------------------------- */
358 static INLINE StgClosure *
359 find_srt( stackPos *info )
360 {
361 StgClosure *c;
362 StgWord bitmap;
363
364 if (info->type == posTypeSRT) {
365 // Small SRT bitmap
366 bitmap = info->next.srt.srt_bitmap;
367 while (bitmap != 0) {
368 if ((bitmap & 1) != 0) {
369 #ifdef ENABLE_WIN32_DLL_SUPPORT
370
371 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
372 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
373 else
374 c = *(info->next.srt.srt);
375 #else
376 c = *(info->next.srt.srt);
377 #endif
378 bitmap = bitmap >> 1;
379 info->next.srt.srt++;
380 info->next.srt.srt_bitmap = bitmap;
381 return c;
382 }
383 bitmap = bitmap >> 1;
384 info->next.srt.srt++;
385 }
386 // bitmap is now zero...
387 return NULL;
388 }
389 else {
390 // Large SRT bitmap
391 nat i = info->next.large_srt.offset;
392 StgWord bitmap;
393
394 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
395 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
396 bitmap = bitmap >> (i % BITS_IN(StgWord));
397 while (i < info->next.large_srt.srt->l.size) {
398 if ((bitmap & 1) != 0) {
399 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
400 i++;
401 info->next.large_srt.offset = i;
402 return c;
403 }
404 i++;
405 if (i % BITS_IN(W_) == 0) {
406 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
407 } else {
408 bitmap = bitmap >> 1;
409 }
410 }
411 // reached the end of this bitmap.
412 info->next.large_srt.offset = i;
413 return NULL;
414 }
415 }
416
417 /* -----------------------------------------------------------------------------
418 * push() pushes a stackElement representing the next child of *c
419 * onto the traverse stack. If *c has no child, *first_child is set
420 * to NULL and nothing is pushed onto the stack. If *c has only one
421 * child, *c_chlid is set to that child and nothing is pushed onto
422 * the stack. If *c has more than two children, *first_child is set
423 * to the first child and a stackElement representing the second
424 * child is pushed onto the stack.
425
426 * Invariants:
427 * *c_child_r is the most recent retainer of *c's children.
428 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
429 * there cannot be any stack objects.
430 * Note: SRTs are considered to be children as well.
431 * -------------------------------------------------------------------------- */
432 static INLINE void
433 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
434 {
435 stackElement se;
436 bdescr *nbd; // Next Block Descriptor
437
438 #ifdef DEBUG_RETAINER
439 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
440 #endif
441
442 ASSERT(get_itbl(c)->type != TSO);
443 ASSERT(get_itbl(c)->type != AP_STACK);
444
445 //
446 // fill in se
447 //
448
449 se.c = c;
450 se.c_child_r = c_child_r;
451
452 // fill in se.info
453 switch (get_itbl(c)->type) {
454 // no child, no SRT
455 case CONSTR_0_1:
456 case CONSTR_0_2:
457 case CAF_BLACKHOLE:
458 case BLACKHOLE:
459 case SE_BLACKHOLE:
460 case SE_CAF_BLACKHOLE:
461 case ARR_WORDS:
462 *first_child = NULL;
463 return;
464
465 // one child (fixed), no SRT
466 case MUT_VAR_CLEAN:
467 case MUT_VAR_DIRTY:
468 *first_child = ((StgMutVar *)c)->var;
469 return;
470 case THUNK_SELECTOR:
471 *first_child = ((StgSelector *)c)->selectee;
472 return;
473 case IND_PERM:
474 case IND_OLDGEN_PERM:
475 case IND_OLDGEN:
476 *first_child = ((StgInd *)c)->indirectee;
477 return;
478 case CONSTR_1_0:
479 case CONSTR_1_1:
480 *first_child = c->payload[0];
481 return;
482
483 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
484 // of the next child. We do not write a separate initialization code.
485 // Also we do not have to initialize info.type;
486
487 // two children (fixed), no SRT
488 // need to push a stackElement, but nothing to store in se.info
489 case CONSTR_2_0:
490 *first_child = c->payload[0]; // return the first pointer
491 // se.info.type = posTypeStep;
492 // se.info.next.step = 2; // 2 = second
493 break;
494
495 // three children (fixed), no SRT
496 // need to push a stackElement
497 case MVAR:
498 // head must be TSO and the head of a linked list of TSOs.
499 // Shoule it be a child? Seems to be yes.
500 *first_child = (StgClosure *)((StgMVar *)c)->head;
501 // se.info.type = posTypeStep;
502 se.info.next.step = 2; // 2 = second
503 break;
504
505 // three children (fixed), no SRT
506 case WEAK:
507 *first_child = ((StgWeak *)c)->key;
508 // se.info.type = posTypeStep;
509 se.info.next.step = 2;
510 break;
511
512 // layout.payload.ptrs, no SRT
513 case CONSTR:
514 case STABLE_NAME:
515 case BCO:
516 case CONSTR_STATIC:
517 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
518 (StgPtr)c->payload);
519 *first_child = find_ptrs(&se.info);
520 if (*first_child == NULL)
521 return; // no child
522 break;
523
524 // StgMutArrPtr.ptrs, no SRT
525 case MUT_ARR_PTRS_CLEAN:
526 case MUT_ARR_PTRS_DIRTY:
527 case MUT_ARR_PTRS_FROZEN:
528 case MUT_ARR_PTRS_FROZEN0:
529 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
530 (StgPtr)(((StgMutArrPtrs *)c)->payload));
531 *first_child = find_ptrs(&se.info);
532 if (*first_child == NULL)
533 return;
534 break;
535
536 // layout.payload.ptrs, SRT
537 case FUN: // *c is a heap object.
538 case FUN_2_0:
539 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
540 *first_child = find_ptrs(&se.info);
541 if (*first_child == NULL)
542 // no child from ptrs, so check SRT
543 goto fun_srt_only;
544 break;
545
546 case THUNK:
547 case THUNK_2_0:
548 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
549 (StgPtr)((StgThunk *)c)->payload);
550 *first_child = find_ptrs(&se.info);
551 if (*first_child == NULL)
552 // no child from ptrs, so check SRT
553 goto thunk_srt_only;
554 break;
555
556 // 1 fixed child, SRT
557 case FUN_1_0:
558 case FUN_1_1:
559 *first_child = c->payload[0];
560 ASSERT(*first_child != NULL);
561 init_srt_fun(&se.info, get_fun_itbl(c));
562 break;
563
564 case THUNK_1_0:
565 case THUNK_1_1:
566 *first_child = ((StgThunk *)c)->payload[0];
567 ASSERT(*first_child != NULL);
568 init_srt_thunk(&se.info, get_thunk_itbl(c));
569 break;
570
571 case FUN_STATIC: // *c is a heap object.
572 ASSERT(get_itbl(c)->srt_bitmap != 0);
573 case FUN_0_1:
574 case FUN_0_2:
575 fun_srt_only:
576 init_srt_fun(&se.info, get_fun_itbl(c));
577 *first_child = find_srt(&se.info);
578 if (*first_child == NULL)
579 return; // no child
580 break;
581
582 // SRT only
583 case THUNK_STATIC:
584 ASSERT(get_itbl(c)->srt_bitmap != 0);
585 case THUNK_0_1:
586 case THUNK_0_2:
587 thunk_srt_only:
588 init_srt_thunk(&se.info, get_thunk_itbl(c));
589 *first_child = find_srt(&se.info);
590 if (*first_child == NULL)
591 return; // no child
592 break;
593
594 case TVAR_WAIT_QUEUE:
595 *first_child = (StgClosure *)((StgTVarWaitQueue *)c)->waiting_tso;
596 se.info.next.step = 2; // 2 = second
597 break;
598 case TVAR:
599 *first_child = (StgClosure *)((StgTVar *)c)->current_value;
600 break;
601 case TREC_HEADER:
602 *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
603 break;
604 case TREC_CHUNK:
605 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
606 se.info.next.step = 0; // entry no.
607 break;
608
609 // cannot appear
610 case PAP:
611 case AP:
612 case AP_STACK:
613 case TSO:
614 case IND_STATIC:
615 case CONSTR_INTLIKE:
616 case CONSTR_CHARLIKE:
617 case CONSTR_NOCAF_STATIC:
618 // stack objects
619 case UPDATE_FRAME:
620 case CATCH_FRAME:
621 case STOP_FRAME:
622 case RET_DYN:
623 case RET_BCO:
624 case RET_SMALL:
625 case RET_VEC_SMALL:
626 case RET_BIG:
627 case RET_VEC_BIG:
628 // invalid objects
629 case IND:
630 case BLOCKED_FETCH:
631 case FETCH_ME:
632 case FETCH_ME_BQ:
633 case RBH:
634 case REMOTE_REF:
635 case EVACUATED:
636 case INVALID_OBJECT:
637 default:
638 barf("Invalid object *c in push()");
639 return;
640 }
641
642 if (stackTop - 1 < stackBottom) {
643 #ifdef DEBUG_RETAINER
644 // debugBelch("push() to the next stack.\n");
645 #endif
646 // currentStack->free is updated when the active stack is switched
647 // to the next stack.
648 currentStack->free = (StgPtr)stackTop;
649
650 if (currentStack->link == NULL) {
651 nbd = allocGroup(BLOCKS_IN_STACK);
652 nbd->link = NULL;
653 nbd->u.back = currentStack;
654 currentStack->link = nbd;
655 } else
656 nbd = currentStack->link;
657
658 newStackBlock(nbd);
659 }
660
661 // adjust stackTop (acutal push)
662 stackTop--;
663 // If the size of stackElement was huge, we would better replace the
664 // following statement by either a memcpy() call or a switch statement
665 // on the type of the element. Currently, the size of stackElement is
666 // small enough (5 words) that this direct assignment seems to be enough.
667 *stackTop = se;
668
669 #ifdef DEBUG_RETAINER
670 stackSize++;
671 if (stackSize > maxStackSize) maxStackSize = stackSize;
672 // ASSERT(stackSize >= 0);
673 // debugBelch("stackSize = %d\n", stackSize);
674 #endif
675 }
676
677 /* -----------------------------------------------------------------------------
678 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
679 * Invariants:
680 * stackTop cannot be equal to stackLimit unless the whole stack is
681 * empty, in which case popOff() is not allowed.
682 * Note:
683 * You can think of popOffReal() as a part of popOff() which is
684 * executed at the end of popOff() in necessary. Since popOff() is
685 * likely to be executed quite often while popOffReal() is not, we
686 * separate popOffReal() from popOff(), which is declared as an
687 * INLINE function (for the sake of execution speed). popOffReal()
688 * is called only within popOff() and nowhere else.
689 * -------------------------------------------------------------------------- */
690 static void
691 popOffReal(void)
692 {
693 bdescr *pbd; // Previous Block Descriptor
694
695 #ifdef DEBUG_RETAINER
696 // debugBelch("pop() to the previous stack.\n");
697 #endif
698
699 ASSERT(stackTop + 1 == stackLimit);
700 ASSERT(stackBottom == (stackElement *)currentStack->start);
701
702 if (firstStack == currentStack) {
703 // The stack is completely empty.
704 stackTop++;
705 ASSERT(stackTop == stackLimit);
706 #ifdef DEBUG_RETAINER
707 stackSize--;
708 if (stackSize > maxStackSize) maxStackSize = stackSize;
709 /*
710 ASSERT(stackSize >= 0);
711 debugBelch("stackSize = %d\n", stackSize);
712 */
713 #endif
714 return;
715 }
716
717 // currentStack->free is updated when the active stack is switched back
718 // to the previous stack.
719 currentStack->free = (StgPtr)stackLimit;
720
721 // find the previous block descriptor
722 pbd = currentStack->u.back;
723 ASSERT(pbd != NULL);
724
725 returnToOldStack(pbd);
726
727 #ifdef DEBUG_RETAINER
728 stackSize--;
729 if (stackSize > maxStackSize) maxStackSize = stackSize;
730 /*
731 ASSERT(stackSize >= 0);
732 debugBelch("stackSize = %d\n", stackSize);
733 */
734 #endif
735 }
736
737 static INLINE void
738 popOff(void) {
739 #ifdef DEBUG_RETAINER
740 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
741 #endif
742
743 ASSERT(stackTop != stackLimit);
744 ASSERT(!isEmptyRetainerStack());
745
746 // <= (instead of <) is wrong!
747 if (stackTop + 1 < stackLimit) {
748 stackTop++;
749 #ifdef DEBUG_RETAINER
750 stackSize--;
751 if (stackSize > maxStackSize) maxStackSize = stackSize;
752 /*
753 ASSERT(stackSize >= 0);
754 debugBelch("stackSize = %d\n", stackSize);
755 */
756 #endif
757 return;
758 }
759
760 popOffReal();
761 }
762
763 /* -----------------------------------------------------------------------------
764 * Finds the next object to be considered for retainer profiling and store
765 * its pointer to *c.
766 * Test if the topmost stack element indicates that more objects are left,
767 * and if so, retrieve the first object and store its pointer to *c. Also,
768 * set *cp and *r appropriately, both of which are stored in the stack element.
769 * The topmost stack element then is overwritten so as for it to now denote
770 * the next object.
771 * If the topmost stack element indicates no more objects are left, pop
772 * off the stack element until either an object can be retrieved or
773 * the current stack chunk becomes empty, indicated by rtsTrue returned by
774 * isOnBoundary(), in which case *c is set to NULL.
775 * Note:
776 * It is okay to call this function even when the current stack chunk
777 * is empty.
778 * -------------------------------------------------------------------------- */
779 static INLINE void
780 pop( StgClosure **c, StgClosure **cp, retainer *r )
781 {
782 stackElement *se;
783
784 #ifdef DEBUG_RETAINER
785 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
786 #endif
787
788 do {
789 if (isOnBoundary()) { // if the current stack chunk is depleted
790 *c = NULL;
791 return;
792 }
793
794 se = stackTop;
795
796 switch (get_itbl(se->c)->type) {
797 // two children (fixed), no SRT
798 // nothing in se.info
799 case CONSTR_2_0:
800 *c = se->c->payload[1];
801 *cp = se->c;
802 *r = se->c_child_r;
803 popOff();
804 return;
805
806 // three children (fixed), no SRT
807 // need to push a stackElement
808 case MVAR:
809 if (se->info.next.step == 2) {
810 *c = (StgClosure *)((StgMVar *)se->c)->tail;
811 se->info.next.step++; // move to the next step
812 // no popOff
813 } else {
814 *c = ((StgMVar *)se->c)->value;
815 popOff();
816 }
817 *cp = se->c;
818 *r = se->c_child_r;
819 return;
820
821 // three children (fixed), no SRT
822 case WEAK:
823 if (se->info.next.step == 2) {
824 *c = ((StgWeak *)se->c)->value;
825 se->info.next.step++;
826 // no popOff
827 } else {
828 *c = ((StgWeak *)se->c)->finalizer;
829 popOff();
830 }
831 *cp = se->c;
832 *r = se->c_child_r;
833 return;
834
835 case TVAR_WAIT_QUEUE:
836 if (se->info.next.step == 2) {
837 *c = (StgClosure *)((StgTVarWaitQueue *)se->c)->next_queue_entry;
838 se->info.next.step++; // move to the next step
839 // no popOff
840 } else {
841 *c = (StgClosure *)((StgTVarWaitQueue *)se->c)->prev_queue_entry;
842 popOff();
843 }
844 *cp = se->c;
845 *r = se->c_child_r;
846 return;
847
848 case TVAR:
849 *c = (StgClosure *)((StgTVar *)se->c)->first_wait_queue_entry;
850 *cp = se->c;
851 *r = se->c_child_r;
852 popOff();
853 return;
854
855 case TREC_HEADER:
856 *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
857 *cp = se->c;
858 *r = se->c_child_r;
859 popOff();
860 return;
861
862 case TREC_CHUNK: {
863 // These are pretty complicated: we have N entries, each
864 // of which contains 3 fields that we want to follow. So
865 // we divide the step counter: the 2 low bits indicate
866 // which field, and the rest of the bits indicate the
867 // entry number (starting from zero).
868 nat entry_no = se->info.next.step >> 2;
869 nat field_no = se->info.next.step & 3;
870 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
871 *c = NULL;
872 popOff();
873 return;
874 }
875 TRecEntry *entry = &((StgTRecChunk *)se->c)->entries[entry_no];
876 if (field_no == 0) {
877 *c = (StgClosure *)entry->tvar;
878 } else if (field_no == 1) {
879 *c = entry->expected_value;
880 } else {
881 *c = entry->new_value;
882 }
883 *cp = se->c;
884 *r = se->c_child_r;
885 se->info.next.step++;
886 return;
887 }
888
889 case CONSTR:
890 case STABLE_NAME:
891 case BCO:
892 case CONSTR_STATIC:
893 // StgMutArrPtr.ptrs, no SRT
894 case MUT_ARR_PTRS_CLEAN:
895 case MUT_ARR_PTRS_DIRTY:
896 case MUT_ARR_PTRS_FROZEN:
897 case MUT_ARR_PTRS_FROZEN0:
898 *c = find_ptrs(&se->info);
899 if (*c == NULL) {
900 popOff();
901 break;
902 }
903 *cp = se->c;
904 *r = se->c_child_r;
905 return;
906
907 // layout.payload.ptrs, SRT
908 case FUN: // always a heap object
909 case FUN_2_0:
910 if (se->info.type == posTypePtrs) {
911 *c = find_ptrs(&se->info);
912 if (*c != NULL) {
913 *cp = se->c;
914 *r = se->c_child_r;
915 return;
916 }
917 init_srt_fun(&se->info, get_fun_itbl(se->c));
918 }
919 goto do_srt;
920
921 case THUNK:
922 case THUNK_2_0:
923 if (se->info.type == posTypePtrs) {
924 *c = find_ptrs(&se->info);
925 if (*c != NULL) {
926 *cp = se->c;
927 *r = se->c_child_r;
928 return;
929 }
930 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
931 }
932 goto do_srt;
933
934 // SRT
935 do_srt:
936 case THUNK_STATIC:
937 case FUN_STATIC:
938 case FUN_0_1:
939 case FUN_0_2:
940 case THUNK_0_1:
941 case THUNK_0_2:
942 case FUN_1_0:
943 case FUN_1_1:
944 case THUNK_1_0:
945 case THUNK_1_1:
946 *c = find_srt(&se->info);
947 if (*c != NULL) {
948 *cp = se->c;
949 *r = se->c_child_r;
950 return;
951 }
952 popOff();
953 break;
954
955 // no child (fixed), no SRT
956 case CONSTR_0_1:
957 case CONSTR_0_2:
958 case CAF_BLACKHOLE:
959 case BLACKHOLE:
960 case SE_BLACKHOLE:
961 case SE_CAF_BLACKHOLE:
962 case ARR_WORDS:
963 // one child (fixed), no SRT
964 case MUT_VAR_CLEAN:
965 case MUT_VAR_DIRTY:
966 case THUNK_SELECTOR:
967 case IND_PERM:
968 case IND_OLDGEN_PERM:
969 case IND_OLDGEN:
970 case CONSTR_1_1:
971 // cannot appear
972 case PAP:
973 case AP:
974 case AP_STACK:
975 case TSO:
976 case IND_STATIC:
977 case CONSTR_INTLIKE:
978 case CONSTR_CHARLIKE:
979 case CONSTR_NOCAF_STATIC:
980 // stack objects
981 case RET_DYN:
982 case UPDATE_FRAME:
983 case CATCH_FRAME:
984 case STOP_FRAME:
985 case RET_BCO:
986 case RET_SMALL:
987 case RET_VEC_SMALL:
988 case RET_BIG:
989 case RET_VEC_BIG:
990 // invalid objects
991 case IND:
992 case BLOCKED_FETCH:
993 case FETCH_ME:
994 case FETCH_ME_BQ:
995 case RBH:
996 case REMOTE_REF:
997 case EVACUATED:
998 case INVALID_OBJECT:
999 default:
1000 barf("Invalid object *c in pop()");
1001 return;
1002 }
1003 } while (rtsTrue);
1004 }
1005
1006 /* -----------------------------------------------------------------------------
1007 * RETAINER PROFILING ENGINE
1008 * -------------------------------------------------------------------------- */
1009
1010 void
1011 initRetainerProfiling( void )
1012 {
1013 initializeAllRetainerSet();
1014 retainerGeneration = 0;
1015 }
1016
1017 /* -----------------------------------------------------------------------------
1018 * This function must be called before f-closing prof_file.
1019 * -------------------------------------------------------------------------- */
1020 void
1021 endRetainerProfiling( void )
1022 {
1023 #ifdef SECOND_APPROACH
1024 outputAllRetainerSet(prof_file);
1025 #endif
1026 }
1027
1028 /* -----------------------------------------------------------------------------
1029 * Returns the actual pointer to the retainer set of the closure *c.
1030 * It may adjust RSET(c) subject to flip.
1031 * Side effects:
1032 * RSET(c) is initialized to NULL if its current value does not
1033 * conform to flip.
1034 * Note:
1035 * Even though this function has side effects, they CAN be ignored because
1036 * subsequent calls to retainerSetOf() always result in the same return value
1037 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1038 * closure.
1039 * We have to perform an XOR (^) operation each time a closure is examined.
1040 * The reason is that we do not know when a closure is visited last.
1041 * -------------------------------------------------------------------------- */
1042 static INLINE void
1043 maybeInitRetainerSet( StgClosure *c )
1044 {
1045 if (!isRetainerSetFieldValid(c)) {
1046 setRetainerSetToNull(c);
1047 }
1048 }
1049
1050 /* -----------------------------------------------------------------------------
1051 * Returns rtsTrue if *c is a retainer.
1052 * -------------------------------------------------------------------------- */
1053 static INLINE rtsBool
1054 isRetainer( StgClosure *c )
1055 {
1056 switch (get_itbl(c)->type) {
1057 //
1058 // True case
1059 //
1060 // TSOs MUST be retainers: they constitute the set of roots.
1061 case TSO:
1062
1063 // mutable objects
1064 case MVAR:
1065 case MUT_VAR_CLEAN:
1066 case MUT_VAR_DIRTY:
1067 case MUT_ARR_PTRS_CLEAN:
1068 case MUT_ARR_PTRS_DIRTY:
1069 case MUT_ARR_PTRS_FROZEN:
1070 case MUT_ARR_PTRS_FROZEN0:
1071
1072 // thunks are retainers.
1073 case THUNK:
1074 case THUNK_1_0:
1075 case THUNK_0_1:
1076 case THUNK_2_0:
1077 case THUNK_1_1:
1078 case THUNK_0_2:
1079 case THUNK_SELECTOR:
1080 case AP:
1081 case AP_STACK:
1082
1083 // Static thunks, or CAFS, are obviously retainers.
1084 case THUNK_STATIC:
1085
1086 // WEAK objects are roots; there is separate code in which traversing
1087 // begins from WEAK objects.
1088 case WEAK:
1089
1090 // Since the other mutvar-type things are retainers, seems
1091 // like the right thing to do:
1092 case TVAR:
1093 return rtsTrue;
1094
1095 //
1096 // False case
1097 //
1098
1099 // constructors
1100 case CONSTR:
1101 case CONSTR_1_0:
1102 case CONSTR_0_1:
1103 case CONSTR_2_0:
1104 case CONSTR_1_1:
1105 case CONSTR_0_2:
1106 // functions
1107 case FUN:
1108 case FUN_1_0:
1109 case FUN_0_1:
1110 case FUN_2_0:
1111 case FUN_1_1:
1112 case FUN_0_2:
1113 // partial applications
1114 case PAP:
1115 // blackholes
1116 case CAF_BLACKHOLE:
1117 case BLACKHOLE:
1118 case SE_BLACKHOLE:
1119 case SE_CAF_BLACKHOLE:
1120 // indirection
1121 case IND_PERM:
1122 case IND_OLDGEN_PERM:
1123 case IND_OLDGEN:
1124 // static objects
1125 case CONSTR_STATIC:
1126 case FUN_STATIC:
1127 // misc
1128 case STABLE_NAME:
1129 case BCO:
1130 case ARR_WORDS:
1131 // STM
1132 case TVAR_WAIT_QUEUE:
1133 case TREC_HEADER:
1134 case TREC_CHUNK:
1135 return rtsFalse;
1136
1137 //
1138 // Error case
1139 //
1140 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1141 case IND_STATIC:
1142 // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
1143 // cannot be *c, *cp, *r in the retainer profiling loop.
1144 case CONSTR_INTLIKE:
1145 case CONSTR_CHARLIKE:
1146 case CONSTR_NOCAF_STATIC:
1147 // Stack objects are invalid because they are never treated as
1148 // legal objects during retainer profiling.
1149 case UPDATE_FRAME:
1150 case CATCH_FRAME:
1151 case STOP_FRAME:
1152 case RET_DYN:
1153 case RET_BCO:
1154 case RET_SMALL:
1155 case RET_VEC_SMALL:
1156 case RET_BIG:
1157 case RET_VEC_BIG:
1158 // other cases
1159 case IND:
1160 case BLOCKED_FETCH:
1161 case FETCH_ME:
1162 case FETCH_ME_BQ:
1163 case RBH:
1164 case REMOTE_REF:
1165 case EVACUATED:
1166 case INVALID_OBJECT:
1167 default:
1168 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1169 return rtsFalse;
1170 }
1171 }
1172
1173 /* -----------------------------------------------------------------------------
1174 * Returns the retainer function value for the closure *c, i.e., R(*c).
1175 * This function does NOT return the retainer(s) of *c.
1176 * Invariants:
1177 * *c must be a retainer.
1178 * Note:
1179 * Depending on the definition of this function, the maintenance of retainer
1180 * sets can be made easier. If most retainer sets are likely to be created
1181 * again across garbage collections, refreshAllRetainerSet() in
1182 * RetainerSet.c can simply do nothing.
1183 * If this is not the case, we can free all the retainer sets and
1184 * re-initialize the hash table.
1185 * See refreshAllRetainerSet() in RetainerSet.c.
1186 * -------------------------------------------------------------------------- */
1187 static INLINE retainer
1188 getRetainerFrom( StgClosure *c )
1189 {
1190 ASSERT(isRetainer(c));
1191
1192 #if defined(RETAINER_SCHEME_INFO)
1193 // Retainer scheme 1: retainer = info table
1194 return get_itbl(c);
1195 #elif defined(RETAINER_SCHEME_CCS)
1196 // Retainer scheme 2: retainer = cost centre stack
1197 return c->header.prof.ccs;
1198 #elif defined(RETAINER_SCHEME_CC)
1199 // Retainer scheme 3: retainer = cost centre
1200 return c->header.prof.ccs->cc;
1201 #endif
1202 }
1203
1204 /* -----------------------------------------------------------------------------
1205 * Associates the retainer set *s with the closure *c, that is, *s becomes
1206 * the retainer set of *c.
1207 * Invariants:
1208 * c != NULL
1209 * s != NULL
1210 * -------------------------------------------------------------------------- */
1211 static INLINE void
1212 associate( StgClosure *c, RetainerSet *s )
1213 {
1214 // StgWord has the same size as pointers, so the following type
1215 // casting is okay.
1216 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1217 }
1218
1219 /* -----------------------------------------------------------------------------
1220 Call retainClosure for each of the closures covered by a large bitmap.
1221 -------------------------------------------------------------------------- */
1222
1223 static void
1224 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1225 StgClosure *c, retainer c_child_r)
1226 {
1227 nat i, b;
1228 StgWord bitmap;
1229
1230 b = 0;
1231 bitmap = large_bitmap->bitmap[b];
1232 for (i = 0; i < size; ) {
1233 if ((bitmap & 1) == 0) {
1234 retainClosure((StgClosure *)*p, c, c_child_r);
1235 }
1236 i++;
1237 p++;
1238 if (i % BITS_IN(W_) == 0) {
1239 b++;
1240 bitmap = large_bitmap->bitmap[b];
1241 } else {
1242 bitmap = bitmap >> 1;
1243 }
1244 }
1245 }
1246
1247 static INLINE StgPtr
1248 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1249 StgClosure *c, retainer c_child_r)
1250 {
1251 while (size > 0) {
1252 if ((bitmap & 1) == 0) {
1253 retainClosure((StgClosure *)*p, c, c_child_r);
1254 }
1255 p++;
1256 bitmap = bitmap >> 1;
1257 size--;
1258 }
1259 return p;
1260 }
1261
1262 /* -----------------------------------------------------------------------------
1263 * Call retainClosure for each of the closures in an SRT.
1264 * ------------------------------------------------------------------------- */
1265
1266 static void
1267 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1268 {
1269 nat i, b, size;
1270 StgWord bitmap;
1271 StgClosure **p;
1272
1273 b = 0;
1274 p = (StgClosure **)srt->srt;
1275 size = srt->l.size;
1276 bitmap = srt->l.bitmap[b];
1277 for (i = 0; i < size; ) {
1278 if ((bitmap & 1) != 0) {
1279 retainClosure((StgClosure *)*p, c, c_child_r);
1280 }
1281 i++;
1282 p++;
1283 if (i % BITS_IN(W_) == 0) {
1284 b++;
1285 bitmap = srt->l.bitmap[b];
1286 } else {
1287 bitmap = bitmap >> 1;
1288 }
1289 }
1290 }
1291
1292 static INLINE void
1293 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1294 {
1295 nat bitmap;
1296 StgClosure **p;
1297
1298 bitmap = srt_bitmap;
1299 p = srt;
1300
1301 if (bitmap == (StgHalfWord)(-1)) {
1302 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1303 return;
1304 }
1305
1306 while (bitmap != 0) {
1307 if ((bitmap & 1) != 0) {
1308 #ifdef ENABLE_WIN32_DLL_SUPPORT
1309 if ( (unsigned long)(*srt) & 0x1 ) {
1310 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1311 c, c_child_r);
1312 } else {
1313 retainClosure(*srt,c,c_child_r);
1314 }
1315 #else
1316 retainClosure(*srt,c,c_child_r);
1317 #endif
1318 }
1319 p++;
1320 bitmap = bitmap >> 1;
1321 }
1322 }
1323
1324 /* -----------------------------------------------------------------------------
1325 * Process all the objects in the stack chunk from stackStart to stackEnd
1326 * with *c and *c_child_r being their parent and their most recent retainer,
1327 * respectively. Treat stackOptionalFun as another child of *c if it is
1328 * not NULL.
1329 * Invariants:
1330 * *c is one of the following: TSO, AP_STACK.
1331 * If *c is TSO, c == c_child_r.
1332 * stackStart < stackEnd.
1333 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1334 * interpretation conforms to the current value of flip (even when they
1335 * are interpreted to be NULL).
1336 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1337 * or ThreadKilled, which means that its stack is ready to process.
1338 * Note:
1339 * This code was almost plagiarzied from GC.c! For each pointer,
1340 * retainClosure() is invoked instead of evacuate().
1341 * -------------------------------------------------------------------------- */
1342 static void
1343 retainStack( StgClosure *c, retainer c_child_r,
1344 StgPtr stackStart, StgPtr stackEnd )
1345 {
1346 stackElement *oldStackBoundary;
1347 StgPtr p;
1348 StgRetInfoTable *info;
1349 StgWord32 bitmap;
1350 nat size;
1351
1352 #ifdef DEBUG_RETAINER
1353 cStackSize++;
1354 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1355 #endif
1356
1357 /*
1358 Each invocation of retainStack() creates a new virtual
1359 stack. Since all such stacks share a single common stack, we
1360 record the current currentStackBoundary, which will be restored
1361 at the exit.
1362 */
1363 oldStackBoundary = currentStackBoundary;
1364 currentStackBoundary = stackTop;
1365
1366 #ifdef DEBUG_RETAINER
1367 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1368 #endif
1369
1370 ASSERT(get_itbl(c)->type != TSO ||
1371 (((StgTSO *)c)->what_next != ThreadRelocated &&
1372 ((StgTSO *)c)->what_next != ThreadComplete &&
1373 ((StgTSO *)c)->what_next != ThreadKilled));
1374
1375 p = stackStart;
1376 while (p < stackEnd) {
1377 info = get_ret_itbl((StgClosure *)p);
1378
1379 switch(info->i.type) {
1380
1381 case UPDATE_FRAME:
1382 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1383 p += sizeofW(StgUpdateFrame);
1384 continue;
1385
1386 case STOP_FRAME:
1387 case CATCH_FRAME:
1388 case CATCH_STM_FRAME:
1389 case CATCH_RETRY_FRAME:
1390 case ATOMICALLY_FRAME:
1391 case RET_SMALL:
1392 case RET_VEC_SMALL:
1393 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1394 size = BITMAP_SIZE(info->i.layout.bitmap);
1395 p++;
1396 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1397
1398 follow_srt:
1399 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1400 continue;
1401
1402 case RET_BCO: {
1403 StgBCO *bco;
1404
1405 p++;
1406 retainClosure((StgClosure *)*p, c, c_child_r);
1407 bco = (StgBCO *)*p;
1408 p++;
1409 size = BCO_BITMAP_SIZE(bco);
1410 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1411 p += size;
1412 continue;
1413 }
1414
1415 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1416 case RET_BIG:
1417 case RET_VEC_BIG:
1418 size = GET_LARGE_BITMAP(&info->i)->size;
1419 p++;
1420 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1421 size, c, c_child_r);
1422 p += size;
1423 // and don't forget to follow the SRT
1424 goto follow_srt;
1425
1426 // Dynamic bitmap: the mask is stored on the stack
1427 case RET_DYN: {
1428 StgWord dyn;
1429 dyn = ((StgRetDyn *)p)->liveness;
1430
1431 // traverse the bitmap first
1432 bitmap = RET_DYN_LIVENESS(dyn);
1433 p = (P_)&((StgRetDyn *)p)->payload[0];
1434 size = RET_DYN_BITMAP_SIZE;
1435 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1436
1437 // skip over the non-ptr words
1438 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1439
1440 // follow the ptr words
1441 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1442 retainClosure((StgClosure *)*p, c, c_child_r);
1443 p++;
1444 }
1445 continue;
1446 }
1447
1448 case RET_FUN: {
1449 StgRetFun *ret_fun = (StgRetFun *)p;
1450 StgFunInfoTable *fun_info;
1451
1452 retainClosure(ret_fun->fun, c, c_child_r);
1453 fun_info = get_fun_itbl(ret_fun->fun);
1454
1455 p = (P_)&ret_fun->payload;
1456 switch (fun_info->f.fun_type) {
1457 case ARG_GEN:
1458 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1459 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1460 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1461 break;
1462 case ARG_GEN_BIG:
1463 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1464 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1465 size, c, c_child_r);
1466 p += size;
1467 break;
1468 default:
1469 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1470 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1471 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1472 break;
1473 }
1474 goto follow_srt;
1475 }
1476
1477 default:
1478 barf("Invalid object found in retainStack(): %d",
1479 (int)(info->i.type));
1480 }
1481 }
1482
1483 // restore currentStackBoundary
1484 currentStackBoundary = oldStackBoundary;
1485 #ifdef DEBUG_RETAINER
1486 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1487 #endif
1488
1489 #ifdef DEBUG_RETAINER
1490 cStackSize--;
1491 #endif
1492 }
1493
1494 /* ----------------------------------------------------------------------------
1495 * Call retainClosure for each of the children of a PAP/AP
1496 * ------------------------------------------------------------------------- */
1497
1498 static INLINE StgPtr
1499 retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun,
1500 StgClosure** payload, StgWord n_args)
1501 {
1502 StgPtr p;
1503 StgWord bitmap;
1504 StgFunInfoTable *fun_info;
1505
1506 retainClosure(fun, pap, c_child_r);
1507 fun_info = get_fun_itbl(fun);
1508 ASSERT(fun_info->i.type != PAP);
1509
1510 p = (StgPtr)payload;
1511
1512 switch (fun_info->f.fun_type) {
1513 case ARG_GEN:
1514 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1515 p = retain_small_bitmap(p, n_args, bitmap,
1516 pap, c_child_r);
1517 break;
1518 case ARG_GEN_BIG:
1519 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1520 n_args, pap, c_child_r);
1521 p += n_args;
1522 break;
1523 case ARG_BCO:
1524 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1525 n_args, pap, c_child_r);
1526 p += n_args;
1527 break;
1528 default:
1529 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1530 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1531 break;
1532 }
1533 return p;
1534 }
1535
1536 /* -----------------------------------------------------------------------------
1537 * Compute the retainer set of *c0 and all its desecents by traversing.
1538 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1539 * Invariants:
1540 * c0 = cp0 = r0 holds only for root objects.
1541 * RSET(cp0) and RSET(r0) are valid, i.e., their
1542 * interpretation conforms to the current value of flip (even when they
1543 * are interpreted to be NULL).
1544 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1545 * the current value of flip. If it does not, during the execution
1546 * of this function, RSET(c0) must be initialized as well as all
1547 * its descendants.
1548 * Note:
1549 * stackTop must be the same at the beginning and the exit of this function.
1550 * *c0 can be TSO (as well as AP_STACK).
1551 * -------------------------------------------------------------------------- */
1552 static void
1553 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1554 {
1555 // c = Current closure
1556 // cp = Current closure's Parent
1557 // r = current closures' most recent Retainer
1558 // c_child_r = current closure's children's most recent retainer
1559 // first_child = first child of c
1560 StgClosure *c, *cp, *first_child;
1561 RetainerSet *s, *retainerSetOfc;
1562 retainer r, c_child_r;
1563 StgWord typeOfc;
1564
1565 #ifdef DEBUG_RETAINER
1566 // StgPtr oldStackTop;
1567 #endif
1568
1569 #ifdef DEBUG_RETAINER
1570 // oldStackTop = stackTop;
1571 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1572 #endif
1573
1574 // (c, cp, r) = (c0, cp0, r0)
1575 c = c0;
1576 cp = cp0;
1577 r = r0;
1578 goto inner_loop;
1579
1580 loop:
1581 //debugBelch("loop");
1582 // pop to (c, cp, r);
1583 pop(&c, &cp, &r);
1584
1585 if (c == NULL) {
1586 #ifdef DEBUG_RETAINER
1587 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1588 #endif
1589 return;
1590 }
1591
1592 //debugBelch("inner_loop");
1593
1594 inner_loop:
1595 // c = current closure under consideration,
1596 // cp = current closure's parent,
1597 // r = current closure's most recent retainer
1598 //
1599 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1600 // RSET(cp) and RSET(r) are valid.
1601 // RSET(c) is valid only if c has been visited before.
1602 //
1603 // Loop invariants (on the relation between c, cp, and r)
1604 // if cp is not a retainer, r belongs to RSET(cp).
1605 // if cp is a retainer, r == cp.
1606
1607 typeOfc = get_itbl(c)->type;
1608
1609 #ifdef DEBUG_RETAINER
1610 switch (typeOfc) {
1611 case IND_STATIC:
1612 case CONSTR_INTLIKE:
1613 case CONSTR_CHARLIKE:
1614 case CONSTR_NOCAF_STATIC:
1615 case CONSTR_STATIC:
1616 case THUNK_STATIC:
1617 case FUN_STATIC:
1618 break;
1619 default:
1620 if (retainerSetOf(c) == NULL) { // first visit?
1621 costArray[typeOfc] += cost(c);
1622 sumOfNewCost += cost(c);
1623 }
1624 break;
1625 }
1626 #endif
1627
1628 // special cases
1629 switch (typeOfc) {
1630 case TSO:
1631 if (((StgTSO *)c)->what_next == ThreadComplete ||
1632 ((StgTSO *)c)->what_next == ThreadKilled) {
1633 #ifdef DEBUG_RETAINER
1634 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1635 #endif
1636 goto loop;
1637 }
1638 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1639 #ifdef DEBUG_RETAINER
1640 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1641 #endif
1642 c = (StgClosure *)((StgTSO *)c)->link;
1643 goto inner_loop;
1644 }
1645 break;
1646
1647 case IND_STATIC:
1648 // We just skip IND_STATIC, so its retainer set is never computed.
1649 c = ((StgIndStatic *)c)->indirectee;
1650 goto inner_loop;
1651 case CONSTR_INTLIKE:
1652 case CONSTR_CHARLIKE:
1653 // static objects with no pointers out, so goto loop.
1654 case CONSTR_NOCAF_STATIC:
1655 // It is not just enough not to compute the retainer set for *c; it is
1656 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1657 // scavenged_static_objects, the list from which is assumed to traverse
1658 // all static objects after major garbage collections.
1659 goto loop;
1660 case THUNK_STATIC:
1661 case FUN_STATIC:
1662 if (get_itbl(c)->srt_bitmap == 0) {
1663 // No need to compute the retainer set; no dynamic objects
1664 // are reachable from *c.
1665 //
1666 // Static objects: if we traverse all the live closures,
1667 // including static closures, during each heap census then
1668 // we will observe that some static closures appear and
1669 // disappear. eg. a closure may contain a pointer to a
1670 // static function 'f' which is not otherwise reachable
1671 // (it doesn't indirectly point to any CAFs, so it doesn't
1672 // appear in any SRTs), so we would find 'f' during
1673 // traversal. However on the next sweep there may be no
1674 // closures pointing to 'f'.
1675 //
1676 // We must therefore ignore static closures whose SRT is
1677 // empty, because these are exactly the closures that may
1678 // "appear". A closure with a non-empty SRT, and which is
1679 // still required, will always be reachable.
1680 //
1681 // But what about CONSTR_STATIC? Surely these may be able
1682 // to appear, and they don't have SRTs, so we can't
1683 // check. So for now, we're calling
1684 // resetStaticObjectForRetainerProfiling() from the
1685 // garbage collector to reset the retainer sets in all the
1686 // reachable static objects.
1687 goto loop;
1688 }
1689 default:
1690 break;
1691 }
1692
1693 // The above objects are ignored in computing the average number of times
1694 // an object is visited.
1695 timesAnyObjectVisited++;
1696
1697 // If this is the first visit to c, initialize its retainer set.
1698 maybeInitRetainerSet(c);
1699 retainerSetOfc = retainerSetOf(c);
1700
1701 // Now compute s:
1702 // isRetainer(cp) == rtsTrue => s == NULL
1703 // isRetainer(cp) == rtsFalse => s == cp.retainer
1704 if (isRetainer(cp))
1705 s = NULL;
1706 else
1707 s = retainerSetOf(cp);
1708
1709 // (c, cp, r, s) is available.
1710
1711 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1712 if (retainerSetOfc == NULL) {
1713 // This is the first visit to *c.
1714 numObjectVisited++;
1715
1716 if (s == NULL)
1717 associate(c, singleton(r));
1718 else
1719 // s is actually the retainer set of *c!
1720 associate(c, s);
1721
1722 // compute c_child_r
1723 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1724 } else {
1725 // This is not the first visit to *c.
1726 if (isMember(r, retainerSetOfc))
1727 goto loop; // no need to process child
1728
1729 if (s == NULL)
1730 associate(c, addElement(r, retainerSetOfc));
1731 else {
1732 // s is not NULL and cp is not a retainer. This means that
1733 // each time *cp is visited, so is *c. Thus, if s has
1734 // exactly one more element in its retainer set than c, s
1735 // is also the new retainer set for *c.
1736 if (s->num == retainerSetOfc->num + 1) {
1737 associate(c, s);
1738 }
1739 // Otherwise, just add R_r to the current retainer set of *c.
1740 else {
1741 associate(c, addElement(r, retainerSetOfc));
1742 }
1743 }
1744
1745 if (isRetainer(c))
1746 goto loop; // no need to process child
1747
1748 // compute c_child_r
1749 c_child_r = r;
1750 }
1751
1752 // now, RSET() of all of *c, *cp, and *r is valid.
1753 // (c, c_child_r) are available.
1754
1755 // process child
1756
1757 // Special case closures: we process these all in one go rather
1758 // than attempting to save the current position, because doing so
1759 // would be hard.
1760 switch (typeOfc) {
1761 case TSO:
1762 retainStack(c, c_child_r,
1763 ((StgTSO *)c)->sp,
1764 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1765 goto loop;
1766
1767 case PAP:
1768 {
1769 StgPAP *pap = (StgPAP *)c;
1770 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1771 goto loop;
1772 }
1773
1774 case AP:
1775 {
1776 StgAP *ap = (StgAP *)c;
1777 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1778 goto loop;
1779 }
1780
1781 case AP_STACK:
1782 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1783 retainStack(c, c_child_r,
1784 (StgPtr)((StgAP_STACK *)c)->payload,
1785 (StgPtr)((StgAP_STACK *)c)->payload +
1786 ((StgAP_STACK *)c)->size);
1787 goto loop;
1788 }
1789
1790 push(c, c_child_r, &first_child);
1791
1792 // If first_child is null, c has no child.
1793 // If first_child is not null, the top stack element points to the next
1794 // object. push() may or may not push a stackElement on the stack.
1795 if (first_child == NULL)
1796 goto loop;
1797
1798 // (c, cp, r) = (first_child, c, c_child_r)
1799 r = c_child_r;
1800 cp = c;
1801 c = first_child;
1802 goto inner_loop;
1803 }
1804
1805 /* -----------------------------------------------------------------------------
1806 * Compute the retainer set for every object reachable from *tl.
1807 * -------------------------------------------------------------------------- */
1808 static void
1809 retainRoot( StgClosure **tl )
1810 {
1811 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1812 // be a root.
1813
1814 ASSERT(isEmptyRetainerStack());
1815 currentStackBoundary = stackTop;
1816
1817 if (*tl != &stg_END_TSO_QUEUE_closure && isRetainer(*tl)) {
1818 retainClosure(*tl, *tl, getRetainerFrom(*tl));
1819 } else {
1820 retainClosure(*tl, *tl, CCS_SYSTEM);
1821 }
1822
1823 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1824 // *tl might be a TSO which is ThreadComplete, in which
1825 // case we ignore it for the purposes of retainer profiling.
1826 }
1827
1828 /* -----------------------------------------------------------------------------
1829 * Compute the retainer set for each of the objects in the heap.
1830 * -------------------------------------------------------------------------- */
1831 static void
1832 computeRetainerSet( void )
1833 {
1834 StgWeak *weak;
1835 RetainerSet *rtl;
1836 nat g;
1837 StgPtr ml;
1838 bdescr *bd;
1839 #ifdef DEBUG_RETAINER
1840 RetainerSet tmpRetainerSet;
1841 #endif
1842
1843 GetRoots(retainRoot); // for scheduler roots
1844
1845 // This function is called after a major GC, when key, value, and finalizer
1846 // all are guaranteed to be valid, or reachable.
1847 //
1848 // The following code assumes that WEAK objects are considered to be roots
1849 // for retainer profilng.
1850 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1851 // retainRoot((StgClosure *)weak);
1852 retainRoot((StgClosure **)&weak);
1853
1854 // Consider roots from the stable ptr table.
1855 markStablePtrTable(retainRoot);
1856
1857 // The following code resets the rs field of each unvisited mutable
1858 // object (computing sumOfNewCostExtra and updating costArray[] when
1859 // debugging retainer profiler).
1860 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1861 // NOT TRUE: even G0 has a block on its mutable list
1862 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1863
1864 // Traversing through mut_list is necessary
1865 // because we can find MUT_VAR objects which have not been
1866 // visited during retainer profiling.
1867 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1868 for (ml = bd->start; ml < bd->free; ml++) {
1869
1870 maybeInitRetainerSet((StgClosure *)*ml);
1871 rtl = retainerSetOf((StgClosure *)*ml);
1872
1873 #ifdef DEBUG_RETAINER
1874 if (rtl == NULL) {
1875 // first visit to *ml
1876 // This is a violation of the interface rule!
1877 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1878
1879 switch (get_itbl((StgClosure *)ml)->type) {
1880 case IND_STATIC:
1881 // no cost involved
1882 break;
1883 case CONSTR_INTLIKE:
1884 case CONSTR_CHARLIKE:
1885 case CONSTR_NOCAF_STATIC:
1886 case CONSTR_STATIC:
1887 case THUNK_STATIC:
1888 case FUN_STATIC:
1889 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1890 break;
1891 default:
1892 // dynamic objects
1893 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1894 sumOfNewCostExtra += cost((StgClosure *)ml);
1895 break;
1896 }
1897 }
1898 #endif
1899 }
1900 }
1901 }
1902 }
1903
1904 /* -----------------------------------------------------------------------------
1905 * Traverse all static objects for which we compute retainer sets,
1906 * and reset their rs fields to NULL, which is accomplished by
1907 * invoking maybeInitRetainerSet(). This function must be called
1908 * before zeroing all objects reachable from scavenged_static_objects
1909 * in the case of major gabage collections. See GarbageCollect() in
1910 * GC.c.
1911 * Note:
1912 * The mut_once_list of the oldest generation must also be traversed?
1913 * Why? Because if the evacuation of an object pointed to by a static
1914 * indirection object fails, it is put back to the mut_once_list of
1915 * the oldest generation.
1916 * However, this is not necessary because any static indirection objects
1917 * are just traversed through to reach dynamic objects. In other words,
1918 * they are not taken into consideration in computing retainer sets.
1919 * -------------------------------------------------------------------------- */
1920 void
1921 resetStaticObjectForRetainerProfiling( void )
1922 {
1923 #ifdef DEBUG_RETAINER
1924 nat count;
1925 #endif
1926 StgClosure *p;
1927
1928 #ifdef DEBUG_RETAINER
1929 count = 0;
1930 #endif
1931 p = scavenged_static_objects;
1932 while (p != END_OF_STATIC_LIST) {
1933 #ifdef DEBUG_RETAINER
1934 count++;
1935 #endif
1936 switch (get_itbl(p)->type) {
1937 case IND_STATIC:
1938 // Since we do not compute the retainer set of any
1939 // IND_STATIC object, we don't have to reset its retainer
1940 // field.
1941 p = (StgClosure*)*IND_STATIC_LINK(p);
1942 break;
1943 case THUNK_STATIC:
1944 maybeInitRetainerSet(p);
1945 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1946 break;
1947 case FUN_STATIC:
1948 maybeInitRetainerSet(p);
1949 p = (StgClosure*)*FUN_STATIC_LINK(p);
1950 break;
1951 case CONSTR_STATIC:
1952 maybeInitRetainerSet(p);
1953 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1954 break;
1955 default:
1956 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1957 p, get_itbl(p)->type);
1958 break;
1959 }
1960 }
1961 #ifdef DEBUG_RETAINER
1962 // debugBelch("count in scavenged_static_objects = %d\n", count);
1963 #endif
1964 }
1965
1966 /* -----------------------------------------------------------------------------
1967 * Perform retainer profiling.
1968 * N is the oldest generation being profilied, where the generations are
1969 * numbered starting at 0.
1970 * Invariants:
1971 * Note:
1972 * This function should be called only immediately after major garbage
1973 * collection.
1974 * ------------------------------------------------------------------------- */
1975 void
1976 retainerProfile(void)
1977 {
1978 #ifdef DEBUG_RETAINER
1979 nat i;
1980 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1981 #endif
1982
1983 #ifdef DEBUG_RETAINER
1984 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1985 #endif
1986
1987 stat_startRP();
1988
1989 // We haven't flipped the bit yet.
1990 #ifdef DEBUG_RETAINER
1991 debugBelch("Before traversing:\n");
1992 sumOfCostLinear = 0;
1993 for (i = 0;i < N_CLOSURE_TYPES; i++)
1994 costArrayLinear[i] = 0;
1995 totalHeapSize = checkHeapSanityForRetainerProfiling();
1996
1997 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1998 /*
1999 debugBelch("costArrayLinear[] = ");
2000 for (i = 0;i < N_CLOSURE_TYPES; i++)
2001 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2002 debugBelch("\n");
2003 */
2004
2005 ASSERT(sumOfCostLinear == totalHeapSize);
2006
2007 /*
2008 #define pcostArrayLinear(index) \
2009 if (costArrayLinear[index] > 0) \
2010 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
2011 pcostArrayLinear(THUNK_STATIC);
2012 pcostArrayLinear(FUN_STATIC);
2013 pcostArrayLinear(CONSTR_STATIC);
2014 pcostArrayLinear(CONSTR_NOCAF_STATIC);
2015 pcostArrayLinear(CONSTR_INTLIKE);
2016 pcostArrayLinear(CONSTR_CHARLIKE);
2017 */
2018 #endif
2019
2020 // Now we flips flip.
2021 flip = flip ^ 1;
2022
2023 #ifdef DEBUG_RETAINER
2024 stackSize = 0;
2025 maxStackSize = 0;
2026 cStackSize = 0;
2027 maxCStackSize = 0;
2028 #endif
2029 numObjectVisited = 0;
2030 timesAnyObjectVisited = 0;
2031
2032 #ifdef DEBUG_RETAINER
2033 debugBelch("During traversing:\n");
2034 sumOfNewCost = 0;
2035 sumOfNewCostExtra = 0;
2036 for (i = 0;i < N_CLOSURE_TYPES; i++)
2037 costArray[i] = 0;
2038 #endif
2039
2040 /*
2041 We initialize the traverse stack each time the retainer profiling is
2042 performed (because the traverse stack size varies on each retainer profiling
2043 and this operation is not costly anyhow). However, we just refresh the
2044 retainer sets.
2045 */
2046 initializeTraverseStack();
2047 #ifdef DEBUG_RETAINER
2048 initializeAllRetainerSet();
2049 #else
2050 refreshAllRetainerSet();
2051 #endif
2052 computeRetainerSet();
2053
2054 #ifdef DEBUG_RETAINER
2055 debugBelch("After traversing:\n");
2056 sumOfCostLinear = 0;
2057 for (i = 0;i < N_CLOSURE_TYPES; i++)
2058 costArrayLinear[i] = 0;
2059 totalHeapSize = checkHeapSanityForRetainerProfiling();
2060
2061 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2062 ASSERT(sumOfCostLinear == totalHeapSize);
2063
2064 // now, compare the two results
2065 /*
2066 Note:
2067 costArray[] must be exactly the same as costArrayLinear[].
2068 Known exceptions:
2069 1) Dead weak pointers, whose type is CONSTR. These objects are not
2070 reachable from any roots.
2071 */
2072 debugBelch("Comparison:\n");
2073 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2074 for (i = 0;i < N_CLOSURE_TYPES; i++)
2075 if (costArray[i] != costArrayLinear[i])
2076 // nothing should be printed except MUT_VAR after major GCs
2077 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2078 debugBelch("\n");
2079
2080 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2081 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2082 debugBelch("\tcostArray[] (must be empty) = ");
2083 for (i = 0;i < N_CLOSURE_TYPES; i++)
2084 if (costArray[i] != costArrayLinear[i])
2085 // nothing should be printed except MUT_VAR after major GCs
2086 debugBelch("[%u:%u] ", i, costArray[i]);
2087 debugBelch("\n");
2088
2089 // only for major garbage collection
2090 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2091 #endif
2092
2093 // post-processing
2094 closeTraverseStack();
2095 #ifdef DEBUG_RETAINER
2096 closeAllRetainerSet();
2097 #else
2098 // Note that there is no post-processing for the retainer sets.
2099 #endif
2100 retainerGeneration++;
2101
2102 stat_endRP(
2103 retainerGeneration - 1, // retainerGeneration has just been incremented!
2104 #ifdef DEBUG_RETAINER
2105 maxCStackSize, maxStackSize,
2106 #endif
2107 (double)timesAnyObjectVisited / numObjectVisited);
2108 }
2109
2110 /* -----------------------------------------------------------------------------
2111 * DEBUGGING CODE
2112 * -------------------------------------------------------------------------- */
2113
2114 #ifdef DEBUG_RETAINER
2115
2116 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2117 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2118 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2119
2120 static nat
2121 sanityCheckHeapClosure( StgClosure *c )
2122 {
2123 StgInfoTable *info;
2124
2125 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2126 ASSERT(!closure_STATIC(c));
2127 ASSERT(LOOKS_LIKE_PTR(c));
2128
2129 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2130 if (get_itbl(c)->type == CONSTR &&
2131 !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
2132 !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
2133 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2134 costArray[get_itbl(c)->type] += cost(c);
2135 sumOfNewCost += cost(c);
2136 } else
2137 debugBelch(
2138 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2139 flip, c, get_itbl(c)->type,
2140 get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
2141 RSET(c));
2142 } else {
2143 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2144 }
2145
2146 return closure_sizeW(c);
2147 }
2148
2149 static nat
2150 heapCheck( bdescr *bd )
2151 {
2152 StgPtr p;
2153 static nat costSum, size;
2154
2155 costSum = 0;
2156 while (bd != NULL) {
2157 p = bd->start;
2158 while (p < bd->free) {
2159 size = sanityCheckHeapClosure((StgClosure *)p);
2160 sumOfCostLinear += size;
2161 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2162 p += size;
2163 // no need for slop check; I think slops are not used currently.
2164 }
2165 ASSERT(p == bd->free);
2166 costSum += bd->free - bd->start;
2167 bd = bd->link;
2168 }
2169
2170 return costSum;
2171 }
2172
2173 static nat
2174 smallObjectPoolCheck(void)
2175 {
2176 bdescr *bd;
2177 StgPtr p;
2178 static nat costSum, size;
2179
2180 bd = small_alloc_list;
2181 costSum = 0;
2182
2183 // first block
2184 if (bd == NULL)
2185 return costSum;
2186
2187 p = bd->start;
2188 while (p < alloc_Hp) {
2189 size = sanityCheckHeapClosure((StgClosure *)p);
2190 sumOfCostLinear += size;
2191 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2192 p += size;
2193 }
2194 ASSERT(p == alloc_Hp);
2195 costSum += alloc_Hp - bd->start;
2196
2197 bd = bd->link;
2198 while (bd != NULL) {
2199 p = bd->start;
2200 while (p < bd->free) {
2201 size = sanityCheckHeapClosure((StgClosure *)p);
2202 sumOfCostLinear += size;
2203 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2204 p += size;
2205 }
2206 ASSERT(p == bd->free);
2207 costSum += bd->free - bd->start;
2208 bd = bd->link;
2209 }
2210
2211 return costSum;
2212 }
2213
2214 static nat
2215 chainCheck(bdescr *bd)
2216 {
2217 nat costSum, size;
2218
2219 costSum = 0;
2220 while (bd != NULL) {
2221 // bd->free - bd->start is not an accurate measurement of the
2222 // object size. Actually it is always zero, so we compute its
2223 // size explicitly.
2224 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2225 sumOfCostLinear += size;
2226 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2227 costSum += size;
2228 bd = bd->link;
2229 }
2230
2231 return costSum;
2232 }
2233
2234 static nat
2235 checkHeapSanityForRetainerProfiling( void )
2236 {
2237 nat costSum, g, s;
2238
2239 costSum = 0;
2240 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2241 if (RtsFlags.GcFlags.generations == 1) {
2242 costSum += heapCheck(g0s0->to_blocks);
2243 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2244 costSum += chainCheck(g0s0->large_objects);
2245 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2246 } else {
2247 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2248 for (s = 0; s < generations[g].n_steps; s++) {
2249 /*
2250 After all live objects have been scavenged, the garbage
2251 collector may create some objects in
2252 scheduleFinalizers(). These objects are created throught
2253 allocate(), so the small object pool or the large object
2254 pool of the g0s0 may not be empty.
2255 */
2256 if (g == 0 && s == 0) {
2257 costSum += smallObjectPoolCheck();
2258 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2259 costSum += chainCheck(generations[g].steps[s].large_objects);
2260 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2261 } else {
2262 costSum += heapCheck(generations[g].steps[s].blocks);
2263 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2264 costSum += chainCheck(generations[g].steps[s].large_objects);
2265 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2266 }
2267 }
2268 }
2269
2270 return costSum;
2271 }
2272
2273 void
2274 findPointer(StgPtr p)
2275 {
2276 StgPtr q, r, e;
2277 bdescr *bd;
2278 nat g, s;
2279
2280 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2281 for (s = 0; s < generations[g].n_steps; s++) {
2282 // if (g == 0 && s == 0) continue;
2283 bd = generations[g].steps[s].blocks;
2284 for (; bd; bd = bd->link) {
2285 for (q = bd->start; q < bd->free; q++) {
2286 if (*q == (StgWord)p) {
2287 r = q;
2288 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2289 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2290 // return;
2291 }
2292 }
2293 }
2294 bd = generations[g].steps[s].large_objects;
2295 for (; bd; bd = bd->link) {
2296 e = bd->start + cost((StgClosure *)bd->start);
2297 for (q = bd->start; q < e; q++) {
2298 if (*q == (StgWord)p) {
2299 r = q;
2300 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2301 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2302 // return;
2303 }
2304 }
2305 }
2306 }
2307 }
2308 }
2309
2310 static void
2311 belongToHeap(StgPtr p)
2312 {
2313 bdescr *bd;
2314 nat g, s;
2315
2316 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2317 for (s = 0; s < generations[g].n_steps; s++) {
2318 // if (g == 0 && s == 0) continue;
2319 bd = generations[g].steps[s].blocks;
2320 for (; bd; bd = bd->link) {
2321 if (bd->start <= p && p < bd->free) {
2322 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2323 return;
2324 }
2325 }
2326 bd = generations[g].steps[s].large_objects;
2327 for (; bd; bd = bd->link) {
2328 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2329 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2330 return;
2331 }
2332 }
2333 }
2334 }
2335 }
2336 #endif /* DEBUG_RETAINER */
2337
2338 #endif /* PROFILING */