Use __PIC__ and ming32_TARGET_OS to turn on windows DLL support, not ENABLE_WIN32_DLL...
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #ifdef PROFILING
11
12 // Turn off inlining when debugging - it obfuscates things
13 #ifdef DEBUG
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "PosixSource.h"
20 #include "Rts.h"
21
22 #include "RtsUtils.h"
23 #include "RetainerProfile.h"
24 #include "RetainerSet.h"
25 #include "Schedule.h"
26 #include "Printer.h"
27 #include "Weak.h"
28 #include "Sanity.h"
29 #include "Profiling.h"
30 #include "Stats.h"
31 #include "ProfHeap.h"
32 #include "Apply.h"
33 #include "sm/Storage.h" // for END_OF_STATIC_LIST
34
35 /*
36 Note: what to change in order to plug-in a new retainer profiling scheme?
37 (1) type retainer in ../includes/StgRetainerProf.h
38 (2) retainer function R(), i.e., getRetainerFrom()
39 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
40 in RetainerSet.h, if needed.
41 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
42 */
43
44 /* -----------------------------------------------------------------------------
45 * Declarations...
46 * -------------------------------------------------------------------------- */
47
48 static nat retainerGeneration; // generation
49
50 static nat numObjectVisited; // total number of objects visited
51 static nat timesAnyObjectVisited; // number of times any objects are visited
52
53 /*
54 The rs field in the profile header of any object points to its retainer
55 set in an indirect way: if flip is 0, it points to the retainer set;
56 if flip is 1, it points to the next byte after the retainer set (even
57 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
58 pointer. See retainerSetOf().
59 */
60
61 StgWord flip = 0; // flip bit
62 // must be 0 if DEBUG_RETAINER is on (for static closures)
63
64 #define setRetainerSetToNull(c) \
65 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
66
67 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
68 static void retainClosure(StgClosure *, StgClosure *, retainer);
69 #ifdef DEBUG_RETAINER
70 static void belongToHeap(StgPtr p);
71 #endif
72
73 #ifdef DEBUG_RETAINER
74 /*
75 cStackSize records how many times retainStack() has been invoked recursively,
76 that is, the number of activation records for retainStack() on the C stack.
77 maxCStackSize records its max value.
78 Invariants:
79 cStackSize <= maxCStackSize
80 */
81 static nat cStackSize, maxCStackSize;
82
83 static nat sumOfNewCost; // sum of the cost of each object, computed
84 // when the object is first visited
85 static nat sumOfNewCostExtra; // for those objects not visited during
86 // retainer profiling, e.g., MUT_VAR
87 static nat costArray[N_CLOSURE_TYPES];
88
89 nat sumOfCostLinear; // sum of the costs of all object, computed
90 // when linearly traversing the heap after
91 // retainer profiling
92 nat costArrayLinear[N_CLOSURE_TYPES];
93 #endif
94
95 /* -----------------------------------------------------------------------------
96 * Retainer stack - header
97 * Note:
98 * Although the retainer stack implementation could be separated *
99 * from the retainer profiling engine, there does not seem to be
100 * any advantage in doing that; retainer stack is an integral part
101 * of retainer profiling engine and cannot be use elsewhere at
102 * all.
103 * -------------------------------------------------------------------------- */
104
105 typedef enum {
106 posTypeStep,
107 posTypePtrs,
108 posTypeSRT,
109 posTypeLargeSRT,
110 } nextPosType;
111
112 typedef union {
113 // fixed layout or layout specified by a field in the closure
114 StgWord step;
115
116 // layout.payload
117 struct {
118 // See StgClosureInfo in InfoTables.h
119 #if SIZEOF_VOID_P == 8
120 StgWord32 pos;
121 StgWord32 ptrs;
122 #else
123 StgWord16 pos;
124 StgWord16 ptrs;
125 #endif
126 StgPtr payload;
127 } ptrs;
128
129 // SRT
130 struct {
131 StgClosure **srt;
132 StgWord srt_bitmap;
133 } srt;
134
135 // Large SRT
136 struct {
137 StgLargeSRT *srt;
138 StgWord offset;
139 } large_srt;
140
141 } nextPos;
142
143 typedef struct {
144 nextPosType type;
145 nextPos next;
146 } stackPos;
147
148 typedef struct {
149 StgClosure *c;
150 retainer c_child_r;
151 stackPos info;
152 } stackElement;
153
154 /*
155 Invariants:
156 firstStack points to the first block group.
157 currentStack points to the block group currently being used.
158 currentStack->free == stackLimit.
159 stackTop points to the topmost byte in the stack of currentStack.
160 Unless the whole stack is empty, stackTop must point to the topmost
161 object (or byte) in the whole stack. Thus, it is only when the whole stack
162 is empty that stackTop == stackLimit (not during the execution of push()
163 and pop()).
164 stackBottom == currentStack->start.
165 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
166 Note:
167 When a current stack becomes empty, stackTop is set to point to
168 the topmost element on the previous block group so as to satisfy
169 the invariants described above.
170 */
171 static bdescr *firstStack = NULL;
172 static bdescr *currentStack;
173 static stackElement *stackBottom, *stackTop, *stackLimit;
174
175 /*
176 currentStackBoundary is used to mark the current stack chunk.
177 If stackTop == currentStackBoundary, it means that the current stack chunk
178 is empty. It is the responsibility of the user to keep currentStackBoundary
179 valid all the time if it is to be employed.
180 */
181 static stackElement *currentStackBoundary;
182
183 /*
184 stackSize records the current size of the stack.
185 maxStackSize records its high water mark.
186 Invariants:
187 stackSize <= maxStackSize
188 Note:
189 stackSize is just an estimate measure of the depth of the graph. The reason
190 is that some heap objects have only a single child and may not result
191 in a new element being pushed onto the stack. Therefore, at the end of
192 retainer profiling, maxStackSize + maxCStackSize is some value no greater
193 than the actual depth of the graph.
194 */
195 #ifdef DEBUG_RETAINER
196 static int stackSize, maxStackSize;
197 #endif
198
199 // number of blocks allocated for one stack
200 #define BLOCKS_IN_STACK 1
201
202 /* -----------------------------------------------------------------------------
203 * Add a new block group to the stack.
204 * Invariants:
205 * currentStack->link == s.
206 * -------------------------------------------------------------------------- */
207 static INLINE void
208 newStackBlock( bdescr *bd )
209 {
210 currentStack = bd;
211 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
212 stackBottom = (stackElement *)bd->start;
213 stackLimit = (stackElement *)stackTop;
214 bd->free = (StgPtr)stackLimit;
215 }
216
217 /* -----------------------------------------------------------------------------
218 * Return to the previous block group.
219 * Invariants:
220 * s->link == currentStack.
221 * -------------------------------------------------------------------------- */
222 static INLINE void
223 returnToOldStack( bdescr *bd )
224 {
225 currentStack = bd;
226 stackTop = (stackElement *)bd->free;
227 stackBottom = (stackElement *)bd->start;
228 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
229 bd->free = (StgPtr)stackLimit;
230 }
231
232 /* -----------------------------------------------------------------------------
233 * Initializes the traverse stack.
234 * -------------------------------------------------------------------------- */
235 static void
236 initializeTraverseStack( void )
237 {
238 if (firstStack != NULL) {
239 freeChain(firstStack);
240 }
241
242 firstStack = allocGroup(BLOCKS_IN_STACK);
243 firstStack->link = NULL;
244 firstStack->u.back = NULL;
245
246 newStackBlock(firstStack);
247 }
248
249 /* -----------------------------------------------------------------------------
250 * Frees all the block groups in the traverse stack.
251 * Invariants:
252 * firstStack != NULL
253 * -------------------------------------------------------------------------- */
254 static void
255 closeTraverseStack( void )
256 {
257 freeChain(firstStack);
258 firstStack = NULL;
259 }
260
261 /* -----------------------------------------------------------------------------
262 * Returns rtsTrue if the whole stack is empty.
263 * -------------------------------------------------------------------------- */
264 static INLINE rtsBool
265 isEmptyRetainerStack( void )
266 {
267 return (firstStack == currentStack) && stackTop == stackLimit;
268 }
269
270 /* -----------------------------------------------------------------------------
271 * Returns size of stack
272 * -------------------------------------------------------------------------- */
273 #ifdef DEBUG
274 lnat
275 retainerStackBlocks( void )
276 {
277 bdescr* bd;
278 lnat res = 0;
279
280 for (bd = firstStack; bd != NULL; bd = bd->link)
281 res += bd->blocks;
282
283 return res;
284 }
285 #endif
286
287 /* -----------------------------------------------------------------------------
288 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
289 * i.e., if the current stack chunk is empty.
290 * -------------------------------------------------------------------------- */
291 static INLINE rtsBool
292 isOnBoundary( void )
293 {
294 return stackTop == currentStackBoundary;
295 }
296
297 /* -----------------------------------------------------------------------------
298 * Initializes *info from ptrs and payload.
299 * Invariants:
300 * payload[] begins with ptrs pointers followed by non-pointers.
301 * -------------------------------------------------------------------------- */
302 static INLINE void
303 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
304 {
305 info->type = posTypePtrs;
306 info->next.ptrs.pos = 0;
307 info->next.ptrs.ptrs = ptrs;
308 info->next.ptrs.payload = payload;
309 }
310
311 /* -----------------------------------------------------------------------------
312 * Find the next object from *info.
313 * -------------------------------------------------------------------------- */
314 static INLINE StgClosure *
315 find_ptrs( stackPos *info )
316 {
317 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
318 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
319 } else {
320 return NULL;
321 }
322 }
323
324 /* -----------------------------------------------------------------------------
325 * Initializes *info from SRT information stored in *infoTable.
326 * -------------------------------------------------------------------------- */
327 static INLINE void
328 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
329 {
330 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
331 info->type = posTypeLargeSRT;
332 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
333 info->next.large_srt.offset = 0;
334 } else {
335 info->type = posTypeSRT;
336 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
337 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
338 }
339 }
340
341 static INLINE void
342 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
343 {
344 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
345 info->type = posTypeLargeSRT;
346 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
347 info->next.large_srt.offset = 0;
348 } else {
349 info->type = posTypeSRT;
350 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
351 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
352 }
353 }
354
355 /* -----------------------------------------------------------------------------
356 * Find the next object from *info.
357 * -------------------------------------------------------------------------- */
358 static INLINE StgClosure *
359 find_srt( stackPos *info )
360 {
361 StgClosure *c;
362 StgWord bitmap;
363
364 if (info->type == posTypeSRT) {
365 // Small SRT bitmap
366 bitmap = info->next.srt.srt_bitmap;
367 while (bitmap != 0) {
368 if ((bitmap & 1) != 0) {
369 #if defined(__PIC__) && defined(mingw32_TARGET_OS)
370 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
371 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
372 else
373 c = *(info->next.srt.srt);
374 #else
375 c = *(info->next.srt.srt);
376 #endif
377 bitmap = bitmap >> 1;
378 info->next.srt.srt++;
379 info->next.srt.srt_bitmap = bitmap;
380 return c;
381 }
382 bitmap = bitmap >> 1;
383 info->next.srt.srt++;
384 }
385 // bitmap is now zero...
386 return NULL;
387 }
388 else {
389 // Large SRT bitmap
390 nat i = info->next.large_srt.offset;
391 StgWord bitmap;
392
393 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
394 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
395 bitmap = bitmap >> (i % BITS_IN(StgWord));
396 while (i < info->next.large_srt.srt->l.size) {
397 if ((bitmap & 1) != 0) {
398 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
399 i++;
400 info->next.large_srt.offset = i;
401 return c;
402 }
403 i++;
404 if (i % BITS_IN(W_) == 0) {
405 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
406 } else {
407 bitmap = bitmap >> 1;
408 }
409 }
410 // reached the end of this bitmap.
411 info->next.large_srt.offset = i;
412 return NULL;
413 }
414 }
415
416 /* -----------------------------------------------------------------------------
417 * push() pushes a stackElement representing the next child of *c
418 * onto the traverse stack. If *c has no child, *first_child is set
419 * to NULL and nothing is pushed onto the stack. If *c has only one
420 * child, *c_chlid is set to that child and nothing is pushed onto
421 * the stack. If *c has more than two children, *first_child is set
422 * to the first child and a stackElement representing the second
423 * child is pushed onto the stack.
424
425 * Invariants:
426 * *c_child_r is the most recent retainer of *c's children.
427 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
428 * there cannot be any stack objects.
429 * Note: SRTs are considered to be children as well.
430 * -------------------------------------------------------------------------- */
431 static INLINE void
432 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
433 {
434 stackElement se;
435 bdescr *nbd; // Next Block Descriptor
436
437 #ifdef DEBUG_RETAINER
438 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
439 #endif
440
441 ASSERT(get_itbl(c)->type != TSO);
442 ASSERT(get_itbl(c)->type != AP_STACK);
443
444 //
445 // fill in se
446 //
447
448 se.c = c;
449 se.c_child_r = c_child_r;
450
451 // fill in se.info
452 switch (get_itbl(c)->type) {
453 // no child, no SRT
454 case CONSTR_0_1:
455 case CONSTR_0_2:
456 case CAF_BLACKHOLE:
457 case BLACKHOLE:
458 case ARR_WORDS:
459 *first_child = NULL;
460 return;
461
462 // one child (fixed), no SRT
463 case MUT_VAR_CLEAN:
464 case MUT_VAR_DIRTY:
465 *first_child = ((StgMutVar *)c)->var;
466 return;
467 case THUNK_SELECTOR:
468 *first_child = ((StgSelector *)c)->selectee;
469 return;
470 case IND_PERM:
471 case IND_OLDGEN_PERM:
472 case IND_OLDGEN:
473 *first_child = ((StgInd *)c)->indirectee;
474 return;
475 case CONSTR_1_0:
476 case CONSTR_1_1:
477 *first_child = c->payload[0];
478 return;
479
480 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
481 // of the next child. We do not write a separate initialization code.
482 // Also we do not have to initialize info.type;
483
484 // two children (fixed), no SRT
485 // need to push a stackElement, but nothing to store in se.info
486 case CONSTR_2_0:
487 *first_child = c->payload[0]; // return the first pointer
488 // se.info.type = posTypeStep;
489 // se.info.next.step = 2; // 2 = second
490 break;
491
492 // three children (fixed), no SRT
493 // need to push a stackElement
494 case MVAR_CLEAN:
495 case MVAR_DIRTY:
496 // head must be TSO and the head of a linked list of TSOs.
497 // Shoule it be a child? Seems to be yes.
498 *first_child = (StgClosure *)((StgMVar *)c)->head;
499 // se.info.type = posTypeStep;
500 se.info.next.step = 2; // 2 = second
501 break;
502
503 // three children (fixed), no SRT
504 case WEAK:
505 *first_child = ((StgWeak *)c)->key;
506 // se.info.type = posTypeStep;
507 se.info.next.step = 2;
508 break;
509
510 // layout.payload.ptrs, no SRT
511 case CONSTR:
512 case STABLE_NAME:
513 case BCO:
514 case CONSTR_STATIC:
515 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
516 (StgPtr)c->payload);
517 *first_child = find_ptrs(&se.info);
518 if (*first_child == NULL)
519 return; // no child
520 break;
521
522 // StgMutArrPtr.ptrs, no SRT
523 case MUT_ARR_PTRS_CLEAN:
524 case MUT_ARR_PTRS_DIRTY:
525 case MUT_ARR_PTRS_FROZEN:
526 case MUT_ARR_PTRS_FROZEN0:
527 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
528 (StgPtr)(((StgMutArrPtrs *)c)->payload));
529 *first_child = find_ptrs(&se.info);
530 if (*first_child == NULL)
531 return;
532 break;
533
534 // layout.payload.ptrs, SRT
535 case FUN: // *c is a heap object.
536 case FUN_2_0:
537 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
538 *first_child = find_ptrs(&se.info);
539 if (*first_child == NULL)
540 // no child from ptrs, so check SRT
541 goto fun_srt_only;
542 break;
543
544 case THUNK:
545 case THUNK_2_0:
546 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
547 (StgPtr)((StgThunk *)c)->payload);
548 *first_child = find_ptrs(&se.info);
549 if (*first_child == NULL)
550 // no child from ptrs, so check SRT
551 goto thunk_srt_only;
552 break;
553
554 // 1 fixed child, SRT
555 case FUN_1_0:
556 case FUN_1_1:
557 *first_child = c->payload[0];
558 ASSERT(*first_child != NULL);
559 init_srt_fun(&se.info, get_fun_itbl(c));
560 break;
561
562 case THUNK_1_0:
563 case THUNK_1_1:
564 *first_child = ((StgThunk *)c)->payload[0];
565 ASSERT(*first_child != NULL);
566 init_srt_thunk(&se.info, get_thunk_itbl(c));
567 break;
568
569 case FUN_STATIC: // *c is a heap object.
570 ASSERT(get_itbl(c)->srt_bitmap != 0);
571 case FUN_0_1:
572 case FUN_0_2:
573 fun_srt_only:
574 init_srt_fun(&se.info, get_fun_itbl(c));
575 *first_child = find_srt(&se.info);
576 if (*first_child == NULL)
577 return; // no child
578 break;
579
580 // SRT only
581 case THUNK_STATIC:
582 ASSERT(get_itbl(c)->srt_bitmap != 0);
583 case THUNK_0_1:
584 case THUNK_0_2:
585 thunk_srt_only:
586 init_srt_thunk(&se.info, get_thunk_itbl(c));
587 *first_child = find_srt(&se.info);
588 if (*first_child == NULL)
589 return; // no child
590 break;
591
592 case TVAR_WATCH_QUEUE:
593 *first_child = (StgClosure *)((StgTVarWatchQueue *)c)->closure;
594 se.info.next.step = 2; // 2 = second
595 break;
596 case TVAR:
597 *first_child = (StgClosure *)((StgTVar *)c)->current_value;
598 break;
599 case TREC_HEADER:
600 *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
601 break;
602 case TREC_CHUNK:
603 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
604 se.info.next.step = 0; // entry no.
605 break;
606
607 // cannot appear
608 case PAP:
609 case AP:
610 case AP_STACK:
611 case TSO:
612 case IND_STATIC:
613 case CONSTR_NOCAF_STATIC:
614 // stack objects
615 case UPDATE_FRAME:
616 case CATCH_FRAME:
617 case STOP_FRAME:
618 case RET_DYN:
619 case RET_BCO:
620 case RET_SMALL:
621 case RET_BIG:
622 // invalid objects
623 case IND:
624 case INVALID_OBJECT:
625 default:
626 barf("Invalid object *c in push()");
627 return;
628 }
629
630 if (stackTop - 1 < stackBottom) {
631 #ifdef DEBUG_RETAINER
632 // debugBelch("push() to the next stack.\n");
633 #endif
634 // currentStack->free is updated when the active stack is switched
635 // to the next stack.
636 currentStack->free = (StgPtr)stackTop;
637
638 if (currentStack->link == NULL) {
639 nbd = allocGroup(BLOCKS_IN_STACK);
640 nbd->link = NULL;
641 nbd->u.back = currentStack;
642 currentStack->link = nbd;
643 } else
644 nbd = currentStack->link;
645
646 newStackBlock(nbd);
647 }
648
649 // adjust stackTop (acutal push)
650 stackTop--;
651 // If the size of stackElement was huge, we would better replace the
652 // following statement by either a memcpy() call or a switch statement
653 // on the type of the element. Currently, the size of stackElement is
654 // small enough (5 words) that this direct assignment seems to be enough.
655
656 // ToDo: The line below leads to the warning:
657 // warning: 'se.info.type' may be used uninitialized in this function
658 // This is caused by the fact that there are execution paths through the
659 // large switch statement above where some cases do not initialize this
660 // field. Is this really harmless? Can we avoid the warning?
661 *stackTop = se;
662
663 #ifdef DEBUG_RETAINER
664 stackSize++;
665 if (stackSize > maxStackSize) maxStackSize = stackSize;
666 // ASSERT(stackSize >= 0);
667 // debugBelch("stackSize = %d\n", stackSize);
668 #endif
669 }
670
671 /* -----------------------------------------------------------------------------
672 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
673 * Invariants:
674 * stackTop cannot be equal to stackLimit unless the whole stack is
675 * empty, in which case popOff() is not allowed.
676 * Note:
677 * You can think of popOffReal() as a part of popOff() which is
678 * executed at the end of popOff() in necessary. Since popOff() is
679 * likely to be executed quite often while popOffReal() is not, we
680 * separate popOffReal() from popOff(), which is declared as an
681 * INLINE function (for the sake of execution speed). popOffReal()
682 * is called only within popOff() and nowhere else.
683 * -------------------------------------------------------------------------- */
684 static void
685 popOffReal(void)
686 {
687 bdescr *pbd; // Previous Block Descriptor
688
689 #ifdef DEBUG_RETAINER
690 // debugBelch("pop() to the previous stack.\n");
691 #endif
692
693 ASSERT(stackTop + 1 == stackLimit);
694 ASSERT(stackBottom == (stackElement *)currentStack->start);
695
696 if (firstStack == currentStack) {
697 // The stack is completely empty.
698 stackTop++;
699 ASSERT(stackTop == stackLimit);
700 #ifdef DEBUG_RETAINER
701 stackSize--;
702 if (stackSize > maxStackSize) maxStackSize = stackSize;
703 /*
704 ASSERT(stackSize >= 0);
705 debugBelch("stackSize = %d\n", stackSize);
706 */
707 #endif
708 return;
709 }
710
711 // currentStack->free is updated when the active stack is switched back
712 // to the previous stack.
713 currentStack->free = (StgPtr)stackLimit;
714
715 // find the previous block descriptor
716 pbd = currentStack->u.back;
717 ASSERT(pbd != NULL);
718
719 returnToOldStack(pbd);
720
721 #ifdef DEBUG_RETAINER
722 stackSize--;
723 if (stackSize > maxStackSize) maxStackSize = stackSize;
724 /*
725 ASSERT(stackSize >= 0);
726 debugBelch("stackSize = %d\n", stackSize);
727 */
728 #endif
729 }
730
731 static INLINE void
732 popOff(void) {
733 #ifdef DEBUG_RETAINER
734 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
735 #endif
736
737 ASSERT(stackTop != stackLimit);
738 ASSERT(!isEmptyRetainerStack());
739
740 // <= (instead of <) is wrong!
741 if (stackTop + 1 < stackLimit) {
742 stackTop++;
743 #ifdef DEBUG_RETAINER
744 stackSize--;
745 if (stackSize > maxStackSize) maxStackSize = stackSize;
746 /*
747 ASSERT(stackSize >= 0);
748 debugBelch("stackSize = %d\n", stackSize);
749 */
750 #endif
751 return;
752 }
753
754 popOffReal();
755 }
756
757 /* -----------------------------------------------------------------------------
758 * Finds the next object to be considered for retainer profiling and store
759 * its pointer to *c.
760 * Test if the topmost stack element indicates that more objects are left,
761 * and if so, retrieve the first object and store its pointer to *c. Also,
762 * set *cp and *r appropriately, both of which are stored in the stack element.
763 * The topmost stack element then is overwritten so as for it to now denote
764 * the next object.
765 * If the topmost stack element indicates no more objects are left, pop
766 * off the stack element until either an object can be retrieved or
767 * the current stack chunk becomes empty, indicated by rtsTrue returned by
768 * isOnBoundary(), in which case *c is set to NULL.
769 * Note:
770 * It is okay to call this function even when the current stack chunk
771 * is empty.
772 * -------------------------------------------------------------------------- */
773 static INLINE void
774 pop( StgClosure **c, StgClosure **cp, retainer *r )
775 {
776 stackElement *se;
777
778 #ifdef DEBUG_RETAINER
779 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
780 #endif
781
782 do {
783 if (isOnBoundary()) { // if the current stack chunk is depleted
784 *c = NULL;
785 return;
786 }
787
788 se = stackTop;
789
790 switch (get_itbl(se->c)->type) {
791 // two children (fixed), no SRT
792 // nothing in se.info
793 case CONSTR_2_0:
794 *c = se->c->payload[1];
795 *cp = se->c;
796 *r = se->c_child_r;
797 popOff();
798 return;
799
800 // three children (fixed), no SRT
801 // need to push a stackElement
802 case MVAR_CLEAN:
803 case MVAR_DIRTY:
804 if (se->info.next.step == 2) {
805 *c = (StgClosure *)((StgMVar *)se->c)->tail;
806 se->info.next.step++; // move to the next step
807 // no popOff
808 } else {
809 *c = ((StgMVar *)se->c)->value;
810 popOff();
811 }
812 *cp = se->c;
813 *r = se->c_child_r;
814 return;
815
816 // three children (fixed), no SRT
817 case WEAK:
818 if (se->info.next.step == 2) {
819 *c = ((StgWeak *)se->c)->value;
820 se->info.next.step++;
821 // no popOff
822 } else {
823 *c = ((StgWeak *)se->c)->finalizer;
824 popOff();
825 }
826 *cp = se->c;
827 *r = se->c_child_r;
828 return;
829
830 case TVAR_WATCH_QUEUE:
831 if (se->info.next.step == 2) {
832 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->next_queue_entry;
833 se->info.next.step++; // move to the next step
834 // no popOff
835 } else {
836 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->prev_queue_entry;
837 popOff();
838 }
839 *cp = se->c;
840 *r = se->c_child_r;
841 return;
842
843 case TVAR:
844 *c = (StgClosure *)((StgTVar *)se->c)->first_watch_queue_entry;
845 *cp = se->c;
846 *r = se->c_child_r;
847 popOff();
848 return;
849
850 case TREC_HEADER:
851 *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
852 *cp = se->c;
853 *r = se->c_child_r;
854 popOff();
855 return;
856
857 case TREC_CHUNK: {
858 // These are pretty complicated: we have N entries, each
859 // of which contains 3 fields that we want to follow. So
860 // we divide the step counter: the 2 low bits indicate
861 // which field, and the rest of the bits indicate the
862 // entry number (starting from zero).
863 TRecEntry *entry;
864 nat entry_no = se->info.next.step >> 2;
865 nat field_no = se->info.next.step & 3;
866 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
867 *c = NULL;
868 popOff();
869 return;
870 }
871 entry = &((StgTRecChunk *)se->c)->entries[entry_no];
872 if (field_no == 0) {
873 *c = (StgClosure *)entry->tvar;
874 } else if (field_no == 1) {
875 *c = entry->expected_value;
876 } else {
877 *c = entry->new_value;
878 }
879 *cp = se->c;
880 *r = se->c_child_r;
881 se->info.next.step++;
882 return;
883 }
884
885 case CONSTR:
886 case STABLE_NAME:
887 case BCO:
888 case CONSTR_STATIC:
889 // StgMutArrPtr.ptrs, no SRT
890 case MUT_ARR_PTRS_CLEAN:
891 case MUT_ARR_PTRS_DIRTY:
892 case MUT_ARR_PTRS_FROZEN:
893 case MUT_ARR_PTRS_FROZEN0:
894 *c = find_ptrs(&se->info);
895 if (*c == NULL) {
896 popOff();
897 break;
898 }
899 *cp = se->c;
900 *r = se->c_child_r;
901 return;
902
903 // layout.payload.ptrs, SRT
904 case FUN: // always a heap object
905 case FUN_2_0:
906 if (se->info.type == posTypePtrs) {
907 *c = find_ptrs(&se->info);
908 if (*c != NULL) {
909 *cp = se->c;
910 *r = se->c_child_r;
911 return;
912 }
913 init_srt_fun(&se->info, get_fun_itbl(se->c));
914 }
915 goto do_srt;
916
917 case THUNK:
918 case THUNK_2_0:
919 if (se->info.type == posTypePtrs) {
920 *c = find_ptrs(&se->info);
921 if (*c != NULL) {
922 *cp = se->c;
923 *r = se->c_child_r;
924 return;
925 }
926 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
927 }
928 goto do_srt;
929
930 // SRT
931 do_srt:
932 case THUNK_STATIC:
933 case FUN_STATIC:
934 case FUN_0_1:
935 case FUN_0_2:
936 case THUNK_0_1:
937 case THUNK_0_2:
938 case FUN_1_0:
939 case FUN_1_1:
940 case THUNK_1_0:
941 case THUNK_1_1:
942 *c = find_srt(&se->info);
943 if (*c != NULL) {
944 *cp = se->c;
945 *r = se->c_child_r;
946 return;
947 }
948 popOff();
949 break;
950
951 // no child (fixed), no SRT
952 case CONSTR_0_1:
953 case CONSTR_0_2:
954 case CAF_BLACKHOLE:
955 case BLACKHOLE:
956 case ARR_WORDS:
957 // one child (fixed), no SRT
958 case MUT_VAR_CLEAN:
959 case MUT_VAR_DIRTY:
960 case THUNK_SELECTOR:
961 case IND_PERM:
962 case IND_OLDGEN_PERM:
963 case IND_OLDGEN:
964 case CONSTR_1_1:
965 // cannot appear
966 case PAP:
967 case AP:
968 case AP_STACK:
969 case TSO:
970 case IND_STATIC:
971 case CONSTR_NOCAF_STATIC:
972 // stack objects
973 case RET_DYN:
974 case UPDATE_FRAME:
975 case CATCH_FRAME:
976 case STOP_FRAME:
977 case RET_BCO:
978 case RET_SMALL:
979 case RET_BIG:
980 // invalid objects
981 case IND:
982 case INVALID_OBJECT:
983 default:
984 barf("Invalid object *c in pop()");
985 return;
986 }
987 } while (rtsTrue);
988 }
989
990 /* -----------------------------------------------------------------------------
991 * RETAINER PROFILING ENGINE
992 * -------------------------------------------------------------------------- */
993
994 void
995 initRetainerProfiling( void )
996 {
997 initializeAllRetainerSet();
998 retainerGeneration = 0;
999 }
1000
1001 /* -----------------------------------------------------------------------------
1002 * This function must be called before f-closing prof_file.
1003 * -------------------------------------------------------------------------- */
1004 void
1005 endRetainerProfiling( void )
1006 {
1007 #ifdef SECOND_APPROACH
1008 outputAllRetainerSet(prof_file);
1009 #endif
1010 }
1011
1012 /* -----------------------------------------------------------------------------
1013 * Returns the actual pointer to the retainer set of the closure *c.
1014 * It may adjust RSET(c) subject to flip.
1015 * Side effects:
1016 * RSET(c) is initialized to NULL if its current value does not
1017 * conform to flip.
1018 * Note:
1019 * Even though this function has side effects, they CAN be ignored because
1020 * subsequent calls to retainerSetOf() always result in the same return value
1021 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1022 * closure.
1023 * We have to perform an XOR (^) operation each time a closure is examined.
1024 * The reason is that we do not know when a closure is visited last.
1025 * -------------------------------------------------------------------------- */
1026 static INLINE void
1027 maybeInitRetainerSet( StgClosure *c )
1028 {
1029 if (!isRetainerSetFieldValid(c)) {
1030 setRetainerSetToNull(c);
1031 }
1032 }
1033
1034 /* -----------------------------------------------------------------------------
1035 * Returns rtsTrue if *c is a retainer.
1036 * -------------------------------------------------------------------------- */
1037 static INLINE rtsBool
1038 isRetainer( StgClosure *c )
1039 {
1040 switch (get_itbl(c)->type) {
1041 //
1042 // True case
1043 //
1044 // TSOs MUST be retainers: they constitute the set of roots.
1045 case TSO:
1046
1047 // mutable objects
1048 case MVAR_CLEAN:
1049 case MVAR_DIRTY:
1050 case MUT_VAR_CLEAN:
1051 case MUT_VAR_DIRTY:
1052 case MUT_ARR_PTRS_CLEAN:
1053 case MUT_ARR_PTRS_DIRTY:
1054 case MUT_ARR_PTRS_FROZEN:
1055 case MUT_ARR_PTRS_FROZEN0:
1056
1057 // thunks are retainers.
1058 case THUNK:
1059 case THUNK_1_0:
1060 case THUNK_0_1:
1061 case THUNK_2_0:
1062 case THUNK_1_1:
1063 case THUNK_0_2:
1064 case THUNK_SELECTOR:
1065 case AP:
1066 case AP_STACK:
1067
1068 // Static thunks, or CAFS, are obviously retainers.
1069 case THUNK_STATIC:
1070
1071 // WEAK objects are roots; there is separate code in which traversing
1072 // begins from WEAK objects.
1073 case WEAK:
1074
1075 // Since the other mutvar-type things are retainers, seems
1076 // like the right thing to do:
1077 case TVAR:
1078 return rtsTrue;
1079
1080 //
1081 // False case
1082 //
1083
1084 // constructors
1085 case CONSTR:
1086 case CONSTR_1_0:
1087 case CONSTR_0_1:
1088 case CONSTR_2_0:
1089 case CONSTR_1_1:
1090 case CONSTR_0_2:
1091 // functions
1092 case FUN:
1093 case FUN_1_0:
1094 case FUN_0_1:
1095 case FUN_2_0:
1096 case FUN_1_1:
1097 case FUN_0_2:
1098 // partial applications
1099 case PAP:
1100 // blackholes
1101 case CAF_BLACKHOLE:
1102 case BLACKHOLE:
1103 // indirection
1104 case IND_PERM:
1105 case IND_OLDGEN_PERM:
1106 case IND_OLDGEN:
1107 // static objects
1108 case CONSTR_STATIC:
1109 case FUN_STATIC:
1110 // misc
1111 case STABLE_NAME:
1112 case BCO:
1113 case ARR_WORDS:
1114 // STM
1115 case TVAR_WATCH_QUEUE:
1116 case TREC_HEADER:
1117 case TREC_CHUNK:
1118 return rtsFalse;
1119
1120 //
1121 // Error case
1122 //
1123 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1124 case IND_STATIC:
1125 // CONSTR_NOCAF_STATIC
1126 // cannot be *c, *cp, *r in the retainer profiling loop.
1127 case CONSTR_NOCAF_STATIC:
1128 // Stack objects are invalid because they are never treated as
1129 // legal objects during retainer profiling.
1130 case UPDATE_FRAME:
1131 case CATCH_FRAME:
1132 case STOP_FRAME:
1133 case RET_DYN:
1134 case RET_BCO:
1135 case RET_SMALL:
1136 case RET_BIG:
1137 // other cases
1138 case IND:
1139 case INVALID_OBJECT:
1140 default:
1141 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1142 return rtsFalse;
1143 }
1144 }
1145
1146 /* -----------------------------------------------------------------------------
1147 * Returns the retainer function value for the closure *c, i.e., R(*c).
1148 * This function does NOT return the retainer(s) of *c.
1149 * Invariants:
1150 * *c must be a retainer.
1151 * Note:
1152 * Depending on the definition of this function, the maintenance of retainer
1153 * sets can be made easier. If most retainer sets are likely to be created
1154 * again across garbage collections, refreshAllRetainerSet() in
1155 * RetainerSet.c can simply do nothing.
1156 * If this is not the case, we can free all the retainer sets and
1157 * re-initialize the hash table.
1158 * See refreshAllRetainerSet() in RetainerSet.c.
1159 * -------------------------------------------------------------------------- */
1160 static INLINE retainer
1161 getRetainerFrom( StgClosure *c )
1162 {
1163 ASSERT(isRetainer(c));
1164
1165 #if defined(RETAINER_SCHEME_INFO)
1166 // Retainer scheme 1: retainer = info table
1167 return get_itbl(c);
1168 #elif defined(RETAINER_SCHEME_CCS)
1169 // Retainer scheme 2: retainer = cost centre stack
1170 return c->header.prof.ccs;
1171 #elif defined(RETAINER_SCHEME_CC)
1172 // Retainer scheme 3: retainer = cost centre
1173 return c->header.prof.ccs->cc;
1174 #endif
1175 }
1176
1177 /* -----------------------------------------------------------------------------
1178 * Associates the retainer set *s with the closure *c, that is, *s becomes
1179 * the retainer set of *c.
1180 * Invariants:
1181 * c != NULL
1182 * s != NULL
1183 * -------------------------------------------------------------------------- */
1184 static INLINE void
1185 associate( StgClosure *c, RetainerSet *s )
1186 {
1187 // StgWord has the same size as pointers, so the following type
1188 // casting is okay.
1189 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1190 }
1191
1192 /* -----------------------------------------------------------------------------
1193 Call retainClosure for each of the closures covered by a large bitmap.
1194 -------------------------------------------------------------------------- */
1195
1196 static void
1197 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1198 StgClosure *c, retainer c_child_r)
1199 {
1200 nat i, b;
1201 StgWord bitmap;
1202
1203 b = 0;
1204 bitmap = large_bitmap->bitmap[b];
1205 for (i = 0; i < size; ) {
1206 if ((bitmap & 1) == 0) {
1207 retainClosure((StgClosure *)*p, c, c_child_r);
1208 }
1209 i++;
1210 p++;
1211 if (i % BITS_IN(W_) == 0) {
1212 b++;
1213 bitmap = large_bitmap->bitmap[b];
1214 } else {
1215 bitmap = bitmap >> 1;
1216 }
1217 }
1218 }
1219
1220 static INLINE StgPtr
1221 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1222 StgClosure *c, retainer c_child_r)
1223 {
1224 while (size > 0) {
1225 if ((bitmap & 1) == 0) {
1226 retainClosure((StgClosure *)*p, c, c_child_r);
1227 }
1228 p++;
1229 bitmap = bitmap >> 1;
1230 size--;
1231 }
1232 return p;
1233 }
1234
1235 /* -----------------------------------------------------------------------------
1236 * Call retainClosure for each of the closures in an SRT.
1237 * ------------------------------------------------------------------------- */
1238
1239 static void
1240 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1241 {
1242 nat i, b, size;
1243 StgWord bitmap;
1244 StgClosure **p;
1245
1246 b = 0;
1247 p = (StgClosure **)srt->srt;
1248 size = srt->l.size;
1249 bitmap = srt->l.bitmap[b];
1250 for (i = 0; i < size; ) {
1251 if ((bitmap & 1) != 0) {
1252 retainClosure((StgClosure *)*p, c, c_child_r);
1253 }
1254 i++;
1255 p++;
1256 if (i % BITS_IN(W_) == 0) {
1257 b++;
1258 bitmap = srt->l.bitmap[b];
1259 } else {
1260 bitmap = bitmap >> 1;
1261 }
1262 }
1263 }
1264
1265 static INLINE void
1266 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1267 {
1268 nat bitmap;
1269 StgClosure **p;
1270
1271 bitmap = srt_bitmap;
1272 p = srt;
1273
1274 if (bitmap == (StgHalfWord)(-1)) {
1275 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1276 return;
1277 }
1278
1279 while (bitmap != 0) {
1280 if ((bitmap & 1) != 0) {
1281 #if defined(__PIC__) && defined(mingw32_TARGET_OS)
1282 if ( (unsigned long)(*srt) & 0x1 ) {
1283 retainClosure(* (StgClosure**) ((unsigned long) (*srt) & ~0x1),
1284 c, c_child_r);
1285 } else {
1286 retainClosure(*srt,c,c_child_r);
1287 }
1288 #else
1289 retainClosure(*srt,c,c_child_r);
1290 #endif
1291 }
1292 p++;
1293 bitmap = bitmap >> 1;
1294 }
1295 }
1296
1297 /* -----------------------------------------------------------------------------
1298 * Process all the objects in the stack chunk from stackStart to stackEnd
1299 * with *c and *c_child_r being their parent and their most recent retainer,
1300 * respectively. Treat stackOptionalFun as another child of *c if it is
1301 * not NULL.
1302 * Invariants:
1303 * *c is one of the following: TSO, AP_STACK.
1304 * If *c is TSO, c == c_child_r.
1305 * stackStart < stackEnd.
1306 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1307 * interpretation conforms to the current value of flip (even when they
1308 * are interpreted to be NULL).
1309 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1310 * or ThreadKilled, which means that its stack is ready to process.
1311 * Note:
1312 * This code was almost plagiarzied from GC.c! For each pointer,
1313 * retainClosure() is invoked instead of evacuate().
1314 * -------------------------------------------------------------------------- */
1315 static void
1316 retainStack( StgClosure *c, retainer c_child_r,
1317 StgPtr stackStart, StgPtr stackEnd )
1318 {
1319 stackElement *oldStackBoundary;
1320 StgPtr p;
1321 StgRetInfoTable *info;
1322 StgWord32 bitmap;
1323 nat size;
1324
1325 #ifdef DEBUG_RETAINER
1326 cStackSize++;
1327 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1328 #endif
1329
1330 /*
1331 Each invocation of retainStack() creates a new virtual
1332 stack. Since all such stacks share a single common stack, we
1333 record the current currentStackBoundary, which will be restored
1334 at the exit.
1335 */
1336 oldStackBoundary = currentStackBoundary;
1337 currentStackBoundary = stackTop;
1338
1339 #ifdef DEBUG_RETAINER
1340 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1341 #endif
1342
1343 ASSERT(get_itbl(c)->type != TSO ||
1344 (((StgTSO *)c)->what_next != ThreadRelocated &&
1345 ((StgTSO *)c)->what_next != ThreadComplete &&
1346 ((StgTSO *)c)->what_next != ThreadKilled));
1347
1348 p = stackStart;
1349 while (p < stackEnd) {
1350 info = get_ret_itbl((StgClosure *)p);
1351
1352 switch(info->i.type) {
1353
1354 case UPDATE_FRAME:
1355 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1356 p += sizeofW(StgUpdateFrame);
1357 continue;
1358
1359 case STOP_FRAME:
1360 case CATCH_FRAME:
1361 case CATCH_STM_FRAME:
1362 case CATCH_RETRY_FRAME:
1363 case ATOMICALLY_FRAME:
1364 case RET_SMALL:
1365 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1366 size = BITMAP_SIZE(info->i.layout.bitmap);
1367 p++;
1368 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1369
1370 follow_srt:
1371 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1372 continue;
1373
1374 case RET_BCO: {
1375 StgBCO *bco;
1376
1377 p++;
1378 retainClosure((StgClosure *)*p, c, c_child_r);
1379 bco = (StgBCO *)*p;
1380 p++;
1381 size = BCO_BITMAP_SIZE(bco);
1382 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1383 p += size;
1384 continue;
1385 }
1386
1387 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1388 case RET_BIG:
1389 size = GET_LARGE_BITMAP(&info->i)->size;
1390 p++;
1391 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1392 size, c, c_child_r);
1393 p += size;
1394 // and don't forget to follow the SRT
1395 goto follow_srt;
1396
1397 // Dynamic bitmap: the mask is stored on the stack
1398 case RET_DYN: {
1399 StgWord dyn;
1400 dyn = ((StgRetDyn *)p)->liveness;
1401
1402 // traverse the bitmap first
1403 bitmap = RET_DYN_LIVENESS(dyn);
1404 p = (P_)&((StgRetDyn *)p)->payload[0];
1405 size = RET_DYN_BITMAP_SIZE;
1406 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1407
1408 // skip over the non-ptr words
1409 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1410
1411 // follow the ptr words
1412 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1413 retainClosure((StgClosure *)*p, c, c_child_r);
1414 p++;
1415 }
1416 continue;
1417 }
1418
1419 case RET_FUN: {
1420 StgRetFun *ret_fun = (StgRetFun *)p;
1421 StgFunInfoTable *fun_info;
1422
1423 retainClosure(ret_fun->fun, c, c_child_r);
1424 fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
1425
1426 p = (P_)&ret_fun->payload;
1427 switch (fun_info->f.fun_type) {
1428 case ARG_GEN:
1429 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1430 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1431 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1432 break;
1433 case ARG_GEN_BIG:
1434 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1435 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1436 size, c, c_child_r);
1437 p += size;
1438 break;
1439 default:
1440 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1441 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1442 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1443 break;
1444 }
1445 goto follow_srt;
1446 }
1447
1448 default:
1449 barf("Invalid object found in retainStack(): %d",
1450 (int)(info->i.type));
1451 }
1452 }
1453
1454 // restore currentStackBoundary
1455 currentStackBoundary = oldStackBoundary;
1456 #ifdef DEBUG_RETAINER
1457 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1458 #endif
1459
1460 #ifdef DEBUG_RETAINER
1461 cStackSize--;
1462 #endif
1463 }
1464
1465 /* ----------------------------------------------------------------------------
1466 * Call retainClosure for each of the children of a PAP/AP
1467 * ------------------------------------------------------------------------- */
1468
1469 static INLINE StgPtr
1470 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1471 retainer c_child_r, /* NOT tagged */
1472 StgClosure *fun, /* tagged */
1473 StgClosure** payload, StgWord n_args)
1474 {
1475 StgPtr p;
1476 StgWord bitmap;
1477 StgFunInfoTable *fun_info;
1478
1479 retainClosure(fun, pap, c_child_r);
1480 fun = UNTAG_CLOSURE(fun);
1481 fun_info = get_fun_itbl(fun);
1482 ASSERT(fun_info->i.type != PAP);
1483
1484 p = (StgPtr)payload;
1485
1486 switch (fun_info->f.fun_type) {
1487 case ARG_GEN:
1488 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1489 p = retain_small_bitmap(p, n_args, bitmap,
1490 pap, c_child_r);
1491 break;
1492 case ARG_GEN_BIG:
1493 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1494 n_args, pap, c_child_r);
1495 p += n_args;
1496 break;
1497 case ARG_BCO:
1498 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1499 n_args, pap, c_child_r);
1500 p += n_args;
1501 break;
1502 default:
1503 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1504 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1505 break;
1506 }
1507 return p;
1508 }
1509
1510 /* -----------------------------------------------------------------------------
1511 * Compute the retainer set of *c0 and all its desecents by traversing.
1512 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1513 * Invariants:
1514 * c0 = cp0 = r0 holds only for root objects.
1515 * RSET(cp0) and RSET(r0) are valid, i.e., their
1516 * interpretation conforms to the current value of flip (even when they
1517 * are interpreted to be NULL).
1518 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1519 * the current value of flip. If it does not, during the execution
1520 * of this function, RSET(c0) must be initialized as well as all
1521 * its descendants.
1522 * Note:
1523 * stackTop must be the same at the beginning and the exit of this function.
1524 * *c0 can be TSO (as well as AP_STACK).
1525 * -------------------------------------------------------------------------- */
1526 static void
1527 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1528 {
1529 // c = Current closure (possibly tagged)
1530 // cp = Current closure's Parent (NOT tagged)
1531 // r = current closures' most recent Retainer (NOT tagged)
1532 // c_child_r = current closure's children's most recent retainer
1533 // first_child = first child of c
1534 StgClosure *c, *cp, *first_child;
1535 RetainerSet *s, *retainerSetOfc;
1536 retainer r, c_child_r;
1537 StgWord typeOfc;
1538
1539 #ifdef DEBUG_RETAINER
1540 // StgPtr oldStackTop;
1541 #endif
1542
1543 #ifdef DEBUG_RETAINER
1544 // oldStackTop = stackTop;
1545 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1546 #endif
1547
1548 // (c, cp, r) = (c0, cp0, r0)
1549 c = c0;
1550 cp = cp0;
1551 r = r0;
1552 goto inner_loop;
1553
1554 loop:
1555 //debugBelch("loop");
1556 // pop to (c, cp, r);
1557 pop(&c, &cp, &r);
1558
1559 if (c == NULL) {
1560 #ifdef DEBUG_RETAINER
1561 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1562 #endif
1563 return;
1564 }
1565
1566 //debugBelch("inner_loop");
1567
1568 inner_loop:
1569 c = UNTAG_CLOSURE(c);
1570
1571 // c = current closure under consideration,
1572 // cp = current closure's parent,
1573 // r = current closure's most recent retainer
1574 //
1575 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1576 // RSET(cp) and RSET(r) are valid.
1577 // RSET(c) is valid only if c has been visited before.
1578 //
1579 // Loop invariants (on the relation between c, cp, and r)
1580 // if cp is not a retainer, r belongs to RSET(cp).
1581 // if cp is a retainer, r == cp.
1582
1583 typeOfc = get_itbl(c)->type;
1584
1585 #ifdef DEBUG_RETAINER
1586 switch (typeOfc) {
1587 case IND_STATIC:
1588 case CONSTR_NOCAF_STATIC:
1589 case CONSTR_STATIC:
1590 case THUNK_STATIC:
1591 case FUN_STATIC:
1592 break;
1593 default:
1594 if (retainerSetOf(c) == NULL) { // first visit?
1595 costArray[typeOfc] += cost(c);
1596 sumOfNewCost += cost(c);
1597 }
1598 break;
1599 }
1600 #endif
1601
1602 // special cases
1603 switch (typeOfc) {
1604 case TSO:
1605 if (((StgTSO *)c)->what_next == ThreadComplete ||
1606 ((StgTSO *)c)->what_next == ThreadKilled) {
1607 #ifdef DEBUG_RETAINER
1608 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1609 #endif
1610 goto loop;
1611 }
1612 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1613 #ifdef DEBUG_RETAINER
1614 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1615 #endif
1616 c = (StgClosure *)((StgTSO *)c)->_link;
1617 goto inner_loop;
1618 }
1619 break;
1620
1621 case IND_STATIC:
1622 // We just skip IND_STATIC, so its retainer set is never computed.
1623 c = ((StgIndStatic *)c)->indirectee;
1624 goto inner_loop;
1625 // static objects with no pointers out, so goto loop.
1626 case CONSTR_NOCAF_STATIC:
1627 // It is not just enough not to compute the retainer set for *c; it is
1628 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1629 // scavenged_static_objects, the list from which is assumed to traverse
1630 // all static objects after major garbage collections.
1631 goto loop;
1632 case THUNK_STATIC:
1633 case FUN_STATIC:
1634 if (get_itbl(c)->srt_bitmap == 0) {
1635 // No need to compute the retainer set; no dynamic objects
1636 // are reachable from *c.
1637 //
1638 // Static objects: if we traverse all the live closures,
1639 // including static closures, during each heap census then
1640 // we will observe that some static closures appear and
1641 // disappear. eg. a closure may contain a pointer to a
1642 // static function 'f' which is not otherwise reachable
1643 // (it doesn't indirectly point to any CAFs, so it doesn't
1644 // appear in any SRTs), so we would find 'f' during
1645 // traversal. However on the next sweep there may be no
1646 // closures pointing to 'f'.
1647 //
1648 // We must therefore ignore static closures whose SRT is
1649 // empty, because these are exactly the closures that may
1650 // "appear". A closure with a non-empty SRT, and which is
1651 // still required, will always be reachable.
1652 //
1653 // But what about CONSTR_STATIC? Surely these may be able
1654 // to appear, and they don't have SRTs, so we can't
1655 // check. So for now, we're calling
1656 // resetStaticObjectForRetainerProfiling() from the
1657 // garbage collector to reset the retainer sets in all the
1658 // reachable static objects.
1659 goto loop;
1660 }
1661 default:
1662 break;
1663 }
1664
1665 // The above objects are ignored in computing the average number of times
1666 // an object is visited.
1667 timesAnyObjectVisited++;
1668
1669 // If this is the first visit to c, initialize its retainer set.
1670 maybeInitRetainerSet(c);
1671 retainerSetOfc = retainerSetOf(c);
1672
1673 // Now compute s:
1674 // isRetainer(cp) == rtsTrue => s == NULL
1675 // isRetainer(cp) == rtsFalse => s == cp.retainer
1676 if (isRetainer(cp))
1677 s = NULL;
1678 else
1679 s = retainerSetOf(cp);
1680
1681 // (c, cp, r, s) is available.
1682
1683 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1684 if (retainerSetOfc == NULL) {
1685 // This is the first visit to *c.
1686 numObjectVisited++;
1687
1688 if (s == NULL)
1689 associate(c, singleton(r));
1690 else
1691 // s is actually the retainer set of *c!
1692 associate(c, s);
1693
1694 // compute c_child_r
1695 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1696 } else {
1697 // This is not the first visit to *c.
1698 if (isMember(r, retainerSetOfc))
1699 goto loop; // no need to process child
1700
1701 if (s == NULL)
1702 associate(c, addElement(r, retainerSetOfc));
1703 else {
1704 // s is not NULL and cp is not a retainer. This means that
1705 // each time *cp is visited, so is *c. Thus, if s has
1706 // exactly one more element in its retainer set than c, s
1707 // is also the new retainer set for *c.
1708 if (s->num == retainerSetOfc->num + 1) {
1709 associate(c, s);
1710 }
1711 // Otherwise, just add R_r to the current retainer set of *c.
1712 else {
1713 associate(c, addElement(r, retainerSetOfc));
1714 }
1715 }
1716
1717 if (isRetainer(c))
1718 goto loop; // no need to process child
1719
1720 // compute c_child_r
1721 c_child_r = r;
1722 }
1723
1724 // now, RSET() of all of *c, *cp, and *r is valid.
1725 // (c, c_child_r) are available.
1726
1727 // process child
1728
1729 // Special case closures: we process these all in one go rather
1730 // than attempting to save the current position, because doing so
1731 // would be hard.
1732 switch (typeOfc) {
1733 case TSO:
1734 retainStack(c, c_child_r,
1735 ((StgTSO *)c)->sp,
1736 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1737 goto loop;
1738
1739 case PAP:
1740 {
1741 StgPAP *pap = (StgPAP *)c;
1742 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1743 goto loop;
1744 }
1745
1746 case AP:
1747 {
1748 StgAP *ap = (StgAP *)c;
1749 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1750 goto loop;
1751 }
1752
1753 case AP_STACK:
1754 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1755 retainStack(c, c_child_r,
1756 (StgPtr)((StgAP_STACK *)c)->payload,
1757 (StgPtr)((StgAP_STACK *)c)->payload +
1758 ((StgAP_STACK *)c)->size);
1759 goto loop;
1760 }
1761
1762 push(c, c_child_r, &first_child);
1763
1764 // If first_child is null, c has no child.
1765 // If first_child is not null, the top stack element points to the next
1766 // object. push() may or may not push a stackElement on the stack.
1767 if (first_child == NULL)
1768 goto loop;
1769
1770 // (c, cp, r) = (first_child, c, c_child_r)
1771 r = c_child_r;
1772 cp = c;
1773 c = first_child;
1774 goto inner_loop;
1775 }
1776
1777 /* -----------------------------------------------------------------------------
1778 * Compute the retainer set for every object reachable from *tl.
1779 * -------------------------------------------------------------------------- */
1780 static void
1781 retainRoot(void *user STG_UNUSED, StgClosure **tl)
1782 {
1783 StgClosure *c;
1784
1785 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1786 // be a root.
1787
1788 ASSERT(isEmptyRetainerStack());
1789 currentStackBoundary = stackTop;
1790
1791 c = UNTAG_CLOSURE(*tl);
1792 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1793 retainClosure(c, c, getRetainerFrom(c));
1794 } else {
1795 retainClosure(c, c, CCS_SYSTEM);
1796 }
1797
1798 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1799 // *tl might be a TSO which is ThreadComplete, in which
1800 // case we ignore it for the purposes of retainer profiling.
1801 }
1802
1803 /* -----------------------------------------------------------------------------
1804 * Compute the retainer set for each of the objects in the heap.
1805 * -------------------------------------------------------------------------- */
1806 static void
1807 computeRetainerSet( void )
1808 {
1809 StgWeak *weak;
1810 RetainerSet *rtl;
1811 nat g;
1812 StgPtr ml;
1813 bdescr *bd;
1814 #ifdef DEBUG_RETAINER
1815 RetainerSet tmpRetainerSet;
1816 #endif
1817
1818 markCapabilities(retainRoot, NULL); // for scheduler roots
1819
1820 // This function is called after a major GC, when key, value, and finalizer
1821 // all are guaranteed to be valid, or reachable.
1822 //
1823 // The following code assumes that WEAK objects are considered to be roots
1824 // for retainer profilng.
1825 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1826 // retainRoot((StgClosure *)weak);
1827 retainRoot(NULL, (StgClosure **)&weak);
1828
1829 // Consider roots from the stable ptr table.
1830 markStablePtrTable(retainRoot, NULL);
1831
1832 // The following code resets the rs field of each unvisited mutable
1833 // object (computing sumOfNewCostExtra and updating costArray[] when
1834 // debugging retainer profiler).
1835 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1836 // NOT TRUE: even G0 has a block on its mutable list
1837 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1838
1839 // Traversing through mut_list is necessary
1840 // because we can find MUT_VAR objects which have not been
1841 // visited during retainer profiling.
1842 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1843 for (ml = bd->start; ml < bd->free; ml++) {
1844
1845 maybeInitRetainerSet((StgClosure *)*ml);
1846 rtl = retainerSetOf((StgClosure *)*ml);
1847
1848 #ifdef DEBUG_RETAINER
1849 if (rtl == NULL) {
1850 // first visit to *ml
1851 // This is a violation of the interface rule!
1852 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1853
1854 switch (get_itbl((StgClosure *)ml)->type) {
1855 case IND_STATIC:
1856 // no cost involved
1857 break;
1858 case CONSTR_NOCAF_STATIC:
1859 case CONSTR_STATIC:
1860 case THUNK_STATIC:
1861 case FUN_STATIC:
1862 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1863 break;
1864 default:
1865 // dynamic objects
1866 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1867 sumOfNewCostExtra += cost((StgClosure *)ml);
1868 break;
1869 }
1870 }
1871 #endif
1872 }
1873 }
1874 }
1875 }
1876
1877 /* -----------------------------------------------------------------------------
1878 * Traverse all static objects for which we compute retainer sets,
1879 * and reset their rs fields to NULL, which is accomplished by
1880 * invoking maybeInitRetainerSet(). This function must be called
1881 * before zeroing all objects reachable from scavenged_static_objects
1882 * in the case of major gabage collections. See GarbageCollect() in
1883 * GC.c.
1884 * Note:
1885 * The mut_once_list of the oldest generation must also be traversed?
1886 * Why? Because if the evacuation of an object pointed to by a static
1887 * indirection object fails, it is put back to the mut_once_list of
1888 * the oldest generation.
1889 * However, this is not necessary because any static indirection objects
1890 * are just traversed through to reach dynamic objects. In other words,
1891 * they are not taken into consideration in computing retainer sets.
1892 * -------------------------------------------------------------------------- */
1893 void
1894 resetStaticObjectForRetainerProfiling( StgClosure *static_objects )
1895 {
1896 #ifdef DEBUG_RETAINER
1897 nat count;
1898 #endif
1899 StgClosure *p;
1900
1901 #ifdef DEBUG_RETAINER
1902 count = 0;
1903 #endif
1904 p = static_objects;
1905 while (p != END_OF_STATIC_LIST) {
1906 #ifdef DEBUG_RETAINER
1907 count++;
1908 #endif
1909 switch (get_itbl(p)->type) {
1910 case IND_STATIC:
1911 // Since we do not compute the retainer set of any
1912 // IND_STATIC object, we don't have to reset its retainer
1913 // field.
1914 p = (StgClosure*)*IND_STATIC_LINK(p);
1915 break;
1916 case THUNK_STATIC:
1917 maybeInitRetainerSet(p);
1918 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1919 break;
1920 case FUN_STATIC:
1921 maybeInitRetainerSet(p);
1922 p = (StgClosure*)*FUN_STATIC_LINK(p);
1923 break;
1924 case CONSTR_STATIC:
1925 maybeInitRetainerSet(p);
1926 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1927 break;
1928 default:
1929 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1930 p, get_itbl(p)->type);
1931 break;
1932 }
1933 }
1934 #ifdef DEBUG_RETAINER
1935 // debugBelch("count in scavenged_static_objects = %d\n", count);
1936 #endif
1937 }
1938
1939 /* -----------------------------------------------------------------------------
1940 * Perform retainer profiling.
1941 * N is the oldest generation being profilied, where the generations are
1942 * numbered starting at 0.
1943 * Invariants:
1944 * Note:
1945 * This function should be called only immediately after major garbage
1946 * collection.
1947 * ------------------------------------------------------------------------- */
1948 void
1949 retainerProfile(void)
1950 {
1951 #ifdef DEBUG_RETAINER
1952 nat i;
1953 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1954 #endif
1955
1956 #ifdef DEBUG_RETAINER
1957 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1958 #endif
1959
1960 stat_startRP();
1961
1962 // We haven't flipped the bit yet.
1963 #ifdef DEBUG_RETAINER
1964 debugBelch("Before traversing:\n");
1965 sumOfCostLinear = 0;
1966 for (i = 0;i < N_CLOSURE_TYPES; i++)
1967 costArrayLinear[i] = 0;
1968 totalHeapSize = checkHeapSanityForRetainerProfiling();
1969
1970 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1971 /*
1972 debugBelch("costArrayLinear[] = ");
1973 for (i = 0;i < N_CLOSURE_TYPES; i++)
1974 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1975 debugBelch("\n");
1976 */
1977
1978 ASSERT(sumOfCostLinear == totalHeapSize);
1979
1980 /*
1981 #define pcostArrayLinear(index) \
1982 if (costArrayLinear[index] > 0) \
1983 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1984 pcostArrayLinear(THUNK_STATIC);
1985 pcostArrayLinear(FUN_STATIC);
1986 pcostArrayLinear(CONSTR_STATIC);
1987 pcostArrayLinear(CONSTR_NOCAF_STATIC);
1988 */
1989 #endif
1990
1991 // Now we flips flip.
1992 flip = flip ^ 1;
1993
1994 #ifdef DEBUG_RETAINER
1995 stackSize = 0;
1996 maxStackSize = 0;
1997 cStackSize = 0;
1998 maxCStackSize = 0;
1999 #endif
2000 numObjectVisited = 0;
2001 timesAnyObjectVisited = 0;
2002
2003 #ifdef DEBUG_RETAINER
2004 debugBelch("During traversing:\n");
2005 sumOfNewCost = 0;
2006 sumOfNewCostExtra = 0;
2007 for (i = 0;i < N_CLOSURE_TYPES; i++)
2008 costArray[i] = 0;
2009 #endif
2010
2011 /*
2012 We initialize the traverse stack each time the retainer profiling is
2013 performed (because the traverse stack size varies on each retainer profiling
2014 and this operation is not costly anyhow). However, we just refresh the
2015 retainer sets.
2016 */
2017 initializeTraverseStack();
2018 #ifdef DEBUG_RETAINER
2019 initializeAllRetainerSet();
2020 #else
2021 refreshAllRetainerSet();
2022 #endif
2023 computeRetainerSet();
2024
2025 #ifdef DEBUG_RETAINER
2026 debugBelch("After traversing:\n");
2027 sumOfCostLinear = 0;
2028 for (i = 0;i < N_CLOSURE_TYPES; i++)
2029 costArrayLinear[i] = 0;
2030 totalHeapSize = checkHeapSanityForRetainerProfiling();
2031
2032 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2033 ASSERT(sumOfCostLinear == totalHeapSize);
2034
2035 // now, compare the two results
2036 /*
2037 Note:
2038 costArray[] must be exactly the same as costArrayLinear[].
2039 Known exceptions:
2040 1) Dead weak pointers, whose type is CONSTR. These objects are not
2041 reachable from any roots.
2042 */
2043 debugBelch("Comparison:\n");
2044 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2045 for (i = 0;i < N_CLOSURE_TYPES; i++)
2046 if (costArray[i] != costArrayLinear[i])
2047 // nothing should be printed except MUT_VAR after major GCs
2048 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2049 debugBelch("\n");
2050
2051 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2052 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2053 debugBelch("\tcostArray[] (must be empty) = ");
2054 for (i = 0;i < N_CLOSURE_TYPES; i++)
2055 if (costArray[i] != costArrayLinear[i])
2056 // nothing should be printed except MUT_VAR after major GCs
2057 debugBelch("[%u:%u] ", i, costArray[i]);
2058 debugBelch("\n");
2059
2060 // only for major garbage collection
2061 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2062 #endif
2063
2064 // post-processing
2065 closeTraverseStack();
2066 #ifdef DEBUG_RETAINER
2067 closeAllRetainerSet();
2068 #else
2069 // Note that there is no post-processing for the retainer sets.
2070 #endif
2071 retainerGeneration++;
2072
2073 stat_endRP(
2074 retainerGeneration - 1, // retainerGeneration has just been incremented!
2075 #ifdef DEBUG_RETAINER
2076 maxCStackSize, maxStackSize,
2077 #endif
2078 (double)timesAnyObjectVisited / numObjectVisited);
2079 }
2080
2081 /* -----------------------------------------------------------------------------
2082 * DEBUGGING CODE
2083 * -------------------------------------------------------------------------- */
2084
2085 #ifdef DEBUG_RETAINER
2086
2087 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2088 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2089 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2090
2091 static nat
2092 sanityCheckHeapClosure( StgClosure *c )
2093 {
2094 StgInfoTable *info;
2095
2096 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2097 ASSERT(!closure_STATIC(c));
2098 ASSERT(LOOKS_LIKE_PTR(c));
2099
2100 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2101 if (get_itbl(c)->type == CONSTR &&
2102 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2103 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2104 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2105 costArray[get_itbl(c)->type] += cost(c);
2106 sumOfNewCost += cost(c);
2107 } else
2108 debugBelch(
2109 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2110 flip, c, get_itbl(c)->type,
2111 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2112 RSET(c));
2113 } else {
2114 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2115 }
2116
2117 return closure_sizeW(c);
2118 }
2119
2120 static nat
2121 heapCheck( bdescr *bd )
2122 {
2123 StgPtr p;
2124 static nat costSum, size;
2125
2126 costSum = 0;
2127 while (bd != NULL) {
2128 p = bd->start;
2129 while (p < bd->free) {
2130 size = sanityCheckHeapClosure((StgClosure *)p);
2131 sumOfCostLinear += size;
2132 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2133 p += size;
2134 // no need for slop check; I think slops are not used currently.
2135 }
2136 ASSERT(p == bd->free);
2137 costSum += bd->free - bd->start;
2138 bd = bd->link;
2139 }
2140
2141 return costSum;
2142 }
2143
2144 static nat
2145 smallObjectPoolCheck(void)
2146 {
2147 bdescr *bd;
2148 StgPtr p;
2149 static nat costSum, size;
2150
2151 bd = g0s0->blocks;
2152 costSum = 0;
2153
2154 // first block
2155 if (bd == NULL)
2156 return costSum;
2157
2158 p = bd->start;
2159 while (p < alloc_Hp) {
2160 size = sanityCheckHeapClosure((StgClosure *)p);
2161 sumOfCostLinear += size;
2162 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2163 p += size;
2164 }
2165 ASSERT(p == alloc_Hp);
2166 costSum += alloc_Hp - bd->start;
2167
2168 bd = bd->link;
2169 while (bd != NULL) {
2170 p = bd->start;
2171 while (p < bd->free) {
2172 size = sanityCheckHeapClosure((StgClosure *)p);
2173 sumOfCostLinear += size;
2174 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2175 p += size;
2176 }
2177 ASSERT(p == bd->free);
2178 costSum += bd->free - bd->start;
2179 bd = bd->link;
2180 }
2181
2182 return costSum;
2183 }
2184
2185 static nat
2186 chainCheck(bdescr *bd)
2187 {
2188 nat costSum, size;
2189
2190 costSum = 0;
2191 while (bd != NULL) {
2192 // bd->free - bd->start is not an accurate measurement of the
2193 // object size. Actually it is always zero, so we compute its
2194 // size explicitly.
2195 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2196 sumOfCostLinear += size;
2197 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2198 costSum += size;
2199 bd = bd->link;
2200 }
2201
2202 return costSum;
2203 }
2204
2205 static nat
2206 checkHeapSanityForRetainerProfiling( void )
2207 {
2208 nat costSum, g, s;
2209
2210 costSum = 0;
2211 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2212 if (RtsFlags.GcFlags.generations == 1) {
2213 costSum += heapCheck(g0s0->to_blocks);
2214 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2215 costSum += chainCheck(g0s0->large_objects);
2216 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2217 } else {
2218 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2219 for (s = 0; s < generations[g].n_steps; s++) {
2220 /*
2221 After all live objects have been scavenged, the garbage
2222 collector may create some objects in
2223 scheduleFinalizers(). These objects are created throught
2224 allocate(), so the small object pool or the large object
2225 pool of the g0s0 may not be empty.
2226 */
2227 if (g == 0 && s == 0) {
2228 costSum += smallObjectPoolCheck();
2229 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2230 costSum += chainCheck(generations[g].steps[s].large_objects);
2231 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2232 } else {
2233 costSum += heapCheck(generations[g].steps[s].blocks);
2234 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2235 costSum += chainCheck(generations[g].steps[s].large_objects);
2236 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2237 }
2238 }
2239 }
2240
2241 return costSum;
2242 }
2243
2244 void
2245 findPointer(StgPtr p)
2246 {
2247 StgPtr q, r, e;
2248 bdescr *bd;
2249 nat g, s;
2250
2251 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2252 for (s = 0; s < generations[g].n_steps; s++) {
2253 // if (g == 0 && s == 0) continue;
2254 bd = generations[g].steps[s].blocks;
2255 for (; bd; bd = bd->link) {
2256 for (q = bd->start; q < bd->free; q++) {
2257 if (*q == (StgWord)p) {
2258 r = q;
2259 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2260 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2261 // return;
2262 }
2263 }
2264 }
2265 bd = generations[g].steps[s].large_objects;
2266 for (; bd; bd = bd->link) {
2267 e = bd->start + cost((StgClosure *)bd->start);
2268 for (q = bd->start; q < e; q++) {
2269 if (*q == (StgWord)p) {
2270 r = q;
2271 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2272 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2273 // return;
2274 }
2275 }
2276 }
2277 }
2278 }
2279 }
2280
2281 static void
2282 belongToHeap(StgPtr p)
2283 {
2284 bdescr *bd;
2285 nat g, s;
2286
2287 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2288 for (s = 0; s < generations[g].n_steps; s++) {
2289 // if (g == 0 && s == 0) continue;
2290 bd = generations[g].steps[s].blocks;
2291 for (; bd; bd = bd->link) {
2292 if (bd->start <= p && p < bd->free) {
2293 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2294 return;
2295 }
2296 }
2297 bd = generations[g].steps[s].large_objects;
2298 for (; bd; bd = bd->link) {
2299 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2300 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2301 return;
2302 }
2303 }
2304 }
2305 }
2306 }
2307 #endif /* DEBUG_RETAINER */
2308
2309 #endif /* PROFILING */