RTS tidyup sweep, first phase
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #ifdef PROFILING
11
12 // Turn off inlining when debugging - it obfuscates things
13 #ifdef DEBUG
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "PosixSource.h"
20 #include "Rts.h"
21
22 #include "RtsUtils.h"
23 #include "RetainerProfile.h"
24 #include "RetainerSet.h"
25 #include "Schedule.h"
26 #include "Printer.h"
27 #include "Weak.h"
28 #include "Sanity.h"
29 #include "Profiling.h"
30 #include "Stats.h"
31 #include "ProfHeap.h"
32 #include "Apply.h"
33
34 /*
35 Note: what to change in order to plug-in a new retainer profiling scheme?
36 (1) type retainer in ../includes/StgRetainerProf.h
37 (2) retainer function R(), i.e., getRetainerFrom()
38 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
39 in RetainerSet.h, if needed.
40 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
41 */
42
43 /* -----------------------------------------------------------------------------
44 * Declarations...
45 * -------------------------------------------------------------------------- */
46
47 static nat retainerGeneration; // generation
48
49 static nat numObjectVisited; // total number of objects visited
50 static nat timesAnyObjectVisited; // number of times any objects are visited
51
52 /*
53 The rs field in the profile header of any object points to its retainer
54 set in an indirect way: if flip is 0, it points to the retainer set;
55 if flip is 1, it points to the next byte after the retainer set (even
56 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
57 pointer. See retainerSetOf().
58 */
59
60 StgWord flip = 0; // flip bit
61 // must be 0 if DEBUG_RETAINER is on (for static closures)
62
63 #define setRetainerSetToNull(c) \
64 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
65
66 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
67 static void retainClosure(StgClosure *, StgClosure *, retainer);
68 #ifdef DEBUG_RETAINER
69 static void belongToHeap(StgPtr p);
70 #endif
71
72 #ifdef DEBUG_RETAINER
73 /*
74 cStackSize records how many times retainStack() has been invoked recursively,
75 that is, the number of activation records for retainStack() on the C stack.
76 maxCStackSize records its max value.
77 Invariants:
78 cStackSize <= maxCStackSize
79 */
80 static nat cStackSize, maxCStackSize;
81
82 static nat sumOfNewCost; // sum of the cost of each object, computed
83 // when the object is first visited
84 static nat sumOfNewCostExtra; // for those objects not visited during
85 // retainer profiling, e.g., MUT_VAR
86 static nat costArray[N_CLOSURE_TYPES];
87
88 nat sumOfCostLinear; // sum of the costs of all object, computed
89 // when linearly traversing the heap after
90 // retainer profiling
91 nat costArrayLinear[N_CLOSURE_TYPES];
92 #endif
93
94 /* -----------------------------------------------------------------------------
95 * Retainer stack - header
96 * Note:
97 * Although the retainer stack implementation could be separated *
98 * from the retainer profiling engine, there does not seem to be
99 * any advantage in doing that; retainer stack is an integral part
100 * of retainer profiling engine and cannot be use elsewhere at
101 * all.
102 * -------------------------------------------------------------------------- */
103
104 typedef enum {
105 posTypeStep,
106 posTypePtrs,
107 posTypeSRT,
108 posTypeLargeSRT,
109 } nextPosType;
110
111 typedef union {
112 // fixed layout or layout specified by a field in the closure
113 StgWord step;
114
115 // layout.payload
116 struct {
117 // See StgClosureInfo in InfoTables.h
118 #if SIZEOF_VOID_P == 8
119 StgWord32 pos;
120 StgWord32 ptrs;
121 #else
122 StgWord16 pos;
123 StgWord16 ptrs;
124 #endif
125 StgPtr payload;
126 } ptrs;
127
128 // SRT
129 struct {
130 StgClosure **srt;
131 StgWord srt_bitmap;
132 } srt;
133
134 // Large SRT
135 struct {
136 StgLargeSRT *srt;
137 StgWord offset;
138 } large_srt;
139
140 } nextPos;
141
142 typedef struct {
143 nextPosType type;
144 nextPos next;
145 } stackPos;
146
147 typedef struct {
148 StgClosure *c;
149 retainer c_child_r;
150 stackPos info;
151 } stackElement;
152
153 /*
154 Invariants:
155 firstStack points to the first block group.
156 currentStack points to the block group currently being used.
157 currentStack->free == stackLimit.
158 stackTop points to the topmost byte in the stack of currentStack.
159 Unless the whole stack is empty, stackTop must point to the topmost
160 object (or byte) in the whole stack. Thus, it is only when the whole stack
161 is empty that stackTop == stackLimit (not during the execution of push()
162 and pop()).
163 stackBottom == currentStack->start.
164 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
165 Note:
166 When a current stack becomes empty, stackTop is set to point to
167 the topmost element on the previous block group so as to satisfy
168 the invariants described above.
169 */
170 static bdescr *firstStack = NULL;
171 static bdescr *currentStack;
172 static stackElement *stackBottom, *stackTop, *stackLimit;
173
174 /*
175 currentStackBoundary is used to mark the current stack chunk.
176 If stackTop == currentStackBoundary, it means that the current stack chunk
177 is empty. It is the responsibility of the user to keep currentStackBoundary
178 valid all the time if it is to be employed.
179 */
180 static stackElement *currentStackBoundary;
181
182 /*
183 stackSize records the current size of the stack.
184 maxStackSize records its high water mark.
185 Invariants:
186 stackSize <= maxStackSize
187 Note:
188 stackSize is just an estimate measure of the depth of the graph. The reason
189 is that some heap objects have only a single child and may not result
190 in a new element being pushed onto the stack. Therefore, at the end of
191 retainer profiling, maxStackSize + maxCStackSize is some value no greater
192 than the actual depth of the graph.
193 */
194 #ifdef DEBUG_RETAINER
195 static int stackSize, maxStackSize;
196 #endif
197
198 // number of blocks allocated for one stack
199 #define BLOCKS_IN_STACK 1
200
201 /* -----------------------------------------------------------------------------
202 * Add a new block group to the stack.
203 * Invariants:
204 * currentStack->link == s.
205 * -------------------------------------------------------------------------- */
206 static INLINE void
207 newStackBlock( bdescr *bd )
208 {
209 currentStack = bd;
210 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
211 stackBottom = (stackElement *)bd->start;
212 stackLimit = (stackElement *)stackTop;
213 bd->free = (StgPtr)stackLimit;
214 }
215
216 /* -----------------------------------------------------------------------------
217 * Return to the previous block group.
218 * Invariants:
219 * s->link == currentStack.
220 * -------------------------------------------------------------------------- */
221 static INLINE void
222 returnToOldStack( bdescr *bd )
223 {
224 currentStack = bd;
225 stackTop = (stackElement *)bd->free;
226 stackBottom = (stackElement *)bd->start;
227 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
228 bd->free = (StgPtr)stackLimit;
229 }
230
231 /* -----------------------------------------------------------------------------
232 * Initializes the traverse stack.
233 * -------------------------------------------------------------------------- */
234 static void
235 initializeTraverseStack( void )
236 {
237 if (firstStack != NULL) {
238 freeChain(firstStack);
239 }
240
241 firstStack = allocGroup(BLOCKS_IN_STACK);
242 firstStack->link = NULL;
243 firstStack->u.back = NULL;
244
245 newStackBlock(firstStack);
246 }
247
248 /* -----------------------------------------------------------------------------
249 * Frees all the block groups in the traverse stack.
250 * Invariants:
251 * firstStack != NULL
252 * -------------------------------------------------------------------------- */
253 static void
254 closeTraverseStack( void )
255 {
256 freeChain(firstStack);
257 firstStack = NULL;
258 }
259
260 /* -----------------------------------------------------------------------------
261 * Returns rtsTrue if the whole stack is empty.
262 * -------------------------------------------------------------------------- */
263 static INLINE rtsBool
264 isEmptyRetainerStack( void )
265 {
266 return (firstStack == currentStack) && stackTop == stackLimit;
267 }
268
269 /* -----------------------------------------------------------------------------
270 * Returns size of stack
271 * -------------------------------------------------------------------------- */
272 #ifdef DEBUG
273 lnat
274 retainerStackBlocks( void )
275 {
276 bdescr* bd;
277 lnat res = 0;
278
279 for (bd = firstStack; bd != NULL; bd = bd->link)
280 res += bd->blocks;
281
282 return res;
283 }
284 #endif
285
286 /* -----------------------------------------------------------------------------
287 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
288 * i.e., if the current stack chunk is empty.
289 * -------------------------------------------------------------------------- */
290 static INLINE rtsBool
291 isOnBoundary( void )
292 {
293 return stackTop == currentStackBoundary;
294 }
295
296 /* -----------------------------------------------------------------------------
297 * Initializes *info from ptrs and payload.
298 * Invariants:
299 * payload[] begins with ptrs pointers followed by non-pointers.
300 * -------------------------------------------------------------------------- */
301 static INLINE void
302 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
303 {
304 info->type = posTypePtrs;
305 info->next.ptrs.pos = 0;
306 info->next.ptrs.ptrs = ptrs;
307 info->next.ptrs.payload = payload;
308 }
309
310 /* -----------------------------------------------------------------------------
311 * Find the next object from *info.
312 * -------------------------------------------------------------------------- */
313 static INLINE StgClosure *
314 find_ptrs( stackPos *info )
315 {
316 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
317 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
318 } else {
319 return NULL;
320 }
321 }
322
323 /* -----------------------------------------------------------------------------
324 * Initializes *info from SRT information stored in *infoTable.
325 * -------------------------------------------------------------------------- */
326 static INLINE void
327 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
328 {
329 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
330 info->type = posTypeLargeSRT;
331 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
332 info->next.large_srt.offset = 0;
333 } else {
334 info->type = posTypeSRT;
335 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
336 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
337 }
338 }
339
340 static INLINE void
341 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
342 {
343 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
344 info->type = posTypeLargeSRT;
345 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
346 info->next.large_srt.offset = 0;
347 } else {
348 info->type = posTypeSRT;
349 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
350 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
351 }
352 }
353
354 /* -----------------------------------------------------------------------------
355 * Find the next object from *info.
356 * -------------------------------------------------------------------------- */
357 static INLINE StgClosure *
358 find_srt( stackPos *info )
359 {
360 StgClosure *c;
361 StgWord bitmap;
362
363 if (info->type == posTypeSRT) {
364 // Small SRT bitmap
365 bitmap = info->next.srt.srt_bitmap;
366 while (bitmap != 0) {
367 if ((bitmap & 1) != 0) {
368 #if defined(__PIC__) && defined(mingw32_TARGET_OS)
369 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
370 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
371 else
372 c = *(info->next.srt.srt);
373 #else
374 c = *(info->next.srt.srt);
375 #endif
376 bitmap = bitmap >> 1;
377 info->next.srt.srt++;
378 info->next.srt.srt_bitmap = bitmap;
379 return c;
380 }
381 bitmap = bitmap >> 1;
382 info->next.srt.srt++;
383 }
384 // bitmap is now zero...
385 return NULL;
386 }
387 else {
388 // Large SRT bitmap
389 nat i = info->next.large_srt.offset;
390 StgWord bitmap;
391
392 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
393 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
394 bitmap = bitmap >> (i % BITS_IN(StgWord));
395 while (i < info->next.large_srt.srt->l.size) {
396 if ((bitmap & 1) != 0) {
397 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
398 i++;
399 info->next.large_srt.offset = i;
400 return c;
401 }
402 i++;
403 if (i % BITS_IN(W_) == 0) {
404 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
405 } else {
406 bitmap = bitmap >> 1;
407 }
408 }
409 // reached the end of this bitmap.
410 info->next.large_srt.offset = i;
411 return NULL;
412 }
413 }
414
415 /* -----------------------------------------------------------------------------
416 * push() pushes a stackElement representing the next child of *c
417 * onto the traverse stack. If *c has no child, *first_child is set
418 * to NULL and nothing is pushed onto the stack. If *c has only one
419 * child, *c_chlid is set to that child and nothing is pushed onto
420 * the stack. If *c has more than two children, *first_child is set
421 * to the first child and a stackElement representing the second
422 * child is pushed onto the stack.
423
424 * Invariants:
425 * *c_child_r is the most recent retainer of *c's children.
426 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
427 * there cannot be any stack objects.
428 * Note: SRTs are considered to be children as well.
429 * -------------------------------------------------------------------------- */
430 static INLINE void
431 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
432 {
433 stackElement se;
434 bdescr *nbd; // Next Block Descriptor
435
436 #ifdef DEBUG_RETAINER
437 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
438 #endif
439
440 ASSERT(get_itbl(c)->type != TSO);
441 ASSERT(get_itbl(c)->type != AP_STACK);
442
443 //
444 // fill in se
445 //
446
447 se.c = c;
448 se.c_child_r = c_child_r;
449
450 // fill in se.info
451 switch (get_itbl(c)->type) {
452 // no child, no SRT
453 case CONSTR_0_1:
454 case CONSTR_0_2:
455 case CAF_BLACKHOLE:
456 case BLACKHOLE:
457 case ARR_WORDS:
458 *first_child = NULL;
459 return;
460
461 // one child (fixed), no SRT
462 case MUT_VAR_CLEAN:
463 case MUT_VAR_DIRTY:
464 *first_child = ((StgMutVar *)c)->var;
465 return;
466 case THUNK_SELECTOR:
467 *first_child = ((StgSelector *)c)->selectee;
468 return;
469 case IND_PERM:
470 case IND_OLDGEN_PERM:
471 case IND_OLDGEN:
472 *first_child = ((StgInd *)c)->indirectee;
473 return;
474 case CONSTR_1_0:
475 case CONSTR_1_1:
476 *first_child = c->payload[0];
477 return;
478
479 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
480 // of the next child. We do not write a separate initialization code.
481 // Also we do not have to initialize info.type;
482
483 // two children (fixed), no SRT
484 // need to push a stackElement, but nothing to store in se.info
485 case CONSTR_2_0:
486 *first_child = c->payload[0]; // return the first pointer
487 // se.info.type = posTypeStep;
488 // se.info.next.step = 2; // 2 = second
489 break;
490
491 // three children (fixed), no SRT
492 // need to push a stackElement
493 case MVAR_CLEAN:
494 case MVAR_DIRTY:
495 // head must be TSO and the head of a linked list of TSOs.
496 // Shoule it be a child? Seems to be yes.
497 *first_child = (StgClosure *)((StgMVar *)c)->head;
498 // se.info.type = posTypeStep;
499 se.info.next.step = 2; // 2 = second
500 break;
501
502 // three children (fixed), no SRT
503 case WEAK:
504 *first_child = ((StgWeak *)c)->key;
505 // se.info.type = posTypeStep;
506 se.info.next.step = 2;
507 break;
508
509 // layout.payload.ptrs, no SRT
510 case CONSTR:
511 case STABLE_NAME:
512 case BCO:
513 case CONSTR_STATIC:
514 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
515 (StgPtr)c->payload);
516 *first_child = find_ptrs(&se.info);
517 if (*first_child == NULL)
518 return; // no child
519 break;
520
521 // StgMutArrPtr.ptrs, no SRT
522 case MUT_ARR_PTRS_CLEAN:
523 case MUT_ARR_PTRS_DIRTY:
524 case MUT_ARR_PTRS_FROZEN:
525 case MUT_ARR_PTRS_FROZEN0:
526 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
527 (StgPtr)(((StgMutArrPtrs *)c)->payload));
528 *first_child = find_ptrs(&se.info);
529 if (*first_child == NULL)
530 return;
531 break;
532
533 // layout.payload.ptrs, SRT
534 case FUN: // *c is a heap object.
535 case FUN_2_0:
536 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
537 *first_child = find_ptrs(&se.info);
538 if (*first_child == NULL)
539 // no child from ptrs, so check SRT
540 goto fun_srt_only;
541 break;
542
543 case THUNK:
544 case THUNK_2_0:
545 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
546 (StgPtr)((StgThunk *)c)->payload);
547 *first_child = find_ptrs(&se.info);
548 if (*first_child == NULL)
549 // no child from ptrs, so check SRT
550 goto thunk_srt_only;
551 break;
552
553 // 1 fixed child, SRT
554 case FUN_1_0:
555 case FUN_1_1:
556 *first_child = c->payload[0];
557 ASSERT(*first_child != NULL);
558 init_srt_fun(&se.info, get_fun_itbl(c));
559 break;
560
561 case THUNK_1_0:
562 case THUNK_1_1:
563 *first_child = ((StgThunk *)c)->payload[0];
564 ASSERT(*first_child != NULL);
565 init_srt_thunk(&se.info, get_thunk_itbl(c));
566 break;
567
568 case FUN_STATIC: // *c is a heap object.
569 ASSERT(get_itbl(c)->srt_bitmap != 0);
570 case FUN_0_1:
571 case FUN_0_2:
572 fun_srt_only:
573 init_srt_fun(&se.info, get_fun_itbl(c));
574 *first_child = find_srt(&se.info);
575 if (*first_child == NULL)
576 return; // no child
577 break;
578
579 // SRT only
580 case THUNK_STATIC:
581 ASSERT(get_itbl(c)->srt_bitmap != 0);
582 case THUNK_0_1:
583 case THUNK_0_2:
584 thunk_srt_only:
585 init_srt_thunk(&se.info, get_thunk_itbl(c));
586 *first_child = find_srt(&se.info);
587 if (*first_child == NULL)
588 return; // no child
589 break;
590
591 case TVAR_WATCH_QUEUE:
592 *first_child = (StgClosure *)((StgTVarWatchQueue *)c)->closure;
593 se.info.next.step = 2; // 2 = second
594 break;
595 case TVAR:
596 *first_child = (StgClosure *)((StgTVar *)c)->current_value;
597 break;
598 case TREC_HEADER:
599 *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
600 break;
601 case TREC_CHUNK:
602 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
603 se.info.next.step = 0; // entry no.
604 break;
605
606 // cannot appear
607 case PAP:
608 case AP:
609 case AP_STACK:
610 case TSO:
611 case IND_STATIC:
612 case CONSTR_NOCAF_STATIC:
613 // stack objects
614 case UPDATE_FRAME:
615 case CATCH_FRAME:
616 case STOP_FRAME:
617 case RET_DYN:
618 case RET_BCO:
619 case RET_SMALL:
620 case RET_BIG:
621 // invalid objects
622 case IND:
623 case BLOCKED_FETCH:
624 case FETCH_ME:
625 case FETCH_ME_BQ:
626 case RBH:
627 case REMOTE_REF:
628 case INVALID_OBJECT:
629 default:
630 barf("Invalid object *c in push()");
631 return;
632 }
633
634 if (stackTop - 1 < stackBottom) {
635 #ifdef DEBUG_RETAINER
636 // debugBelch("push() to the next stack.\n");
637 #endif
638 // currentStack->free is updated when the active stack is switched
639 // to the next stack.
640 currentStack->free = (StgPtr)stackTop;
641
642 if (currentStack->link == NULL) {
643 nbd = allocGroup(BLOCKS_IN_STACK);
644 nbd->link = NULL;
645 nbd->u.back = currentStack;
646 currentStack->link = nbd;
647 } else
648 nbd = currentStack->link;
649
650 newStackBlock(nbd);
651 }
652
653 // adjust stackTop (acutal push)
654 stackTop--;
655 // If the size of stackElement was huge, we would better replace the
656 // following statement by either a memcpy() call or a switch statement
657 // on the type of the element. Currently, the size of stackElement is
658 // small enough (5 words) that this direct assignment seems to be enough.
659
660 // ToDo: The line below leads to the warning:
661 // warning: 'se.info.type' may be used uninitialized in this function
662 // This is caused by the fact that there are execution paths through the
663 // large switch statement above where some cases do not initialize this
664 // field. Is this really harmless? Can we avoid the warning?
665 *stackTop = se;
666
667 #ifdef DEBUG_RETAINER
668 stackSize++;
669 if (stackSize > maxStackSize) maxStackSize = stackSize;
670 // ASSERT(stackSize >= 0);
671 // debugBelch("stackSize = %d\n", stackSize);
672 #endif
673 }
674
675 /* -----------------------------------------------------------------------------
676 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
677 * Invariants:
678 * stackTop cannot be equal to stackLimit unless the whole stack is
679 * empty, in which case popOff() is not allowed.
680 * Note:
681 * You can think of popOffReal() as a part of popOff() which is
682 * executed at the end of popOff() in necessary. Since popOff() is
683 * likely to be executed quite often while popOffReal() is not, we
684 * separate popOffReal() from popOff(), which is declared as an
685 * INLINE function (for the sake of execution speed). popOffReal()
686 * is called only within popOff() and nowhere else.
687 * -------------------------------------------------------------------------- */
688 static void
689 popOffReal(void)
690 {
691 bdescr *pbd; // Previous Block Descriptor
692
693 #ifdef DEBUG_RETAINER
694 // debugBelch("pop() to the previous stack.\n");
695 #endif
696
697 ASSERT(stackTop + 1 == stackLimit);
698 ASSERT(stackBottom == (stackElement *)currentStack->start);
699
700 if (firstStack == currentStack) {
701 // The stack is completely empty.
702 stackTop++;
703 ASSERT(stackTop == stackLimit);
704 #ifdef DEBUG_RETAINER
705 stackSize--;
706 if (stackSize > maxStackSize) maxStackSize = stackSize;
707 /*
708 ASSERT(stackSize >= 0);
709 debugBelch("stackSize = %d\n", stackSize);
710 */
711 #endif
712 return;
713 }
714
715 // currentStack->free is updated when the active stack is switched back
716 // to the previous stack.
717 currentStack->free = (StgPtr)stackLimit;
718
719 // find the previous block descriptor
720 pbd = currentStack->u.back;
721 ASSERT(pbd != NULL);
722
723 returnToOldStack(pbd);
724
725 #ifdef DEBUG_RETAINER
726 stackSize--;
727 if (stackSize > maxStackSize) maxStackSize = stackSize;
728 /*
729 ASSERT(stackSize >= 0);
730 debugBelch("stackSize = %d\n", stackSize);
731 */
732 #endif
733 }
734
735 static INLINE void
736 popOff(void) {
737 #ifdef DEBUG_RETAINER
738 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
739 #endif
740
741 ASSERT(stackTop != stackLimit);
742 ASSERT(!isEmptyRetainerStack());
743
744 // <= (instead of <) is wrong!
745 if (stackTop + 1 < stackLimit) {
746 stackTop++;
747 #ifdef DEBUG_RETAINER
748 stackSize--;
749 if (stackSize > maxStackSize) maxStackSize = stackSize;
750 /*
751 ASSERT(stackSize >= 0);
752 debugBelch("stackSize = %d\n", stackSize);
753 */
754 #endif
755 return;
756 }
757
758 popOffReal();
759 }
760
761 /* -----------------------------------------------------------------------------
762 * Finds the next object to be considered for retainer profiling and store
763 * its pointer to *c.
764 * Test if the topmost stack element indicates that more objects are left,
765 * and if so, retrieve the first object and store its pointer to *c. Also,
766 * set *cp and *r appropriately, both of which are stored in the stack element.
767 * The topmost stack element then is overwritten so as for it to now denote
768 * the next object.
769 * If the topmost stack element indicates no more objects are left, pop
770 * off the stack element until either an object can be retrieved or
771 * the current stack chunk becomes empty, indicated by rtsTrue returned by
772 * isOnBoundary(), in which case *c is set to NULL.
773 * Note:
774 * It is okay to call this function even when the current stack chunk
775 * is empty.
776 * -------------------------------------------------------------------------- */
777 static INLINE void
778 pop( StgClosure **c, StgClosure **cp, retainer *r )
779 {
780 stackElement *se;
781
782 #ifdef DEBUG_RETAINER
783 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
784 #endif
785
786 do {
787 if (isOnBoundary()) { // if the current stack chunk is depleted
788 *c = NULL;
789 return;
790 }
791
792 se = stackTop;
793
794 switch (get_itbl(se->c)->type) {
795 // two children (fixed), no SRT
796 // nothing in se.info
797 case CONSTR_2_0:
798 *c = se->c->payload[1];
799 *cp = se->c;
800 *r = se->c_child_r;
801 popOff();
802 return;
803
804 // three children (fixed), no SRT
805 // need to push a stackElement
806 case MVAR_CLEAN:
807 case MVAR_DIRTY:
808 if (se->info.next.step == 2) {
809 *c = (StgClosure *)((StgMVar *)se->c)->tail;
810 se->info.next.step++; // move to the next step
811 // no popOff
812 } else {
813 *c = ((StgMVar *)se->c)->value;
814 popOff();
815 }
816 *cp = se->c;
817 *r = se->c_child_r;
818 return;
819
820 // three children (fixed), no SRT
821 case WEAK:
822 if (se->info.next.step == 2) {
823 *c = ((StgWeak *)se->c)->value;
824 se->info.next.step++;
825 // no popOff
826 } else {
827 *c = ((StgWeak *)se->c)->finalizer;
828 popOff();
829 }
830 *cp = se->c;
831 *r = se->c_child_r;
832 return;
833
834 case TVAR_WATCH_QUEUE:
835 if (se->info.next.step == 2) {
836 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->next_queue_entry;
837 se->info.next.step++; // move to the next step
838 // no popOff
839 } else {
840 *c = (StgClosure *)((StgTVarWatchQueue *)se->c)->prev_queue_entry;
841 popOff();
842 }
843 *cp = se->c;
844 *r = se->c_child_r;
845 return;
846
847 case TVAR:
848 *c = (StgClosure *)((StgTVar *)se->c)->first_watch_queue_entry;
849 *cp = se->c;
850 *r = se->c_child_r;
851 popOff();
852 return;
853
854 case TREC_HEADER:
855 *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
856 *cp = se->c;
857 *r = se->c_child_r;
858 popOff();
859 return;
860
861 case TREC_CHUNK: {
862 // These are pretty complicated: we have N entries, each
863 // of which contains 3 fields that we want to follow. So
864 // we divide the step counter: the 2 low bits indicate
865 // which field, and the rest of the bits indicate the
866 // entry number (starting from zero).
867 TRecEntry *entry;
868 nat entry_no = se->info.next.step >> 2;
869 nat field_no = se->info.next.step & 3;
870 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
871 *c = NULL;
872 popOff();
873 return;
874 }
875 entry = &((StgTRecChunk *)se->c)->entries[entry_no];
876 if (field_no == 0) {
877 *c = (StgClosure *)entry->tvar;
878 } else if (field_no == 1) {
879 *c = entry->expected_value;
880 } else {
881 *c = entry->new_value;
882 }
883 *cp = se->c;
884 *r = se->c_child_r;
885 se->info.next.step++;
886 return;
887 }
888
889 case CONSTR:
890 case STABLE_NAME:
891 case BCO:
892 case CONSTR_STATIC:
893 // StgMutArrPtr.ptrs, no SRT
894 case MUT_ARR_PTRS_CLEAN:
895 case MUT_ARR_PTRS_DIRTY:
896 case MUT_ARR_PTRS_FROZEN:
897 case MUT_ARR_PTRS_FROZEN0:
898 *c = find_ptrs(&se->info);
899 if (*c == NULL) {
900 popOff();
901 break;
902 }
903 *cp = se->c;
904 *r = se->c_child_r;
905 return;
906
907 // layout.payload.ptrs, SRT
908 case FUN: // always a heap object
909 case FUN_2_0:
910 if (se->info.type == posTypePtrs) {
911 *c = find_ptrs(&se->info);
912 if (*c != NULL) {
913 *cp = se->c;
914 *r = se->c_child_r;
915 return;
916 }
917 init_srt_fun(&se->info, get_fun_itbl(se->c));
918 }
919 goto do_srt;
920
921 case THUNK:
922 case THUNK_2_0:
923 if (se->info.type == posTypePtrs) {
924 *c = find_ptrs(&se->info);
925 if (*c != NULL) {
926 *cp = se->c;
927 *r = se->c_child_r;
928 return;
929 }
930 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
931 }
932 goto do_srt;
933
934 // SRT
935 do_srt:
936 case THUNK_STATIC:
937 case FUN_STATIC:
938 case FUN_0_1:
939 case FUN_0_2:
940 case THUNK_0_1:
941 case THUNK_0_2:
942 case FUN_1_0:
943 case FUN_1_1:
944 case THUNK_1_0:
945 case THUNK_1_1:
946 *c = find_srt(&se->info);
947 if (*c != NULL) {
948 *cp = se->c;
949 *r = se->c_child_r;
950 return;
951 }
952 popOff();
953 break;
954
955 // no child (fixed), no SRT
956 case CONSTR_0_1:
957 case CONSTR_0_2:
958 case CAF_BLACKHOLE:
959 case BLACKHOLE:
960 case ARR_WORDS:
961 // one child (fixed), no SRT
962 case MUT_VAR_CLEAN:
963 case MUT_VAR_DIRTY:
964 case THUNK_SELECTOR:
965 case IND_PERM:
966 case IND_OLDGEN_PERM:
967 case IND_OLDGEN:
968 case CONSTR_1_1:
969 // cannot appear
970 case PAP:
971 case AP:
972 case AP_STACK:
973 case TSO:
974 case IND_STATIC:
975 case CONSTR_NOCAF_STATIC:
976 // stack objects
977 case RET_DYN:
978 case UPDATE_FRAME:
979 case CATCH_FRAME:
980 case STOP_FRAME:
981 case RET_BCO:
982 case RET_SMALL:
983 case RET_BIG:
984 // invalid objects
985 case IND:
986 case BLOCKED_FETCH:
987 case FETCH_ME:
988 case FETCH_ME_BQ:
989 case RBH:
990 case REMOTE_REF:
991 case INVALID_OBJECT:
992 default:
993 barf("Invalid object *c in pop()");
994 return;
995 }
996 } while (rtsTrue);
997 }
998
999 /* -----------------------------------------------------------------------------
1000 * RETAINER PROFILING ENGINE
1001 * -------------------------------------------------------------------------- */
1002
1003 void
1004 initRetainerProfiling( void )
1005 {
1006 initializeAllRetainerSet();
1007 retainerGeneration = 0;
1008 }
1009
1010 /* -----------------------------------------------------------------------------
1011 * This function must be called before f-closing prof_file.
1012 * -------------------------------------------------------------------------- */
1013 void
1014 endRetainerProfiling( void )
1015 {
1016 #ifdef SECOND_APPROACH
1017 outputAllRetainerSet(prof_file);
1018 #endif
1019 }
1020
1021 /* -----------------------------------------------------------------------------
1022 * Returns the actual pointer to the retainer set of the closure *c.
1023 * It may adjust RSET(c) subject to flip.
1024 * Side effects:
1025 * RSET(c) is initialized to NULL if its current value does not
1026 * conform to flip.
1027 * Note:
1028 * Even though this function has side effects, they CAN be ignored because
1029 * subsequent calls to retainerSetOf() always result in the same return value
1030 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1031 * closure.
1032 * We have to perform an XOR (^) operation each time a closure is examined.
1033 * The reason is that we do not know when a closure is visited last.
1034 * -------------------------------------------------------------------------- */
1035 static INLINE void
1036 maybeInitRetainerSet( StgClosure *c )
1037 {
1038 if (!isRetainerSetFieldValid(c)) {
1039 setRetainerSetToNull(c);
1040 }
1041 }
1042
1043 /* -----------------------------------------------------------------------------
1044 * Returns rtsTrue if *c is a retainer.
1045 * -------------------------------------------------------------------------- */
1046 static INLINE rtsBool
1047 isRetainer( StgClosure *c )
1048 {
1049 switch (get_itbl(c)->type) {
1050 //
1051 // True case
1052 //
1053 // TSOs MUST be retainers: they constitute the set of roots.
1054 case TSO:
1055
1056 // mutable objects
1057 case MVAR_CLEAN:
1058 case MVAR_DIRTY:
1059 case MUT_VAR_CLEAN:
1060 case MUT_VAR_DIRTY:
1061 case MUT_ARR_PTRS_CLEAN:
1062 case MUT_ARR_PTRS_DIRTY:
1063 case MUT_ARR_PTRS_FROZEN:
1064 case MUT_ARR_PTRS_FROZEN0:
1065
1066 // thunks are retainers.
1067 case THUNK:
1068 case THUNK_1_0:
1069 case THUNK_0_1:
1070 case THUNK_2_0:
1071 case THUNK_1_1:
1072 case THUNK_0_2:
1073 case THUNK_SELECTOR:
1074 case AP:
1075 case AP_STACK:
1076
1077 // Static thunks, or CAFS, are obviously retainers.
1078 case THUNK_STATIC:
1079
1080 // WEAK objects are roots; there is separate code in which traversing
1081 // begins from WEAK objects.
1082 case WEAK:
1083
1084 // Since the other mutvar-type things are retainers, seems
1085 // like the right thing to do:
1086 case TVAR:
1087 return rtsTrue;
1088
1089 //
1090 // False case
1091 //
1092
1093 // constructors
1094 case CONSTR:
1095 case CONSTR_1_0:
1096 case CONSTR_0_1:
1097 case CONSTR_2_0:
1098 case CONSTR_1_1:
1099 case CONSTR_0_2:
1100 // functions
1101 case FUN:
1102 case FUN_1_0:
1103 case FUN_0_1:
1104 case FUN_2_0:
1105 case FUN_1_1:
1106 case FUN_0_2:
1107 // partial applications
1108 case PAP:
1109 // blackholes
1110 case CAF_BLACKHOLE:
1111 case BLACKHOLE:
1112 // indirection
1113 case IND_PERM:
1114 case IND_OLDGEN_PERM:
1115 case IND_OLDGEN:
1116 // static objects
1117 case CONSTR_STATIC:
1118 case FUN_STATIC:
1119 // misc
1120 case STABLE_NAME:
1121 case BCO:
1122 case ARR_WORDS:
1123 // STM
1124 case TVAR_WATCH_QUEUE:
1125 case TREC_HEADER:
1126 case TREC_CHUNK:
1127 return rtsFalse;
1128
1129 //
1130 // Error case
1131 //
1132 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1133 case IND_STATIC:
1134 // CONSTR_NOCAF_STATIC
1135 // cannot be *c, *cp, *r in the retainer profiling loop.
1136 case CONSTR_NOCAF_STATIC:
1137 // Stack objects are invalid because they are never treated as
1138 // legal objects during retainer profiling.
1139 case UPDATE_FRAME:
1140 case CATCH_FRAME:
1141 case STOP_FRAME:
1142 case RET_DYN:
1143 case RET_BCO:
1144 case RET_SMALL:
1145 case RET_BIG:
1146 // other cases
1147 case IND:
1148 case BLOCKED_FETCH:
1149 case FETCH_ME:
1150 case FETCH_ME_BQ:
1151 case RBH:
1152 case REMOTE_REF:
1153 case INVALID_OBJECT:
1154 default:
1155 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1156 return rtsFalse;
1157 }
1158 }
1159
1160 /* -----------------------------------------------------------------------------
1161 * Returns the retainer function value for the closure *c, i.e., R(*c).
1162 * This function does NOT return the retainer(s) of *c.
1163 * Invariants:
1164 * *c must be a retainer.
1165 * Note:
1166 * Depending on the definition of this function, the maintenance of retainer
1167 * sets can be made easier. If most retainer sets are likely to be created
1168 * again across garbage collections, refreshAllRetainerSet() in
1169 * RetainerSet.c can simply do nothing.
1170 * If this is not the case, we can free all the retainer sets and
1171 * re-initialize the hash table.
1172 * See refreshAllRetainerSet() in RetainerSet.c.
1173 * -------------------------------------------------------------------------- */
1174 static INLINE retainer
1175 getRetainerFrom( StgClosure *c )
1176 {
1177 ASSERT(isRetainer(c));
1178
1179 #if defined(RETAINER_SCHEME_INFO)
1180 // Retainer scheme 1: retainer = info table
1181 return get_itbl(c);
1182 #elif defined(RETAINER_SCHEME_CCS)
1183 // Retainer scheme 2: retainer = cost centre stack
1184 return c->header.prof.ccs;
1185 #elif defined(RETAINER_SCHEME_CC)
1186 // Retainer scheme 3: retainer = cost centre
1187 return c->header.prof.ccs->cc;
1188 #endif
1189 }
1190
1191 /* -----------------------------------------------------------------------------
1192 * Associates the retainer set *s with the closure *c, that is, *s becomes
1193 * the retainer set of *c.
1194 * Invariants:
1195 * c != NULL
1196 * s != NULL
1197 * -------------------------------------------------------------------------- */
1198 static INLINE void
1199 associate( StgClosure *c, RetainerSet *s )
1200 {
1201 // StgWord has the same size as pointers, so the following type
1202 // casting is okay.
1203 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1204 }
1205
1206 /* -----------------------------------------------------------------------------
1207 Call retainClosure for each of the closures covered by a large bitmap.
1208 -------------------------------------------------------------------------- */
1209
1210 static void
1211 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1212 StgClosure *c, retainer c_child_r)
1213 {
1214 nat i, b;
1215 StgWord bitmap;
1216
1217 b = 0;
1218 bitmap = large_bitmap->bitmap[b];
1219 for (i = 0; i < size; ) {
1220 if ((bitmap & 1) == 0) {
1221 retainClosure((StgClosure *)*p, c, c_child_r);
1222 }
1223 i++;
1224 p++;
1225 if (i % BITS_IN(W_) == 0) {
1226 b++;
1227 bitmap = large_bitmap->bitmap[b];
1228 } else {
1229 bitmap = bitmap >> 1;
1230 }
1231 }
1232 }
1233
1234 static INLINE StgPtr
1235 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1236 StgClosure *c, retainer c_child_r)
1237 {
1238 while (size > 0) {
1239 if ((bitmap & 1) == 0) {
1240 retainClosure((StgClosure *)*p, c, c_child_r);
1241 }
1242 p++;
1243 bitmap = bitmap >> 1;
1244 size--;
1245 }
1246 return p;
1247 }
1248
1249 /* -----------------------------------------------------------------------------
1250 * Call retainClosure for each of the closures in an SRT.
1251 * ------------------------------------------------------------------------- */
1252
1253 static void
1254 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1255 {
1256 nat i, b, size;
1257 StgWord bitmap;
1258 StgClosure **p;
1259
1260 b = 0;
1261 p = (StgClosure **)srt->srt;
1262 size = srt->l.size;
1263 bitmap = srt->l.bitmap[b];
1264 for (i = 0; i < size; ) {
1265 if ((bitmap & 1) != 0) {
1266 retainClosure((StgClosure *)*p, c, c_child_r);
1267 }
1268 i++;
1269 p++;
1270 if (i % BITS_IN(W_) == 0) {
1271 b++;
1272 bitmap = srt->l.bitmap[b];
1273 } else {
1274 bitmap = bitmap >> 1;
1275 }
1276 }
1277 }
1278
1279 static INLINE void
1280 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1281 {
1282 nat bitmap;
1283 StgClosure **p;
1284
1285 bitmap = srt_bitmap;
1286 p = srt;
1287
1288 if (bitmap == (StgHalfWord)(-1)) {
1289 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1290 return;
1291 }
1292
1293 while (bitmap != 0) {
1294 if ((bitmap & 1) != 0) {
1295 #ifdef ENABLE_WIN32_DLL_SUPPORT
1296 if ( (unsigned long)(*srt) & 0x1 ) {
1297 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1298 c, c_child_r);
1299 } else {
1300 retainClosure(*srt,c,c_child_r);
1301 }
1302 #else
1303 retainClosure(*srt,c,c_child_r);
1304 #endif
1305 }
1306 p++;
1307 bitmap = bitmap >> 1;
1308 }
1309 }
1310
1311 /* -----------------------------------------------------------------------------
1312 * Process all the objects in the stack chunk from stackStart to stackEnd
1313 * with *c and *c_child_r being their parent and their most recent retainer,
1314 * respectively. Treat stackOptionalFun as another child of *c if it is
1315 * not NULL.
1316 * Invariants:
1317 * *c is one of the following: TSO, AP_STACK.
1318 * If *c is TSO, c == c_child_r.
1319 * stackStart < stackEnd.
1320 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1321 * interpretation conforms to the current value of flip (even when they
1322 * are interpreted to be NULL).
1323 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1324 * or ThreadKilled, which means that its stack is ready to process.
1325 * Note:
1326 * This code was almost plagiarzied from GC.c! For each pointer,
1327 * retainClosure() is invoked instead of evacuate().
1328 * -------------------------------------------------------------------------- */
1329 static void
1330 retainStack( StgClosure *c, retainer c_child_r,
1331 StgPtr stackStart, StgPtr stackEnd )
1332 {
1333 stackElement *oldStackBoundary;
1334 StgPtr p;
1335 StgRetInfoTable *info;
1336 StgWord32 bitmap;
1337 nat size;
1338
1339 #ifdef DEBUG_RETAINER
1340 cStackSize++;
1341 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1342 #endif
1343
1344 /*
1345 Each invocation of retainStack() creates a new virtual
1346 stack. Since all such stacks share a single common stack, we
1347 record the current currentStackBoundary, which will be restored
1348 at the exit.
1349 */
1350 oldStackBoundary = currentStackBoundary;
1351 currentStackBoundary = stackTop;
1352
1353 #ifdef DEBUG_RETAINER
1354 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1355 #endif
1356
1357 ASSERT(get_itbl(c)->type != TSO ||
1358 (((StgTSO *)c)->what_next != ThreadRelocated &&
1359 ((StgTSO *)c)->what_next != ThreadComplete &&
1360 ((StgTSO *)c)->what_next != ThreadKilled));
1361
1362 p = stackStart;
1363 while (p < stackEnd) {
1364 info = get_ret_itbl((StgClosure *)p);
1365
1366 switch(info->i.type) {
1367
1368 case UPDATE_FRAME:
1369 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1370 p += sizeofW(StgUpdateFrame);
1371 continue;
1372
1373 case STOP_FRAME:
1374 case CATCH_FRAME:
1375 case CATCH_STM_FRAME:
1376 case CATCH_RETRY_FRAME:
1377 case ATOMICALLY_FRAME:
1378 case RET_SMALL:
1379 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1380 size = BITMAP_SIZE(info->i.layout.bitmap);
1381 p++;
1382 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1383
1384 follow_srt:
1385 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1386 continue;
1387
1388 case RET_BCO: {
1389 StgBCO *bco;
1390
1391 p++;
1392 retainClosure((StgClosure *)*p, c, c_child_r);
1393 bco = (StgBCO *)*p;
1394 p++;
1395 size = BCO_BITMAP_SIZE(bco);
1396 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1397 p += size;
1398 continue;
1399 }
1400
1401 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1402 case RET_BIG:
1403 size = GET_LARGE_BITMAP(&info->i)->size;
1404 p++;
1405 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1406 size, c, c_child_r);
1407 p += size;
1408 // and don't forget to follow the SRT
1409 goto follow_srt;
1410
1411 // Dynamic bitmap: the mask is stored on the stack
1412 case RET_DYN: {
1413 StgWord dyn;
1414 dyn = ((StgRetDyn *)p)->liveness;
1415
1416 // traverse the bitmap first
1417 bitmap = RET_DYN_LIVENESS(dyn);
1418 p = (P_)&((StgRetDyn *)p)->payload[0];
1419 size = RET_DYN_BITMAP_SIZE;
1420 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1421
1422 // skip over the non-ptr words
1423 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1424
1425 // follow the ptr words
1426 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1427 retainClosure((StgClosure *)*p, c, c_child_r);
1428 p++;
1429 }
1430 continue;
1431 }
1432
1433 case RET_FUN: {
1434 StgRetFun *ret_fun = (StgRetFun *)p;
1435 StgFunInfoTable *fun_info;
1436
1437 retainClosure(ret_fun->fun, c, c_child_r);
1438 fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
1439
1440 p = (P_)&ret_fun->payload;
1441 switch (fun_info->f.fun_type) {
1442 case ARG_GEN:
1443 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1444 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1445 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1446 break;
1447 case ARG_GEN_BIG:
1448 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1449 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1450 size, c, c_child_r);
1451 p += size;
1452 break;
1453 default:
1454 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1455 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1456 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1457 break;
1458 }
1459 goto follow_srt;
1460 }
1461
1462 default:
1463 barf("Invalid object found in retainStack(): %d",
1464 (int)(info->i.type));
1465 }
1466 }
1467
1468 // restore currentStackBoundary
1469 currentStackBoundary = oldStackBoundary;
1470 #ifdef DEBUG_RETAINER
1471 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1472 #endif
1473
1474 #ifdef DEBUG_RETAINER
1475 cStackSize--;
1476 #endif
1477 }
1478
1479 /* ----------------------------------------------------------------------------
1480 * Call retainClosure for each of the children of a PAP/AP
1481 * ------------------------------------------------------------------------- */
1482
1483 static INLINE StgPtr
1484 retain_PAP_payload (StgClosure *pap, /* NOT tagged */
1485 retainer c_child_r, /* NOT tagged */
1486 StgClosure *fun, /* tagged */
1487 StgClosure** payload, StgWord n_args)
1488 {
1489 StgPtr p;
1490 StgWord bitmap;
1491 StgFunInfoTable *fun_info;
1492
1493 retainClosure(fun, pap, c_child_r);
1494 fun = UNTAG_CLOSURE(fun);
1495 fun_info = get_fun_itbl(fun);
1496 ASSERT(fun_info->i.type != PAP);
1497
1498 p = (StgPtr)payload;
1499
1500 switch (fun_info->f.fun_type) {
1501 case ARG_GEN:
1502 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1503 p = retain_small_bitmap(p, n_args, bitmap,
1504 pap, c_child_r);
1505 break;
1506 case ARG_GEN_BIG:
1507 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1508 n_args, pap, c_child_r);
1509 p += n_args;
1510 break;
1511 case ARG_BCO:
1512 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1513 n_args, pap, c_child_r);
1514 p += n_args;
1515 break;
1516 default:
1517 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1518 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1519 break;
1520 }
1521 return p;
1522 }
1523
1524 /* -----------------------------------------------------------------------------
1525 * Compute the retainer set of *c0 and all its desecents by traversing.
1526 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1527 * Invariants:
1528 * c0 = cp0 = r0 holds only for root objects.
1529 * RSET(cp0) and RSET(r0) are valid, i.e., their
1530 * interpretation conforms to the current value of flip (even when they
1531 * are interpreted to be NULL).
1532 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1533 * the current value of flip. If it does not, during the execution
1534 * of this function, RSET(c0) must be initialized as well as all
1535 * its descendants.
1536 * Note:
1537 * stackTop must be the same at the beginning and the exit of this function.
1538 * *c0 can be TSO (as well as AP_STACK).
1539 * -------------------------------------------------------------------------- */
1540 static void
1541 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1542 {
1543 // c = Current closure (possibly tagged)
1544 // cp = Current closure's Parent (NOT tagged)
1545 // r = current closures' most recent Retainer (NOT tagged)
1546 // c_child_r = current closure's children's most recent retainer
1547 // first_child = first child of c
1548 StgClosure *c, *cp, *first_child;
1549 RetainerSet *s, *retainerSetOfc;
1550 retainer r, c_child_r;
1551 StgWord typeOfc;
1552
1553 #ifdef DEBUG_RETAINER
1554 // StgPtr oldStackTop;
1555 #endif
1556
1557 #ifdef DEBUG_RETAINER
1558 // oldStackTop = stackTop;
1559 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1560 #endif
1561
1562 // (c, cp, r) = (c0, cp0, r0)
1563 c = c0;
1564 cp = cp0;
1565 r = r0;
1566 goto inner_loop;
1567
1568 loop:
1569 //debugBelch("loop");
1570 // pop to (c, cp, r);
1571 pop(&c, &cp, &r);
1572
1573 if (c == NULL) {
1574 #ifdef DEBUG_RETAINER
1575 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1576 #endif
1577 return;
1578 }
1579
1580 //debugBelch("inner_loop");
1581
1582 inner_loop:
1583 c = UNTAG_CLOSURE(c);
1584
1585 // c = current closure under consideration,
1586 // cp = current closure's parent,
1587 // r = current closure's most recent retainer
1588 //
1589 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1590 // RSET(cp) and RSET(r) are valid.
1591 // RSET(c) is valid only if c has been visited before.
1592 //
1593 // Loop invariants (on the relation between c, cp, and r)
1594 // if cp is not a retainer, r belongs to RSET(cp).
1595 // if cp is a retainer, r == cp.
1596
1597 typeOfc = get_itbl(c)->type;
1598
1599 #ifdef DEBUG_RETAINER
1600 switch (typeOfc) {
1601 case IND_STATIC:
1602 case CONSTR_NOCAF_STATIC:
1603 case CONSTR_STATIC:
1604 case THUNK_STATIC:
1605 case FUN_STATIC:
1606 break;
1607 default:
1608 if (retainerSetOf(c) == NULL) { // first visit?
1609 costArray[typeOfc] += cost(c);
1610 sumOfNewCost += cost(c);
1611 }
1612 break;
1613 }
1614 #endif
1615
1616 // special cases
1617 switch (typeOfc) {
1618 case TSO:
1619 if (((StgTSO *)c)->what_next == ThreadComplete ||
1620 ((StgTSO *)c)->what_next == ThreadKilled) {
1621 #ifdef DEBUG_RETAINER
1622 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1623 #endif
1624 goto loop;
1625 }
1626 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1627 #ifdef DEBUG_RETAINER
1628 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1629 #endif
1630 c = (StgClosure *)((StgTSO *)c)->_link;
1631 goto inner_loop;
1632 }
1633 break;
1634
1635 case IND_STATIC:
1636 // We just skip IND_STATIC, so its retainer set is never computed.
1637 c = ((StgIndStatic *)c)->indirectee;
1638 goto inner_loop;
1639 // static objects with no pointers out, so goto loop.
1640 case CONSTR_NOCAF_STATIC:
1641 // It is not just enough not to compute the retainer set for *c; it is
1642 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1643 // scavenged_static_objects, the list from which is assumed to traverse
1644 // all static objects after major garbage collections.
1645 goto loop;
1646 case THUNK_STATIC:
1647 case FUN_STATIC:
1648 if (get_itbl(c)->srt_bitmap == 0) {
1649 // No need to compute the retainer set; no dynamic objects
1650 // are reachable from *c.
1651 //
1652 // Static objects: if we traverse all the live closures,
1653 // including static closures, during each heap census then
1654 // we will observe that some static closures appear and
1655 // disappear. eg. a closure may contain a pointer to a
1656 // static function 'f' which is not otherwise reachable
1657 // (it doesn't indirectly point to any CAFs, so it doesn't
1658 // appear in any SRTs), so we would find 'f' during
1659 // traversal. However on the next sweep there may be no
1660 // closures pointing to 'f'.
1661 //
1662 // We must therefore ignore static closures whose SRT is
1663 // empty, because these are exactly the closures that may
1664 // "appear". A closure with a non-empty SRT, and which is
1665 // still required, will always be reachable.
1666 //
1667 // But what about CONSTR_STATIC? Surely these may be able
1668 // to appear, and they don't have SRTs, so we can't
1669 // check. So for now, we're calling
1670 // resetStaticObjectForRetainerProfiling() from the
1671 // garbage collector to reset the retainer sets in all the
1672 // reachable static objects.
1673 goto loop;
1674 }
1675 default:
1676 break;
1677 }
1678
1679 // The above objects are ignored in computing the average number of times
1680 // an object is visited.
1681 timesAnyObjectVisited++;
1682
1683 // If this is the first visit to c, initialize its retainer set.
1684 maybeInitRetainerSet(c);
1685 retainerSetOfc = retainerSetOf(c);
1686
1687 // Now compute s:
1688 // isRetainer(cp) == rtsTrue => s == NULL
1689 // isRetainer(cp) == rtsFalse => s == cp.retainer
1690 if (isRetainer(cp))
1691 s = NULL;
1692 else
1693 s = retainerSetOf(cp);
1694
1695 // (c, cp, r, s) is available.
1696
1697 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1698 if (retainerSetOfc == NULL) {
1699 // This is the first visit to *c.
1700 numObjectVisited++;
1701
1702 if (s == NULL)
1703 associate(c, singleton(r));
1704 else
1705 // s is actually the retainer set of *c!
1706 associate(c, s);
1707
1708 // compute c_child_r
1709 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1710 } else {
1711 // This is not the first visit to *c.
1712 if (isMember(r, retainerSetOfc))
1713 goto loop; // no need to process child
1714
1715 if (s == NULL)
1716 associate(c, addElement(r, retainerSetOfc));
1717 else {
1718 // s is not NULL and cp is not a retainer. This means that
1719 // each time *cp is visited, so is *c. Thus, if s has
1720 // exactly one more element in its retainer set than c, s
1721 // is also the new retainer set for *c.
1722 if (s->num == retainerSetOfc->num + 1) {
1723 associate(c, s);
1724 }
1725 // Otherwise, just add R_r to the current retainer set of *c.
1726 else {
1727 associate(c, addElement(r, retainerSetOfc));
1728 }
1729 }
1730
1731 if (isRetainer(c))
1732 goto loop; // no need to process child
1733
1734 // compute c_child_r
1735 c_child_r = r;
1736 }
1737
1738 // now, RSET() of all of *c, *cp, and *r is valid.
1739 // (c, c_child_r) are available.
1740
1741 // process child
1742
1743 // Special case closures: we process these all in one go rather
1744 // than attempting to save the current position, because doing so
1745 // would be hard.
1746 switch (typeOfc) {
1747 case TSO:
1748 retainStack(c, c_child_r,
1749 ((StgTSO *)c)->sp,
1750 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1751 goto loop;
1752
1753 case PAP:
1754 {
1755 StgPAP *pap = (StgPAP *)c;
1756 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1757 goto loop;
1758 }
1759
1760 case AP:
1761 {
1762 StgAP *ap = (StgAP *)c;
1763 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1764 goto loop;
1765 }
1766
1767 case AP_STACK:
1768 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1769 retainStack(c, c_child_r,
1770 (StgPtr)((StgAP_STACK *)c)->payload,
1771 (StgPtr)((StgAP_STACK *)c)->payload +
1772 ((StgAP_STACK *)c)->size);
1773 goto loop;
1774 }
1775
1776 push(c, c_child_r, &first_child);
1777
1778 // If first_child is null, c has no child.
1779 // If first_child is not null, the top stack element points to the next
1780 // object. push() may or may not push a stackElement on the stack.
1781 if (first_child == NULL)
1782 goto loop;
1783
1784 // (c, cp, r) = (first_child, c, c_child_r)
1785 r = c_child_r;
1786 cp = c;
1787 c = first_child;
1788 goto inner_loop;
1789 }
1790
1791 /* -----------------------------------------------------------------------------
1792 * Compute the retainer set for every object reachable from *tl.
1793 * -------------------------------------------------------------------------- */
1794 static void
1795 retainRoot(void *user STG_UNUSED, StgClosure **tl)
1796 {
1797 StgClosure *c;
1798
1799 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1800 // be a root.
1801
1802 ASSERT(isEmptyRetainerStack());
1803 currentStackBoundary = stackTop;
1804
1805 c = UNTAG_CLOSURE(*tl);
1806 if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
1807 retainClosure(c, c, getRetainerFrom(c));
1808 } else {
1809 retainClosure(c, c, CCS_SYSTEM);
1810 }
1811
1812 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1813 // *tl might be a TSO which is ThreadComplete, in which
1814 // case we ignore it for the purposes of retainer profiling.
1815 }
1816
1817 /* -----------------------------------------------------------------------------
1818 * Compute the retainer set for each of the objects in the heap.
1819 * -------------------------------------------------------------------------- */
1820 static void
1821 computeRetainerSet( void )
1822 {
1823 StgWeak *weak;
1824 RetainerSet *rtl;
1825 nat g;
1826 StgPtr ml;
1827 bdescr *bd;
1828 #ifdef DEBUG_RETAINER
1829 RetainerSet tmpRetainerSet;
1830 #endif
1831
1832 markCapabilities(retainRoot, NULL); // for scheduler roots
1833
1834 // This function is called after a major GC, when key, value, and finalizer
1835 // all are guaranteed to be valid, or reachable.
1836 //
1837 // The following code assumes that WEAK objects are considered to be roots
1838 // for retainer profilng.
1839 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1840 // retainRoot((StgClosure *)weak);
1841 retainRoot(NULL, (StgClosure **)&weak);
1842
1843 // Consider roots from the stable ptr table.
1844 markStablePtrTable(retainRoot, NULL);
1845
1846 // The following code resets the rs field of each unvisited mutable
1847 // object (computing sumOfNewCostExtra and updating costArray[] when
1848 // debugging retainer profiler).
1849 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1850 // NOT TRUE: even G0 has a block on its mutable list
1851 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1852
1853 // Traversing through mut_list is necessary
1854 // because we can find MUT_VAR objects which have not been
1855 // visited during retainer profiling.
1856 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1857 for (ml = bd->start; ml < bd->free; ml++) {
1858
1859 maybeInitRetainerSet((StgClosure *)*ml);
1860 rtl = retainerSetOf((StgClosure *)*ml);
1861
1862 #ifdef DEBUG_RETAINER
1863 if (rtl == NULL) {
1864 // first visit to *ml
1865 // This is a violation of the interface rule!
1866 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1867
1868 switch (get_itbl((StgClosure *)ml)->type) {
1869 case IND_STATIC:
1870 // no cost involved
1871 break;
1872 case CONSTR_NOCAF_STATIC:
1873 case CONSTR_STATIC:
1874 case THUNK_STATIC:
1875 case FUN_STATIC:
1876 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1877 break;
1878 default:
1879 // dynamic objects
1880 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1881 sumOfNewCostExtra += cost((StgClosure *)ml);
1882 break;
1883 }
1884 }
1885 #endif
1886 }
1887 }
1888 }
1889 }
1890
1891 /* -----------------------------------------------------------------------------
1892 * Traverse all static objects for which we compute retainer sets,
1893 * and reset their rs fields to NULL, which is accomplished by
1894 * invoking maybeInitRetainerSet(). This function must be called
1895 * before zeroing all objects reachable from scavenged_static_objects
1896 * in the case of major gabage collections. See GarbageCollect() in
1897 * GC.c.
1898 * Note:
1899 * The mut_once_list of the oldest generation must also be traversed?
1900 * Why? Because if the evacuation of an object pointed to by a static
1901 * indirection object fails, it is put back to the mut_once_list of
1902 * the oldest generation.
1903 * However, this is not necessary because any static indirection objects
1904 * are just traversed through to reach dynamic objects. In other words,
1905 * they are not taken into consideration in computing retainer sets.
1906 * -------------------------------------------------------------------------- */
1907 void
1908 resetStaticObjectForRetainerProfiling( StgClosure *static_objects )
1909 {
1910 #ifdef DEBUG_RETAINER
1911 nat count;
1912 #endif
1913 StgClosure *p;
1914
1915 #ifdef DEBUG_RETAINER
1916 count = 0;
1917 #endif
1918 p = static_objects;
1919 while (p != END_OF_STATIC_LIST) {
1920 #ifdef DEBUG_RETAINER
1921 count++;
1922 #endif
1923 switch (get_itbl(p)->type) {
1924 case IND_STATIC:
1925 // Since we do not compute the retainer set of any
1926 // IND_STATIC object, we don't have to reset its retainer
1927 // field.
1928 p = (StgClosure*)*IND_STATIC_LINK(p);
1929 break;
1930 case THUNK_STATIC:
1931 maybeInitRetainerSet(p);
1932 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1933 break;
1934 case FUN_STATIC:
1935 maybeInitRetainerSet(p);
1936 p = (StgClosure*)*FUN_STATIC_LINK(p);
1937 break;
1938 case CONSTR_STATIC:
1939 maybeInitRetainerSet(p);
1940 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1941 break;
1942 default:
1943 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1944 p, get_itbl(p)->type);
1945 break;
1946 }
1947 }
1948 #ifdef DEBUG_RETAINER
1949 // debugBelch("count in scavenged_static_objects = %d\n", count);
1950 #endif
1951 }
1952
1953 /* -----------------------------------------------------------------------------
1954 * Perform retainer profiling.
1955 * N is the oldest generation being profilied, where the generations are
1956 * numbered starting at 0.
1957 * Invariants:
1958 * Note:
1959 * This function should be called only immediately after major garbage
1960 * collection.
1961 * ------------------------------------------------------------------------- */
1962 void
1963 retainerProfile(void)
1964 {
1965 #ifdef DEBUG_RETAINER
1966 nat i;
1967 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1968 #endif
1969
1970 #ifdef DEBUG_RETAINER
1971 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1972 #endif
1973
1974 stat_startRP();
1975
1976 // We haven't flipped the bit yet.
1977 #ifdef DEBUG_RETAINER
1978 debugBelch("Before traversing:\n");
1979 sumOfCostLinear = 0;
1980 for (i = 0;i < N_CLOSURE_TYPES; i++)
1981 costArrayLinear[i] = 0;
1982 totalHeapSize = checkHeapSanityForRetainerProfiling();
1983
1984 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1985 /*
1986 debugBelch("costArrayLinear[] = ");
1987 for (i = 0;i < N_CLOSURE_TYPES; i++)
1988 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1989 debugBelch("\n");
1990 */
1991
1992 ASSERT(sumOfCostLinear == totalHeapSize);
1993
1994 /*
1995 #define pcostArrayLinear(index) \
1996 if (costArrayLinear[index] > 0) \
1997 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1998 pcostArrayLinear(THUNK_STATIC);
1999 pcostArrayLinear(FUN_STATIC);
2000 pcostArrayLinear(CONSTR_STATIC);
2001 pcostArrayLinear(CONSTR_NOCAF_STATIC);
2002 */
2003 #endif
2004
2005 // Now we flips flip.
2006 flip = flip ^ 1;
2007
2008 #ifdef DEBUG_RETAINER
2009 stackSize = 0;
2010 maxStackSize = 0;
2011 cStackSize = 0;
2012 maxCStackSize = 0;
2013 #endif
2014 numObjectVisited = 0;
2015 timesAnyObjectVisited = 0;
2016
2017 #ifdef DEBUG_RETAINER
2018 debugBelch("During traversing:\n");
2019 sumOfNewCost = 0;
2020 sumOfNewCostExtra = 0;
2021 for (i = 0;i < N_CLOSURE_TYPES; i++)
2022 costArray[i] = 0;
2023 #endif
2024
2025 /*
2026 We initialize the traverse stack each time the retainer profiling is
2027 performed (because the traverse stack size varies on each retainer profiling
2028 and this operation is not costly anyhow). However, we just refresh the
2029 retainer sets.
2030 */
2031 initializeTraverseStack();
2032 #ifdef DEBUG_RETAINER
2033 initializeAllRetainerSet();
2034 #else
2035 refreshAllRetainerSet();
2036 #endif
2037 computeRetainerSet();
2038
2039 #ifdef DEBUG_RETAINER
2040 debugBelch("After traversing:\n");
2041 sumOfCostLinear = 0;
2042 for (i = 0;i < N_CLOSURE_TYPES; i++)
2043 costArrayLinear[i] = 0;
2044 totalHeapSize = checkHeapSanityForRetainerProfiling();
2045
2046 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2047 ASSERT(sumOfCostLinear == totalHeapSize);
2048
2049 // now, compare the two results
2050 /*
2051 Note:
2052 costArray[] must be exactly the same as costArrayLinear[].
2053 Known exceptions:
2054 1) Dead weak pointers, whose type is CONSTR. These objects are not
2055 reachable from any roots.
2056 */
2057 debugBelch("Comparison:\n");
2058 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2059 for (i = 0;i < N_CLOSURE_TYPES; i++)
2060 if (costArray[i] != costArrayLinear[i])
2061 // nothing should be printed except MUT_VAR after major GCs
2062 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2063 debugBelch("\n");
2064
2065 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2066 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2067 debugBelch("\tcostArray[] (must be empty) = ");
2068 for (i = 0;i < N_CLOSURE_TYPES; i++)
2069 if (costArray[i] != costArrayLinear[i])
2070 // nothing should be printed except MUT_VAR after major GCs
2071 debugBelch("[%u:%u] ", i, costArray[i]);
2072 debugBelch("\n");
2073
2074 // only for major garbage collection
2075 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2076 #endif
2077
2078 // post-processing
2079 closeTraverseStack();
2080 #ifdef DEBUG_RETAINER
2081 closeAllRetainerSet();
2082 #else
2083 // Note that there is no post-processing for the retainer sets.
2084 #endif
2085 retainerGeneration++;
2086
2087 stat_endRP(
2088 retainerGeneration - 1, // retainerGeneration has just been incremented!
2089 #ifdef DEBUG_RETAINER
2090 maxCStackSize, maxStackSize,
2091 #endif
2092 (double)timesAnyObjectVisited / numObjectVisited);
2093 }
2094
2095 /* -----------------------------------------------------------------------------
2096 * DEBUGGING CODE
2097 * -------------------------------------------------------------------------- */
2098
2099 #ifdef DEBUG_RETAINER
2100
2101 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2102 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2103 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2104
2105 static nat
2106 sanityCheckHeapClosure( StgClosure *c )
2107 {
2108 StgInfoTable *info;
2109
2110 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2111 ASSERT(!closure_STATIC(c));
2112 ASSERT(LOOKS_LIKE_PTR(c));
2113
2114 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2115 if (get_itbl(c)->type == CONSTR &&
2116 !strcmp(GET_PROF_TYPE(get_itbl(c)), "DEAD_WEAK") &&
2117 !strcmp(GET_PROF_DESC(get_itbl(c)), "DEAD_WEAK")) {
2118 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2119 costArray[get_itbl(c)->type] += cost(c);
2120 sumOfNewCost += cost(c);
2121 } else
2122 debugBelch(
2123 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2124 flip, c, get_itbl(c)->type,
2125 get_itbl(c)->prof.closure_type, GET_PROF_DESC(get_itbl(c)),
2126 RSET(c));
2127 } else {
2128 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2129 }
2130
2131 return closure_sizeW(c);
2132 }
2133
2134 static nat
2135 heapCheck( bdescr *bd )
2136 {
2137 StgPtr p;
2138 static nat costSum, size;
2139
2140 costSum = 0;
2141 while (bd != NULL) {
2142 p = bd->start;
2143 while (p < bd->free) {
2144 size = sanityCheckHeapClosure((StgClosure *)p);
2145 sumOfCostLinear += size;
2146 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2147 p += size;
2148 // no need for slop check; I think slops are not used currently.
2149 }
2150 ASSERT(p == bd->free);
2151 costSum += bd->free - bd->start;
2152 bd = bd->link;
2153 }
2154
2155 return costSum;
2156 }
2157
2158 static nat
2159 smallObjectPoolCheck(void)
2160 {
2161 bdescr *bd;
2162 StgPtr p;
2163 static nat costSum, size;
2164
2165 bd = g0s0->blocks;
2166 costSum = 0;
2167
2168 // first block
2169 if (bd == NULL)
2170 return costSum;
2171
2172 p = bd->start;
2173 while (p < alloc_Hp) {
2174 size = sanityCheckHeapClosure((StgClosure *)p);
2175 sumOfCostLinear += size;
2176 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2177 p += size;
2178 }
2179 ASSERT(p == alloc_Hp);
2180 costSum += alloc_Hp - bd->start;
2181
2182 bd = bd->link;
2183 while (bd != NULL) {
2184 p = bd->start;
2185 while (p < bd->free) {
2186 size = sanityCheckHeapClosure((StgClosure *)p);
2187 sumOfCostLinear += size;
2188 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2189 p += size;
2190 }
2191 ASSERT(p == bd->free);
2192 costSum += bd->free - bd->start;
2193 bd = bd->link;
2194 }
2195
2196 return costSum;
2197 }
2198
2199 static nat
2200 chainCheck(bdescr *bd)
2201 {
2202 nat costSum, size;
2203
2204 costSum = 0;
2205 while (bd != NULL) {
2206 // bd->free - bd->start is not an accurate measurement of the
2207 // object size. Actually it is always zero, so we compute its
2208 // size explicitly.
2209 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2210 sumOfCostLinear += size;
2211 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2212 costSum += size;
2213 bd = bd->link;
2214 }
2215
2216 return costSum;
2217 }
2218
2219 static nat
2220 checkHeapSanityForRetainerProfiling( void )
2221 {
2222 nat costSum, g, s;
2223
2224 costSum = 0;
2225 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2226 if (RtsFlags.GcFlags.generations == 1) {
2227 costSum += heapCheck(g0s0->to_blocks);
2228 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2229 costSum += chainCheck(g0s0->large_objects);
2230 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2231 } else {
2232 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2233 for (s = 0; s < generations[g].n_steps; s++) {
2234 /*
2235 After all live objects have been scavenged, the garbage
2236 collector may create some objects in
2237 scheduleFinalizers(). These objects are created throught
2238 allocate(), so the small object pool or the large object
2239 pool of the g0s0 may not be empty.
2240 */
2241 if (g == 0 && s == 0) {
2242 costSum += smallObjectPoolCheck();
2243 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2244 costSum += chainCheck(generations[g].steps[s].large_objects);
2245 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2246 } else {
2247 costSum += heapCheck(generations[g].steps[s].blocks);
2248 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2249 costSum += chainCheck(generations[g].steps[s].large_objects);
2250 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2251 }
2252 }
2253 }
2254
2255 return costSum;
2256 }
2257
2258 void
2259 findPointer(StgPtr p)
2260 {
2261 StgPtr q, r, e;
2262 bdescr *bd;
2263 nat g, s;
2264
2265 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2266 for (s = 0; s < generations[g].n_steps; s++) {
2267 // if (g == 0 && s == 0) continue;
2268 bd = generations[g].steps[s].blocks;
2269 for (; bd; bd = bd->link) {
2270 for (q = bd->start; q < bd->free; q++) {
2271 if (*q == (StgWord)p) {
2272 r = q;
2273 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2274 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2275 // return;
2276 }
2277 }
2278 }
2279 bd = generations[g].steps[s].large_objects;
2280 for (; bd; bd = bd->link) {
2281 e = bd->start + cost((StgClosure *)bd->start);
2282 for (q = bd->start; q < e; q++) {
2283 if (*q == (StgWord)p) {
2284 r = q;
2285 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2286 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2287 // return;
2288 }
2289 }
2290 }
2291 }
2292 }
2293 }
2294
2295 static void
2296 belongToHeap(StgPtr p)
2297 {
2298 bdescr *bd;
2299 nat g, s;
2300
2301 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2302 for (s = 0; s < generations[g].n_steps; s++) {
2303 // if (g == 0 && s == 0) continue;
2304 bd = generations[g].steps[s].blocks;
2305 for (; bd; bd = bd->link) {
2306 if (bd->start <= p && p < bd->free) {
2307 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2308 return;
2309 }
2310 }
2311 bd = generations[g].steps[s].large_objects;
2312 for (; bd; bd = bd->link) {
2313 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2314 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2315 return;
2316 }
2317 }
2318 }
2319 }
2320 }
2321 #endif /* DEBUG_RETAINER */
2322
2323 #endif /* PROFILING */