Remove CONSTR_CHARLIKE and CONSTR_INTLIKE closure types
[ghc.git] / rts / RetainerProfile.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001
4 * Author: Sungwoo Park
5 *
6 * Retainer profiling.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #ifdef PROFILING
11
12 // Turn off inlining when debugging - it obfuscates things
13 #ifdef DEBUG
14 #define INLINE
15 #else
16 #define INLINE inline
17 #endif
18
19 #include "Rts.h"
20 #include "RtsUtils.h"
21 #include "RetainerProfile.h"
22 #include "RetainerSet.h"
23 #include "Schedule.h"
24 #include "Printer.h"
25 #include "Storage.h"
26 #include "RtsFlags.h"
27 #include "Weak.h"
28 #include "Sanity.h"
29 #include "Profiling.h"
30 #include "Stats.h"
31 #include "BlockAlloc.h"
32 #include "ProfHeap.h"
33 #include "Apply.h"
34
35 /*
36 Note: what to change in order to plug-in a new retainer profiling scheme?
37 (1) type retainer in ../includes/StgRetainerProf.h
38 (2) retainer function R(), i.e., getRetainerFrom()
39 (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
40 in RetainerSet.h, if needed.
41 (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
42 */
43
44 /* -----------------------------------------------------------------------------
45 * Declarations...
46 * -------------------------------------------------------------------------- */
47
48 static nat retainerGeneration; // generation
49
50 static nat numObjectVisited; // total number of objects visited
51 static nat timesAnyObjectVisited; // number of times any objects are visited
52
53 /*
54 The rs field in the profile header of any object points to its retainer
55 set in an indirect way: if flip is 0, it points to the retainer set;
56 if flip is 1, it points to the next byte after the retainer set (even
57 for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
58 pointer. See retainerSetOf().
59 */
60
61 StgWord flip = 0; // flip bit
62 // must be 0 if DEBUG_RETAINER is on (for static closures)
63
64 #define setRetainerSetToNull(c) \
65 (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
66
67 static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
68 static void retainClosure(StgClosure *, StgClosure *, retainer);
69 #ifdef DEBUG_RETAINER
70 static void belongToHeap(StgPtr p);
71 #endif
72
73 #ifdef DEBUG_RETAINER
74 /*
75 cStackSize records how many times retainStack() has been invoked recursively,
76 that is, the number of activation records for retainStack() on the C stack.
77 maxCStackSize records its max value.
78 Invariants:
79 cStackSize <= maxCStackSize
80 */
81 static nat cStackSize, maxCStackSize;
82
83 static nat sumOfNewCost; // sum of the cost of each object, computed
84 // when the object is first visited
85 static nat sumOfNewCostExtra; // for those objects not visited during
86 // retainer profiling, e.g., MUT_VAR
87 static nat costArray[N_CLOSURE_TYPES];
88
89 nat sumOfCostLinear; // sum of the costs of all object, computed
90 // when linearly traversing the heap after
91 // retainer profiling
92 nat costArrayLinear[N_CLOSURE_TYPES];
93 #endif
94
95 /* -----------------------------------------------------------------------------
96 * Retainer stack - header
97 * Note:
98 * Although the retainer stack implementation could be separated *
99 * from the retainer profiling engine, there does not seem to be
100 * any advantage in doing that; retainer stack is an integral part
101 * of retainer profiling engine and cannot be use elsewhere at
102 * all.
103 * -------------------------------------------------------------------------- */
104
105 typedef enum {
106 posTypeStep,
107 posTypePtrs,
108 posTypeSRT,
109 posTypeLargeSRT,
110 } nextPosType;
111
112 typedef union {
113 // fixed layout or layout specified by a field in the closure
114 StgWord step;
115
116 // layout.payload
117 struct {
118 // See StgClosureInfo in InfoTables.h
119 #if SIZEOF_VOID_P == 8
120 StgWord32 pos;
121 StgWord32 ptrs;
122 #else
123 StgWord16 pos;
124 StgWord16 ptrs;
125 #endif
126 StgPtr payload;
127 } ptrs;
128
129 // SRT
130 struct {
131 StgClosure **srt;
132 StgWord srt_bitmap;
133 } srt;
134
135 // Large SRT
136 struct {
137 StgLargeSRT *srt;
138 StgWord offset;
139 } large_srt;
140
141 } nextPos;
142
143 typedef struct {
144 nextPosType type;
145 nextPos next;
146 } stackPos;
147
148 typedef struct {
149 StgClosure *c;
150 retainer c_child_r;
151 stackPos info;
152 } stackElement;
153
154 /*
155 Invariants:
156 firstStack points to the first block group.
157 currentStack points to the block group currently being used.
158 currentStack->free == stackLimit.
159 stackTop points to the topmost byte in the stack of currentStack.
160 Unless the whole stack is empty, stackTop must point to the topmost
161 object (or byte) in the whole stack. Thus, it is only when the whole stack
162 is empty that stackTop == stackLimit (not during the execution of push()
163 and pop()).
164 stackBottom == currentStack->start.
165 stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
166 Note:
167 When a current stack becomes empty, stackTop is set to point to
168 the topmost element on the previous block group so as to satisfy
169 the invariants described above.
170 */
171 static bdescr *firstStack = NULL;
172 static bdescr *currentStack;
173 static stackElement *stackBottom, *stackTop, *stackLimit;
174
175 /*
176 currentStackBoundary is used to mark the current stack chunk.
177 If stackTop == currentStackBoundary, it means that the current stack chunk
178 is empty. It is the responsibility of the user to keep currentStackBoundary
179 valid all the time if it is to be employed.
180 */
181 static stackElement *currentStackBoundary;
182
183 /*
184 stackSize records the current size of the stack.
185 maxStackSize records its high water mark.
186 Invariants:
187 stackSize <= maxStackSize
188 Note:
189 stackSize is just an estimate measure of the depth of the graph. The reason
190 is that some heap objects have only a single child and may not result
191 in a new element being pushed onto the stack. Therefore, at the end of
192 retainer profiling, maxStackSize + maxCStackSize is some value no greater
193 than the actual depth of the graph.
194 */
195 #ifdef DEBUG_RETAINER
196 static int stackSize, maxStackSize;
197 #endif
198
199 // number of blocks allocated for one stack
200 #define BLOCKS_IN_STACK 1
201
202 /* -----------------------------------------------------------------------------
203 * Add a new block group to the stack.
204 * Invariants:
205 * currentStack->link == s.
206 * -------------------------------------------------------------------------- */
207 static INLINE void
208 newStackBlock( bdescr *bd )
209 {
210 currentStack = bd;
211 stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
212 stackBottom = (stackElement *)bd->start;
213 stackLimit = (stackElement *)stackTop;
214 bd->free = (StgPtr)stackLimit;
215 }
216
217 /* -----------------------------------------------------------------------------
218 * Return to the previous block group.
219 * Invariants:
220 * s->link == currentStack.
221 * -------------------------------------------------------------------------- */
222 static INLINE void
223 returnToOldStack( bdescr *bd )
224 {
225 currentStack = bd;
226 stackTop = (stackElement *)bd->free;
227 stackBottom = (stackElement *)bd->start;
228 stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
229 bd->free = (StgPtr)stackLimit;
230 }
231
232 /* -----------------------------------------------------------------------------
233 * Initializes the traverse stack.
234 * -------------------------------------------------------------------------- */
235 static void
236 initializeTraverseStack( void )
237 {
238 if (firstStack != NULL) {
239 freeChain(firstStack);
240 }
241
242 firstStack = allocGroup(BLOCKS_IN_STACK);
243 firstStack->link = NULL;
244 firstStack->u.back = NULL;
245
246 newStackBlock(firstStack);
247 }
248
249 /* -----------------------------------------------------------------------------
250 * Frees all the block groups in the traverse stack.
251 * Invariants:
252 * firstStack != NULL
253 * -------------------------------------------------------------------------- */
254 static void
255 closeTraverseStack( void )
256 {
257 freeChain(firstStack);
258 firstStack = NULL;
259 }
260
261 /* -----------------------------------------------------------------------------
262 * Returns rtsTrue if the whole stack is empty.
263 * -------------------------------------------------------------------------- */
264 static INLINE rtsBool
265 isEmptyRetainerStack( void )
266 {
267 return (firstStack == currentStack) && stackTop == stackLimit;
268 }
269
270 /* -----------------------------------------------------------------------------
271 * Returns size of stack
272 * -------------------------------------------------------------------------- */
273 #ifdef DEBUG
274 lnat
275 retainerStackBlocks( void )
276 {
277 bdescr* bd;
278 lnat res = 0;
279
280 for (bd = firstStack; bd != NULL; bd = bd->link)
281 res += bd->blocks;
282
283 return res;
284 }
285 #endif
286
287 /* -----------------------------------------------------------------------------
288 * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
289 * i.e., if the current stack chunk is empty.
290 * -------------------------------------------------------------------------- */
291 static INLINE rtsBool
292 isOnBoundary( void )
293 {
294 return stackTop == currentStackBoundary;
295 }
296
297 /* -----------------------------------------------------------------------------
298 * Initializes *info from ptrs and payload.
299 * Invariants:
300 * payload[] begins with ptrs pointers followed by non-pointers.
301 * -------------------------------------------------------------------------- */
302 static INLINE void
303 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
304 {
305 info->type = posTypePtrs;
306 info->next.ptrs.pos = 0;
307 info->next.ptrs.ptrs = ptrs;
308 info->next.ptrs.payload = payload;
309 }
310
311 /* -----------------------------------------------------------------------------
312 * Find the next object from *info.
313 * -------------------------------------------------------------------------- */
314 static INLINE StgClosure *
315 find_ptrs( stackPos *info )
316 {
317 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
318 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
319 } else {
320 return NULL;
321 }
322 }
323
324 /* -----------------------------------------------------------------------------
325 * Initializes *info from SRT information stored in *infoTable.
326 * -------------------------------------------------------------------------- */
327 static INLINE void
328 init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
329 {
330 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
331 info->type = posTypeLargeSRT;
332 info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
333 info->next.large_srt.offset = 0;
334 } else {
335 info->type = posTypeSRT;
336 info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
337 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
338 }
339 }
340
341 static INLINE void
342 init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
343 {
344 if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
345 info->type = posTypeLargeSRT;
346 info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
347 info->next.large_srt.offset = 0;
348 } else {
349 info->type = posTypeSRT;
350 info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
351 info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
352 }
353 }
354
355 /* -----------------------------------------------------------------------------
356 * Find the next object from *info.
357 * -------------------------------------------------------------------------- */
358 static INLINE StgClosure *
359 find_srt( stackPos *info )
360 {
361 StgClosure *c;
362 StgWord bitmap;
363
364 if (info->type == posTypeSRT) {
365 // Small SRT bitmap
366 bitmap = info->next.srt.srt_bitmap;
367 while (bitmap != 0) {
368 if ((bitmap & 1) != 0) {
369 #ifdef ENABLE_WIN32_DLL_SUPPORT
370
371 if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
372 c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
373 else
374 c = *(info->next.srt.srt);
375 #else
376 c = *(info->next.srt.srt);
377 #endif
378 bitmap = bitmap >> 1;
379 info->next.srt.srt++;
380 info->next.srt.srt_bitmap = bitmap;
381 return c;
382 }
383 bitmap = bitmap >> 1;
384 info->next.srt.srt++;
385 }
386 // bitmap is now zero...
387 return NULL;
388 }
389 else {
390 // Large SRT bitmap
391 nat i = info->next.large_srt.offset;
392 StgWord bitmap;
393
394 // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
395 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
396 bitmap = bitmap >> (i % BITS_IN(StgWord));
397 while (i < info->next.large_srt.srt->l.size) {
398 if ((bitmap & 1) != 0) {
399 c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
400 i++;
401 info->next.large_srt.offset = i;
402 return c;
403 }
404 i++;
405 if (i % BITS_IN(W_) == 0) {
406 bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
407 } else {
408 bitmap = bitmap >> 1;
409 }
410 }
411 // reached the end of this bitmap.
412 info->next.large_srt.offset = i;
413 return NULL;
414 }
415 }
416
417 /* -----------------------------------------------------------------------------
418 * push() pushes a stackElement representing the next child of *c
419 * onto the traverse stack. If *c has no child, *first_child is set
420 * to NULL and nothing is pushed onto the stack. If *c has only one
421 * child, *c_chlid is set to that child and nothing is pushed onto
422 * the stack. If *c has more than two children, *first_child is set
423 * to the first child and a stackElement representing the second
424 * child is pushed onto the stack.
425
426 * Invariants:
427 * *c_child_r is the most recent retainer of *c's children.
428 * *c is not any of TSO, AP, PAP, AP_STACK, which means that
429 * there cannot be any stack objects.
430 * Note: SRTs are considered to be children as well.
431 * -------------------------------------------------------------------------- */
432 static INLINE void
433 push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
434 {
435 stackElement se;
436 bdescr *nbd; // Next Block Descriptor
437
438 #ifdef DEBUG_RETAINER
439 // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
440 #endif
441
442 ASSERT(get_itbl(c)->type != TSO);
443 ASSERT(get_itbl(c)->type != AP_STACK);
444
445 //
446 // fill in se
447 //
448
449 se.c = c;
450 se.c_child_r = c_child_r;
451
452 // fill in se.info
453 switch (get_itbl(c)->type) {
454 // no child, no SRT
455 case CONSTR_0_1:
456 case CONSTR_0_2:
457 case CAF_BLACKHOLE:
458 case BLACKHOLE:
459 case SE_BLACKHOLE:
460 case SE_CAF_BLACKHOLE:
461 case ARR_WORDS:
462 *first_child = NULL;
463 return;
464
465 // one child (fixed), no SRT
466 case MUT_VAR_CLEAN:
467 case MUT_VAR_DIRTY:
468 *first_child = ((StgMutVar *)c)->var;
469 return;
470 case THUNK_SELECTOR:
471 *first_child = ((StgSelector *)c)->selectee;
472 return;
473 case IND_PERM:
474 case IND_OLDGEN_PERM:
475 case IND_OLDGEN:
476 *first_child = ((StgInd *)c)->indirectee;
477 return;
478 case CONSTR_1_0:
479 case CONSTR_1_1:
480 *first_child = c->payload[0];
481 return;
482
483 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
484 // of the next child. We do not write a separate initialization code.
485 // Also we do not have to initialize info.type;
486
487 // two children (fixed), no SRT
488 // need to push a stackElement, but nothing to store in se.info
489 case CONSTR_2_0:
490 *first_child = c->payload[0]; // return the first pointer
491 // se.info.type = posTypeStep;
492 // se.info.next.step = 2; // 2 = second
493 break;
494
495 // three children (fixed), no SRT
496 // need to push a stackElement
497 case MVAR:
498 // head must be TSO and the head of a linked list of TSOs.
499 // Shoule it be a child? Seems to be yes.
500 *first_child = (StgClosure *)((StgMVar *)c)->head;
501 // se.info.type = posTypeStep;
502 se.info.next.step = 2; // 2 = second
503 break;
504
505 // three children (fixed), no SRT
506 case WEAK:
507 *first_child = ((StgWeak *)c)->key;
508 // se.info.type = posTypeStep;
509 se.info.next.step = 2;
510 break;
511
512 // layout.payload.ptrs, no SRT
513 case CONSTR:
514 case STABLE_NAME:
515 case BCO:
516 case CONSTR_STATIC:
517 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
518 (StgPtr)c->payload);
519 *first_child = find_ptrs(&se.info);
520 if (*first_child == NULL)
521 return; // no child
522 break;
523
524 // StgMutArrPtr.ptrs, no SRT
525 case MUT_ARR_PTRS_CLEAN:
526 case MUT_ARR_PTRS_DIRTY:
527 case MUT_ARR_PTRS_FROZEN:
528 case MUT_ARR_PTRS_FROZEN0:
529 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
530 (StgPtr)(((StgMutArrPtrs *)c)->payload));
531 *first_child = find_ptrs(&se.info);
532 if (*first_child == NULL)
533 return;
534 break;
535
536 // layout.payload.ptrs, SRT
537 case FUN: // *c is a heap object.
538 case FUN_2_0:
539 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
540 *first_child = find_ptrs(&se.info);
541 if (*first_child == NULL)
542 // no child from ptrs, so check SRT
543 goto fun_srt_only;
544 break;
545
546 case THUNK:
547 case THUNK_2_0:
548 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
549 (StgPtr)((StgThunk *)c)->payload);
550 *first_child = find_ptrs(&se.info);
551 if (*first_child == NULL)
552 // no child from ptrs, so check SRT
553 goto thunk_srt_only;
554 break;
555
556 // 1 fixed child, SRT
557 case FUN_1_0:
558 case FUN_1_1:
559 *first_child = c->payload[0];
560 ASSERT(*first_child != NULL);
561 init_srt_fun(&se.info, get_fun_itbl(c));
562 break;
563
564 case THUNK_1_0:
565 case THUNK_1_1:
566 *first_child = ((StgThunk *)c)->payload[0];
567 ASSERT(*first_child != NULL);
568 init_srt_thunk(&se.info, get_thunk_itbl(c));
569 break;
570
571 case FUN_STATIC: // *c is a heap object.
572 ASSERT(get_itbl(c)->srt_bitmap != 0);
573 case FUN_0_1:
574 case FUN_0_2:
575 fun_srt_only:
576 init_srt_fun(&se.info, get_fun_itbl(c));
577 *first_child = find_srt(&se.info);
578 if (*first_child == NULL)
579 return; // no child
580 break;
581
582 // SRT only
583 case THUNK_STATIC:
584 ASSERT(get_itbl(c)->srt_bitmap != 0);
585 case THUNK_0_1:
586 case THUNK_0_2:
587 thunk_srt_only:
588 init_srt_thunk(&se.info, get_thunk_itbl(c));
589 *first_child = find_srt(&se.info);
590 if (*first_child == NULL)
591 return; // no child
592 break;
593
594 case TVAR_WAIT_QUEUE:
595 *first_child = (StgClosure *)((StgTVarWaitQueue *)c)->waiting_tso;
596 se.info.next.step = 2; // 2 = second
597 break;
598 case TVAR:
599 *first_child = (StgClosure *)((StgTVar *)c)->current_value;
600 break;
601 case TREC_HEADER:
602 *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
603 break;
604 case TREC_CHUNK:
605 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
606 se.info.next.step = 0; // entry no.
607 break;
608
609 // cannot appear
610 case PAP:
611 case AP:
612 case AP_STACK:
613 case TSO:
614 case IND_STATIC:
615 case CONSTR_NOCAF_STATIC:
616 // stack objects
617 case UPDATE_FRAME:
618 case CATCH_FRAME:
619 case STOP_FRAME:
620 case RET_DYN:
621 case RET_BCO:
622 case RET_SMALL:
623 case RET_VEC_SMALL:
624 case RET_BIG:
625 case RET_VEC_BIG:
626 // invalid objects
627 case IND:
628 case BLOCKED_FETCH:
629 case FETCH_ME:
630 case FETCH_ME_BQ:
631 case RBH:
632 case REMOTE_REF:
633 case EVACUATED:
634 case INVALID_OBJECT:
635 default:
636 barf("Invalid object *c in push()");
637 return;
638 }
639
640 if (stackTop - 1 < stackBottom) {
641 #ifdef DEBUG_RETAINER
642 // debugBelch("push() to the next stack.\n");
643 #endif
644 // currentStack->free is updated when the active stack is switched
645 // to the next stack.
646 currentStack->free = (StgPtr)stackTop;
647
648 if (currentStack->link == NULL) {
649 nbd = allocGroup(BLOCKS_IN_STACK);
650 nbd->link = NULL;
651 nbd->u.back = currentStack;
652 currentStack->link = nbd;
653 } else
654 nbd = currentStack->link;
655
656 newStackBlock(nbd);
657 }
658
659 // adjust stackTop (acutal push)
660 stackTop--;
661 // If the size of stackElement was huge, we would better replace the
662 // following statement by either a memcpy() call or a switch statement
663 // on the type of the element. Currently, the size of stackElement is
664 // small enough (5 words) that this direct assignment seems to be enough.
665 *stackTop = se;
666
667 #ifdef DEBUG_RETAINER
668 stackSize++;
669 if (stackSize > maxStackSize) maxStackSize = stackSize;
670 // ASSERT(stackSize >= 0);
671 // debugBelch("stackSize = %d\n", stackSize);
672 #endif
673 }
674
675 /* -----------------------------------------------------------------------------
676 * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
677 * Invariants:
678 * stackTop cannot be equal to stackLimit unless the whole stack is
679 * empty, in which case popOff() is not allowed.
680 * Note:
681 * You can think of popOffReal() as a part of popOff() which is
682 * executed at the end of popOff() in necessary. Since popOff() is
683 * likely to be executed quite often while popOffReal() is not, we
684 * separate popOffReal() from popOff(), which is declared as an
685 * INLINE function (for the sake of execution speed). popOffReal()
686 * is called only within popOff() and nowhere else.
687 * -------------------------------------------------------------------------- */
688 static void
689 popOffReal(void)
690 {
691 bdescr *pbd; // Previous Block Descriptor
692
693 #ifdef DEBUG_RETAINER
694 // debugBelch("pop() to the previous stack.\n");
695 #endif
696
697 ASSERT(stackTop + 1 == stackLimit);
698 ASSERT(stackBottom == (stackElement *)currentStack->start);
699
700 if (firstStack == currentStack) {
701 // The stack is completely empty.
702 stackTop++;
703 ASSERT(stackTop == stackLimit);
704 #ifdef DEBUG_RETAINER
705 stackSize--;
706 if (stackSize > maxStackSize) maxStackSize = stackSize;
707 /*
708 ASSERT(stackSize >= 0);
709 debugBelch("stackSize = %d\n", stackSize);
710 */
711 #endif
712 return;
713 }
714
715 // currentStack->free is updated when the active stack is switched back
716 // to the previous stack.
717 currentStack->free = (StgPtr)stackLimit;
718
719 // find the previous block descriptor
720 pbd = currentStack->u.back;
721 ASSERT(pbd != NULL);
722
723 returnToOldStack(pbd);
724
725 #ifdef DEBUG_RETAINER
726 stackSize--;
727 if (stackSize > maxStackSize) maxStackSize = stackSize;
728 /*
729 ASSERT(stackSize >= 0);
730 debugBelch("stackSize = %d\n", stackSize);
731 */
732 #endif
733 }
734
735 static INLINE void
736 popOff(void) {
737 #ifdef DEBUG_RETAINER
738 // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
739 #endif
740
741 ASSERT(stackTop != stackLimit);
742 ASSERT(!isEmptyRetainerStack());
743
744 // <= (instead of <) is wrong!
745 if (stackTop + 1 < stackLimit) {
746 stackTop++;
747 #ifdef DEBUG_RETAINER
748 stackSize--;
749 if (stackSize > maxStackSize) maxStackSize = stackSize;
750 /*
751 ASSERT(stackSize >= 0);
752 debugBelch("stackSize = %d\n", stackSize);
753 */
754 #endif
755 return;
756 }
757
758 popOffReal();
759 }
760
761 /* -----------------------------------------------------------------------------
762 * Finds the next object to be considered for retainer profiling and store
763 * its pointer to *c.
764 * Test if the topmost stack element indicates that more objects are left,
765 * and if so, retrieve the first object and store its pointer to *c. Also,
766 * set *cp and *r appropriately, both of which are stored in the stack element.
767 * The topmost stack element then is overwritten so as for it to now denote
768 * the next object.
769 * If the topmost stack element indicates no more objects are left, pop
770 * off the stack element until either an object can be retrieved or
771 * the current stack chunk becomes empty, indicated by rtsTrue returned by
772 * isOnBoundary(), in which case *c is set to NULL.
773 * Note:
774 * It is okay to call this function even when the current stack chunk
775 * is empty.
776 * -------------------------------------------------------------------------- */
777 static INLINE void
778 pop( StgClosure **c, StgClosure **cp, retainer *r )
779 {
780 stackElement *se;
781
782 #ifdef DEBUG_RETAINER
783 // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
784 #endif
785
786 do {
787 if (isOnBoundary()) { // if the current stack chunk is depleted
788 *c = NULL;
789 return;
790 }
791
792 se = stackTop;
793
794 switch (get_itbl(se->c)->type) {
795 // two children (fixed), no SRT
796 // nothing in se.info
797 case CONSTR_2_0:
798 *c = se->c->payload[1];
799 *cp = se->c;
800 *r = se->c_child_r;
801 popOff();
802 return;
803
804 // three children (fixed), no SRT
805 // need to push a stackElement
806 case MVAR:
807 if (se->info.next.step == 2) {
808 *c = (StgClosure *)((StgMVar *)se->c)->tail;
809 se->info.next.step++; // move to the next step
810 // no popOff
811 } else {
812 *c = ((StgMVar *)se->c)->value;
813 popOff();
814 }
815 *cp = se->c;
816 *r = se->c_child_r;
817 return;
818
819 // three children (fixed), no SRT
820 case WEAK:
821 if (se->info.next.step == 2) {
822 *c = ((StgWeak *)se->c)->value;
823 se->info.next.step++;
824 // no popOff
825 } else {
826 *c = ((StgWeak *)se->c)->finalizer;
827 popOff();
828 }
829 *cp = se->c;
830 *r = se->c_child_r;
831 return;
832
833 case TVAR_WAIT_QUEUE:
834 if (se->info.next.step == 2) {
835 *c = (StgClosure *)((StgTVarWaitQueue *)se->c)->next_queue_entry;
836 se->info.next.step++; // move to the next step
837 // no popOff
838 } else {
839 *c = (StgClosure *)((StgTVarWaitQueue *)se->c)->prev_queue_entry;
840 popOff();
841 }
842 *cp = se->c;
843 *r = se->c_child_r;
844 return;
845
846 case TVAR:
847 *c = (StgClosure *)((StgTVar *)se->c)->first_wait_queue_entry;
848 *cp = se->c;
849 *r = se->c_child_r;
850 popOff();
851 return;
852
853 case TREC_HEADER:
854 *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
855 *cp = se->c;
856 *r = se->c_child_r;
857 popOff();
858 return;
859
860 case TREC_CHUNK: {
861 // These are pretty complicated: we have N entries, each
862 // of which contains 3 fields that we want to follow. So
863 // we divide the step counter: the 2 low bits indicate
864 // which field, and the rest of the bits indicate the
865 // entry number (starting from zero).
866 nat entry_no = se->info.next.step >> 2;
867 nat field_no = se->info.next.step & 3;
868 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
869 *c = NULL;
870 popOff();
871 return;
872 }
873 TRecEntry *entry = &((StgTRecChunk *)se->c)->entries[entry_no];
874 if (field_no == 0) {
875 *c = (StgClosure *)entry->tvar;
876 } else if (field_no == 1) {
877 *c = entry->expected_value;
878 } else {
879 *c = entry->new_value;
880 }
881 *cp = se->c;
882 *r = se->c_child_r;
883 se->info.next.step++;
884 return;
885 }
886
887 case CONSTR:
888 case STABLE_NAME:
889 case BCO:
890 case CONSTR_STATIC:
891 // StgMutArrPtr.ptrs, no SRT
892 case MUT_ARR_PTRS_CLEAN:
893 case MUT_ARR_PTRS_DIRTY:
894 case MUT_ARR_PTRS_FROZEN:
895 case MUT_ARR_PTRS_FROZEN0:
896 *c = find_ptrs(&se->info);
897 if (*c == NULL) {
898 popOff();
899 break;
900 }
901 *cp = se->c;
902 *r = se->c_child_r;
903 return;
904
905 // layout.payload.ptrs, SRT
906 case FUN: // always a heap object
907 case FUN_2_0:
908 if (se->info.type == posTypePtrs) {
909 *c = find_ptrs(&se->info);
910 if (*c != NULL) {
911 *cp = se->c;
912 *r = se->c_child_r;
913 return;
914 }
915 init_srt_fun(&se->info, get_fun_itbl(se->c));
916 }
917 goto do_srt;
918
919 case THUNK:
920 case THUNK_2_0:
921 if (se->info.type == posTypePtrs) {
922 *c = find_ptrs(&se->info);
923 if (*c != NULL) {
924 *cp = se->c;
925 *r = se->c_child_r;
926 return;
927 }
928 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
929 }
930 goto do_srt;
931
932 // SRT
933 do_srt:
934 case THUNK_STATIC:
935 case FUN_STATIC:
936 case FUN_0_1:
937 case FUN_0_2:
938 case THUNK_0_1:
939 case THUNK_0_2:
940 case FUN_1_0:
941 case FUN_1_1:
942 case THUNK_1_0:
943 case THUNK_1_1:
944 *c = find_srt(&se->info);
945 if (*c != NULL) {
946 *cp = se->c;
947 *r = se->c_child_r;
948 return;
949 }
950 popOff();
951 break;
952
953 // no child (fixed), no SRT
954 case CONSTR_0_1:
955 case CONSTR_0_2:
956 case CAF_BLACKHOLE:
957 case BLACKHOLE:
958 case SE_BLACKHOLE:
959 case SE_CAF_BLACKHOLE:
960 case ARR_WORDS:
961 // one child (fixed), no SRT
962 case MUT_VAR_CLEAN:
963 case MUT_VAR_DIRTY:
964 case THUNK_SELECTOR:
965 case IND_PERM:
966 case IND_OLDGEN_PERM:
967 case IND_OLDGEN:
968 case CONSTR_1_1:
969 // cannot appear
970 case PAP:
971 case AP:
972 case AP_STACK:
973 case TSO:
974 case IND_STATIC:
975 case CONSTR_NOCAF_STATIC:
976 // stack objects
977 case RET_DYN:
978 case UPDATE_FRAME:
979 case CATCH_FRAME:
980 case STOP_FRAME:
981 case RET_BCO:
982 case RET_SMALL:
983 case RET_VEC_SMALL:
984 case RET_BIG:
985 case RET_VEC_BIG:
986 // invalid objects
987 case IND:
988 case BLOCKED_FETCH:
989 case FETCH_ME:
990 case FETCH_ME_BQ:
991 case RBH:
992 case REMOTE_REF:
993 case EVACUATED:
994 case INVALID_OBJECT:
995 default:
996 barf("Invalid object *c in pop()");
997 return;
998 }
999 } while (rtsTrue);
1000 }
1001
1002 /* -----------------------------------------------------------------------------
1003 * RETAINER PROFILING ENGINE
1004 * -------------------------------------------------------------------------- */
1005
1006 void
1007 initRetainerProfiling( void )
1008 {
1009 initializeAllRetainerSet();
1010 retainerGeneration = 0;
1011 }
1012
1013 /* -----------------------------------------------------------------------------
1014 * This function must be called before f-closing prof_file.
1015 * -------------------------------------------------------------------------- */
1016 void
1017 endRetainerProfiling( void )
1018 {
1019 #ifdef SECOND_APPROACH
1020 outputAllRetainerSet(prof_file);
1021 #endif
1022 }
1023
1024 /* -----------------------------------------------------------------------------
1025 * Returns the actual pointer to the retainer set of the closure *c.
1026 * It may adjust RSET(c) subject to flip.
1027 * Side effects:
1028 * RSET(c) is initialized to NULL if its current value does not
1029 * conform to flip.
1030 * Note:
1031 * Even though this function has side effects, they CAN be ignored because
1032 * subsequent calls to retainerSetOf() always result in the same return value
1033 * and retainerSetOf() is the only way to retrieve retainerSet of a given
1034 * closure.
1035 * We have to perform an XOR (^) operation each time a closure is examined.
1036 * The reason is that we do not know when a closure is visited last.
1037 * -------------------------------------------------------------------------- */
1038 static INLINE void
1039 maybeInitRetainerSet( StgClosure *c )
1040 {
1041 if (!isRetainerSetFieldValid(c)) {
1042 setRetainerSetToNull(c);
1043 }
1044 }
1045
1046 /* -----------------------------------------------------------------------------
1047 * Returns rtsTrue if *c is a retainer.
1048 * -------------------------------------------------------------------------- */
1049 static INLINE rtsBool
1050 isRetainer( StgClosure *c )
1051 {
1052 switch (get_itbl(c)->type) {
1053 //
1054 // True case
1055 //
1056 // TSOs MUST be retainers: they constitute the set of roots.
1057 case TSO:
1058
1059 // mutable objects
1060 case MVAR:
1061 case MUT_VAR_CLEAN:
1062 case MUT_VAR_DIRTY:
1063 case MUT_ARR_PTRS_CLEAN:
1064 case MUT_ARR_PTRS_DIRTY:
1065 case MUT_ARR_PTRS_FROZEN:
1066 case MUT_ARR_PTRS_FROZEN0:
1067
1068 // thunks are retainers.
1069 case THUNK:
1070 case THUNK_1_0:
1071 case THUNK_0_1:
1072 case THUNK_2_0:
1073 case THUNK_1_1:
1074 case THUNK_0_2:
1075 case THUNK_SELECTOR:
1076 case AP:
1077 case AP_STACK:
1078
1079 // Static thunks, or CAFS, are obviously retainers.
1080 case THUNK_STATIC:
1081
1082 // WEAK objects are roots; there is separate code in which traversing
1083 // begins from WEAK objects.
1084 case WEAK:
1085
1086 // Since the other mutvar-type things are retainers, seems
1087 // like the right thing to do:
1088 case TVAR:
1089 return rtsTrue;
1090
1091 //
1092 // False case
1093 //
1094
1095 // constructors
1096 case CONSTR:
1097 case CONSTR_1_0:
1098 case CONSTR_0_1:
1099 case CONSTR_2_0:
1100 case CONSTR_1_1:
1101 case CONSTR_0_2:
1102 // functions
1103 case FUN:
1104 case FUN_1_0:
1105 case FUN_0_1:
1106 case FUN_2_0:
1107 case FUN_1_1:
1108 case FUN_0_2:
1109 // partial applications
1110 case PAP:
1111 // blackholes
1112 case CAF_BLACKHOLE:
1113 case BLACKHOLE:
1114 case SE_BLACKHOLE:
1115 case SE_CAF_BLACKHOLE:
1116 // indirection
1117 case IND_PERM:
1118 case IND_OLDGEN_PERM:
1119 case IND_OLDGEN:
1120 // static objects
1121 case CONSTR_STATIC:
1122 case FUN_STATIC:
1123 // misc
1124 case STABLE_NAME:
1125 case BCO:
1126 case ARR_WORDS:
1127 // STM
1128 case TVAR_WAIT_QUEUE:
1129 case TREC_HEADER:
1130 case TREC_CHUNK:
1131 return rtsFalse;
1132
1133 //
1134 // Error case
1135 //
1136 // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
1137 case IND_STATIC:
1138 // CONSTR_NOCAF_STATIC
1139 // cannot be *c, *cp, *r in the retainer profiling loop.
1140 case CONSTR_NOCAF_STATIC:
1141 // Stack objects are invalid because they are never treated as
1142 // legal objects during retainer profiling.
1143 case UPDATE_FRAME:
1144 case CATCH_FRAME:
1145 case STOP_FRAME:
1146 case RET_DYN:
1147 case RET_BCO:
1148 case RET_SMALL:
1149 case RET_VEC_SMALL:
1150 case RET_BIG:
1151 case RET_VEC_BIG:
1152 // other cases
1153 case IND:
1154 case BLOCKED_FETCH:
1155 case FETCH_ME:
1156 case FETCH_ME_BQ:
1157 case RBH:
1158 case REMOTE_REF:
1159 case EVACUATED:
1160 case INVALID_OBJECT:
1161 default:
1162 barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
1163 return rtsFalse;
1164 }
1165 }
1166
1167 /* -----------------------------------------------------------------------------
1168 * Returns the retainer function value for the closure *c, i.e., R(*c).
1169 * This function does NOT return the retainer(s) of *c.
1170 * Invariants:
1171 * *c must be a retainer.
1172 * Note:
1173 * Depending on the definition of this function, the maintenance of retainer
1174 * sets can be made easier. If most retainer sets are likely to be created
1175 * again across garbage collections, refreshAllRetainerSet() in
1176 * RetainerSet.c can simply do nothing.
1177 * If this is not the case, we can free all the retainer sets and
1178 * re-initialize the hash table.
1179 * See refreshAllRetainerSet() in RetainerSet.c.
1180 * -------------------------------------------------------------------------- */
1181 static INLINE retainer
1182 getRetainerFrom( StgClosure *c )
1183 {
1184 ASSERT(isRetainer(c));
1185
1186 #if defined(RETAINER_SCHEME_INFO)
1187 // Retainer scheme 1: retainer = info table
1188 return get_itbl(c);
1189 #elif defined(RETAINER_SCHEME_CCS)
1190 // Retainer scheme 2: retainer = cost centre stack
1191 return c->header.prof.ccs;
1192 #elif defined(RETAINER_SCHEME_CC)
1193 // Retainer scheme 3: retainer = cost centre
1194 return c->header.prof.ccs->cc;
1195 #endif
1196 }
1197
1198 /* -----------------------------------------------------------------------------
1199 * Associates the retainer set *s with the closure *c, that is, *s becomes
1200 * the retainer set of *c.
1201 * Invariants:
1202 * c != NULL
1203 * s != NULL
1204 * -------------------------------------------------------------------------- */
1205 static INLINE void
1206 associate( StgClosure *c, RetainerSet *s )
1207 {
1208 // StgWord has the same size as pointers, so the following type
1209 // casting is okay.
1210 RSET(c) = (RetainerSet *)((StgWord)s | flip);
1211 }
1212
1213 /* -----------------------------------------------------------------------------
1214 Call retainClosure for each of the closures covered by a large bitmap.
1215 -------------------------------------------------------------------------- */
1216
1217 static void
1218 retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
1219 StgClosure *c, retainer c_child_r)
1220 {
1221 nat i, b;
1222 StgWord bitmap;
1223
1224 b = 0;
1225 bitmap = large_bitmap->bitmap[b];
1226 for (i = 0; i < size; ) {
1227 if ((bitmap & 1) == 0) {
1228 retainClosure((StgClosure *)*p, c, c_child_r);
1229 }
1230 i++;
1231 p++;
1232 if (i % BITS_IN(W_) == 0) {
1233 b++;
1234 bitmap = large_bitmap->bitmap[b];
1235 } else {
1236 bitmap = bitmap >> 1;
1237 }
1238 }
1239 }
1240
1241 static INLINE StgPtr
1242 retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
1243 StgClosure *c, retainer c_child_r)
1244 {
1245 while (size > 0) {
1246 if ((bitmap & 1) == 0) {
1247 retainClosure((StgClosure *)*p, c, c_child_r);
1248 }
1249 p++;
1250 bitmap = bitmap >> 1;
1251 size--;
1252 }
1253 return p;
1254 }
1255
1256 /* -----------------------------------------------------------------------------
1257 * Call retainClosure for each of the closures in an SRT.
1258 * ------------------------------------------------------------------------- */
1259
1260 static void
1261 retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
1262 {
1263 nat i, b, size;
1264 StgWord bitmap;
1265 StgClosure **p;
1266
1267 b = 0;
1268 p = (StgClosure **)srt->srt;
1269 size = srt->l.size;
1270 bitmap = srt->l.bitmap[b];
1271 for (i = 0; i < size; ) {
1272 if ((bitmap & 1) != 0) {
1273 retainClosure((StgClosure *)*p, c, c_child_r);
1274 }
1275 i++;
1276 p++;
1277 if (i % BITS_IN(W_) == 0) {
1278 b++;
1279 bitmap = srt->l.bitmap[b];
1280 } else {
1281 bitmap = bitmap >> 1;
1282 }
1283 }
1284 }
1285
1286 static INLINE void
1287 retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
1288 {
1289 nat bitmap;
1290 StgClosure **p;
1291
1292 bitmap = srt_bitmap;
1293 p = srt;
1294
1295 if (bitmap == (StgHalfWord)(-1)) {
1296 retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
1297 return;
1298 }
1299
1300 while (bitmap != 0) {
1301 if ((bitmap & 1) != 0) {
1302 #ifdef ENABLE_WIN32_DLL_SUPPORT
1303 if ( (unsigned long)(*srt) & 0x1 ) {
1304 retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
1305 c, c_child_r);
1306 } else {
1307 retainClosure(*srt,c,c_child_r);
1308 }
1309 #else
1310 retainClosure(*srt,c,c_child_r);
1311 #endif
1312 }
1313 p++;
1314 bitmap = bitmap >> 1;
1315 }
1316 }
1317
1318 /* -----------------------------------------------------------------------------
1319 * Process all the objects in the stack chunk from stackStart to stackEnd
1320 * with *c and *c_child_r being their parent and their most recent retainer,
1321 * respectively. Treat stackOptionalFun as another child of *c if it is
1322 * not NULL.
1323 * Invariants:
1324 * *c is one of the following: TSO, AP_STACK.
1325 * If *c is TSO, c == c_child_r.
1326 * stackStart < stackEnd.
1327 * RSET(c) and RSET(c_child_r) are valid, i.e., their
1328 * interpretation conforms to the current value of flip (even when they
1329 * are interpreted to be NULL).
1330 * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
1331 * or ThreadKilled, which means that its stack is ready to process.
1332 * Note:
1333 * This code was almost plagiarzied from GC.c! For each pointer,
1334 * retainClosure() is invoked instead of evacuate().
1335 * -------------------------------------------------------------------------- */
1336 static void
1337 retainStack( StgClosure *c, retainer c_child_r,
1338 StgPtr stackStart, StgPtr stackEnd )
1339 {
1340 stackElement *oldStackBoundary;
1341 StgPtr p;
1342 StgRetInfoTable *info;
1343 StgWord32 bitmap;
1344 nat size;
1345
1346 #ifdef DEBUG_RETAINER
1347 cStackSize++;
1348 if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
1349 #endif
1350
1351 /*
1352 Each invocation of retainStack() creates a new virtual
1353 stack. Since all such stacks share a single common stack, we
1354 record the current currentStackBoundary, which will be restored
1355 at the exit.
1356 */
1357 oldStackBoundary = currentStackBoundary;
1358 currentStackBoundary = stackTop;
1359
1360 #ifdef DEBUG_RETAINER
1361 // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
1362 #endif
1363
1364 ASSERT(get_itbl(c)->type != TSO ||
1365 (((StgTSO *)c)->what_next != ThreadRelocated &&
1366 ((StgTSO *)c)->what_next != ThreadComplete &&
1367 ((StgTSO *)c)->what_next != ThreadKilled));
1368
1369 p = stackStart;
1370 while (p < stackEnd) {
1371 info = get_ret_itbl((StgClosure *)p);
1372
1373 switch(info->i.type) {
1374
1375 case UPDATE_FRAME:
1376 retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
1377 p += sizeofW(StgUpdateFrame);
1378 continue;
1379
1380 case STOP_FRAME:
1381 case CATCH_FRAME:
1382 case CATCH_STM_FRAME:
1383 case CATCH_RETRY_FRAME:
1384 case ATOMICALLY_FRAME:
1385 case RET_SMALL:
1386 case RET_VEC_SMALL:
1387 bitmap = BITMAP_BITS(info->i.layout.bitmap);
1388 size = BITMAP_SIZE(info->i.layout.bitmap);
1389 p++;
1390 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1391
1392 follow_srt:
1393 retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
1394 continue;
1395
1396 case RET_BCO: {
1397 StgBCO *bco;
1398
1399 p++;
1400 retainClosure((StgClosure *)*p, c, c_child_r);
1401 bco = (StgBCO *)*p;
1402 p++;
1403 size = BCO_BITMAP_SIZE(bco);
1404 retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
1405 p += size;
1406 continue;
1407 }
1408
1409 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1410 case RET_BIG:
1411 case RET_VEC_BIG:
1412 size = GET_LARGE_BITMAP(&info->i)->size;
1413 p++;
1414 retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
1415 size, c, c_child_r);
1416 p += size;
1417 // and don't forget to follow the SRT
1418 goto follow_srt;
1419
1420 // Dynamic bitmap: the mask is stored on the stack
1421 case RET_DYN: {
1422 StgWord dyn;
1423 dyn = ((StgRetDyn *)p)->liveness;
1424
1425 // traverse the bitmap first
1426 bitmap = RET_DYN_LIVENESS(dyn);
1427 p = (P_)&((StgRetDyn *)p)->payload[0];
1428 size = RET_DYN_BITMAP_SIZE;
1429 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1430
1431 // skip over the non-ptr words
1432 p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
1433
1434 // follow the ptr words
1435 for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
1436 retainClosure((StgClosure *)*p, c, c_child_r);
1437 p++;
1438 }
1439 continue;
1440 }
1441
1442 case RET_FUN: {
1443 StgRetFun *ret_fun = (StgRetFun *)p;
1444 StgFunInfoTable *fun_info;
1445
1446 retainClosure(ret_fun->fun, c, c_child_r);
1447 fun_info = get_fun_itbl(ret_fun->fun);
1448
1449 p = (P_)&ret_fun->payload;
1450 switch (fun_info->f.fun_type) {
1451 case ARG_GEN:
1452 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1453 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1454 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1455 break;
1456 case ARG_GEN_BIG:
1457 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1458 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1459 size, c, c_child_r);
1460 p += size;
1461 break;
1462 default:
1463 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1464 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1465 p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
1466 break;
1467 }
1468 goto follow_srt;
1469 }
1470
1471 default:
1472 barf("Invalid object found in retainStack(): %d",
1473 (int)(info->i.type));
1474 }
1475 }
1476
1477 // restore currentStackBoundary
1478 currentStackBoundary = oldStackBoundary;
1479 #ifdef DEBUG_RETAINER
1480 // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
1481 #endif
1482
1483 #ifdef DEBUG_RETAINER
1484 cStackSize--;
1485 #endif
1486 }
1487
1488 /* ----------------------------------------------------------------------------
1489 * Call retainClosure for each of the children of a PAP/AP
1490 * ------------------------------------------------------------------------- */
1491
1492 static INLINE StgPtr
1493 retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun,
1494 StgClosure** payload, StgWord n_args)
1495 {
1496 StgPtr p;
1497 StgWord bitmap;
1498 StgFunInfoTable *fun_info;
1499
1500 retainClosure(fun, pap, c_child_r);
1501 fun_info = get_fun_itbl(fun);
1502 ASSERT(fun_info->i.type != PAP);
1503
1504 p = (StgPtr)payload;
1505
1506 switch (fun_info->f.fun_type) {
1507 case ARG_GEN:
1508 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1509 p = retain_small_bitmap(p, n_args, bitmap,
1510 pap, c_child_r);
1511 break;
1512 case ARG_GEN_BIG:
1513 retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
1514 n_args, pap, c_child_r);
1515 p += n_args;
1516 break;
1517 case ARG_BCO:
1518 retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
1519 n_args, pap, c_child_r);
1520 p += n_args;
1521 break;
1522 default:
1523 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1524 p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
1525 break;
1526 }
1527 return p;
1528 }
1529
1530 /* -----------------------------------------------------------------------------
1531 * Compute the retainer set of *c0 and all its desecents by traversing.
1532 * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
1533 * Invariants:
1534 * c0 = cp0 = r0 holds only for root objects.
1535 * RSET(cp0) and RSET(r0) are valid, i.e., their
1536 * interpretation conforms to the current value of flip (even when they
1537 * are interpreted to be NULL).
1538 * However, RSET(c0) may be corrupt, i.e., it may not conform to
1539 * the current value of flip. If it does not, during the execution
1540 * of this function, RSET(c0) must be initialized as well as all
1541 * its descendants.
1542 * Note:
1543 * stackTop must be the same at the beginning and the exit of this function.
1544 * *c0 can be TSO (as well as AP_STACK).
1545 * -------------------------------------------------------------------------- */
1546 static void
1547 retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
1548 {
1549 // c = Current closure
1550 // cp = Current closure's Parent
1551 // r = current closures' most recent Retainer
1552 // c_child_r = current closure's children's most recent retainer
1553 // first_child = first child of c
1554 StgClosure *c, *cp, *first_child;
1555 RetainerSet *s, *retainerSetOfc;
1556 retainer r, c_child_r;
1557 StgWord typeOfc;
1558
1559 #ifdef DEBUG_RETAINER
1560 // StgPtr oldStackTop;
1561 #endif
1562
1563 #ifdef DEBUG_RETAINER
1564 // oldStackTop = stackTop;
1565 // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
1566 #endif
1567
1568 // (c, cp, r) = (c0, cp0, r0)
1569 c = c0;
1570 cp = cp0;
1571 r = r0;
1572 goto inner_loop;
1573
1574 loop:
1575 //debugBelch("loop");
1576 // pop to (c, cp, r);
1577 pop(&c, &cp, &r);
1578
1579 if (c == NULL) {
1580 #ifdef DEBUG_RETAINER
1581 // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
1582 #endif
1583 return;
1584 }
1585
1586 //debugBelch("inner_loop");
1587
1588 inner_loop:
1589 // c = current closure under consideration,
1590 // cp = current closure's parent,
1591 // r = current closure's most recent retainer
1592 //
1593 // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
1594 // RSET(cp) and RSET(r) are valid.
1595 // RSET(c) is valid only if c has been visited before.
1596 //
1597 // Loop invariants (on the relation between c, cp, and r)
1598 // if cp is not a retainer, r belongs to RSET(cp).
1599 // if cp is a retainer, r == cp.
1600
1601 typeOfc = get_itbl(c)->type;
1602
1603 #ifdef DEBUG_RETAINER
1604 switch (typeOfc) {
1605 case IND_STATIC:
1606 case CONSTR_NOCAF_STATIC:
1607 case CONSTR_STATIC:
1608 case THUNK_STATIC:
1609 case FUN_STATIC:
1610 break;
1611 default:
1612 if (retainerSetOf(c) == NULL) { // first visit?
1613 costArray[typeOfc] += cost(c);
1614 sumOfNewCost += cost(c);
1615 }
1616 break;
1617 }
1618 #endif
1619
1620 // special cases
1621 switch (typeOfc) {
1622 case TSO:
1623 if (((StgTSO *)c)->what_next == ThreadComplete ||
1624 ((StgTSO *)c)->what_next == ThreadKilled) {
1625 #ifdef DEBUG_RETAINER
1626 debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
1627 #endif
1628 goto loop;
1629 }
1630 if (((StgTSO *)c)->what_next == ThreadRelocated) {
1631 #ifdef DEBUG_RETAINER
1632 debugBelch("ThreadRelocated encountered in retainClosure()\n");
1633 #endif
1634 c = (StgClosure *)((StgTSO *)c)->link;
1635 goto inner_loop;
1636 }
1637 break;
1638
1639 case IND_STATIC:
1640 // We just skip IND_STATIC, so its retainer set is never computed.
1641 c = ((StgIndStatic *)c)->indirectee;
1642 goto inner_loop;
1643 // static objects with no pointers out, so goto loop.
1644 case CONSTR_NOCAF_STATIC:
1645 // It is not just enough not to compute the retainer set for *c; it is
1646 // mandatory because CONSTR_NOCAF_STATIC are not reachable from
1647 // scavenged_static_objects, the list from which is assumed to traverse
1648 // all static objects after major garbage collections.
1649 goto loop;
1650 case THUNK_STATIC:
1651 case FUN_STATIC:
1652 if (get_itbl(c)->srt_bitmap == 0) {
1653 // No need to compute the retainer set; no dynamic objects
1654 // are reachable from *c.
1655 //
1656 // Static objects: if we traverse all the live closures,
1657 // including static closures, during each heap census then
1658 // we will observe that some static closures appear and
1659 // disappear. eg. a closure may contain a pointer to a
1660 // static function 'f' which is not otherwise reachable
1661 // (it doesn't indirectly point to any CAFs, so it doesn't
1662 // appear in any SRTs), so we would find 'f' during
1663 // traversal. However on the next sweep there may be no
1664 // closures pointing to 'f'.
1665 //
1666 // We must therefore ignore static closures whose SRT is
1667 // empty, because these are exactly the closures that may
1668 // "appear". A closure with a non-empty SRT, and which is
1669 // still required, will always be reachable.
1670 //
1671 // But what about CONSTR_STATIC? Surely these may be able
1672 // to appear, and they don't have SRTs, so we can't
1673 // check. So for now, we're calling
1674 // resetStaticObjectForRetainerProfiling() from the
1675 // garbage collector to reset the retainer sets in all the
1676 // reachable static objects.
1677 goto loop;
1678 }
1679 default:
1680 break;
1681 }
1682
1683 // The above objects are ignored in computing the average number of times
1684 // an object is visited.
1685 timesAnyObjectVisited++;
1686
1687 // If this is the first visit to c, initialize its retainer set.
1688 maybeInitRetainerSet(c);
1689 retainerSetOfc = retainerSetOf(c);
1690
1691 // Now compute s:
1692 // isRetainer(cp) == rtsTrue => s == NULL
1693 // isRetainer(cp) == rtsFalse => s == cp.retainer
1694 if (isRetainer(cp))
1695 s = NULL;
1696 else
1697 s = retainerSetOf(cp);
1698
1699 // (c, cp, r, s) is available.
1700
1701 // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
1702 if (retainerSetOfc == NULL) {
1703 // This is the first visit to *c.
1704 numObjectVisited++;
1705
1706 if (s == NULL)
1707 associate(c, singleton(r));
1708 else
1709 // s is actually the retainer set of *c!
1710 associate(c, s);
1711
1712 // compute c_child_r
1713 c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
1714 } else {
1715 // This is not the first visit to *c.
1716 if (isMember(r, retainerSetOfc))
1717 goto loop; // no need to process child
1718
1719 if (s == NULL)
1720 associate(c, addElement(r, retainerSetOfc));
1721 else {
1722 // s is not NULL and cp is not a retainer. This means that
1723 // each time *cp is visited, so is *c. Thus, if s has
1724 // exactly one more element in its retainer set than c, s
1725 // is also the new retainer set for *c.
1726 if (s->num == retainerSetOfc->num + 1) {
1727 associate(c, s);
1728 }
1729 // Otherwise, just add R_r to the current retainer set of *c.
1730 else {
1731 associate(c, addElement(r, retainerSetOfc));
1732 }
1733 }
1734
1735 if (isRetainer(c))
1736 goto loop; // no need to process child
1737
1738 // compute c_child_r
1739 c_child_r = r;
1740 }
1741
1742 // now, RSET() of all of *c, *cp, and *r is valid.
1743 // (c, c_child_r) are available.
1744
1745 // process child
1746
1747 // Special case closures: we process these all in one go rather
1748 // than attempting to save the current position, because doing so
1749 // would be hard.
1750 switch (typeOfc) {
1751 case TSO:
1752 retainStack(c, c_child_r,
1753 ((StgTSO *)c)->sp,
1754 ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
1755 goto loop;
1756
1757 case PAP:
1758 {
1759 StgPAP *pap = (StgPAP *)c;
1760 retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
1761 goto loop;
1762 }
1763
1764 case AP:
1765 {
1766 StgAP *ap = (StgAP *)c;
1767 retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
1768 goto loop;
1769 }
1770
1771 case AP_STACK:
1772 retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
1773 retainStack(c, c_child_r,
1774 (StgPtr)((StgAP_STACK *)c)->payload,
1775 (StgPtr)((StgAP_STACK *)c)->payload +
1776 ((StgAP_STACK *)c)->size);
1777 goto loop;
1778 }
1779
1780 push(c, c_child_r, &first_child);
1781
1782 // If first_child is null, c has no child.
1783 // If first_child is not null, the top stack element points to the next
1784 // object. push() may or may not push a stackElement on the stack.
1785 if (first_child == NULL)
1786 goto loop;
1787
1788 // (c, cp, r) = (first_child, c, c_child_r)
1789 r = c_child_r;
1790 cp = c;
1791 c = first_child;
1792 goto inner_loop;
1793 }
1794
1795 /* -----------------------------------------------------------------------------
1796 * Compute the retainer set for every object reachable from *tl.
1797 * -------------------------------------------------------------------------- */
1798 static void
1799 retainRoot( StgClosure **tl )
1800 {
1801 // We no longer assume that only TSOs and WEAKs are roots; any closure can
1802 // be a root.
1803
1804 ASSERT(isEmptyRetainerStack());
1805 currentStackBoundary = stackTop;
1806
1807 if (*tl != &stg_END_TSO_QUEUE_closure && isRetainer(*tl)) {
1808 retainClosure(*tl, *tl, getRetainerFrom(*tl));
1809 } else {
1810 retainClosure(*tl, *tl, CCS_SYSTEM);
1811 }
1812
1813 // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
1814 // *tl might be a TSO which is ThreadComplete, in which
1815 // case we ignore it for the purposes of retainer profiling.
1816 }
1817
1818 /* -----------------------------------------------------------------------------
1819 * Compute the retainer set for each of the objects in the heap.
1820 * -------------------------------------------------------------------------- */
1821 static void
1822 computeRetainerSet( void )
1823 {
1824 StgWeak *weak;
1825 RetainerSet *rtl;
1826 nat g;
1827 StgPtr ml;
1828 bdescr *bd;
1829 #ifdef DEBUG_RETAINER
1830 RetainerSet tmpRetainerSet;
1831 #endif
1832
1833 GetRoots(retainRoot); // for scheduler roots
1834
1835 // This function is called after a major GC, when key, value, and finalizer
1836 // all are guaranteed to be valid, or reachable.
1837 //
1838 // The following code assumes that WEAK objects are considered to be roots
1839 // for retainer profilng.
1840 for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
1841 // retainRoot((StgClosure *)weak);
1842 retainRoot((StgClosure **)&weak);
1843
1844 // Consider roots from the stable ptr table.
1845 markStablePtrTable(retainRoot);
1846
1847 // The following code resets the rs field of each unvisited mutable
1848 // object (computing sumOfNewCostExtra and updating costArray[] when
1849 // debugging retainer profiler).
1850 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1851 // NOT TRUE: even G0 has a block on its mutable list
1852 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1853
1854 // Traversing through mut_list is necessary
1855 // because we can find MUT_VAR objects which have not been
1856 // visited during retainer profiling.
1857 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1858 for (ml = bd->start; ml < bd->free; ml++) {
1859
1860 maybeInitRetainerSet((StgClosure *)*ml);
1861 rtl = retainerSetOf((StgClosure *)*ml);
1862
1863 #ifdef DEBUG_RETAINER
1864 if (rtl == NULL) {
1865 // first visit to *ml
1866 // This is a violation of the interface rule!
1867 RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
1868
1869 switch (get_itbl((StgClosure *)ml)->type) {
1870 case IND_STATIC:
1871 // no cost involved
1872 break;
1873 case CONSTR_NOCAF_STATIC:
1874 case CONSTR_STATIC:
1875 case THUNK_STATIC:
1876 case FUN_STATIC:
1877 barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
1878 break;
1879 default:
1880 // dynamic objects
1881 costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
1882 sumOfNewCostExtra += cost((StgClosure *)ml);
1883 break;
1884 }
1885 }
1886 #endif
1887 }
1888 }
1889 }
1890 }
1891
1892 /* -----------------------------------------------------------------------------
1893 * Traverse all static objects for which we compute retainer sets,
1894 * and reset their rs fields to NULL, which is accomplished by
1895 * invoking maybeInitRetainerSet(). This function must be called
1896 * before zeroing all objects reachable from scavenged_static_objects
1897 * in the case of major gabage collections. See GarbageCollect() in
1898 * GC.c.
1899 * Note:
1900 * The mut_once_list of the oldest generation must also be traversed?
1901 * Why? Because if the evacuation of an object pointed to by a static
1902 * indirection object fails, it is put back to the mut_once_list of
1903 * the oldest generation.
1904 * However, this is not necessary because any static indirection objects
1905 * are just traversed through to reach dynamic objects. In other words,
1906 * they are not taken into consideration in computing retainer sets.
1907 * -------------------------------------------------------------------------- */
1908 void
1909 resetStaticObjectForRetainerProfiling( void )
1910 {
1911 #ifdef DEBUG_RETAINER
1912 nat count;
1913 #endif
1914 StgClosure *p;
1915
1916 #ifdef DEBUG_RETAINER
1917 count = 0;
1918 #endif
1919 p = scavenged_static_objects;
1920 while (p != END_OF_STATIC_LIST) {
1921 #ifdef DEBUG_RETAINER
1922 count++;
1923 #endif
1924 switch (get_itbl(p)->type) {
1925 case IND_STATIC:
1926 // Since we do not compute the retainer set of any
1927 // IND_STATIC object, we don't have to reset its retainer
1928 // field.
1929 p = (StgClosure*)*IND_STATIC_LINK(p);
1930 break;
1931 case THUNK_STATIC:
1932 maybeInitRetainerSet(p);
1933 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1934 break;
1935 case FUN_STATIC:
1936 maybeInitRetainerSet(p);
1937 p = (StgClosure*)*FUN_STATIC_LINK(p);
1938 break;
1939 case CONSTR_STATIC:
1940 maybeInitRetainerSet(p);
1941 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1942 break;
1943 default:
1944 barf("resetStaticObjectForRetainerProfiling: %p (%s)",
1945 p, get_itbl(p)->type);
1946 break;
1947 }
1948 }
1949 #ifdef DEBUG_RETAINER
1950 // debugBelch("count in scavenged_static_objects = %d\n", count);
1951 #endif
1952 }
1953
1954 /* -----------------------------------------------------------------------------
1955 * Perform retainer profiling.
1956 * N is the oldest generation being profilied, where the generations are
1957 * numbered starting at 0.
1958 * Invariants:
1959 * Note:
1960 * This function should be called only immediately after major garbage
1961 * collection.
1962 * ------------------------------------------------------------------------- */
1963 void
1964 retainerProfile(void)
1965 {
1966 #ifdef DEBUG_RETAINER
1967 nat i;
1968 nat totalHeapSize; // total raw heap size (computed by linear scanning)
1969 #endif
1970
1971 #ifdef DEBUG_RETAINER
1972 debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
1973 #endif
1974
1975 stat_startRP();
1976
1977 // We haven't flipped the bit yet.
1978 #ifdef DEBUG_RETAINER
1979 debugBelch("Before traversing:\n");
1980 sumOfCostLinear = 0;
1981 for (i = 0;i < N_CLOSURE_TYPES; i++)
1982 costArrayLinear[i] = 0;
1983 totalHeapSize = checkHeapSanityForRetainerProfiling();
1984
1985 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
1986 /*
1987 debugBelch("costArrayLinear[] = ");
1988 for (i = 0;i < N_CLOSURE_TYPES; i++)
1989 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
1990 debugBelch("\n");
1991 */
1992
1993 ASSERT(sumOfCostLinear == totalHeapSize);
1994
1995 /*
1996 #define pcostArrayLinear(index) \
1997 if (costArrayLinear[index] > 0) \
1998 debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
1999 pcostArrayLinear(THUNK_STATIC);
2000 pcostArrayLinear(FUN_STATIC);
2001 pcostArrayLinear(CONSTR_STATIC);
2002 pcostArrayLinear(CONSTR_NOCAF_STATIC);
2003 */
2004 #endif
2005
2006 // Now we flips flip.
2007 flip = flip ^ 1;
2008
2009 #ifdef DEBUG_RETAINER
2010 stackSize = 0;
2011 maxStackSize = 0;
2012 cStackSize = 0;
2013 maxCStackSize = 0;
2014 #endif
2015 numObjectVisited = 0;
2016 timesAnyObjectVisited = 0;
2017
2018 #ifdef DEBUG_RETAINER
2019 debugBelch("During traversing:\n");
2020 sumOfNewCost = 0;
2021 sumOfNewCostExtra = 0;
2022 for (i = 0;i < N_CLOSURE_TYPES; i++)
2023 costArray[i] = 0;
2024 #endif
2025
2026 /*
2027 We initialize the traverse stack each time the retainer profiling is
2028 performed (because the traverse stack size varies on each retainer profiling
2029 and this operation is not costly anyhow). However, we just refresh the
2030 retainer sets.
2031 */
2032 initializeTraverseStack();
2033 #ifdef DEBUG_RETAINER
2034 initializeAllRetainerSet();
2035 #else
2036 refreshAllRetainerSet();
2037 #endif
2038 computeRetainerSet();
2039
2040 #ifdef DEBUG_RETAINER
2041 debugBelch("After traversing:\n");
2042 sumOfCostLinear = 0;
2043 for (i = 0;i < N_CLOSURE_TYPES; i++)
2044 costArrayLinear[i] = 0;
2045 totalHeapSize = checkHeapSanityForRetainerProfiling();
2046
2047 debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
2048 ASSERT(sumOfCostLinear == totalHeapSize);
2049
2050 // now, compare the two results
2051 /*
2052 Note:
2053 costArray[] must be exactly the same as costArrayLinear[].
2054 Known exceptions:
2055 1) Dead weak pointers, whose type is CONSTR. These objects are not
2056 reachable from any roots.
2057 */
2058 debugBelch("Comparison:\n");
2059 debugBelch("\tcostArrayLinear[] (must be empty) = ");
2060 for (i = 0;i < N_CLOSURE_TYPES; i++)
2061 if (costArray[i] != costArrayLinear[i])
2062 // nothing should be printed except MUT_VAR after major GCs
2063 debugBelch("[%u:%u] ", i, costArrayLinear[i]);
2064 debugBelch("\n");
2065
2066 debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
2067 debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
2068 debugBelch("\tcostArray[] (must be empty) = ");
2069 for (i = 0;i < N_CLOSURE_TYPES; i++)
2070 if (costArray[i] != costArrayLinear[i])
2071 // nothing should be printed except MUT_VAR after major GCs
2072 debugBelch("[%u:%u] ", i, costArray[i]);
2073 debugBelch("\n");
2074
2075 // only for major garbage collection
2076 ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
2077 #endif
2078
2079 // post-processing
2080 closeTraverseStack();
2081 #ifdef DEBUG_RETAINER
2082 closeAllRetainerSet();
2083 #else
2084 // Note that there is no post-processing for the retainer sets.
2085 #endif
2086 retainerGeneration++;
2087
2088 stat_endRP(
2089 retainerGeneration - 1, // retainerGeneration has just been incremented!
2090 #ifdef DEBUG_RETAINER
2091 maxCStackSize, maxStackSize,
2092 #endif
2093 (double)timesAnyObjectVisited / numObjectVisited);
2094 }
2095
2096 /* -----------------------------------------------------------------------------
2097 * DEBUGGING CODE
2098 * -------------------------------------------------------------------------- */
2099
2100 #ifdef DEBUG_RETAINER
2101
2102 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
2103 ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
2104 ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
2105
2106 static nat
2107 sanityCheckHeapClosure( StgClosure *c )
2108 {
2109 StgInfoTable *info;
2110
2111 ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
2112 ASSERT(!closure_STATIC(c));
2113 ASSERT(LOOKS_LIKE_PTR(c));
2114
2115 if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
2116 if (get_itbl(c)->type == CONSTR &&
2117 !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
2118 !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
2119 debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
2120 costArray[get_itbl(c)->type] += cost(c);
2121 sumOfNewCost += cost(c);
2122 } else
2123 debugBelch(
2124 "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
2125 flip, c, get_itbl(c)->type,
2126 get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
2127 RSET(c));
2128 } else {
2129 // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
2130 }
2131
2132 return closure_sizeW(c);
2133 }
2134
2135 static nat
2136 heapCheck( bdescr *bd )
2137 {
2138 StgPtr p;
2139 static nat costSum, size;
2140
2141 costSum = 0;
2142 while (bd != NULL) {
2143 p = bd->start;
2144 while (p < bd->free) {
2145 size = sanityCheckHeapClosure((StgClosure *)p);
2146 sumOfCostLinear += size;
2147 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2148 p += size;
2149 // no need for slop check; I think slops are not used currently.
2150 }
2151 ASSERT(p == bd->free);
2152 costSum += bd->free - bd->start;
2153 bd = bd->link;
2154 }
2155
2156 return costSum;
2157 }
2158
2159 static nat
2160 smallObjectPoolCheck(void)
2161 {
2162 bdescr *bd;
2163 StgPtr p;
2164 static nat costSum, size;
2165
2166 bd = small_alloc_list;
2167 costSum = 0;
2168
2169 // first block
2170 if (bd == NULL)
2171 return costSum;
2172
2173 p = bd->start;
2174 while (p < alloc_Hp) {
2175 size = sanityCheckHeapClosure((StgClosure *)p);
2176 sumOfCostLinear += size;
2177 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2178 p += size;
2179 }
2180 ASSERT(p == alloc_Hp);
2181 costSum += alloc_Hp - bd->start;
2182
2183 bd = bd->link;
2184 while (bd != NULL) {
2185 p = bd->start;
2186 while (p < bd->free) {
2187 size = sanityCheckHeapClosure((StgClosure *)p);
2188 sumOfCostLinear += size;
2189 costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
2190 p += size;
2191 }
2192 ASSERT(p == bd->free);
2193 costSum += bd->free - bd->start;
2194 bd = bd->link;
2195 }
2196
2197 return costSum;
2198 }
2199
2200 static nat
2201 chainCheck(bdescr *bd)
2202 {
2203 nat costSum, size;
2204
2205 costSum = 0;
2206 while (bd != NULL) {
2207 // bd->free - bd->start is not an accurate measurement of the
2208 // object size. Actually it is always zero, so we compute its
2209 // size explicitly.
2210 size = sanityCheckHeapClosure((StgClosure *)bd->start);
2211 sumOfCostLinear += size;
2212 costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
2213 costSum += size;
2214 bd = bd->link;
2215 }
2216
2217 return costSum;
2218 }
2219
2220 static nat
2221 checkHeapSanityForRetainerProfiling( void )
2222 {
2223 nat costSum, g, s;
2224
2225 costSum = 0;
2226 debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2227 if (RtsFlags.GcFlags.generations == 1) {
2228 costSum += heapCheck(g0s0->to_blocks);
2229 debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2230 costSum += chainCheck(g0s0->large_objects);
2231 debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2232 } else {
2233 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
2234 for (s = 0; s < generations[g].n_steps; s++) {
2235 /*
2236 After all live objects have been scavenged, the garbage
2237 collector may create some objects in
2238 scheduleFinalizers(). These objects are created throught
2239 allocate(), so the small object pool or the large object
2240 pool of the g0s0 may not be empty.
2241 */
2242 if (g == 0 && s == 0) {
2243 costSum += smallObjectPoolCheck();
2244 debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2245 costSum += chainCheck(generations[g].steps[s].large_objects);
2246 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2247 } else {
2248 costSum += heapCheck(generations[g].steps[s].blocks);
2249 debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2250 costSum += chainCheck(generations[g].steps[s].large_objects);
2251 debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
2252 }
2253 }
2254 }
2255
2256 return costSum;
2257 }
2258
2259 void
2260 findPointer(StgPtr p)
2261 {
2262 StgPtr q, r, e;
2263 bdescr *bd;
2264 nat g, s;
2265
2266 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2267 for (s = 0; s < generations[g].n_steps; s++) {
2268 // if (g == 0 && s == 0) continue;
2269 bd = generations[g].steps[s].blocks;
2270 for (; bd; bd = bd->link) {
2271 for (q = bd->start; q < bd->free; q++) {
2272 if (*q == (StgWord)p) {
2273 r = q;
2274 while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
2275 debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
2276 // return;
2277 }
2278 }
2279 }
2280 bd = generations[g].steps[s].large_objects;
2281 for (; bd; bd = bd->link) {
2282 e = bd->start + cost((StgClosure *)bd->start);
2283 for (q = bd->start; q < e; q++) {
2284 if (*q == (StgWord)p) {
2285 r = q;
2286 while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
2287 debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
2288 // return;
2289 }
2290 }
2291 }
2292 }
2293 }
2294 }
2295
2296 static void
2297 belongToHeap(StgPtr p)
2298 {
2299 bdescr *bd;
2300 nat g, s;
2301
2302 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
2303 for (s = 0; s < generations[g].n_steps; s++) {
2304 // if (g == 0 && s == 0) continue;
2305 bd = generations[g].steps[s].blocks;
2306 for (; bd; bd = bd->link) {
2307 if (bd->start <= p && p < bd->free) {
2308 debugBelch("Belongs to gen[%d], step[%d]", g, s);
2309 return;
2310 }
2311 }
2312 bd = generations[g].steps[s].large_objects;
2313 for (; bd; bd = bd->link) {
2314 if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
2315 debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
2316 return;
2317 }
2318 }
2319 }
2320 }
2321 }
2322 #endif /* DEBUG_RETAINER */
2323
2324 #endif /* PROFILING */