rts: enable parallel GC scan of large (32M+) allocation area
[ghc.git] / rts / ProfHeap.c
1 /* ----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2003
4 *
5 * Support for heap profiling
6 *
7 * --------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "Capability.h"
13 #include "RtsFlags.h"
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "ProfHeap.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "RetainerProfile.h"
20 #include "LdvProfile.h"
21 #include "Arena.h"
22 #include "Printer.h"
23 #include "Trace.h"
24 #include "sm/GCThread.h"
25
26 #include <string.h>
27
28 /* -----------------------------------------------------------------------------
29 * era stores the current time period. It is the same as the
30 * number of censuses that have been performed.
31 *
32 * RESTRICTION:
33 * era must be no longer than LDV_SHIFT (15 or 30) bits.
34 * Invariants:
35 * era is initialized to 1 in initHeapProfiling().
36 *
37 * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
38 * When era reaches max_era, the profiling stops because a closure can
39 * store only up to (max_era - 1) as its creation or last use time.
40 * -------------------------------------------------------------------------- */
41 unsigned int era;
42 static uint32_t max_era;
43
44 /* -----------------------------------------------------------------------------
45 * Counters
46 *
47 * For most heap profiles each closure identity gets a simple count
48 * of live words in the heap at each census. However, if we're
49 * selecting by biography, then we have to keep the various
50 * lag/drag/void counters for each identity.
51 * -------------------------------------------------------------------------- */
52 typedef struct _counter {
53 const void *identity;
54 union {
55 ssize_t resid;
56 struct {
57 // Total sizes of:
58 ssize_t prim; // 'inherently used' closures
59 ssize_t not_used; // 'never used' closures
60 ssize_t used; // 'used at least once' closures
61 ssize_t void_total; // 'destroyed without being used' closures
62 ssize_t drag_total; // 'used at least once and waiting to die'
63 } ldv;
64 } c;
65 struct _counter *next;
66 } counter;
67
68 STATIC_INLINE void
69 initLDVCtr( counter *ctr )
70 {
71 ctr->c.ldv.prim = 0;
72 ctr->c.ldv.not_used = 0;
73 ctr->c.ldv.used = 0;
74 ctr->c.ldv.void_total = 0;
75 ctr->c.ldv.drag_total = 0;
76 }
77
78 typedef struct {
79 double time; // the time in MUT time when the census is made
80 HashTable * hash;
81 counter * ctrs;
82 Arena * arena;
83
84 // for LDV profiling, when just displaying by LDV
85 ssize_t prim;
86 ssize_t not_used;
87 ssize_t used;
88 ssize_t void_total;
89 ssize_t drag_total;
90 } Census;
91
92 static Census *censuses = NULL;
93 static uint32_t n_censuses = 0;
94
95 #ifdef PROFILING
96 static void aggregateCensusInfo( void );
97 #endif
98
99 static void dumpCensus( Census *census );
100
101 static rtsBool closureSatisfiesConstraints( const StgClosure* p );
102
103 /* ----------------------------------------------------------------------------
104 * Find the "closure identity", which is a unique pointer representing
105 * the band to which this closure's heap space is attributed in the
106 * heap profile.
107 * ------------------------------------------------------------------------- */
108 static const void *
109 closureIdentity( const StgClosure *p )
110 {
111 switch (RtsFlags.ProfFlags.doHeapProfile) {
112
113 #ifdef PROFILING
114 case HEAP_BY_CCS:
115 return p->header.prof.ccs;
116 case HEAP_BY_MOD:
117 return p->header.prof.ccs->cc->module;
118 case HEAP_BY_DESCR:
119 return GET_PROF_DESC(get_itbl(p));
120 case HEAP_BY_TYPE:
121 return GET_PROF_TYPE(get_itbl(p));
122 case HEAP_BY_RETAINER:
123 // AFAIK, the only closures in the heap which might not have a
124 // valid retainer set are DEAD_WEAK closures.
125 if (isRetainerSetFieldValid(p))
126 return retainerSetOf(p);
127 else
128 return NULL;
129
130 #else
131 case HEAP_BY_CLOSURE_TYPE:
132 {
133 const StgInfoTable *info;
134 info = get_itbl(p);
135 switch (info->type) {
136 case CONSTR:
137 case CONSTR_1_0:
138 case CONSTR_0_1:
139 case CONSTR_2_0:
140 case CONSTR_1_1:
141 case CONSTR_0_2:
142 case CONSTR_STATIC:
143 case CONSTR_NOCAF_STATIC:
144 return GET_CON_DESC(itbl_to_con_itbl(info));
145 default:
146 return closure_type_names[info->type];
147 }
148 }
149
150 #endif
151 default:
152 barf("closureIdentity");
153 }
154 }
155
156 /* --------------------------------------------------------------------------
157 * Profiling type predicates
158 * ----------------------------------------------------------------------- */
159 #ifdef PROFILING
160 STATIC_INLINE rtsBool
161 doingLDVProfiling( void )
162 {
163 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
164 || RtsFlags.ProfFlags.bioSelector != NULL);
165 }
166
167 rtsBool
168 doingRetainerProfiling( void )
169 {
170 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
171 || RtsFlags.ProfFlags.retainerSelector != NULL);
172 }
173 #endif /* PROFILING */
174
175 // Precesses a closure 'c' being destroyed whose size is 'size'.
176 // Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
177 // such as TSO; they should not be involved in computing dragNew or voidNew.
178 //
179 // Even though era is checked in both LdvCensusForDead() and
180 // LdvCensusKillAll(), we still need to make sure that era is > 0 because
181 // LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
182 // when a thunk is replaced by an indirection object.
183
184 #ifdef PROFILING
185 void
186 LDV_recordDead( const StgClosure *c, uint32_t size )
187 {
188 const void *id;
189 uint32_t t;
190 counter *ctr;
191
192 if (era > 0 && closureSatisfiesConstraints(c)) {
193 size -= sizeofW(StgProfHeader);
194 ASSERT(LDVW(c) != 0);
195 if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
196 t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
197 if (t < era) {
198 if (RtsFlags.ProfFlags.bioSelector == NULL) {
199 censuses[t].void_total += size;
200 censuses[era].void_total -= size;
201 ASSERT(censuses[t].void_total < censuses[t].not_used);
202 } else {
203 id = closureIdentity(c);
204 ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
205 ASSERT( ctr != NULL );
206 ctr->c.ldv.void_total += size;
207 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
208 if (ctr == NULL) {
209 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
210 initLDVCtr(ctr);
211 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
212 ctr->identity = id;
213 ctr->next = censuses[era].ctrs;
214 censuses[era].ctrs = ctr;
215 }
216 ctr->c.ldv.void_total -= size;
217 }
218 }
219 } else {
220 t = LDVW((c)) & LDV_LAST_MASK;
221 if (t + 1 < era) {
222 if (RtsFlags.ProfFlags.bioSelector == NULL) {
223 censuses[t+1].drag_total += size;
224 censuses[era].drag_total -= size;
225 } else {
226 const void *id;
227 id = closureIdentity(c);
228 ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
229 ASSERT( ctr != NULL );
230 ctr->c.ldv.drag_total += size;
231 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
232 if (ctr == NULL) {
233 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
234 initLDVCtr(ctr);
235 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
236 ctr->identity = id;
237 ctr->next = censuses[era].ctrs;
238 censuses[era].ctrs = ctr;
239 }
240 ctr->c.ldv.drag_total -= size;
241 }
242 }
243 }
244 }
245 }
246 #endif
247
248 /* --------------------------------------------------------------------------
249 * Initialize censuses[era];
250 * ----------------------------------------------------------------------- */
251
252 STATIC_INLINE void
253 initEra(Census *census)
254 {
255 census->hash = allocHashTable();
256 census->ctrs = NULL;
257 census->arena = newArena();
258
259 census->not_used = 0;
260 census->used = 0;
261 census->prim = 0;
262 census->void_total = 0;
263 census->drag_total = 0;
264 }
265
266 STATIC_INLINE void
267 freeEra(Census *census)
268 {
269 arenaFree(census->arena);
270 freeHashTable(census->hash, NULL);
271 }
272
273 /* --------------------------------------------------------------------------
274 * Increases era by 1 and initialize census[era].
275 * Reallocates gi[] and increases its size if needed.
276 * ----------------------------------------------------------------------- */
277
278 static void
279 nextEra( void )
280 {
281 #ifdef PROFILING
282 if (doingLDVProfiling()) {
283 era++;
284
285 if (era == max_era) {
286 errorBelch("Maximum number of censuses reached.");
287 if (rtsConfig.rts_opts_suggestions == rtsTrue) {
288 if (rtsConfig.rts_opts_enabled == RtsOptsAll) {
289 errorBelch("Use `+RTS -i' to reduce censuses.");
290 } else {
291 errorBelch("Relink with -rtsopts and "
292 "use `+RTS -i' to reduce censuses.");
293 }
294 }
295 stg_exit(EXIT_FAILURE);
296 }
297
298 if (era == n_censuses) {
299 n_censuses *= 2;
300 censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
301 "nextEra");
302 }
303 }
304 #endif /* PROFILING */
305
306 initEra( &censuses[era] );
307 }
308
309 /* ----------------------------------------------------------------------------
310 * Heap profiling by info table
311 * ------------------------------------------------------------------------- */
312
313 #if !defined(PROFILING)
314 FILE *hp_file;
315 static char *hp_filename;
316
317 void freeProfiling (void)
318 {
319 }
320
321 void initProfiling (void)
322 {
323 char *prog;
324
325 prog = stgMallocBytes(strlen(prog_name) + 1, "initProfiling2");
326 strcpy(prog, prog_name);
327 #ifdef mingw32_HOST_OS
328 // on Windows, drop the .exe suffix if there is one
329 {
330 char *suff;
331 suff = strrchr(prog,'.');
332 if (suff != NULL && !strcmp(suff,".exe")) {
333 *suff = '\0';
334 }
335 }
336 #endif
337
338 if (RtsFlags.ProfFlags.doHeapProfile) {
339 /* Initialise the log file name */
340 hp_filename = stgMallocBytes(strlen(prog) + 6, "hpFileName");
341 sprintf(hp_filename, "%s.hp", prog);
342
343 /* open the log file */
344 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
345 debugBelch("Can't open profiling report file %s\n",
346 hp_filename);
347 RtsFlags.ProfFlags.doHeapProfile = 0;
348 stgFree(prog);
349 return;
350 }
351 }
352
353 stgFree(prog);
354
355 initHeapProfiling();
356 }
357
358 void endProfiling( void )
359 {
360 endHeapProfiling();
361 }
362 #endif /* !PROFILING */
363
364 static void
365 printSample(rtsBool beginSample, StgDouble sampleValue)
366 {
367 fprintf(hp_file, "%s %f\n",
368 (beginSample ? "BEGIN_SAMPLE" : "END_SAMPLE"),
369 sampleValue);
370 if (!beginSample) {
371 fflush(hp_file);
372 }
373 }
374
375 static void
376 dumpCostCentresToEventLog(void)
377 {
378 #ifdef PROFILING
379 CostCentre *cc, *next;
380 for (cc = CC_LIST; cc != NULL; cc = next) {
381 next = cc->link;
382 traceHeapProfCostCentre(cc->ccID, cc->label, cc->module,
383 cc->srcloc, cc->is_caf);
384 }
385 #endif
386 }
387
388 /* --------------------------------------------------------------------------
389 * Initialize the heap profilier
390 * ----------------------------------------------------------------------- */
391 uint32_t
392 initHeapProfiling(void)
393 {
394 if (! RtsFlags.ProfFlags.doHeapProfile) {
395 return 0;
396 }
397
398 #ifdef PROFILING
399 if (doingLDVProfiling() && doingRetainerProfiling()) {
400 errorBelch("cannot mix -hb and -hr");
401 stg_exit(EXIT_FAILURE);
402 }
403 #endif
404
405 // we only count eras if we're doing LDV profiling. Otherwise era
406 // is fixed at zero.
407 #ifdef PROFILING
408 if (doingLDVProfiling()) {
409 era = 1;
410 } else
411 #endif
412 {
413 era = 0;
414 }
415
416 // max_era = 2^LDV_SHIFT
417 max_era = 1 << LDV_SHIFT;
418
419 n_censuses = 32;
420 censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
421
422 initEra( &censuses[era] );
423
424 /* initProfilingLogFile(); */
425 fprintf(hp_file, "JOB \"%s", prog_name);
426
427 #ifdef PROFILING
428 {
429 int count;
430 for(count = 1; count < prog_argc; count++)
431 fprintf(hp_file, " %s", prog_argv[count]);
432 fprintf(hp_file, " +RTS");
433 for(count = 0; count < rts_argc; count++)
434 fprintf(hp_file, " %s", rts_argv[count]);
435 }
436 #endif /* PROFILING */
437
438 fprintf(hp_file, "\"\n" );
439
440 fprintf(hp_file, "DATE \"%s\"\n", time_str());
441
442 fprintf(hp_file, "SAMPLE_UNIT \"seconds\"\n");
443 fprintf(hp_file, "VALUE_UNIT \"bytes\"\n");
444
445 printSample(rtsTrue, 0);
446 printSample(rtsFalse, 0);
447
448 #ifdef PROFILING
449 if (doingRetainerProfiling()) {
450 initRetainerProfiling();
451 }
452 #endif
453
454 traceHeapProfBegin(0);
455 dumpCostCentresToEventLog();
456
457 return 0;
458 }
459
460 void
461 endHeapProfiling(void)
462 {
463 StgDouble seconds;
464
465 if (! RtsFlags.ProfFlags.doHeapProfile) {
466 return;
467 }
468
469 #ifdef PROFILING
470 if (doingRetainerProfiling()) {
471 endRetainerProfiling();
472 }
473 #endif
474
475 #ifdef PROFILING
476 if (doingLDVProfiling()) {
477 uint32_t t;
478 LdvCensusKillAll();
479 aggregateCensusInfo();
480 for (t = 1; t < era; t++) {
481 dumpCensus( &censuses[t] );
482 }
483 }
484 #endif
485
486 #ifdef PROFILING
487 if (doingLDVProfiling()) {
488 uint32_t t;
489 if (RtsFlags.ProfFlags.bioSelector != NULL) {
490 for (t = 1; t <= era; t++) {
491 freeEra( &censuses[t] );
492 }
493 } else {
494 freeEra( &censuses[era] );
495 }
496 } else {
497 freeEra( &censuses[0] );
498 }
499 #else
500 freeEra( &censuses[0] );
501 #endif
502
503 stgFree(censuses);
504
505 seconds = mut_user_time();
506 printSample(rtsTrue, seconds);
507 printSample(rtsFalse, seconds);
508 fclose(hp_file);
509 }
510
511
512
513 #ifdef PROFILING
514 static size_t
515 buf_append(char *p, const char *q, char *end)
516 {
517 int m;
518
519 for (m = 0; p < end; p++, q++, m++) {
520 *p = *q;
521 if (*q == '\0') { break; }
522 }
523 return m;
524 }
525
526 static void
527 fprint_ccs(FILE *fp, CostCentreStack *ccs, uint32_t max_length)
528 {
529 char buf[max_length+1], *p, *buf_end;
530
531 // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
532 if (ccs == CCS_MAIN) {
533 fprintf(fp, "MAIN");
534 return;
535 }
536
537 fprintf(fp, "(%" FMT_Int ")", ccs->ccsID);
538
539 p = buf;
540 buf_end = buf + max_length + 1;
541
542 // keep printing components of the stack until we run out of space
543 // in the buffer. If we run out of space, end with "...".
544 for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
545
546 // CAF cost centres print as M.CAF, but we leave the module
547 // name out of all the others to save space.
548 if (!strcmp(ccs->cc->label,"CAF")) {
549 p += buf_append(p, ccs->cc->module, buf_end);
550 p += buf_append(p, ".CAF", buf_end);
551 } else {
552 p += buf_append(p, ccs->cc->label, buf_end);
553 if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
554 p += buf_append(p, "/", buf_end);
555 }
556 }
557
558 if (p >= buf_end) {
559 sprintf(buf+max_length-4, "...");
560 break;
561 }
562 }
563 fprintf(fp, "%s", buf);
564 }
565
566 rtsBool
567 strMatchesSelector( const char* str, const char* sel )
568 {
569 const char* p;
570 // debugBelch("str_matches_selector %s %s\n", str, sel);
571 while (1) {
572 // Compare str against wherever we've got to in sel.
573 p = str;
574 while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
575 p++; sel++;
576 }
577 // Match if all of str used and have reached the end of a sel fragment.
578 if (*p == '\0' && (*sel == ',' || *sel == '\0'))
579 return rtsTrue;
580
581 // No match. Advance sel to the start of the next elem.
582 while (*sel != ',' && *sel != '\0') sel++;
583 if (*sel == ',') sel++;
584
585 /* Run out of sel ?? */
586 if (*sel == '\0') return rtsFalse;
587 }
588 }
589
590 #endif /* PROFILING */
591
592 /* -----------------------------------------------------------------------------
593 * Figure out whether a closure should be counted in this census, by
594 * testing against all the specified constraints.
595 * -------------------------------------------------------------------------- */
596 static rtsBool
597 closureSatisfiesConstraints( const StgClosure* p )
598 {
599 #if !defined(PROFILING)
600 (void)p; /* keep gcc -Wall happy */
601 return rtsTrue;
602 #else
603 rtsBool b;
604
605 // The CCS has a selected field to indicate whether this closure is
606 // deselected by not being mentioned in the module, CC, or CCS
607 // selectors.
608 if (!p->header.prof.ccs->selected) {
609 return rtsFalse;
610 }
611
612 if (RtsFlags.ProfFlags.descrSelector) {
613 b = strMatchesSelector( (GET_PROF_DESC(get_itbl((StgClosure *)p))),
614 RtsFlags.ProfFlags.descrSelector );
615 if (!b) return rtsFalse;
616 }
617 if (RtsFlags.ProfFlags.typeSelector) {
618 b = strMatchesSelector( (GET_PROF_TYPE(get_itbl((StgClosure *)p))),
619 RtsFlags.ProfFlags.typeSelector );
620 if (!b) return rtsFalse;
621 }
622 if (RtsFlags.ProfFlags.retainerSelector) {
623 RetainerSet *rs;
624 uint32_t i;
625 // We must check that the retainer set is valid here. One
626 // reason it might not be valid is if this closure is a
627 // a newly deceased weak pointer (i.e. a DEAD_WEAK), since
628 // these aren't reached by the retainer profiler's traversal.
629 if (isRetainerSetFieldValid((StgClosure *)p)) {
630 rs = retainerSetOf((StgClosure *)p);
631 if (rs != NULL) {
632 for (i = 0; i < rs->num; i++) {
633 b = strMatchesSelector( rs->element[i]->cc->label,
634 RtsFlags.ProfFlags.retainerSelector );
635 if (b) return rtsTrue;
636 }
637 }
638 }
639 return rtsFalse;
640 }
641 return rtsTrue;
642 #endif /* PROFILING */
643 }
644
645 /* -----------------------------------------------------------------------------
646 * Aggregate the heap census info for biographical profiling
647 * -------------------------------------------------------------------------- */
648 #ifdef PROFILING
649 static void
650 aggregateCensusInfo( void )
651 {
652 HashTable *acc;
653 uint32_t t;
654 counter *c, *d, *ctrs;
655 Arena *arena;
656
657 if (!doingLDVProfiling()) return;
658
659 // Aggregate the LDV counters when displaying by biography.
660 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
661 long void_total, drag_total;
662
663 // Now we compute void_total and drag_total for each census
664 // After the program has finished, the void_total field of
665 // each census contains the count of words that were *created*
666 // in this era and were eventually void. Conversely, if a
667 // void closure was destroyed in this era, it will be
668 // represented by a negative count of words in void_total.
669 //
670 // To get the count of live words that are void at each
671 // census, just propagate the void_total count forwards:
672
673 void_total = 0;
674 drag_total = 0;
675 for (t = 1; t < era; t++) { // note: start at 1, not 0
676 void_total += censuses[t].void_total;
677 drag_total += censuses[t].drag_total;
678 censuses[t].void_total = void_total;
679 censuses[t].drag_total = drag_total;
680
681 ASSERT( censuses[t].void_total <= censuses[t].not_used );
682 // should be true because: void_total is the count of
683 // live words that are void at this census, which *must*
684 // be less than the number of live words that have not
685 // been used yet.
686
687 ASSERT( censuses[t].drag_total <= censuses[t].used );
688 // similar reasoning as above.
689 }
690
691 return;
692 }
693
694 // otherwise... we're doing a heap profile that is restricted to
695 // some combination of lag, drag, void or use. We've kept all the
696 // census info for all censuses so far, but we still need to
697 // aggregate the counters forwards.
698
699 arena = newArena();
700 acc = allocHashTable();
701 ctrs = NULL;
702
703 for (t = 1; t < era; t++) {
704
705 // first look through all the counters we're aggregating
706 for (c = ctrs; c != NULL; c = c->next) {
707 // if one of the totals is non-zero, then this closure
708 // type must be present in the heap at this census time...
709 d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);
710
711 if (d == NULL) {
712 // if this closure identity isn't present in the
713 // census for this time period, then our running
714 // totals *must* be zero.
715 ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);
716
717 // debugCCS(c->identity);
718 // debugBelch(" census=%d void_total=%d drag_total=%d\n",
719 // t, c->c.ldv.void_total, c->c.ldv.drag_total);
720 } else {
721 d->c.ldv.void_total += c->c.ldv.void_total;
722 d->c.ldv.drag_total += c->c.ldv.drag_total;
723 c->c.ldv.void_total = d->c.ldv.void_total;
724 c->c.ldv.drag_total = d->c.ldv.drag_total;
725
726 ASSERT( c->c.ldv.void_total >= 0 );
727 ASSERT( c->c.ldv.drag_total >= 0 );
728 }
729 }
730
731 // now look through the counters in this census to find new ones
732 for (c = censuses[t].ctrs; c != NULL; c = c->next) {
733 d = lookupHashTable(acc, (StgWord)c->identity);
734 if (d == NULL) {
735 d = arenaAlloc( arena, sizeof(counter) );
736 initLDVCtr(d);
737 insertHashTable( acc, (StgWord)c->identity, d );
738 d->identity = c->identity;
739 d->next = ctrs;
740 ctrs = d;
741 d->c.ldv.void_total = c->c.ldv.void_total;
742 d->c.ldv.drag_total = c->c.ldv.drag_total;
743 }
744 ASSERT( c->c.ldv.void_total >= 0 );
745 ASSERT( c->c.ldv.drag_total >= 0 );
746 }
747 }
748
749 freeHashTable(acc, NULL);
750 arenaFree(arena);
751 }
752 #endif
753
754 /* -----------------------------------------------------------------------------
755 * Print out the results of a heap census.
756 * -------------------------------------------------------------------------- */
757 static void
758 dumpCensus( Census *census )
759 {
760 counter *ctr;
761 ssize_t count;
762
763 printSample(rtsTrue, census->time);
764 traceHeapProfSampleBegin(era);
765
766 #ifdef PROFILING
767 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
768 fprintf(hp_file, "VOID\t%lu\n",
769 (unsigned long)(census->void_total) * sizeof(W_));
770 fprintf(hp_file, "LAG\t%lu\n",
771 (unsigned long)(census->not_used - census->void_total) * sizeof(W_));
772 fprintf(hp_file, "USE\t%lu\n",
773 (unsigned long)(census->used - census->drag_total) * sizeof(W_));
774 fprintf(hp_file, "INHERENT_USE\t%lu\n",
775 (unsigned long)(census->prim) * sizeof(W_));
776 fprintf(hp_file, "DRAG\t%lu\n",
777 (unsigned long)(census->drag_total) * sizeof(W_));
778 printSample(rtsFalse, census->time);
779 return;
780 }
781 #endif
782
783 for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
784
785 #ifdef PROFILING
786 if (RtsFlags.ProfFlags.bioSelector != NULL) {
787 count = 0;
788 if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
789 count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
790 if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
791 count += ctr->c.ldv.drag_total;
792 if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
793 count += ctr->c.ldv.void_total;
794 if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
795 count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
796 } else
797 #endif
798 {
799 count = ctr->c.resid;
800 }
801
802 ASSERT( count >= 0 );
803
804 if (count == 0) continue;
805
806 #if !defined(PROFILING)
807 switch (RtsFlags.ProfFlags.doHeapProfile) {
808 case HEAP_BY_CLOSURE_TYPE:
809 fprintf(hp_file, "%s", (char *)ctr->identity);
810 traceHeapProfSampleString(0, (char *)ctr->identity,
811 count * sizeof(W_));
812 break;
813 }
814 #endif
815
816 #ifdef PROFILING
817 switch (RtsFlags.ProfFlags.doHeapProfile) {
818 case HEAP_BY_CCS:
819 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
820 RtsFlags.ProfFlags.ccsLength);
821 traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
822 count * sizeof(W_));
823 break;
824 case HEAP_BY_MOD:
825 case HEAP_BY_DESCR:
826 case HEAP_BY_TYPE:
827 fprintf(hp_file, "%s", (char *)ctr->identity);
828 traceHeapProfSampleString(0, (char *)ctr->identity,
829 count * sizeof(W_));
830 break;
831 case HEAP_BY_RETAINER:
832 {
833 RetainerSet *rs = (RetainerSet *)ctr->identity;
834
835 // it might be the distinguished retainer set rs_MANY:
836 if (rs == &rs_MANY) {
837 fprintf(hp_file, "MANY");
838 break;
839 }
840
841 // Mark this retainer set by negating its id, because it
842 // has appeared in at least one census. We print the
843 // values of all such retainer sets into the log file at
844 // the end. A retainer set may exist but not feature in
845 // any censuses if it arose as the intermediate retainer
846 // set for some closure during retainer set calculation.
847 if (rs->id > 0)
848 rs->id = -(rs->id);
849
850 // report in the unit of bytes: * sizeof(StgWord)
851 printRetainerSetShort(hp_file, rs, RtsFlags.ProfFlags.ccsLength);
852 break;
853 }
854 default:
855 barf("dumpCensus; doHeapProfile");
856 }
857 #endif
858
859 fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
860 }
861
862 printSample(rtsFalse, census->time);
863 }
864
865
866 static void heapProfObject(Census *census, StgClosure *p, size_t size,
867 rtsBool prim
868 #ifndef PROFILING
869 STG_UNUSED
870 #endif
871 )
872 {
873 const void *identity;
874 size_t real_size;
875 counter *ctr;
876
877 identity = NULL;
878
879 #ifdef PROFILING
880 // subtract the profiling overhead
881 real_size = size - sizeofW(StgProfHeader);
882 #else
883 real_size = size;
884 #endif
885
886 if (closureSatisfiesConstraints((StgClosure*)p)) {
887 #ifdef PROFILING
888 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
889 if (prim)
890 census->prim += real_size;
891 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
892 census->not_used += real_size;
893 else
894 census->used += real_size;
895 } else
896 #endif
897 {
898 identity = closureIdentity((StgClosure *)p);
899
900 if (identity != NULL) {
901 ctr = lookupHashTable(census->hash, (StgWord)identity);
902 if (ctr != NULL) {
903 #ifdef PROFILING
904 if (RtsFlags.ProfFlags.bioSelector != NULL) {
905 if (prim)
906 ctr->c.ldv.prim += real_size;
907 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
908 ctr->c.ldv.not_used += real_size;
909 else
910 ctr->c.ldv.used += real_size;
911 } else
912 #endif
913 {
914 ctr->c.resid += real_size;
915 }
916 } else {
917 ctr = arenaAlloc( census->arena, sizeof(counter) );
918 initLDVCtr(ctr);
919 insertHashTable( census->hash, (StgWord)identity, ctr );
920 ctr->identity = identity;
921 ctr->next = census->ctrs;
922 census->ctrs = ctr;
923
924 #ifdef PROFILING
925 if (RtsFlags.ProfFlags.bioSelector != NULL) {
926 if (prim)
927 ctr->c.ldv.prim = real_size;
928 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
929 ctr->c.ldv.not_used = real_size;
930 else
931 ctr->c.ldv.used = real_size;
932 } else
933 #endif
934 {
935 ctr->c.resid = real_size;
936 }
937 }
938 }
939 }
940 }
941 }
942
943 // Compact objects require special handling code because they
944 // are not stored consecutively in memory (rather, each object
945 // is a list of objects), and that would break the while loop
946 // below. But we know that each block holds at most one object
947 // so we don't need the loop.
948 //
949 // See Note [Compact Normal Forms] for details.
950 static void
951 heapCensusCompactList(Census *census, bdescr *bd)
952 {
953 for (; bd != NULL; bd = bd->link) {
954 StgCompactNFDataBlock *block = (StgCompactNFDataBlock*)bd->start;
955 StgCompactNFData *str = block->owner;
956 heapProfObject(census, (StgClosure*)str,
957 compact_nfdata_full_sizeW(str), rtsTrue);
958 }
959 }
960
961 /* -----------------------------------------------------------------------------
962 * Code to perform a heap census.
963 * -------------------------------------------------------------------------- */
964 static void
965 heapCensusChain( Census *census, bdescr *bd )
966 {
967 StgPtr p;
968 const StgInfoTable *info;
969 size_t size;
970 rtsBool prim;
971
972 for (; bd != NULL; bd = bd->link) {
973
974 // HACK: pretend a pinned block is just one big ARR_WORDS
975 // owned by CCS_PINNED. These blocks can be full of holes due
976 // to alignment constraints so we can't traverse the memory
977 // and do a proper census.
978 if (bd->flags & BF_PINNED) {
979 StgClosure arr;
980 SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_PINNED);
981 heapProfObject(census, &arr, bd->blocks * BLOCK_SIZE_W, rtsTrue);
982 continue;
983 }
984
985 p = bd->start;
986
987 // When we shrink a large ARR_WORDS, we do not adjust the free pointer
988 // of the associated block descriptor, thus introducing slop at the end
989 // of the object. This slop remains after GC, violating the assumption
990 // of the loop below that all slop has been eliminated (#11627).
991 // Consequently, we handle large ARR_WORDS objects as a special case.
992 if (bd->flags & BF_LARGE
993 && get_itbl((StgClosure *)p)->type == ARR_WORDS) {
994 size = arr_words_sizeW((StgArrBytes *)p);
995 prim = rtsTrue;
996 heapProfObject(census, (StgClosure *)p, size, prim);
997 continue;
998 }
999
1000 while (p < bd->free) {
1001 info = get_itbl((const StgClosure *)p);
1002 prim = rtsFalse;
1003
1004 switch (info->type) {
1005
1006 case THUNK:
1007 size = thunk_sizeW_fromITBL(info);
1008 break;
1009
1010 case THUNK_1_1:
1011 case THUNK_0_2:
1012 case THUNK_2_0:
1013 size = sizeofW(StgThunkHeader) + 2;
1014 break;
1015
1016 case THUNK_1_0:
1017 case THUNK_0_1:
1018 case THUNK_SELECTOR:
1019 size = sizeofW(StgThunkHeader) + 1;
1020 break;
1021
1022 case CONSTR:
1023 case FUN:
1024 case BLACKHOLE:
1025 case BLOCKING_QUEUE:
1026 case FUN_1_0:
1027 case FUN_0_1:
1028 case FUN_1_1:
1029 case FUN_0_2:
1030 case FUN_2_0:
1031 case CONSTR_1_0:
1032 case CONSTR_0_1:
1033 case CONSTR_1_1:
1034 case CONSTR_0_2:
1035 case CONSTR_2_0:
1036 size = sizeW_fromITBL(info);
1037 break;
1038
1039 case IND:
1040 // Special case/Delicate Hack: INDs don't normally
1041 // appear, since we're doing this heap census right
1042 // after GC. However, GarbageCollect() also does
1043 // resurrectThreads(), which can update some
1044 // blackholes when it calls raiseAsync() on the
1045 // resurrected threads. So we know that any IND will
1046 // be the size of a BLACKHOLE.
1047 size = BLACKHOLE_sizeW();
1048 break;
1049
1050 case BCO:
1051 prim = rtsTrue;
1052 size = bco_sizeW((StgBCO *)p);
1053 break;
1054
1055 case MVAR_CLEAN:
1056 case MVAR_DIRTY:
1057 case TVAR:
1058 case WEAK:
1059 case PRIM:
1060 case MUT_PRIM:
1061 case MUT_VAR_CLEAN:
1062 case MUT_VAR_DIRTY:
1063 prim = rtsTrue;
1064 size = sizeW_fromITBL(info);
1065 break;
1066
1067 case AP:
1068 size = ap_sizeW((StgAP *)p);
1069 break;
1070
1071 case PAP:
1072 size = pap_sizeW((StgPAP *)p);
1073 break;
1074
1075 case AP_STACK:
1076 size = ap_stack_sizeW((StgAP_STACK *)p);
1077 break;
1078
1079 case ARR_WORDS:
1080 prim = rtsTrue;
1081 size = arr_words_sizeW((StgArrBytes*)p);
1082 break;
1083
1084 case MUT_ARR_PTRS_CLEAN:
1085 case MUT_ARR_PTRS_DIRTY:
1086 case MUT_ARR_PTRS_FROZEN:
1087 case MUT_ARR_PTRS_FROZEN0:
1088 prim = rtsTrue;
1089 size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
1090 break;
1091
1092 case SMALL_MUT_ARR_PTRS_CLEAN:
1093 case SMALL_MUT_ARR_PTRS_DIRTY:
1094 case SMALL_MUT_ARR_PTRS_FROZEN:
1095 case SMALL_MUT_ARR_PTRS_FROZEN0:
1096 prim = rtsTrue;
1097 size = small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)p);
1098 break;
1099
1100 case TSO:
1101 prim = rtsTrue;
1102 #ifdef PROFILING
1103 if (RtsFlags.ProfFlags.includeTSOs) {
1104 size = sizeofW(StgTSO);
1105 break;
1106 } else {
1107 // Skip this TSO and move on to the next object
1108 p += sizeofW(StgTSO);
1109 continue;
1110 }
1111 #else
1112 size = sizeofW(StgTSO);
1113 break;
1114 #endif
1115
1116 case STACK:
1117 prim = rtsTrue;
1118 #ifdef PROFILING
1119 if (RtsFlags.ProfFlags.includeTSOs) {
1120 size = stack_sizeW((StgStack*)p);
1121 break;
1122 } else {
1123 // Skip this TSO and move on to the next object
1124 p += stack_sizeW((StgStack*)p);
1125 continue;
1126 }
1127 #else
1128 size = stack_sizeW((StgStack*)p);
1129 break;
1130 #endif
1131
1132 case TREC_CHUNK:
1133 prim = rtsTrue;
1134 size = sizeofW(StgTRecChunk);
1135 break;
1136
1137 case COMPACT_NFDATA:
1138 barf("heapCensus, found compact object in the wrong list");
1139 break;
1140
1141 default:
1142 barf("heapCensus, unknown object: %d", info->type);
1143 }
1144
1145 heapProfObject(census,(StgClosure*)p,size,prim);
1146
1147 p += size;
1148 }
1149 }
1150 }
1151
1152 void heapCensus (Time t)
1153 {
1154 uint32_t g, n;
1155 Census *census;
1156 gen_workspace *ws;
1157
1158 census = &censuses[era];
1159 census->time = mut_user_time_until(t);
1160
1161 // calculate retainer sets if necessary
1162 #ifdef PROFILING
1163 if (doingRetainerProfiling()) {
1164 retainerProfile();
1165 }
1166 #endif
1167
1168 #ifdef PROFILING
1169 stat_startHeapCensus();
1170 #endif
1171
1172 // Traverse the heap, collecting the census info
1173 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1174 heapCensusChain( census, generations[g].blocks );
1175 // Are we interested in large objects? might be
1176 // confusing to include the stack in a heap profile.
1177 heapCensusChain( census, generations[g].large_objects );
1178 heapCensusCompactList ( census, generations[g].compact_objects );
1179
1180 for (n = 0; n < n_capabilities; n++) {
1181 ws = &gc_threads[n]->gens[g];
1182 heapCensusChain(census, ws->todo_bd);
1183 heapCensusChain(census, ws->part_list);
1184 heapCensusChain(census, ws->scavd_list);
1185 }
1186 }
1187
1188 // dump out the census info
1189 #ifdef PROFILING
1190 // We can't generate any info for LDV profiling until
1191 // the end of the run...
1192 if (!doingLDVProfiling())
1193 dumpCensus( census );
1194 #else
1195 dumpCensus( census );
1196 #endif
1197
1198
1199 // free our storage, unless we're keeping all the census info for
1200 // future restriction by biography.
1201 #ifdef PROFILING
1202 if (RtsFlags.ProfFlags.bioSelector == NULL)
1203 {
1204 freeEra(census);
1205 census->hash = NULL;
1206 census->arena = NULL;
1207 }
1208 #endif
1209
1210 // we're into the next time period now
1211 nextEra();
1212
1213 #ifdef PROFILING
1214 stat_endHeapCensus();
1215 #endif
1216 }