e98704d51323fca512c46a09bbc7101d0a801488
[ghc.git] / rts / ProfHeap.c
1 /* ----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2003
4 *
5 * Support for heap profiling
6 *
7 * --------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsFlags.h"
13 #include "RtsUtils.h"
14 #include "Profiling.h"
15 #include "ProfHeap.h"
16 #include "Stats.h"
17 #include "Hash.h"
18 #include "RetainerProfile.h"
19 #include "LdvProfile.h"
20 #include "Arena.h"
21 #include "Printer.h"
22 #include "sm/GCThread.h"
23
24 #include <string.h>
25
26 /* -----------------------------------------------------------------------------
27 * era stores the current time period. It is the same as the
28 * number of censuses that have been performed.
29 *
30 * RESTRICTION:
31 * era must be no longer than LDV_SHIFT (15 or 30) bits.
32 * Invariants:
33 * era is initialized to 1 in initHeapProfiling().
34 *
35 * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
36 * When era reaches max_era, the profiling stops because a closure can
37 * store only up to (max_era - 1) as its creation or last use time.
38 * -------------------------------------------------------------------------- */
39 unsigned int era;
40 static uint32_t max_era;
41
42 /* -----------------------------------------------------------------------------
43 * Counters
44 *
45 * For most heap profiles each closure identity gets a simple count
46 * of live words in the heap at each census. However, if we're
47 * selecting by biography, then we have to keep the various
48 * lag/drag/void counters for each identity.
49 * -------------------------------------------------------------------------- */
50 typedef struct _counter {
51 void *identity;
52 union {
53 ssize_t resid;
54 struct {
55 // Total sizes of:
56 ssize_t prim; // 'inherently used' closures
57 ssize_t not_used; // 'never used' closures
58 ssize_t used; // 'used at least once' closures
59 ssize_t void_total; // 'destroyed without being used' closures
60 ssize_t drag_total; // 'used at least once and waiting to die'
61 } ldv;
62 } c;
63 struct _counter *next;
64 } counter;
65
66 STATIC_INLINE void
67 initLDVCtr( counter *ctr )
68 {
69 ctr->c.ldv.prim = 0;
70 ctr->c.ldv.not_used = 0;
71 ctr->c.ldv.used = 0;
72 ctr->c.ldv.void_total = 0;
73 ctr->c.ldv.drag_total = 0;
74 }
75
76 typedef struct {
77 double time; // the time in MUT time when the census is made
78 HashTable * hash;
79 counter * ctrs;
80 Arena * arena;
81
82 // for LDV profiling, when just displaying by LDV
83 ssize_t prim;
84 ssize_t not_used;
85 ssize_t used;
86 ssize_t void_total;
87 ssize_t drag_total;
88 } Census;
89
90 static Census *censuses = NULL;
91 static uint32_t n_censuses = 0;
92
93 #ifdef PROFILING
94 static void aggregateCensusInfo( void );
95 #endif
96
97 static void dumpCensus( Census *census );
98
99 static rtsBool closureSatisfiesConstraints( const StgClosure* p );
100
101 /* ----------------------------------------------------------------------------
102 * Find the "closure identity", which is a unique pointer representing
103 * the band to which this closure's heap space is attributed in the
104 * heap profile.
105 * ------------------------------------------------------------------------- */
106 static void *
107 closureIdentity( const StgClosure *p )
108 {
109 switch (RtsFlags.ProfFlags.doHeapProfile) {
110
111 #ifdef PROFILING
112 case HEAP_BY_CCS:
113 return p->header.prof.ccs;
114 case HEAP_BY_MOD:
115 return p->header.prof.ccs->cc->module;
116 case HEAP_BY_DESCR:
117 return GET_PROF_DESC(get_itbl(p));
118 case HEAP_BY_TYPE:
119 return GET_PROF_TYPE(get_itbl(p));
120 case HEAP_BY_RETAINER:
121 // AFAIK, the only closures in the heap which might not have a
122 // valid retainer set are DEAD_WEAK closures.
123 if (isRetainerSetFieldValid(p))
124 return retainerSetOf(p);
125 else
126 return NULL;
127
128 #else
129 case HEAP_BY_CLOSURE_TYPE:
130 {
131 StgInfoTable *info;
132 info = get_itbl(p);
133 switch (info->type) {
134 case CONSTR:
135 case CONSTR_1_0:
136 case CONSTR_0_1:
137 case CONSTR_2_0:
138 case CONSTR_1_1:
139 case CONSTR_0_2:
140 case CONSTR_STATIC:
141 case CONSTR_NOCAF_STATIC:
142 return GET_CON_DESC(itbl_to_con_itbl(info));
143 default:
144 return closure_type_names[info->type];
145 }
146 }
147
148 #endif
149 default:
150 barf("closureIdentity");
151 }
152 }
153
154 /* --------------------------------------------------------------------------
155 * Profiling type predicates
156 * ----------------------------------------------------------------------- */
157 #ifdef PROFILING
158 STATIC_INLINE rtsBool
159 doingLDVProfiling( void )
160 {
161 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
162 || RtsFlags.ProfFlags.bioSelector != NULL);
163 }
164
165 rtsBool
166 doingRetainerProfiling( void )
167 {
168 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
169 || RtsFlags.ProfFlags.retainerSelector != NULL);
170 }
171 #endif /* PROFILING */
172
173 // Precesses a closure 'c' being destroyed whose size is 'size'.
174 // Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
175 // such as TSO; they should not be involved in computing dragNew or voidNew.
176 //
177 // Even though era is checked in both LdvCensusForDead() and
178 // LdvCensusKillAll(), we still need to make sure that era is > 0 because
179 // LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
180 // when a thunk is replaced by an indirection object.
181
182 #ifdef PROFILING
183 void
184 LDV_recordDead( const StgClosure *c, uint32_t size )
185 {
186 void *id;
187 uint32_t t;
188 counter *ctr;
189
190 if (era > 0 && closureSatisfiesConstraints(c)) {
191 size -= sizeofW(StgProfHeader);
192 ASSERT(LDVW(c) != 0);
193 if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
194 t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
195 if (t < era) {
196 if (RtsFlags.ProfFlags.bioSelector == NULL) {
197 censuses[t].void_total += size;
198 censuses[era].void_total -= size;
199 ASSERT(censuses[t].void_total < censuses[t].not_used);
200 } else {
201 id = closureIdentity(c);
202 ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
203 ASSERT( ctr != NULL );
204 ctr->c.ldv.void_total += size;
205 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
206 if (ctr == NULL) {
207 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
208 initLDVCtr(ctr);
209 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
210 ctr->identity = id;
211 ctr->next = censuses[era].ctrs;
212 censuses[era].ctrs = ctr;
213 }
214 ctr->c.ldv.void_total -= size;
215 }
216 }
217 } else {
218 t = LDVW((c)) & LDV_LAST_MASK;
219 if (t + 1 < era) {
220 if (RtsFlags.ProfFlags.bioSelector == NULL) {
221 censuses[t+1].drag_total += size;
222 censuses[era].drag_total -= size;
223 } else {
224 void *id;
225 id = closureIdentity(c);
226 ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
227 ASSERT( ctr != NULL );
228 ctr->c.ldv.drag_total += size;
229 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
230 if (ctr == NULL) {
231 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
232 initLDVCtr(ctr);
233 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
234 ctr->identity = id;
235 ctr->next = censuses[era].ctrs;
236 censuses[era].ctrs = ctr;
237 }
238 ctr->c.ldv.drag_total -= size;
239 }
240 }
241 }
242 }
243 }
244 #endif
245
246 /* --------------------------------------------------------------------------
247 * Initialize censuses[era];
248 * ----------------------------------------------------------------------- */
249
250 STATIC_INLINE void
251 initEra(Census *census)
252 {
253 census->hash = allocHashTable();
254 census->ctrs = NULL;
255 census->arena = newArena();
256
257 census->not_used = 0;
258 census->used = 0;
259 census->prim = 0;
260 census->void_total = 0;
261 census->drag_total = 0;
262 }
263
264 STATIC_INLINE void
265 freeEra(Census *census)
266 {
267 arenaFree(census->arena);
268 freeHashTable(census->hash, NULL);
269 }
270
271 /* --------------------------------------------------------------------------
272 * Increases era by 1 and initialize census[era].
273 * Reallocates gi[] and increases its size if needed.
274 * ----------------------------------------------------------------------- */
275
276 static void
277 nextEra( void )
278 {
279 #ifdef PROFILING
280 if (doingLDVProfiling()) {
281 era++;
282
283 if (era == max_era) {
284 errorBelch("Maximum number of censuses reached.");
285 if (rtsConfig.rts_opts_suggestions == rtsTrue) {
286 if (rtsConfig.rts_opts_enabled == RtsOptsAll) {
287 errorBelch("Use `+RTS -i' to reduce censuses.");
288 } else {
289 errorBelch("Relink with -rtsopts and "
290 "use `+RTS -i' to reduce censuses.");
291 }
292 }
293 stg_exit(EXIT_FAILURE);
294 }
295
296 if (era == n_censuses) {
297 n_censuses *= 2;
298 censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
299 "nextEra");
300 }
301 }
302 #endif /* PROFILING */
303
304 initEra( &censuses[era] );
305 }
306
307 /* ----------------------------------------------------------------------------
308 * Heap profiling by info table
309 * ------------------------------------------------------------------------- */
310
311 #if !defined(PROFILING)
312 FILE *hp_file;
313 static char *hp_filename;
314
315 void freeProfiling (void)
316 {
317 }
318
319 void initProfiling (void)
320 {
321 char *prog;
322
323 prog = stgMallocBytes(strlen(prog_name) + 1, "initProfiling2");
324 strcpy(prog, prog_name);
325 #ifdef mingw32_HOST_OS
326 // on Windows, drop the .exe suffix if there is one
327 {
328 char *suff;
329 suff = strrchr(prog,'.');
330 if (suff != NULL && !strcmp(suff,".exe")) {
331 *suff = '\0';
332 }
333 }
334 #endif
335
336 if (RtsFlags.ProfFlags.doHeapProfile) {
337 /* Initialise the log file name */
338 hp_filename = stgMallocBytes(strlen(prog) + 6, "hpFileName");
339 sprintf(hp_filename, "%s.hp", prog);
340
341 /* open the log file */
342 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
343 debugBelch("Can't open profiling report file %s\n",
344 hp_filename);
345 RtsFlags.ProfFlags.doHeapProfile = 0;
346 stgFree(prog);
347 return;
348 }
349 }
350
351 stgFree(prog);
352
353 initHeapProfiling();
354 }
355
356 void endProfiling( void )
357 {
358 endHeapProfiling();
359 }
360 #endif /* !PROFILING */
361
362 static void
363 printSample(rtsBool beginSample, StgDouble sampleValue)
364 {
365 fprintf(hp_file, "%s %f\n",
366 (beginSample ? "BEGIN_SAMPLE" : "END_SAMPLE"),
367 sampleValue);
368 if (!beginSample) {
369 fflush(hp_file);
370 }
371 }
372
373 /* --------------------------------------------------------------------------
374 * Initialize the heap profilier
375 * ----------------------------------------------------------------------- */
376 uint32_t
377 initHeapProfiling(void)
378 {
379 if (! RtsFlags.ProfFlags.doHeapProfile) {
380 return 0;
381 }
382
383 #ifdef PROFILING
384 if (doingLDVProfiling() && doingRetainerProfiling()) {
385 errorBelch("cannot mix -hb and -hr");
386 stg_exit(EXIT_FAILURE);
387 }
388 #endif
389
390 // we only count eras if we're doing LDV profiling. Otherwise era
391 // is fixed at zero.
392 #ifdef PROFILING
393 if (doingLDVProfiling()) {
394 era = 1;
395 } else
396 #endif
397 {
398 era = 0;
399 }
400
401 // max_era = 2^LDV_SHIFT
402 max_era = 1 << LDV_SHIFT;
403
404 n_censuses = 32;
405 censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
406
407 initEra( &censuses[era] );
408
409 /* initProfilingLogFile(); */
410 fprintf(hp_file, "JOB \"%s", prog_name);
411
412 #ifdef PROFILING
413 {
414 int count;
415 for(count = 1; count < prog_argc; count++)
416 fprintf(hp_file, " %s", prog_argv[count]);
417 fprintf(hp_file, " +RTS");
418 for(count = 0; count < rts_argc; count++)
419 fprintf(hp_file, " %s", rts_argv[count]);
420 }
421 #endif /* PROFILING */
422
423 fprintf(hp_file, "\"\n" );
424
425 fprintf(hp_file, "DATE \"%s\"\n", time_str());
426
427 fprintf(hp_file, "SAMPLE_UNIT \"seconds\"\n");
428 fprintf(hp_file, "VALUE_UNIT \"bytes\"\n");
429
430 printSample(rtsTrue, 0);
431 printSample(rtsFalse, 0);
432
433 #ifdef PROFILING
434 if (doingRetainerProfiling()) {
435 initRetainerProfiling();
436 }
437 #endif
438
439 return 0;
440 }
441
442 void
443 endHeapProfiling(void)
444 {
445 StgDouble seconds;
446
447 if (! RtsFlags.ProfFlags.doHeapProfile) {
448 return;
449 }
450
451 #ifdef PROFILING
452 if (doingRetainerProfiling()) {
453 endRetainerProfiling();
454 }
455 #endif
456
457 #ifdef PROFILING
458 if (doingLDVProfiling()) {
459 uint32_t t;
460 LdvCensusKillAll();
461 aggregateCensusInfo();
462 for (t = 1; t < era; t++) {
463 dumpCensus( &censuses[t] );
464 }
465 }
466 #endif
467
468 #ifdef PROFILING
469 if (doingLDVProfiling()) {
470 uint32_t t;
471 if (RtsFlags.ProfFlags.bioSelector != NULL) {
472 for (t = 1; t <= era; t++) {
473 freeEra( &censuses[t] );
474 }
475 } else {
476 freeEra( &censuses[era] );
477 }
478 } else {
479 freeEra( &censuses[0] );
480 }
481 #else
482 freeEra( &censuses[0] );
483 #endif
484
485 stgFree(censuses);
486
487 seconds = mut_user_time();
488 printSample(rtsTrue, seconds);
489 printSample(rtsFalse, seconds);
490 fclose(hp_file);
491 }
492
493
494
495 #ifdef PROFILING
496 static size_t
497 buf_append(char *p, const char *q, char *end)
498 {
499 int m;
500
501 for (m = 0; p < end; p++, q++, m++) {
502 *p = *q;
503 if (*q == '\0') { break; }
504 }
505 return m;
506 }
507
508 static void
509 fprint_ccs(FILE *fp, CostCentreStack *ccs, uint32_t max_length)
510 {
511 char buf[max_length+1], *p, *buf_end;
512
513 // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
514 if (ccs == CCS_MAIN) {
515 fprintf(fp, "MAIN");
516 return;
517 }
518
519 fprintf(fp, "(%" FMT_Int ")", ccs->ccsID);
520
521 p = buf;
522 buf_end = buf + max_length + 1;
523
524 // keep printing components of the stack until we run out of space
525 // in the buffer. If we run out of space, end with "...".
526 for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
527
528 // CAF cost centres print as M.CAF, but we leave the module
529 // name out of all the others to save space.
530 if (!strcmp(ccs->cc->label,"CAF")) {
531 p += buf_append(p, ccs->cc->module, buf_end);
532 p += buf_append(p, ".CAF", buf_end);
533 } else {
534 p += buf_append(p, ccs->cc->label, buf_end);
535 if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
536 p += buf_append(p, "/", buf_end);
537 }
538 }
539
540 if (p >= buf_end) {
541 sprintf(buf+max_length-4, "...");
542 break;
543 }
544 }
545 fprintf(fp, "%s", buf);
546 }
547
548 rtsBool
549 strMatchesSelector( const char* str, const char* sel )
550 {
551 const char* p;
552 // debugBelch("str_matches_selector %s %s\n", str, sel);
553 while (1) {
554 // Compare str against wherever we've got to in sel.
555 p = str;
556 while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
557 p++; sel++;
558 }
559 // Match if all of str used and have reached the end of a sel fragment.
560 if (*p == '\0' && (*sel == ',' || *sel == '\0'))
561 return rtsTrue;
562
563 // No match. Advance sel to the start of the next elem.
564 while (*sel != ',' && *sel != '\0') sel++;
565 if (*sel == ',') sel++;
566
567 /* Run out of sel ?? */
568 if (*sel == '\0') return rtsFalse;
569 }
570 }
571
572 #endif /* PROFILING */
573
574 /* -----------------------------------------------------------------------------
575 * Figure out whether a closure should be counted in this census, by
576 * testing against all the specified constraints.
577 * -------------------------------------------------------------------------- */
578 static rtsBool
579 closureSatisfiesConstraints( const StgClosure* p )
580 {
581 #if !defined(PROFILING)
582 (void)p; /* keep gcc -Wall happy */
583 return rtsTrue;
584 #else
585 rtsBool b;
586
587 // The CCS has a selected field to indicate whether this closure is
588 // deselected by not being mentioned in the module, CC, or CCS
589 // selectors.
590 if (!p->header.prof.ccs->selected) {
591 return rtsFalse;
592 }
593
594 if (RtsFlags.ProfFlags.descrSelector) {
595 b = strMatchesSelector( (GET_PROF_DESC(get_itbl((StgClosure *)p))),
596 RtsFlags.ProfFlags.descrSelector );
597 if (!b) return rtsFalse;
598 }
599 if (RtsFlags.ProfFlags.typeSelector) {
600 b = strMatchesSelector( (GET_PROF_TYPE(get_itbl((StgClosure *)p))),
601 RtsFlags.ProfFlags.typeSelector );
602 if (!b) return rtsFalse;
603 }
604 if (RtsFlags.ProfFlags.retainerSelector) {
605 RetainerSet *rs;
606 uint32_t i;
607 // We must check that the retainer set is valid here. One
608 // reason it might not be valid is if this closure is a
609 // a newly deceased weak pointer (i.e. a DEAD_WEAK), since
610 // these aren't reached by the retainer profiler's traversal.
611 if (isRetainerSetFieldValid((StgClosure *)p)) {
612 rs = retainerSetOf((StgClosure *)p);
613 if (rs != NULL) {
614 for (i = 0; i < rs->num; i++) {
615 b = strMatchesSelector( rs->element[i]->cc->label,
616 RtsFlags.ProfFlags.retainerSelector );
617 if (b) return rtsTrue;
618 }
619 }
620 }
621 return rtsFalse;
622 }
623 return rtsTrue;
624 #endif /* PROFILING */
625 }
626
627 /* -----------------------------------------------------------------------------
628 * Aggregate the heap census info for biographical profiling
629 * -------------------------------------------------------------------------- */
630 #ifdef PROFILING
631 static void
632 aggregateCensusInfo( void )
633 {
634 HashTable *acc;
635 uint32_t t;
636 counter *c, *d, *ctrs;
637 Arena *arena;
638
639 if (!doingLDVProfiling()) return;
640
641 // Aggregate the LDV counters when displaying by biography.
642 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
643 long void_total, drag_total;
644
645 // Now we compute void_total and drag_total for each census
646 // After the program has finished, the void_total field of
647 // each census contains the count of words that were *created*
648 // in this era and were eventually void. Conversely, if a
649 // void closure was destroyed in this era, it will be
650 // represented by a negative count of words in void_total.
651 //
652 // To get the count of live words that are void at each
653 // census, just propagate the void_total count forwards:
654
655 void_total = 0;
656 drag_total = 0;
657 for (t = 1; t < era; t++) { // note: start at 1, not 0
658 void_total += censuses[t].void_total;
659 drag_total += censuses[t].drag_total;
660 censuses[t].void_total = void_total;
661 censuses[t].drag_total = drag_total;
662
663 ASSERT( censuses[t].void_total <= censuses[t].not_used );
664 // should be true because: void_total is the count of
665 // live words that are void at this census, which *must*
666 // be less than the number of live words that have not
667 // been used yet.
668
669 ASSERT( censuses[t].drag_total <= censuses[t].used );
670 // similar reasoning as above.
671 }
672
673 return;
674 }
675
676 // otherwise... we're doing a heap profile that is restricted to
677 // some combination of lag, drag, void or use. We've kept all the
678 // census info for all censuses so far, but we still need to
679 // aggregate the counters forwards.
680
681 arena = newArena();
682 acc = allocHashTable();
683 ctrs = NULL;
684
685 for (t = 1; t < era; t++) {
686
687 // first look through all the counters we're aggregating
688 for (c = ctrs; c != NULL; c = c->next) {
689 // if one of the totals is non-zero, then this closure
690 // type must be present in the heap at this census time...
691 d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);
692
693 if (d == NULL) {
694 // if this closure identity isn't present in the
695 // census for this time period, then our running
696 // totals *must* be zero.
697 ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);
698
699 // debugCCS(c->identity);
700 // debugBelch(" census=%d void_total=%d drag_total=%d\n",
701 // t, c->c.ldv.void_total, c->c.ldv.drag_total);
702 } else {
703 d->c.ldv.void_total += c->c.ldv.void_total;
704 d->c.ldv.drag_total += c->c.ldv.drag_total;
705 c->c.ldv.void_total = d->c.ldv.void_total;
706 c->c.ldv.drag_total = d->c.ldv.drag_total;
707
708 ASSERT( c->c.ldv.void_total >= 0 );
709 ASSERT( c->c.ldv.drag_total >= 0 );
710 }
711 }
712
713 // now look through the counters in this census to find new ones
714 for (c = censuses[t].ctrs; c != NULL; c = c->next) {
715 d = lookupHashTable(acc, (StgWord)c->identity);
716 if (d == NULL) {
717 d = arenaAlloc( arena, sizeof(counter) );
718 initLDVCtr(d);
719 insertHashTable( acc, (StgWord)c->identity, d );
720 d->identity = c->identity;
721 d->next = ctrs;
722 ctrs = d;
723 d->c.ldv.void_total = c->c.ldv.void_total;
724 d->c.ldv.drag_total = c->c.ldv.drag_total;
725 }
726 ASSERT( c->c.ldv.void_total >= 0 );
727 ASSERT( c->c.ldv.drag_total >= 0 );
728 }
729 }
730
731 freeHashTable(acc, NULL);
732 arenaFree(arena);
733 }
734 #endif
735
736 /* -----------------------------------------------------------------------------
737 * Print out the results of a heap census.
738 * -------------------------------------------------------------------------- */
739 static void
740 dumpCensus( Census *census )
741 {
742 counter *ctr;
743 ssize_t count;
744
745 printSample(rtsTrue, census->time);
746
747 #ifdef PROFILING
748 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
749 fprintf(hp_file, "VOID\t%lu\n", (unsigned long)(census->void_total) * sizeof(W_));
750 fprintf(hp_file, "LAG\t%lu\n",
751 (unsigned long)(census->not_used - census->void_total) * sizeof(W_));
752 fprintf(hp_file, "USE\t%lu\n",
753 (unsigned long)(census->used - census->drag_total) * sizeof(W_));
754 fprintf(hp_file, "INHERENT_USE\t%lu\n",
755 (unsigned long)(census->prim) * sizeof(W_));
756 fprintf(hp_file, "DRAG\t%lu\n",
757 (unsigned long)(census->drag_total) * sizeof(W_));
758 printSample(rtsFalse, census->time);
759 return;
760 }
761 #endif
762
763 for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
764
765 #ifdef PROFILING
766 if (RtsFlags.ProfFlags.bioSelector != NULL) {
767 count = 0;
768 if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
769 count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
770 if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
771 count += ctr->c.ldv.drag_total;
772 if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
773 count += ctr->c.ldv.void_total;
774 if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
775 count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
776 } else
777 #endif
778 {
779 count = ctr->c.resid;
780 }
781
782 ASSERT( count >= 0 );
783
784 if (count == 0) continue;
785
786 #if !defined(PROFILING)
787 switch (RtsFlags.ProfFlags.doHeapProfile) {
788 case HEAP_BY_CLOSURE_TYPE:
789 fprintf(hp_file, "%s", (char *)ctr->identity);
790 break;
791 }
792 #endif
793
794 #ifdef PROFILING
795 switch (RtsFlags.ProfFlags.doHeapProfile) {
796 case HEAP_BY_CCS:
797 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, RtsFlags.ProfFlags.ccsLength);
798 break;
799 case HEAP_BY_MOD:
800 case HEAP_BY_DESCR:
801 case HEAP_BY_TYPE:
802 fprintf(hp_file, "%s", (char *)ctr->identity);
803 break;
804 case HEAP_BY_RETAINER:
805 {
806 RetainerSet *rs = (RetainerSet *)ctr->identity;
807
808 // it might be the distinguished retainer set rs_MANY:
809 if (rs == &rs_MANY) {
810 fprintf(hp_file, "MANY");
811 break;
812 }
813
814 // Mark this retainer set by negating its id, because it
815 // has appeared in at least one census. We print the
816 // values of all such retainer sets into the log file at
817 // the end. A retainer set may exist but not feature in
818 // any censuses if it arose as the intermediate retainer
819 // set for some closure during retainer set calculation.
820 if (rs->id > 0)
821 rs->id = -(rs->id);
822
823 // report in the unit of bytes: * sizeof(StgWord)
824 printRetainerSetShort(hp_file, rs, RtsFlags.ProfFlags.ccsLength);
825 break;
826 }
827 default:
828 barf("dumpCensus; doHeapProfile");
829 }
830 #endif
831
832 fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
833 }
834
835 printSample(rtsFalse, census->time);
836 }
837
838
839 static void heapProfObject(Census *census, StgClosure *p, size_t size,
840 rtsBool prim
841 #ifndef PROFILING
842 STG_UNUSED
843 #endif
844 )
845 {
846 void *identity;
847 size_t real_size;
848 counter *ctr;
849
850 identity = NULL;
851
852 #ifdef PROFILING
853 // subtract the profiling overhead
854 real_size = size - sizeofW(StgProfHeader);
855 #else
856 real_size = size;
857 #endif
858
859 if (closureSatisfiesConstraints((StgClosure*)p)) {
860 #ifdef PROFILING
861 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
862 if (prim)
863 census->prim += real_size;
864 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
865 census->not_used += real_size;
866 else
867 census->used += real_size;
868 } else
869 #endif
870 {
871 identity = closureIdentity((StgClosure *)p);
872
873 if (identity != NULL) {
874 ctr = lookupHashTable( census->hash, (StgWord)identity );
875 if (ctr != NULL) {
876 #ifdef PROFILING
877 if (RtsFlags.ProfFlags.bioSelector != NULL) {
878 if (prim)
879 ctr->c.ldv.prim += real_size;
880 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
881 ctr->c.ldv.not_used += real_size;
882 else
883 ctr->c.ldv.used += real_size;
884 } else
885 #endif
886 {
887 ctr->c.resid += real_size;
888 }
889 } else {
890 ctr = arenaAlloc( census->arena, sizeof(counter) );
891 initLDVCtr(ctr);
892 insertHashTable( census->hash, (StgWord)identity, ctr );
893 ctr->identity = identity;
894 ctr->next = census->ctrs;
895 census->ctrs = ctr;
896
897 #ifdef PROFILING
898 if (RtsFlags.ProfFlags.bioSelector != NULL) {
899 if (prim)
900 ctr->c.ldv.prim = real_size;
901 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
902 ctr->c.ldv.not_used = real_size;
903 else
904 ctr->c.ldv.used = real_size;
905 } else
906 #endif
907 {
908 ctr->c.resid = real_size;
909 }
910 }
911 }
912 }
913 }
914 }
915
916 /* -----------------------------------------------------------------------------
917 * Code to perform a heap census.
918 * -------------------------------------------------------------------------- */
919 static void
920 heapCensusChain( Census *census, bdescr *bd )
921 {
922 StgPtr p;
923 StgInfoTable *info;
924 size_t size;
925 rtsBool prim;
926
927 for (; bd != NULL; bd = bd->link) {
928
929 // HACK: pretend a pinned block is just one big ARR_WORDS
930 // owned by CCS_PINNED. These blocks can be full of holes due
931 // to alignment constraints so we can't traverse the memory
932 // and do a proper census.
933 if (bd->flags & BF_PINNED) {
934 StgClosure arr;
935 SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_PINNED);
936 heapProfObject(census, &arr, bd->blocks * BLOCK_SIZE_W, rtsTrue);
937 continue;
938 }
939
940 p = bd->start;
941
942 // When we shrink a large ARR_WORDS, we do not adjust the free pointer
943 // of the associated block descriptor, thus introducing slop at the end
944 // of the object. This slop remains after GC, violating the assumption
945 // of the loop below that all slop has been eliminated (#11627).
946 // Consequently, we handle large ARR_WORDS objects as a special case.
947 if (bd->flags & BF_LARGE
948 && get_itbl((StgClosure *)p)->type == ARR_WORDS) {
949 size = arr_words_sizeW((StgArrBytes *)p);
950 prim = rtsTrue;
951 heapProfObject(census, (StgClosure *)p, size, prim);
952 continue;
953 }
954
955 while (p < bd->free) {
956 info = get_itbl((StgClosure *)p);
957 prim = rtsFalse;
958
959 switch (info->type) {
960
961 case THUNK:
962 size = thunk_sizeW_fromITBL(info);
963 break;
964
965 case THUNK_1_1:
966 case THUNK_0_2:
967 case THUNK_2_0:
968 size = sizeofW(StgThunkHeader) + 2;
969 break;
970
971 case THUNK_1_0:
972 case THUNK_0_1:
973 case THUNK_SELECTOR:
974 size = sizeofW(StgThunkHeader) + 1;
975 break;
976
977 case CONSTR:
978 case FUN:
979 case BLACKHOLE:
980 case BLOCKING_QUEUE:
981 case FUN_1_0:
982 case FUN_0_1:
983 case FUN_1_1:
984 case FUN_0_2:
985 case FUN_2_0:
986 case CONSTR_1_0:
987 case CONSTR_0_1:
988 case CONSTR_1_1:
989 case CONSTR_0_2:
990 case CONSTR_2_0:
991 size = sizeW_fromITBL(info);
992 break;
993
994 case IND:
995 // Special case/Delicate Hack: INDs don't normally
996 // appear, since we're doing this heap census right
997 // after GC. However, GarbageCollect() also does
998 // resurrectThreads(), which can update some
999 // blackholes when it calls raiseAsync() on the
1000 // resurrected threads. So we know that any IND will
1001 // be the size of a BLACKHOLE.
1002 size = BLACKHOLE_sizeW();
1003 break;
1004
1005 case BCO:
1006 prim = rtsTrue;
1007 size = bco_sizeW((StgBCO *)p);
1008 break;
1009
1010 case MVAR_CLEAN:
1011 case MVAR_DIRTY:
1012 case TVAR:
1013 case WEAK:
1014 case PRIM:
1015 case MUT_PRIM:
1016 case MUT_VAR_CLEAN:
1017 case MUT_VAR_DIRTY:
1018 prim = rtsTrue;
1019 size = sizeW_fromITBL(info);
1020 break;
1021
1022 case AP:
1023 size = ap_sizeW((StgAP *)p);
1024 break;
1025
1026 case PAP:
1027 size = pap_sizeW((StgPAP *)p);
1028 break;
1029
1030 case AP_STACK:
1031 size = ap_stack_sizeW((StgAP_STACK *)p);
1032 break;
1033
1034 case ARR_WORDS:
1035 prim = rtsTrue;
1036 size = arr_words_sizeW((StgArrBytes*)p);
1037 break;
1038
1039 case MUT_ARR_PTRS_CLEAN:
1040 case MUT_ARR_PTRS_DIRTY:
1041 case MUT_ARR_PTRS_FROZEN:
1042 case MUT_ARR_PTRS_FROZEN0:
1043 prim = rtsTrue;
1044 size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
1045 break;
1046
1047 case SMALL_MUT_ARR_PTRS_CLEAN:
1048 case SMALL_MUT_ARR_PTRS_DIRTY:
1049 case SMALL_MUT_ARR_PTRS_FROZEN:
1050 case SMALL_MUT_ARR_PTRS_FROZEN0:
1051 prim = rtsTrue;
1052 size = small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)p);
1053 break;
1054
1055 case TSO:
1056 prim = rtsTrue;
1057 #ifdef PROFILING
1058 if (RtsFlags.ProfFlags.includeTSOs) {
1059 size = sizeofW(StgTSO);
1060 break;
1061 } else {
1062 // Skip this TSO and move on to the next object
1063 p += sizeofW(StgTSO);
1064 continue;
1065 }
1066 #else
1067 size = sizeofW(StgTSO);
1068 break;
1069 #endif
1070
1071 case STACK:
1072 prim = rtsTrue;
1073 #ifdef PROFILING
1074 if (RtsFlags.ProfFlags.includeTSOs) {
1075 size = stack_sizeW((StgStack*)p);
1076 break;
1077 } else {
1078 // Skip this TSO and move on to the next object
1079 p += stack_sizeW((StgStack*)p);
1080 continue;
1081 }
1082 #else
1083 size = stack_sizeW((StgStack*)p);
1084 break;
1085 #endif
1086
1087 case TREC_CHUNK:
1088 prim = rtsTrue;
1089 size = sizeofW(StgTRecChunk);
1090 break;
1091
1092 default:
1093 barf("heapCensus, unknown object: %d", info->type);
1094 }
1095
1096 heapProfObject(census,(StgClosure*)p,size,prim);
1097
1098 p += size;
1099 }
1100 }
1101 }
1102
1103 void heapCensus (Time t)
1104 {
1105 uint32_t g, n;
1106 Census *census;
1107 gen_workspace *ws;
1108
1109 census = &censuses[era];
1110 census->time = mut_user_time_until(t);
1111
1112 // calculate retainer sets if necessary
1113 #ifdef PROFILING
1114 if (doingRetainerProfiling()) {
1115 retainerProfile();
1116 }
1117 #endif
1118
1119 #ifdef PROFILING
1120 stat_startHeapCensus();
1121 #endif
1122
1123 // Traverse the heap, collecting the census info
1124 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1125 heapCensusChain( census, generations[g].blocks );
1126 // Are we interested in large objects? might be
1127 // confusing to include the stack in a heap profile.
1128 heapCensusChain( census, generations[g].large_objects );
1129
1130 for (n = 0; n < n_capabilities; n++) {
1131 ws = &gc_threads[n]->gens[g];
1132 heapCensusChain(census, ws->todo_bd);
1133 heapCensusChain(census, ws->part_list);
1134 heapCensusChain(census, ws->scavd_list);
1135 }
1136 }
1137
1138 // dump out the census info
1139 #ifdef PROFILING
1140 // We can't generate any info for LDV profiling until
1141 // the end of the run...
1142 if (!doingLDVProfiling())
1143 dumpCensus( census );
1144 #else
1145 dumpCensus( census );
1146 #endif
1147
1148
1149 // free our storage, unless we're keeping all the census info for
1150 // future restriction by biography.
1151 #ifdef PROFILING
1152 if (RtsFlags.ProfFlags.bioSelector == NULL)
1153 {
1154 freeEra(census);
1155 census->hash = NULL;
1156 census->arena = NULL;
1157 }
1158 #endif
1159
1160 // we're into the next time period now
1161 nextEra();
1162
1163 #ifdef PROFILING
1164 stat_endHeapCensus();
1165 #endif
1166 }