Update Trac ticket URLs to point to GitLab
[ghc.git] / rts / ProfHeap.c
1 /* ----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2003
4 *
5 * Support for heap profiling
6 *
7 * --------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "Capability.h"
13 #include "RtsFlags.h"
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "ProfHeap.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "RetainerProfile.h"
20 #include "LdvProfile.h"
21 #include "Arena.h"
22 #include "Printer.h"
23 #include "Trace.h"
24 #include "sm/GCThread.h"
25
26 #include <fs_rts.h>
27 #include <string.h>
28
29 /* -----------------------------------------------------------------------------
30 * era stores the current time period. It is the same as the
31 * number of censuses that have been performed.
32 *
33 * RESTRICTION:
34 * era must be no longer than LDV_SHIFT (15 or 30) bits.
35 * Invariants:
36 * era is initialized to 1 in initHeapProfiling().
37 *
38 * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
39 * When era reaches max_era, the profiling stops because a closure can
40 * store only up to (max_era - 1) as its creation or last use time.
41 * -------------------------------------------------------------------------- */
42 unsigned int era;
43 static uint32_t max_era;
44
45 /* -----------------------------------------------------------------------------
46 * Counters
47 *
48 * For most heap profiles each closure identity gets a simple count
49 * of live words in the heap at each census. However, if we're
50 * selecting by biography, then we have to keep the various
51 * lag/drag/void counters for each identity.
52 * -------------------------------------------------------------------------- */
53 typedef struct _counter {
54 const void *identity;
55 union {
56 ssize_t resid;
57 struct {
58 // Total sizes of:
59 ssize_t prim; // 'inherently used' closures
60 ssize_t not_used; // 'never used' closures
61 ssize_t used; // 'used at least once' closures
62 ssize_t void_total; // 'destroyed without being used' closures
63 ssize_t drag_total; // 'used at least once and waiting to die'
64 } ldv;
65 } c;
66 struct _counter *next;
67 } counter;
68
69 STATIC_INLINE void
70 initLDVCtr( counter *ctr )
71 {
72 ctr->c.ldv.prim = 0;
73 ctr->c.ldv.not_used = 0;
74 ctr->c.ldv.used = 0;
75 ctr->c.ldv.void_total = 0;
76 ctr->c.ldv.drag_total = 0;
77 }
78
79 typedef struct {
80 double time; // the time in MUT time when the census is made
81 HashTable * hash;
82 counter * ctrs;
83 Arena * arena;
84
85 // for LDV profiling, when just displaying by LDV
86 ssize_t prim;
87 ssize_t not_used;
88 ssize_t used;
89 ssize_t void_total;
90 ssize_t drag_total;
91 } Census;
92
93 static Census *censuses = NULL;
94 static uint32_t n_censuses = 0;
95
96 #if defined(PROFILING)
97 static void aggregateCensusInfo( void );
98 #endif
99
100 static void dumpCensus( Census *census );
101
102 static bool closureSatisfiesConstraints( const StgClosure* p );
103
104 /* ----------------------------------------------------------------------------
105 * Find the "closure identity", which is a unique pointer representing
106 * the band to which this closure's heap space is attributed in the
107 * heap profile.
108 * ------------------------------------------------------------------------- */
109 static const void *
110 closureIdentity( const StgClosure *p )
111 {
112 switch (RtsFlags.ProfFlags.doHeapProfile) {
113
114 #if defined(PROFILING)
115 case HEAP_BY_CCS:
116 return p->header.prof.ccs;
117 case HEAP_BY_MOD:
118 return p->header.prof.ccs->cc->module;
119 case HEAP_BY_DESCR:
120 return GET_PROF_DESC(get_itbl(p));
121 case HEAP_BY_TYPE:
122 return GET_PROF_TYPE(get_itbl(p));
123 case HEAP_BY_RETAINER:
124 // AFAIK, the only closures in the heap which might not have a
125 // valid retainer set are DEAD_WEAK closures.
126 if (isRetainerSetFieldValid(p))
127 return retainerSetOf(p);
128 else
129 return NULL;
130 #endif
131
132 case HEAP_BY_CLOSURE_TYPE:
133 {
134 const StgInfoTable *info;
135 info = get_itbl(p);
136 switch (info->type) {
137 case CONSTR:
138 case CONSTR_1_0:
139 case CONSTR_0_1:
140 case CONSTR_2_0:
141 case CONSTR_1_1:
142 case CONSTR_0_2:
143 case CONSTR_NOCAF:
144 return GET_CON_DESC(itbl_to_con_itbl(info));
145 default:
146 return closure_type_names[info->type];
147 }
148 }
149
150 default:
151 barf("closureIdentity");
152 }
153 }
154
155 /* --------------------------------------------------------------------------
156 * Profiling type predicates
157 * ----------------------------------------------------------------------- */
158 #if defined(PROFILING)
159 STATIC_INLINE bool
160 doingLDVProfiling( void )
161 {
162 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
163 || RtsFlags.ProfFlags.bioSelector != NULL);
164 }
165
166 bool
167 doingRetainerProfiling( void )
168 {
169 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
170 || RtsFlags.ProfFlags.retainerSelector != NULL);
171 }
172 #endif /* PROFILING */
173
174 // Processes a closure 'c' being destroyed whose size is 'size'.
175 // Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
176 // such as TSO; they should not be involved in computing dragNew or voidNew.
177 //
178 // Even though era is checked in both LdvCensusForDead() and
179 // LdvCensusKillAll(), we still need to make sure that era is > 0 because
180 // LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
181 // when a thunk is replaced by an indirection object.
182
183 #if defined(PROFILING)
184 void
185 LDV_recordDead( const StgClosure *c, uint32_t size )
186 {
187 const void *id;
188 uint32_t t;
189 counter *ctr;
190
191 if (era > 0 && closureSatisfiesConstraints(c)) {
192 size -= sizeofW(StgProfHeader);
193 ASSERT(LDVW(c) != 0);
194 if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
195 t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
196 if (t < era) {
197 if (RtsFlags.ProfFlags.bioSelector == NULL) {
198 censuses[t].void_total += size;
199 censuses[era].void_total -= size;
200 ASSERT(censuses[t].void_total < censuses[t].not_used);
201 } else {
202 id = closureIdentity(c);
203 ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
204 if (ctr == NULL)
205 barf("LDV_recordDead: Failed to find counter for closure %p", c);
206
207 ctr->c.ldv.void_total += size;
208 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
209 if (ctr == NULL) {
210 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
211 initLDVCtr(ctr);
212 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
213 ctr->identity = id;
214 ctr->next = censuses[era].ctrs;
215 censuses[era].ctrs = ctr;
216 }
217 ctr->c.ldv.void_total -= size;
218 }
219 }
220 } else {
221 t = LDVW((c)) & LDV_LAST_MASK;
222 if (t + 1 < era) {
223 if (RtsFlags.ProfFlags.bioSelector == NULL) {
224 censuses[t+1].drag_total += size;
225 censuses[era].drag_total -= size;
226 } else {
227 const void *id;
228 id = closureIdentity(c);
229 ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
230 ASSERT( ctr != NULL );
231 ctr->c.ldv.drag_total += size;
232 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
233 if (ctr == NULL) {
234 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
235 initLDVCtr(ctr);
236 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
237 ctr->identity = id;
238 ctr->next = censuses[era].ctrs;
239 censuses[era].ctrs = ctr;
240 }
241 ctr->c.ldv.drag_total -= size;
242 }
243 }
244 }
245 }
246 }
247 #endif
248
249 /* --------------------------------------------------------------------------
250 * Initialize censuses[era];
251 * ----------------------------------------------------------------------- */
252
253 STATIC_INLINE void
254 initEra(Census *census)
255 {
256 census->hash = allocHashTable();
257 census->ctrs = NULL;
258 census->arena = newArena();
259
260 census->not_used = 0;
261 census->used = 0;
262 census->prim = 0;
263 census->void_total = 0;
264 census->drag_total = 0;
265 }
266
267 STATIC_INLINE void
268 freeEra(Census *census)
269 {
270 arenaFree(census->arena);
271 freeHashTable(census->hash, NULL);
272 }
273
274 /* --------------------------------------------------------------------------
275 * Increases era by 1 and initialize census[era].
276 * Reallocates gi[] and increases its size if needed.
277 * ----------------------------------------------------------------------- */
278
279 static void
280 nextEra( void )
281 {
282 #if defined(PROFILING)
283 if (doingLDVProfiling()) {
284 era++;
285
286 if (era == max_era) {
287 errorBelch("Maximum number of censuses reached.");
288 if (rtsConfig.rts_opts_suggestions == true) {
289 if (rtsConfig.rts_opts_enabled == RtsOptsAll) {
290 errorBelch("Use `+RTS -i' to reduce censuses.");
291 } else {
292 errorBelch("Relink with -rtsopts and "
293 "use `+RTS -i' to reduce censuses.");
294 }
295 }
296 stg_exit(EXIT_FAILURE);
297 }
298
299 if (era == n_censuses) {
300 n_censuses *= 2;
301 censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
302 "nextEra");
303 }
304 }
305 #endif /* PROFILING */
306
307 initEra( &censuses[era] );
308 }
309
310 /* ----------------------------------------------------------------------------
311 * Heap profiling by info table
312 * ------------------------------------------------------------------------- */
313
314 #if !defined(PROFILING)
315 FILE *hp_file;
316 static char *hp_filename;
317
318 void freeProfiling (void)
319 {
320 }
321
322 void initProfiling (void)
323 {
324 char *prog;
325
326 prog = stgMallocBytes(strlen(prog_name) + 1, "initProfiling2");
327 strcpy(prog, prog_name);
328 #if defined(mingw32_HOST_OS)
329 // on Windows, drop the .exe suffix if there is one
330 {
331 char *suff;
332 suff = strrchr(prog,'.');
333 if (suff != NULL && !strcmp(suff,".exe")) {
334 *suff = '\0';
335 }
336 }
337 #endif
338
339 if (RtsFlags.ProfFlags.doHeapProfile) {
340 /* Initialise the log file name */
341 hp_filename = stgMallocBytes(strlen(prog) + 6, "hpFileName");
342 sprintf(hp_filename, "%s.hp", prog);
343
344 /* open the log file */
345 if ((hp_file = __rts_fopen(hp_filename, "w")) == NULL) {
346 debugBelch("Can't open profiling report file %s\n",
347 hp_filename);
348 RtsFlags.ProfFlags.doHeapProfile = 0;
349 stgFree(prog);
350 return;
351 }
352 }
353
354 stgFree(prog);
355
356 initHeapProfiling();
357 }
358
359 void endProfiling( void )
360 {
361 endHeapProfiling();
362 }
363 #endif /* !PROFILING */
364
365 static void
366 printEscapedString(const char* string)
367 {
368 for (const char* p = string; *p != '\0'; ++p) {
369 if (*p == '\"') {
370 // Escape every " as ""
371 fputc('"', hp_file);
372 }
373 fputc(*p, hp_file);
374 }
375 }
376
377 static void
378 printSample(bool beginSample, StgDouble sampleValue)
379 {
380 fprintf(hp_file, "%s %f\n",
381 (beginSample ? "BEGIN_SAMPLE" : "END_SAMPLE"),
382 sampleValue);
383 if (!beginSample) {
384 fflush(hp_file);
385 }
386 }
387
388 static void
389 dumpCostCentresToEventLog(void)
390 {
391 #if defined(PROFILING)
392 CostCentre *cc, *next;
393 for (cc = CC_LIST; cc != NULL; cc = next) {
394 next = cc->link;
395 traceHeapProfCostCentre(cc->ccID, cc->label, cc->module,
396 cc->srcloc, cc->is_caf);
397 }
398 #endif
399 }
400
401 /* --------------------------------------------------------------------------
402 * Initialize the heap profilier
403 * ----------------------------------------------------------------------- */
404 uint32_t
405 initHeapProfiling(void)
406 {
407 if (! RtsFlags.ProfFlags.doHeapProfile) {
408 return 0;
409 }
410
411 #if defined(PROFILING)
412 if (doingLDVProfiling() && doingRetainerProfiling()) {
413 errorBelch("cannot mix -hb and -hr");
414 stg_exit(EXIT_FAILURE);
415 }
416 #if defined(THREADED_RTS)
417 // See #12019.
418 if (doingLDVProfiling() && RtsFlags.ParFlags.nCapabilities > 1) {
419 errorBelch("-hb cannot be used with multiple capabilities");
420 stg_exit(EXIT_FAILURE);
421 }
422 #endif
423 #endif
424
425 // we only count eras if we're doing LDV profiling. Otherwise era
426 // is fixed at zero.
427 #if defined(PROFILING)
428 if (doingLDVProfiling()) {
429 era = 1;
430 } else
431 #endif
432 {
433 era = 0;
434 }
435
436 // max_era = 2^LDV_SHIFT
437 max_era = 1 << LDV_SHIFT;
438
439 n_censuses = 32;
440 censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
441
442 initEra( &censuses[era] );
443
444 /* initProfilingLogFile(); */
445 fprintf(hp_file, "JOB \"");
446 printEscapedString(prog_name);
447
448 #if defined(PROFILING)
449 for (int i = 1; i < prog_argc; ++i) {
450 fputc(' ', hp_file);
451 printEscapedString(prog_argv[i]);
452 }
453 fprintf(hp_file, " +RTS");
454 for (int i = 0; i < rts_argc; ++i) {
455 fputc(' ', hp_file);
456 printEscapedString(rts_argv[i]);
457 }
458 #endif /* PROFILING */
459
460 fprintf(hp_file, "\"\n" );
461
462 fprintf(hp_file, "DATE \"%s\"\n", time_str());
463
464 fprintf(hp_file, "SAMPLE_UNIT \"seconds\"\n");
465 fprintf(hp_file, "VALUE_UNIT \"bytes\"\n");
466
467 printSample(true, 0);
468 printSample(false, 0);
469
470 #if defined(PROFILING)
471 if (doingRetainerProfiling()) {
472 initRetainerProfiling();
473 }
474 #endif
475
476 traceHeapProfBegin(0);
477 dumpCostCentresToEventLog();
478
479 return 0;
480 }
481
482 void
483 endHeapProfiling(void)
484 {
485 StgDouble seconds;
486
487 if (! RtsFlags.ProfFlags.doHeapProfile) {
488 return;
489 }
490
491 #if defined(PROFILING)
492 if (doingRetainerProfiling()) {
493 endRetainerProfiling();
494 }
495 #endif
496
497 #if defined(PROFILING)
498 if (doingLDVProfiling()) {
499 uint32_t t;
500 LdvCensusKillAll();
501 aggregateCensusInfo();
502 for (t = 1; t < era; t++) {
503 dumpCensus( &censuses[t] );
504 }
505 }
506 #endif
507
508 #if defined(PROFILING)
509 if (doingLDVProfiling()) {
510 uint32_t t;
511 if (RtsFlags.ProfFlags.bioSelector != NULL) {
512 for (t = 1; t <= era; t++) {
513 freeEra( &censuses[t] );
514 }
515 } else {
516 freeEra( &censuses[era] );
517 }
518 } else {
519 freeEra( &censuses[0] );
520 }
521 #else
522 freeEra( &censuses[0] );
523 #endif
524
525 stgFree(censuses);
526
527 seconds = mut_user_time();
528 printSample(true, seconds);
529 printSample(false, seconds);
530 fclose(hp_file);
531 }
532
533
534
535 #if defined(PROFILING)
536 static size_t
537 buf_append(char *p, const char *q, char *end)
538 {
539 int m;
540
541 for (m = 0; p < end; p++, q++, m++) {
542 *p = *q;
543 if (*q == '\0') { break; }
544 }
545 return m;
546 }
547
548 static void
549 fprint_ccs(FILE *fp, CostCentreStack *ccs, uint32_t max_length)
550 {
551 char buf[max_length+1], *p, *buf_end;
552
553 // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
554 if (ccs == CCS_MAIN) {
555 fprintf(fp, "MAIN");
556 return;
557 }
558
559 fprintf(fp, "(%" FMT_Int ")", ccs->ccsID);
560
561 p = buf;
562 buf_end = buf + max_length + 1;
563
564 // keep printing components of the stack until we run out of space
565 // in the buffer. If we run out of space, end with "...".
566 for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
567
568 // CAF cost centres print as M.CAF, but we leave the module
569 // name out of all the others to save space.
570 if (!strcmp(ccs->cc->label,"CAF")) {
571 p += buf_append(p, ccs->cc->module, buf_end);
572 p += buf_append(p, ".CAF", buf_end);
573 } else {
574 p += buf_append(p, ccs->cc->label, buf_end);
575 if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
576 p += buf_append(p, "/", buf_end);
577 }
578 }
579
580 if (p >= buf_end) {
581 sprintf(buf+max_length-4, "...");
582 break;
583 }
584 }
585 fprintf(fp, "%s", buf);
586 }
587
588 bool
589 strMatchesSelector( const char* str, const char* sel )
590 {
591 const char* p;
592 // debugBelch("str_matches_selector %s %s\n", str, sel);
593 while (1) {
594 // Compare str against wherever we've got to in sel.
595 p = str;
596 while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
597 p++; sel++;
598 }
599 // Match if all of str used and have reached the end of a sel fragment.
600 if (*p == '\0' && (*sel == ',' || *sel == '\0'))
601 return true;
602
603 // No match. Advance sel to the start of the next elem.
604 while (*sel != ',' && *sel != '\0') sel++;
605 if (*sel == ',') sel++;
606
607 /* Run out of sel ?? */
608 if (*sel == '\0') return false;
609 }
610 }
611
612 #endif /* PROFILING */
613
614 /* -----------------------------------------------------------------------------
615 * Figure out whether a closure should be counted in this census, by
616 * testing against all the specified constraints.
617 * -------------------------------------------------------------------------- */
618 static bool
619 closureSatisfiesConstraints( const StgClosure* p )
620 {
621 #if !defined(PROFILING)
622 (void)p; /* keep gcc -Wall happy */
623 return true;
624 #else
625 bool b;
626
627 // The CCS has a selected field to indicate whether this closure is
628 // deselected by not being mentioned in the module, CC, or CCS
629 // selectors.
630 if (!p->header.prof.ccs->selected) {
631 return false;
632 }
633
634 if (RtsFlags.ProfFlags.descrSelector) {
635 b = strMatchesSelector( (GET_PROF_DESC(get_itbl((StgClosure *)p))),
636 RtsFlags.ProfFlags.descrSelector );
637 if (!b) return false;
638 }
639 if (RtsFlags.ProfFlags.typeSelector) {
640 b = strMatchesSelector( (GET_PROF_TYPE(get_itbl((StgClosure *)p))),
641 RtsFlags.ProfFlags.typeSelector );
642 if (!b) return false;
643 }
644 if (RtsFlags.ProfFlags.retainerSelector) {
645 RetainerSet *rs;
646 uint32_t i;
647 // We must check that the retainer set is valid here. One
648 // reason it might not be valid is if this closure is a
649 // a newly deceased weak pointer (i.e. a DEAD_WEAK), since
650 // these aren't reached by the retainer profiler's traversal.
651 if (isRetainerSetFieldValid((StgClosure *)p)) {
652 rs = retainerSetOf((StgClosure *)p);
653 if (rs != NULL) {
654 for (i = 0; i < rs->num; i++) {
655 b = strMatchesSelector( rs->element[i]->cc->label,
656 RtsFlags.ProfFlags.retainerSelector );
657 if (b) return true;
658 }
659 }
660 }
661 return false;
662 }
663 return true;
664 #endif /* PROFILING */
665 }
666
667 /* -----------------------------------------------------------------------------
668 * Aggregate the heap census info for biographical profiling
669 * -------------------------------------------------------------------------- */
670 #if defined(PROFILING)
671 static void
672 aggregateCensusInfo( void )
673 {
674 HashTable *acc;
675 uint32_t t;
676 counter *c, *d, *ctrs;
677 Arena *arena;
678
679 if (!doingLDVProfiling()) return;
680
681 // Aggregate the LDV counters when displaying by biography.
682 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
683 long void_total, drag_total;
684
685 // Now we compute void_total and drag_total for each census
686 // After the program has finished, the void_total field of
687 // each census contains the count of words that were *created*
688 // in this era and were eventually void. Conversely, if a
689 // void closure was destroyed in this era, it will be
690 // represented by a negative count of words in void_total.
691 //
692 // To get the count of live words that are void at each
693 // census, just propagate the void_total count forwards:
694
695 void_total = 0;
696 drag_total = 0;
697 for (t = 1; t < era; t++) { // note: start at 1, not 0
698 void_total += censuses[t].void_total;
699 drag_total += censuses[t].drag_total;
700 censuses[t].void_total = void_total;
701 censuses[t].drag_total = drag_total;
702
703 ASSERT( censuses[t].void_total <= censuses[t].not_used );
704 // should be true because: void_total is the count of
705 // live words that are void at this census, which *must*
706 // be less than the number of live words that have not
707 // been used yet.
708
709 ASSERT( censuses[t].drag_total <= censuses[t].used );
710 // similar reasoning as above.
711 }
712
713 return;
714 }
715
716 // otherwise... we're doing a heap profile that is restricted to
717 // some combination of lag, drag, void or use. We've kept all the
718 // census info for all censuses so far, but we still need to
719 // aggregate the counters forwards.
720
721 arena = newArena();
722 acc = allocHashTable();
723 ctrs = NULL;
724
725 for (t = 1; t < era; t++) {
726
727 // first look through all the counters we're aggregating
728 for (c = ctrs; c != NULL; c = c->next) {
729 // if one of the totals is non-zero, then this closure
730 // type must be present in the heap at this census time...
731 d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);
732
733 if (d == NULL) {
734 // if this closure identity isn't present in the
735 // census for this time period, then our running
736 // totals *must* be zero.
737 ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);
738
739 // debugCCS(c->identity);
740 // debugBelch(" census=%d void_total=%d drag_total=%d\n",
741 // t, c->c.ldv.void_total, c->c.ldv.drag_total);
742 } else {
743 d->c.ldv.void_total += c->c.ldv.void_total;
744 d->c.ldv.drag_total += c->c.ldv.drag_total;
745 c->c.ldv.void_total = d->c.ldv.void_total;
746 c->c.ldv.drag_total = d->c.ldv.drag_total;
747
748 ASSERT( c->c.ldv.void_total >= 0 );
749 ASSERT( c->c.ldv.drag_total >= 0 );
750 }
751 }
752
753 // now look through the counters in this census to find new ones
754 for (c = censuses[t].ctrs; c != NULL; c = c->next) {
755 d = lookupHashTable(acc, (StgWord)c->identity);
756 if (d == NULL) {
757 d = arenaAlloc( arena, sizeof(counter) );
758 initLDVCtr(d);
759 insertHashTable( acc, (StgWord)c->identity, d );
760 d->identity = c->identity;
761 d->next = ctrs;
762 ctrs = d;
763 d->c.ldv.void_total = c->c.ldv.void_total;
764 d->c.ldv.drag_total = c->c.ldv.drag_total;
765 }
766 ASSERT( c->c.ldv.void_total >= 0 );
767 ASSERT( c->c.ldv.drag_total >= 0 );
768 }
769 }
770
771 freeHashTable(acc, NULL);
772 arenaFree(arena);
773 }
774 #endif
775
776 /* -----------------------------------------------------------------------------
777 * Print out the results of a heap census.
778 * -------------------------------------------------------------------------- */
779 static void
780 dumpCensus( Census *census )
781 {
782 counter *ctr;
783 ssize_t count;
784
785 printSample(true, census->time);
786 traceHeapProfSampleBegin(era);
787
788 #if defined(PROFILING)
789 /* change typecast to uint64_t to remove
790 * print formatting warning. See #12636 */
791 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
792 fprintf(hp_file, "VOID\t%" FMT_Word64 "\n",
793 (uint64_t)(census->void_total *
794 sizeof(W_)));
795 fprintf(hp_file, "LAG\t%" FMT_Word64 "\n",
796 (uint64_t)((census->not_used - census->void_total) *
797 sizeof(W_)));
798 fprintf(hp_file, "USE\t%" FMT_Word64 "\n",
799 (uint64_t)((census->used - census->drag_total) *
800 sizeof(W_)));
801 fprintf(hp_file, "INHERENT_USE\t%" FMT_Word64 "\n",
802 (uint64_t)(census->prim * sizeof(W_)));
803 fprintf(hp_file, "DRAG\t%" FMT_Word64 "\n",
804 (uint64_t)(census->drag_total * sizeof(W_)));
805 printSample(false, census->time);
806 return;
807 }
808 #endif
809
810 for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
811
812 #if defined(PROFILING)
813 if (RtsFlags.ProfFlags.bioSelector != NULL) {
814 count = 0;
815 if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
816 count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
817 if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
818 count += ctr->c.ldv.drag_total;
819 if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
820 count += ctr->c.ldv.void_total;
821 if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
822 count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
823 } else
824 #endif
825 {
826 count = ctr->c.resid;
827 }
828
829 ASSERT( count >= 0 );
830
831 if (count == 0) continue;
832
833 switch (RtsFlags.ProfFlags.doHeapProfile) {
834 case HEAP_BY_CLOSURE_TYPE:
835 fprintf(hp_file, "%s", (char *)ctr->identity);
836 traceHeapProfSampleString(0, (char *)ctr->identity,
837 count * sizeof(W_));
838 break;
839 }
840
841 #if defined(PROFILING)
842 switch (RtsFlags.ProfFlags.doHeapProfile) {
843 case HEAP_BY_CCS:
844 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
845 RtsFlags.ProfFlags.ccsLength);
846 traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
847 count * sizeof(W_));
848 break;
849 case HEAP_BY_MOD:
850 case HEAP_BY_DESCR:
851 case HEAP_BY_TYPE:
852 fprintf(hp_file, "%s", (char *)ctr->identity);
853 traceHeapProfSampleString(0, (char *)ctr->identity,
854 count * sizeof(W_));
855 break;
856 case HEAP_BY_RETAINER:
857 {
858 RetainerSet *rs = (RetainerSet *)ctr->identity;
859
860 // it might be the distinguished retainer set rs_MANY:
861 if (rs == &rs_MANY) {
862 fprintf(hp_file, "MANY");
863 break;
864 }
865
866 // Mark this retainer set by negating its id, because it
867 // has appeared in at least one census. We print the
868 // values of all such retainer sets into the log file at
869 // the end. A retainer set may exist but not feature in
870 // any censuses if it arose as the intermediate retainer
871 // set for some closure during retainer set calculation.
872 if (rs->id > 0)
873 rs->id = -(rs->id);
874
875 // report in the unit of bytes: * sizeof(StgWord)
876 printRetainerSetShort(hp_file, rs, RtsFlags.ProfFlags.ccsLength);
877 break;
878 }
879 default:
880 barf("dumpCensus; doHeapProfile");
881 }
882 #endif
883
884 fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
885 }
886
887 printSample(false, census->time);
888 }
889
890
891 static void heapProfObject(Census *census, StgClosure *p, size_t size,
892 bool prim
893 #if !defined(PROFILING)
894 STG_UNUSED
895 #endif
896 )
897 {
898 const void *identity;
899 size_t real_size;
900 counter *ctr;
901
902 identity = NULL;
903
904 #if defined(PROFILING)
905 // subtract the profiling overhead
906 real_size = size - sizeofW(StgProfHeader);
907 #else
908 real_size = size;
909 #endif
910
911 if (closureSatisfiesConstraints((StgClosure*)p)) {
912 #if defined(PROFILING)
913 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
914 if (prim)
915 census->prim += real_size;
916 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
917 census->not_used += real_size;
918 else
919 census->used += real_size;
920 } else
921 #endif
922 {
923 identity = closureIdentity((StgClosure *)p);
924
925 if (identity != NULL) {
926 ctr = lookupHashTable(census->hash, (StgWord)identity);
927 if (ctr != NULL) {
928 #if defined(PROFILING)
929 if (RtsFlags.ProfFlags.bioSelector != NULL) {
930 if (prim)
931 ctr->c.ldv.prim += real_size;
932 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
933 ctr->c.ldv.not_used += real_size;
934 else
935 ctr->c.ldv.used += real_size;
936 } else
937 #endif
938 {
939 ctr->c.resid += real_size;
940 }
941 } else {
942 ctr = arenaAlloc( census->arena, sizeof(counter) );
943 initLDVCtr(ctr);
944 insertHashTable( census->hash, (StgWord)identity, ctr );
945 ctr->identity = identity;
946 ctr->next = census->ctrs;
947 census->ctrs = ctr;
948
949 #if defined(PROFILING)
950 if (RtsFlags.ProfFlags.bioSelector != NULL) {
951 if (prim)
952 ctr->c.ldv.prim = real_size;
953 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
954 ctr->c.ldv.not_used = real_size;
955 else
956 ctr->c.ldv.used = real_size;
957 } else
958 #endif
959 {
960 ctr->c.resid = real_size;
961 }
962 }
963 }
964 }
965 }
966 }
967
968 // Compact objects require special handling code because they
969 // are not stored consecutively in memory (rather, each object
970 // is a list of objects), and that would break the while loop
971 // below. But we know that each block holds at most one object
972 // so we don't need the loop.
973 //
974 // See Note [Compact Normal Forms] for details.
975 static void
976 heapCensusCompactList(Census *census, bdescr *bd)
977 {
978 for (; bd != NULL; bd = bd->link) {
979 StgCompactNFDataBlock *block = (StgCompactNFDataBlock*)bd->start;
980 StgCompactNFData *str = block->owner;
981 heapProfObject(census, (StgClosure*)str,
982 compact_nfdata_full_sizeW(str), true);
983 }
984 }
985
986 /* -----------------------------------------------------------------------------
987 * Code to perform a heap census.
988 * -------------------------------------------------------------------------- */
989 static void
990 heapCensusChain( Census *census, bdescr *bd )
991 {
992 StgPtr p;
993 const StgInfoTable *info;
994 size_t size;
995 bool prim;
996
997 for (; bd != NULL; bd = bd->link) {
998
999 // HACK: pretend a pinned block is just one big ARR_WORDS
1000 // owned by CCS_PINNED. These blocks can be full of holes due
1001 // to alignment constraints so we can't traverse the memory
1002 // and do a proper census.
1003 if (bd->flags & BF_PINNED) {
1004 StgClosure arr;
1005 SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_PINNED);
1006 heapProfObject(census, &arr, bd->blocks * BLOCK_SIZE_W, true);
1007 continue;
1008 }
1009
1010 p = bd->start;
1011
1012 // When we shrink a large ARR_WORDS, we do not adjust the free pointer
1013 // of the associated block descriptor, thus introducing slop at the end
1014 // of the object. This slop remains after GC, violating the assumption
1015 // of the loop below that all slop has been eliminated (#11627).
1016 // Consequently, we handle large ARR_WORDS objects as a special case.
1017 if (bd->flags & BF_LARGE
1018 && get_itbl((StgClosure *)p)->type == ARR_WORDS) {
1019 size = arr_words_sizeW((StgArrBytes *)p);
1020 prim = true;
1021 heapProfObject(census, (StgClosure *)p, size, prim);
1022 continue;
1023 }
1024
1025 while (p < bd->free) {
1026 info = get_itbl((const StgClosure *)p);
1027 prim = false;
1028
1029 switch (info->type) {
1030
1031 case THUNK:
1032 size = thunk_sizeW_fromITBL(info);
1033 break;
1034
1035 case THUNK_1_1:
1036 case THUNK_0_2:
1037 case THUNK_2_0:
1038 size = sizeofW(StgThunkHeader) + 2;
1039 break;
1040
1041 case THUNK_1_0:
1042 case THUNK_0_1:
1043 case THUNK_SELECTOR:
1044 size = sizeofW(StgThunkHeader) + 1;
1045 break;
1046
1047 case FUN:
1048 case BLACKHOLE:
1049 case BLOCKING_QUEUE:
1050 case FUN_1_0:
1051 case FUN_0_1:
1052 case FUN_1_1:
1053 case FUN_0_2:
1054 case FUN_2_0:
1055 case CONSTR:
1056 case CONSTR_NOCAF:
1057 case CONSTR_1_0:
1058 case CONSTR_0_1:
1059 case CONSTR_1_1:
1060 case CONSTR_0_2:
1061 case CONSTR_2_0:
1062 size = sizeW_fromITBL(info);
1063 break;
1064
1065 case IND:
1066 // Special case/Delicate Hack: INDs don't normally
1067 // appear, since we're doing this heap census right
1068 // after GC. However, GarbageCollect() also does
1069 // resurrectThreads(), which can update some
1070 // blackholes when it calls raiseAsync() on the
1071 // resurrected threads. So we know that any IND will
1072 // be the size of a BLACKHOLE.
1073 size = BLACKHOLE_sizeW();
1074 break;
1075
1076 case BCO:
1077 prim = true;
1078 size = bco_sizeW((StgBCO *)p);
1079 break;
1080
1081 case MVAR_CLEAN:
1082 case MVAR_DIRTY:
1083 case TVAR:
1084 case WEAK:
1085 case PRIM:
1086 case MUT_PRIM:
1087 case MUT_VAR_CLEAN:
1088 case MUT_VAR_DIRTY:
1089 prim = true;
1090 size = sizeW_fromITBL(info);
1091 break;
1092
1093 case AP:
1094 size = ap_sizeW((StgAP *)p);
1095 break;
1096
1097 case PAP:
1098 size = pap_sizeW((StgPAP *)p);
1099 break;
1100
1101 case AP_STACK:
1102 size = ap_stack_sizeW((StgAP_STACK *)p);
1103 break;
1104
1105 case ARR_WORDS:
1106 prim = true;
1107 size = arr_words_sizeW((StgArrBytes*)p);
1108 break;
1109
1110 case MUT_ARR_PTRS_CLEAN:
1111 case MUT_ARR_PTRS_DIRTY:
1112 case MUT_ARR_PTRS_FROZEN_CLEAN:
1113 case MUT_ARR_PTRS_FROZEN_DIRTY:
1114 prim = true;
1115 size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
1116 break;
1117
1118 case SMALL_MUT_ARR_PTRS_CLEAN:
1119 case SMALL_MUT_ARR_PTRS_DIRTY:
1120 case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
1121 case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
1122 prim = true;
1123 size = small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)p);
1124 break;
1125
1126 case TSO:
1127 prim = true;
1128 #if defined(PROFILING)
1129 if (RtsFlags.ProfFlags.includeTSOs) {
1130 size = sizeofW(StgTSO);
1131 break;
1132 } else {
1133 // Skip this TSO and move on to the next object
1134 p += sizeofW(StgTSO);
1135 continue;
1136 }
1137 #else
1138 size = sizeofW(StgTSO);
1139 break;
1140 #endif
1141
1142 case STACK:
1143 prim = true;
1144 #if defined(PROFILING)
1145 if (RtsFlags.ProfFlags.includeTSOs) {
1146 size = stack_sizeW((StgStack*)p);
1147 break;
1148 } else {
1149 // Skip this TSO and move on to the next object
1150 p += stack_sizeW((StgStack*)p);
1151 continue;
1152 }
1153 #else
1154 size = stack_sizeW((StgStack*)p);
1155 break;
1156 #endif
1157
1158 case TREC_CHUNK:
1159 prim = true;
1160 size = sizeofW(StgTRecChunk);
1161 break;
1162
1163 case COMPACT_NFDATA:
1164 barf("heapCensus, found compact object in the wrong list");
1165 break;
1166
1167 default:
1168 barf("heapCensus, unknown object: %d", info->type);
1169 }
1170
1171 heapProfObject(census,(StgClosure*)p,size,prim);
1172
1173 p += size;
1174 }
1175 }
1176 }
1177
1178 void heapCensus (Time t)
1179 {
1180 uint32_t g, n;
1181 Census *census;
1182 gen_workspace *ws;
1183
1184 census = &censuses[era];
1185 census->time = mut_user_time_until(t);
1186
1187 // calculate retainer sets if necessary
1188 #if defined(PROFILING)
1189 if (doingRetainerProfiling()) {
1190 retainerProfile();
1191 }
1192 #endif
1193
1194 #if defined(PROFILING)
1195 stat_startHeapCensus();
1196 #endif
1197
1198 // Traverse the heap, collecting the census info
1199 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1200 heapCensusChain( census, generations[g].blocks );
1201 // Are we interested in large objects? might be
1202 // confusing to include the stack in a heap profile.
1203 heapCensusChain( census, generations[g].large_objects );
1204 heapCensusCompactList ( census, generations[g].compact_objects );
1205
1206 for (n = 0; n < n_capabilities; n++) {
1207 ws = &gc_threads[n]->gens[g];
1208 heapCensusChain(census, ws->todo_bd);
1209 heapCensusChain(census, ws->part_list);
1210 heapCensusChain(census, ws->scavd_list);
1211 }
1212 }
1213
1214 // dump out the census info
1215 #if defined(PROFILING)
1216 // We can't generate any info for LDV profiling until
1217 // the end of the run...
1218 if (!doingLDVProfiling())
1219 dumpCensus( census );
1220 #else
1221 dumpCensus( census );
1222 #endif
1223
1224
1225 // free our storage, unless we're keeping all the census info for
1226 // future restriction by biography.
1227 #if defined(PROFILING)
1228 if (RtsFlags.ProfFlags.bioSelector == NULL)
1229 {
1230 freeEra(census);
1231 census->hash = NULL;
1232 census->arena = NULL;
1233 }
1234 #endif
1235
1236 // we're into the next time period now
1237 nextEra();
1238
1239 #if defined(PROFILING)
1240 stat_endHeapCensus();
1241 #endif
1242 }