Add a proper write barrier for MVars
[ghc.git] / rts / ProfHeap.c
1 /* ----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2003
4 *
5 * Support for heap profiling
6 *
7 * --------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11 #include "RtsUtils.h"
12 #include "RtsFlags.h"
13 #include "Profiling.h"
14 #include "ProfHeap.h"
15 #include "Stats.h"
16 #include "Hash.h"
17 #include "RetainerProfile.h"
18 #include "LdvProfile.h"
19 #include "Arena.h"
20 #include "Printer.h"
21
22 #include <string.h>
23 #include <stdlib.h>
24 #include <math.h>
25
26 /* -----------------------------------------------------------------------------
27 * era stores the current time period. It is the same as the
28 * number of censuses that have been performed.
29 *
30 * RESTRICTION:
31 * era must be no longer than LDV_SHIFT (15 or 30) bits.
32 * Invariants:
33 * era is initialized to 1 in initHeapProfiling().
34 *
35 * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
36 * When era reaches max_era, the profiling stops because a closure can
37 * store only up to (max_era - 1) as its creation or last use time.
38 * -------------------------------------------------------------------------- */
39 unsigned int era;
40 static nat max_era;
41
42 /* -----------------------------------------------------------------------------
43 * Counters
44 *
45 * For most heap profiles each closure identity gets a simple count
46 * of live words in the heap at each census. However, if we're
47 * selecting by biography, then we have to keep the various
48 * lag/drag/void counters for each identity.
49 * -------------------------------------------------------------------------- */
50 typedef struct _counter {
51 void *identity;
52 union {
53 nat resid;
54 struct {
55 int prim; // total size of 'inherently used' closures
56 int not_used; // total size of 'never used' closures
57 int used; // total size of 'used at least once' closures
58 int void_total; // current total size of 'destroyed without being used' closures
59 int drag_total; // current total size of 'used at least once and waiting to die'
60 } ldv;
61 } c;
62 struct _counter *next;
63 } counter;
64
65 STATIC_INLINE void
66 initLDVCtr( counter *ctr )
67 {
68 ctr->c.ldv.prim = 0;
69 ctr->c.ldv.not_used = 0;
70 ctr->c.ldv.used = 0;
71 ctr->c.ldv.void_total = 0;
72 ctr->c.ldv.drag_total = 0;
73 }
74
75 typedef struct {
76 double time; // the time in MUT time when the census is made
77 HashTable * hash;
78 counter * ctrs;
79 Arena * arena;
80
81 // for LDV profiling, when just displaying by LDV
82 int prim;
83 int not_used;
84 int used;
85 int void_total;
86 int drag_total;
87 } Census;
88
89 static Census *censuses = NULL;
90 static nat n_censuses = 0;
91
92 #ifdef PROFILING
93 static void aggregateCensusInfo( void );
94 #endif
95
96 static void dumpCensus( Census *census );
97
98 /* ----------------------------------------------------------------------------
99 Closure Type Profiling;
100 ------------------------------------------------------------------------- */
101
102 #ifndef PROFILING
103 static char *type_names[] = {
104 "INVALID_OBJECT",
105 "CONSTR",
106 "CONSTR_1_0",
107 "CONSTR_0_1",
108 "CONSTR_2_0",
109 "CONSTR_1_1",
110 "CONSTR_0_2",
111 "CONSTR_STATIC",
112 "CONSTR_NOCAF_STATIC",
113 "FUN",
114 "FUN_1_0",
115 "FUN_0_1",
116 "FUN_2_0",
117 "FUN_1_1",
118 "FUN_0_2",
119 "FUN_STATIC",
120 "THUNK",
121 "THUNK_1_0",
122 "THUNK_0_1",
123 "THUNK_2_0",
124 "THUNK_1_1",
125 "THUNK_0_2",
126 "THUNK_STATIC",
127 "THUNK_SELECTOR",
128 "BCO",
129 "AP",
130 "PAP",
131 "AP_STACK",
132 "IND",
133 "IND_OLDGEN",
134 "IND_PERM",
135 "IND_OLDGEN_PERM",
136 "IND_STATIC",
137 "RET_BCO",
138 "RET_SMALL",
139 "RET_BIG",
140 "RET_DYN",
141 "RET_FUN",
142 "UPDATE_FRAME",
143 "CATCH_FRAME",
144 "STOP_FRAME",
145 "CAF_BLACKHOLE",
146 "BLACKHOLE",
147 "SE_BLACKHOLE",
148 "SE_CAF_BLACKHOLE",
149 "MVAR_CLEAN",
150 "MVAR_DIRTY",
151 "ARR_WORDS",
152 "MUT_ARR_PTRS_CLEAN",
153 "MUT_ARR_PTRS_DIRTY",
154 "MUT_ARR_PTRS_FROZEN0",
155 "MUT_ARR_PTRS_FROZEN",
156 "MUT_VAR_CLEAN",
157 "MUT_VAR_DIRTY",
158 "WEAK",
159 "STABLE_NAME",
160 "TSO",
161 "BLOCKED_FETCH",
162 "FETCH_ME",
163 "FETCH_ME_BQ",
164 "RBH",
165 "EVACUATED",
166 "REMOTE_REF",
167 "TVAR_WATCH_QUEUE",
168 "INVARIANT_CHECK_QUEUE",
169 "ATOMIC_INVARIANT",
170 "TVAR",
171 "TREC_CHUNK",
172 "TREC_HEADER",
173 "ATOMICALLY_FRAME",
174 "CATCH_RETRY_FRAME",
175 "CATCH_STM_FRAME",
176 "N_CLOSURE_TYPES"
177 };
178 #endif
179
180 /* ----------------------------------------------------------------------------
181 * Find the "closure identity", which is a unique pointer reresenting
182 * the band to which this closure's heap space is attributed in the
183 * heap profile.
184 * ------------------------------------------------------------------------- */
185 STATIC_INLINE void *
186 closureIdentity( StgClosure *p )
187 {
188 switch (RtsFlags.ProfFlags.doHeapProfile) {
189
190 #ifdef PROFILING
191 case HEAP_BY_CCS:
192 return p->header.prof.ccs;
193 case HEAP_BY_MOD:
194 return p->header.prof.ccs->cc->module;
195 case HEAP_BY_DESCR:
196 return GET_PROF_DESC(get_itbl(p));
197 case HEAP_BY_TYPE:
198 return GET_PROF_TYPE(get_itbl(p));
199 case HEAP_BY_RETAINER:
200 // AFAIK, the only closures in the heap which might not have a
201 // valid retainer set are DEAD_WEAK closures.
202 if (isRetainerSetFieldValid(p))
203 return retainerSetOf(p);
204 else
205 return NULL;
206
207 #else
208 case HEAP_BY_CLOSURE_TYPE:
209 {
210 StgInfoTable *info;
211 info = get_itbl(p);
212 switch (info->type) {
213 case CONSTR:
214 case CONSTR_1_0:
215 case CONSTR_0_1:
216 case CONSTR_2_0:
217 case CONSTR_1_1:
218 case CONSTR_0_2:
219 case CONSTR_STATIC:
220 case CONSTR_NOCAF_STATIC:
221 return GET_CON_DESC(itbl_to_con_itbl(info));
222 default:
223 return type_names[info->type];
224 }
225 }
226
227 #endif
228 default:
229 barf("closureIdentity");
230 }
231 }
232
233 /* --------------------------------------------------------------------------
234 * Profiling type predicates
235 * ----------------------------------------------------------------------- */
236 #ifdef PROFILING
237 STATIC_INLINE rtsBool
238 doingLDVProfiling( void )
239 {
240 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
241 || RtsFlags.ProfFlags.bioSelector != NULL);
242 }
243
244 STATIC_INLINE rtsBool
245 doingRetainerProfiling( void )
246 {
247 return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
248 || RtsFlags.ProfFlags.retainerSelector != NULL);
249 }
250 #endif /* PROFILING */
251
252 // Precesses a closure 'c' being destroyed whose size is 'size'.
253 // Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
254 // such as TSO; they should not be involved in computing dragNew or voidNew.
255 //
256 // Even though era is checked in both LdvCensusForDead() and
257 // LdvCensusKillAll(), we still need to make sure that era is > 0 because
258 // LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
259 // when a thunk is replaced by an indirection object.
260
261 #ifdef PROFILING
262 void
263 LDV_recordDead( StgClosure *c, nat size )
264 {
265 void *id;
266 nat t;
267 counter *ctr;
268
269 if (era > 0 && closureSatisfiesConstraints(c)) {
270 size -= sizeofW(StgProfHeader);
271 ASSERT(LDVW(c) != 0);
272 if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
273 t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
274 if (t < era) {
275 if (RtsFlags.ProfFlags.bioSelector == NULL) {
276 censuses[t].void_total += (int)size;
277 censuses[era].void_total -= (int)size;
278 ASSERT(censuses[t].void_total < censuses[t].not_used);
279 } else {
280 id = closureIdentity(c);
281 ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
282 ASSERT( ctr != NULL );
283 ctr->c.ldv.void_total += (int)size;
284 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
285 if (ctr == NULL) {
286 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
287 initLDVCtr(ctr);
288 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
289 ctr->identity = id;
290 ctr->next = censuses[era].ctrs;
291 censuses[era].ctrs = ctr;
292 }
293 ctr->c.ldv.void_total -= (int)size;
294 }
295 }
296 } else {
297 t = LDVW((c)) & LDV_LAST_MASK;
298 if (t + 1 < era) {
299 if (RtsFlags.ProfFlags.bioSelector == NULL) {
300 censuses[t+1].drag_total += size;
301 censuses[era].drag_total -= size;
302 } else {
303 void *id;
304 id = closureIdentity(c);
305 ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
306 ASSERT( ctr != NULL );
307 ctr->c.ldv.drag_total += (int)size;
308 ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
309 if (ctr == NULL) {
310 ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
311 initLDVCtr(ctr);
312 insertHashTable(censuses[era].hash, (StgWord)id, ctr);
313 ctr->identity = id;
314 ctr->next = censuses[era].ctrs;
315 censuses[era].ctrs = ctr;
316 }
317 ctr->c.ldv.drag_total -= (int)size;
318 }
319 }
320 }
321 }
322 }
323 #endif
324
325 /* --------------------------------------------------------------------------
326 * Initialize censuses[era];
327 * ----------------------------------------------------------------------- */
328
329 STATIC_INLINE void
330 initEra(Census *census)
331 {
332 census->hash = allocHashTable();
333 census->ctrs = NULL;
334 census->arena = newArena();
335
336 census->not_used = 0;
337 census->used = 0;
338 census->prim = 0;
339 census->void_total = 0;
340 census->drag_total = 0;
341 }
342
343 STATIC_INLINE void
344 freeEra(Census *census)
345 {
346 if (RtsFlags.ProfFlags.bioSelector != NULL)
347 // when bioSelector==NULL, these are freed in heapCensus()
348 {
349 arenaFree(census->arena);
350 freeHashTable(census->hash, NULL);
351 }
352 }
353
354 /* --------------------------------------------------------------------------
355 * Increases era by 1 and initialize census[era].
356 * Reallocates gi[] and increases its size if needed.
357 * ----------------------------------------------------------------------- */
358
359 static void
360 nextEra( void )
361 {
362 #ifdef PROFILING
363 if (doingLDVProfiling()) {
364 era++;
365
366 if (era == max_era) {
367 errorBelch("maximum number of censuses reached; use +RTS -i to reduce");
368 stg_exit(EXIT_FAILURE);
369 }
370
371 if (era == n_censuses) {
372 n_censuses *= 2;
373 censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
374 "nextEra");
375 }
376 }
377 #endif /* PROFILING */
378
379 initEra( &censuses[era] );
380 }
381
382 /* ----------------------------------------------------------------------------
383 * Heap profiling by info table
384 * ------------------------------------------------------------------------- */
385
386 #if !defined(PROFILING)
387 FILE *hp_file;
388 static char *hp_filename;
389
390 void initProfiling1 (void)
391 {
392 }
393
394 void freeProfiling1 (void)
395 {
396 }
397
398 void initProfiling2 (void)
399 {
400 if (RtsFlags.ProfFlags.doHeapProfile) {
401 /* Initialise the log file name */
402 hp_filename = stgMallocBytes(strlen(prog_name) + 6, "hpFileName");
403 sprintf(hp_filename, "%s.hp", prog_name);
404
405 /* open the log file */
406 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
407 debugBelch("Can't open profiling report file %s\n",
408 hp_filename);
409 RtsFlags.ProfFlags.doHeapProfile = 0;
410 return;
411 }
412 }
413
414 initHeapProfiling();
415 }
416
417 void endProfiling( void )
418 {
419 endHeapProfiling();
420 }
421 #endif /* !PROFILING */
422
423 static void
424 printSample(rtsBool beginSample, StgDouble sampleValue)
425 {
426 StgDouble fractionalPart, integralPart;
427 fractionalPart = modf(sampleValue, &integralPart);
428 fprintf(hp_file, "%s %" FMT_Word64 ".%02" FMT_Word64 "\n",
429 (beginSample ? "BEGIN_SAMPLE" : "END_SAMPLE"),
430 (StgWord64)integralPart, (StgWord64)(fractionalPart * 100));
431 }
432
433 /* --------------------------------------------------------------------------
434 * Initialize the heap profilier
435 * ----------------------------------------------------------------------- */
436 nat
437 initHeapProfiling(void)
438 {
439 if (! RtsFlags.ProfFlags.doHeapProfile) {
440 return 0;
441 }
442
443 #ifdef PROFILING
444 if (doingLDVProfiling() && doingRetainerProfiling()) {
445 errorBelch("cannot mix -hb and -hr");
446 stg_exit(EXIT_FAILURE);
447 }
448 #endif
449
450 // we only count eras if we're doing LDV profiling. Otherwise era
451 // is fixed at zero.
452 #ifdef PROFILING
453 if (doingLDVProfiling()) {
454 era = 1;
455 } else
456 #endif
457 {
458 era = 0;
459 }
460
461 // max_era = 2^LDV_SHIFT
462 max_era = 1 << LDV_SHIFT;
463
464 n_censuses = 32;
465 censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
466
467 initEra( &censuses[era] );
468
469 /* initProfilingLogFile(); */
470 fprintf(hp_file, "JOB \"%s", prog_name);
471
472 #ifdef PROFILING
473 {
474 int count;
475 for(count = 1; count < prog_argc; count++)
476 fprintf(hp_file, " %s", prog_argv[count]);
477 fprintf(hp_file, " +RTS");
478 for(count = 0; count < rts_argc; count++)
479 fprintf(hp_file, " %s", rts_argv[count]);
480 }
481 #endif /* PROFILING */
482
483 fprintf(hp_file, "\"\n" );
484
485 fprintf(hp_file, "DATE \"%s\"\n", time_str());
486
487 fprintf(hp_file, "SAMPLE_UNIT \"seconds\"\n");
488 fprintf(hp_file, "VALUE_UNIT \"bytes\"\n");
489
490 printSample(rtsTrue, 0);
491 printSample(rtsFalse, 0);
492
493 #ifdef PROFILING
494 if (doingRetainerProfiling()) {
495 initRetainerProfiling();
496 }
497 #endif
498
499 return 0;
500 }
501
502 void
503 endHeapProfiling(void)
504 {
505 StgDouble seconds;
506
507 if (! RtsFlags.ProfFlags.doHeapProfile) {
508 return;
509 }
510
511 #ifdef PROFILING
512 if (doingRetainerProfiling()) {
513 endRetainerProfiling();
514 }
515 #endif
516
517 #ifdef PROFILING
518 if (doingLDVProfiling()) {
519 nat t;
520 LdvCensusKillAll();
521 aggregateCensusInfo();
522 for (t = 1; t < era; t++) {
523 dumpCensus( &censuses[t] );
524 }
525 }
526 #endif
527
528 #ifdef PROFILING
529 if (doingLDVProfiling()) {
530 nat t;
531 for (t = 1; t <= era; t++) {
532 freeEra( &censuses[t] );
533 }
534 } else {
535 freeEra( &censuses[0] );
536 }
537 #else
538 freeEra( &censuses[0] );
539 #endif
540
541 stgFree(censuses);
542
543 seconds = mut_user_time();
544 printSample(rtsTrue, seconds);
545 printSample(rtsFalse, seconds);
546 fclose(hp_file);
547 }
548
549
550
551 #ifdef PROFILING
552 static size_t
553 buf_append(char *p, const char *q, char *end)
554 {
555 int m;
556
557 for (m = 0; p < end; p++, q++, m++) {
558 *p = *q;
559 if (*q == '\0') { break; }
560 }
561 return m;
562 }
563
564 static void
565 fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
566 {
567 char buf[max_length+1], *p, *buf_end;
568
569 // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
570 if (ccs == CCS_MAIN) {
571 fprintf(fp, "MAIN");
572 return;
573 }
574
575 fprintf(fp, "(%ld)", ccs->ccsID);
576
577 p = buf;
578 buf_end = buf + max_length + 1;
579
580 // keep printing components of the stack until we run out of space
581 // in the buffer. If we run out of space, end with "...".
582 for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
583
584 // CAF cost centres print as M.CAF, but we leave the module
585 // name out of all the others to save space.
586 if (!strcmp(ccs->cc->label,"CAF")) {
587 p += buf_append(p, ccs->cc->module, buf_end);
588 p += buf_append(p, ".CAF", buf_end);
589 } else {
590 p += buf_append(p, ccs->cc->label, buf_end);
591 if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
592 p += buf_append(p, "/", buf_end);
593 }
594 }
595
596 if (p >= buf_end) {
597 sprintf(buf+max_length-4, "...");
598 break;
599 }
600 }
601 fprintf(fp, "%s", buf);
602 }
603 #endif /* PROFILING */
604
605 rtsBool
606 strMatchesSelector( char* str, char* sel )
607 {
608 char* p;
609 // debugBelch("str_matches_selector %s %s\n", str, sel);
610 while (1) {
611 // Compare str against wherever we've got to in sel.
612 p = str;
613 while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
614 p++; sel++;
615 }
616 // Match if all of str used and have reached the end of a sel fragment.
617 if (*p == '\0' && (*sel == ',' || *sel == '\0'))
618 return rtsTrue;
619
620 // No match. Advance sel to the start of the next elem.
621 while (*sel != ',' && *sel != '\0') sel++;
622 if (*sel == ',') sel++;
623
624 /* Run out of sel ?? */
625 if (*sel == '\0') return rtsFalse;
626 }
627 }
628
629 /* -----------------------------------------------------------------------------
630 * Figure out whether a closure should be counted in this census, by
631 * testing against all the specified constraints.
632 * -------------------------------------------------------------------------- */
633 rtsBool
634 closureSatisfiesConstraints( StgClosure* p )
635 {
636 #if !defined(PROFILING)
637 (void)p; /* keep gcc -Wall happy */
638 return rtsTrue;
639 #else
640 rtsBool b;
641
642 // The CCS has a selected field to indicate whether this closure is
643 // deselected by not being mentioned in the module, CC, or CCS
644 // selectors.
645 if (!p->header.prof.ccs->selected) {
646 return rtsFalse;
647 }
648
649 if (RtsFlags.ProfFlags.descrSelector) {
650 b = strMatchesSelector( (GET_PROF_DESC(get_itbl((StgClosure *)p))),
651 RtsFlags.ProfFlags.descrSelector );
652 if (!b) return rtsFalse;
653 }
654 if (RtsFlags.ProfFlags.typeSelector) {
655 b = strMatchesSelector( (GET_PROF_TYPE(get_itbl((StgClosure *)p))),
656 RtsFlags.ProfFlags.typeSelector );
657 if (!b) return rtsFalse;
658 }
659 if (RtsFlags.ProfFlags.retainerSelector) {
660 RetainerSet *rs;
661 nat i;
662 // We must check that the retainer set is valid here. One
663 // reason it might not be valid is if this closure is a
664 // a newly deceased weak pointer (i.e. a DEAD_WEAK), since
665 // these aren't reached by the retainer profiler's traversal.
666 if (isRetainerSetFieldValid((StgClosure *)p)) {
667 rs = retainerSetOf((StgClosure *)p);
668 if (rs != NULL) {
669 for (i = 0; i < rs->num; i++) {
670 b = strMatchesSelector( rs->element[i]->cc->label,
671 RtsFlags.ProfFlags.retainerSelector );
672 if (b) return rtsTrue;
673 }
674 }
675 }
676 return rtsFalse;
677 }
678 return rtsTrue;
679 #endif /* PROFILING */
680 }
681
682 /* -----------------------------------------------------------------------------
683 * Aggregate the heap census info for biographical profiling
684 * -------------------------------------------------------------------------- */
685 #ifdef PROFILING
686 static void
687 aggregateCensusInfo( void )
688 {
689 HashTable *acc;
690 nat t;
691 counter *c, *d, *ctrs;
692 Arena *arena;
693
694 if (!doingLDVProfiling()) return;
695
696 // Aggregate the LDV counters when displaying by biography.
697 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
698 int void_total, drag_total;
699
700 // Now we compute void_total and drag_total for each census
701 // After the program has finished, the void_total field of
702 // each census contains the count of words that were *created*
703 // in this era and were eventually void. Conversely, if a
704 // void closure was destroyed in this era, it will be
705 // represented by a negative count of words in void_total.
706 //
707 // To get the count of live words that are void at each
708 // census, just propagate the void_total count forwards:
709
710 void_total = 0;
711 drag_total = 0;
712 for (t = 1; t < era; t++) { // note: start at 1, not 0
713 void_total += censuses[t].void_total;
714 drag_total += censuses[t].drag_total;
715 censuses[t].void_total = void_total;
716 censuses[t].drag_total = drag_total;
717
718 ASSERT( censuses[t].void_total <= censuses[t].not_used );
719 // should be true because: void_total is the count of
720 // live words that are void at this census, which *must*
721 // be less than the number of live words that have not
722 // been used yet.
723
724 ASSERT( censuses[t].drag_total <= censuses[t].used );
725 // similar reasoning as above.
726 }
727
728 return;
729 }
730
731 // otherwise... we're doing a heap profile that is restricted to
732 // some combination of lag, drag, void or use. We've kept all the
733 // census info for all censuses so far, but we still need to
734 // aggregate the counters forwards.
735
736 arena = newArena();
737 acc = allocHashTable();
738 ctrs = NULL;
739
740 for (t = 1; t < era; t++) {
741
742 // first look through all the counters we're aggregating
743 for (c = ctrs; c != NULL; c = c->next) {
744 // if one of the totals is non-zero, then this closure
745 // type must be present in the heap at this census time...
746 d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);
747
748 if (d == NULL) {
749 // if this closure identity isn't present in the
750 // census for this time period, then our running
751 // totals *must* be zero.
752 ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);
753
754 // debugCCS(c->identity);
755 // debugBelch(" census=%d void_total=%d drag_total=%d\n",
756 // t, c->c.ldv.void_total, c->c.ldv.drag_total);
757 } else {
758 d->c.ldv.void_total += c->c.ldv.void_total;
759 d->c.ldv.drag_total += c->c.ldv.drag_total;
760 c->c.ldv.void_total = d->c.ldv.void_total;
761 c->c.ldv.drag_total = d->c.ldv.drag_total;
762
763 ASSERT( c->c.ldv.void_total >= 0 );
764 ASSERT( c->c.ldv.drag_total >= 0 );
765 }
766 }
767
768 // now look through the counters in this census to find new ones
769 for (c = censuses[t].ctrs; c != NULL; c = c->next) {
770 d = lookupHashTable(acc, (StgWord)c->identity);
771 if (d == NULL) {
772 d = arenaAlloc( arena, sizeof(counter) );
773 initLDVCtr(d);
774 insertHashTable( acc, (StgWord)c->identity, d );
775 d->identity = c->identity;
776 d->next = ctrs;
777 ctrs = d;
778 d->c.ldv.void_total = c->c.ldv.void_total;
779 d->c.ldv.drag_total = c->c.ldv.drag_total;
780 }
781 ASSERT( c->c.ldv.void_total >= 0 );
782 ASSERT( c->c.ldv.drag_total >= 0 );
783 }
784 }
785
786 freeHashTable(acc, NULL);
787 arenaFree(arena);
788 }
789 #endif
790
791 /* -----------------------------------------------------------------------------
792 * Print out the results of a heap census.
793 * -------------------------------------------------------------------------- */
794 static void
795 dumpCensus( Census *census )
796 {
797 counter *ctr;
798 int count;
799
800 printSample(rtsTrue, census->time);
801
802 #ifdef PROFILING
803 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
804 fprintf(hp_file, "VOID\t%lu\n", (unsigned long)(census->void_total) * sizeof(W_));
805 fprintf(hp_file, "LAG\t%lu\n",
806 (unsigned long)(census->not_used - census->void_total) * sizeof(W_));
807 fprintf(hp_file, "USE\t%lu\n",
808 (unsigned long)(census->used - census->drag_total) * sizeof(W_));
809 fprintf(hp_file, "INHERENT_USE\t%lu\n",
810 (unsigned long)(census->prim) * sizeof(W_));
811 fprintf(hp_file, "DRAG\t%lu\n",
812 (unsigned long)(census->drag_total) * sizeof(W_));
813 printSample(rtsFalse, census->time);
814 return;
815 }
816 #endif
817
818 for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
819
820 #ifdef PROFILING
821 if (RtsFlags.ProfFlags.bioSelector != NULL) {
822 count = 0;
823 if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
824 count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
825 if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
826 count += ctr->c.ldv.drag_total;
827 if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
828 count += ctr->c.ldv.void_total;
829 if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
830 count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
831 } else
832 #endif
833 {
834 count = ctr->c.resid;
835 }
836
837 ASSERT( count >= 0 );
838
839 if (count == 0) continue;
840
841 #if !defined(PROFILING)
842 switch (RtsFlags.ProfFlags.doHeapProfile) {
843 case HEAP_BY_CLOSURE_TYPE:
844 fprintf(hp_file, "%s", (char *)ctr->identity);
845 break;
846 }
847 #endif
848
849 #ifdef PROFILING
850 switch (RtsFlags.ProfFlags.doHeapProfile) {
851 case HEAP_BY_CCS:
852 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, RtsFlags.ProfFlags.ccsLength);
853 break;
854 case HEAP_BY_MOD:
855 case HEAP_BY_DESCR:
856 case HEAP_BY_TYPE:
857 fprintf(hp_file, "%s", (char *)ctr->identity);
858 break;
859 case HEAP_BY_RETAINER:
860 {
861 RetainerSet *rs = (RetainerSet *)ctr->identity;
862
863 // it might be the distinguished retainer set rs_MANY:
864 if (rs == &rs_MANY) {
865 fprintf(hp_file, "MANY");
866 break;
867 }
868
869 // Mark this retainer set by negating its id, because it
870 // has appeared in at least one census. We print the
871 // values of all such retainer sets into the log file at
872 // the end. A retainer set may exist but not feature in
873 // any censuses if it arose as the intermediate retainer
874 // set for some closure during retainer set calculation.
875 if (rs->id > 0)
876 rs->id = -(rs->id);
877
878 // report in the unit of bytes: * sizeof(StgWord)
879 printRetainerSetShort(hp_file, rs);
880 break;
881 }
882 default:
883 barf("dumpCensus; doHeapProfile");
884 }
885 #endif
886
887 fprintf(hp_file, "\t%lu\n", (unsigned long)count * sizeof(W_));
888 }
889
890 printSample(rtsFalse, census->time);
891 }
892
893 /* -----------------------------------------------------------------------------
894 * Code to perform a heap census.
895 * -------------------------------------------------------------------------- */
896 static void
897 heapCensusChain( Census *census, bdescr *bd )
898 {
899 StgPtr p;
900 StgInfoTable *info;
901 void *identity;
902 nat size;
903 counter *ctr;
904 nat real_size;
905 rtsBool prim;
906
907 for (; bd != NULL; bd = bd->link) {
908
909 // HACK: ignore pinned blocks, because they contain gaps.
910 // It's not clear exactly what we'd like to do here, since we
911 // can't tell which objects in the block are actually alive.
912 // Perhaps the whole block should be counted as SYSTEM memory.
913 if (bd->flags & BF_PINNED) {
914 continue;
915 }
916
917 p = bd->start;
918 while (p < bd->free) {
919 info = get_itbl((StgClosure *)p);
920 prim = rtsFalse;
921
922 switch (info->type) {
923
924 case THUNK:
925 size = thunk_sizeW_fromITBL(info);
926 break;
927
928 case THUNK_1_1:
929 case THUNK_0_2:
930 case THUNK_2_0:
931 size = sizeofW(StgThunkHeader) + 2;
932 break;
933
934 case THUNK_1_0:
935 case THUNK_0_1:
936 case THUNK_SELECTOR:
937 size = sizeofW(StgThunkHeader) + 1;
938 break;
939
940 case CONSTR:
941 case FUN:
942 case IND_PERM:
943 case IND_OLDGEN:
944 case IND_OLDGEN_PERM:
945 case CAF_BLACKHOLE:
946 case SE_CAF_BLACKHOLE:
947 case SE_BLACKHOLE:
948 case BLACKHOLE:
949 case FUN_1_0:
950 case FUN_0_1:
951 case FUN_1_1:
952 case FUN_0_2:
953 case FUN_2_0:
954 case CONSTR_1_0:
955 case CONSTR_0_1:
956 case CONSTR_1_1:
957 case CONSTR_0_2:
958 case CONSTR_2_0:
959 size = sizeW_fromITBL(info);
960 break;
961
962 case IND:
963 // Special case/Delicate Hack: INDs don't normally
964 // appear, since we're doing this heap census right
965 // after GC. However, GarbageCollect() also does
966 // resurrectThreads(), which can update some
967 // blackholes when it calls raiseAsync() on the
968 // resurrected threads. So we know that any IND will
969 // be the size of a BLACKHOLE.
970 size = BLACKHOLE_sizeW();
971 break;
972
973 case BCO:
974 prim = rtsTrue;
975 size = bco_sizeW((StgBCO *)p);
976 break;
977
978 case MVAR_CLEAN:
979 case MVAR_DIRTY:
980 case WEAK:
981 case STABLE_NAME:
982 case MUT_VAR_CLEAN:
983 case MUT_VAR_DIRTY:
984 prim = rtsTrue;
985 size = sizeW_fromITBL(info);
986 break;
987
988 case AP:
989 size = ap_sizeW((StgAP *)p);
990 break;
991
992 case PAP:
993 size = pap_sizeW((StgPAP *)p);
994 break;
995
996 case AP_STACK:
997 size = ap_stack_sizeW((StgAP_STACK *)p);
998 break;
999
1000 case ARR_WORDS:
1001 prim = rtsTrue;
1002 size = arr_words_sizeW(stgCast(StgArrWords*,p));
1003 break;
1004
1005 case MUT_ARR_PTRS_CLEAN:
1006 case MUT_ARR_PTRS_DIRTY:
1007 case MUT_ARR_PTRS_FROZEN:
1008 case MUT_ARR_PTRS_FROZEN0:
1009 prim = rtsTrue;
1010 size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
1011 break;
1012
1013 case TSO:
1014 prim = rtsTrue;
1015 #ifdef PROFILING
1016 if (RtsFlags.ProfFlags.includeTSOs) {
1017 size = tso_sizeW((StgTSO *)p);
1018 break;
1019 } else {
1020 // Skip this TSO and move on to the next object
1021 p += tso_sizeW((StgTSO *)p);
1022 continue;
1023 }
1024 #else
1025 size = tso_sizeW((StgTSO *)p);
1026 break;
1027 #endif
1028
1029 case TREC_HEADER:
1030 prim = rtsTrue;
1031 size = sizeofW(StgTRecHeader);
1032 break;
1033
1034 case TVAR_WATCH_QUEUE:
1035 prim = rtsTrue;
1036 size = sizeofW(StgTVarWatchQueue);
1037 break;
1038
1039 case INVARIANT_CHECK_QUEUE:
1040 prim = rtsTrue;
1041 size = sizeofW(StgInvariantCheckQueue);
1042 break;
1043
1044 case ATOMIC_INVARIANT:
1045 prim = rtsTrue;
1046 size = sizeofW(StgAtomicInvariant);
1047 break;
1048
1049 case TVAR:
1050 prim = rtsTrue;
1051 size = sizeofW(StgTVar);
1052 break;
1053
1054 case TREC_CHUNK:
1055 prim = rtsTrue;
1056 size = sizeofW(StgTRecChunk);
1057 break;
1058
1059 default:
1060 barf("heapCensus, unknown object: %d", info->type);
1061 }
1062
1063 identity = NULL;
1064
1065 #ifdef PROFILING
1066 // subtract the profiling overhead
1067 real_size = size - sizeofW(StgProfHeader);
1068 #else
1069 real_size = size;
1070 #endif
1071
1072 if (closureSatisfiesConstraints((StgClosure*)p)) {
1073 #ifdef PROFILING
1074 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
1075 if (prim)
1076 census->prim += real_size;
1077 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
1078 census->not_used += real_size;
1079 else
1080 census->used += real_size;
1081 } else
1082 #endif
1083 {
1084 identity = closureIdentity((StgClosure *)p);
1085
1086 if (identity != NULL) {
1087 ctr = lookupHashTable( census->hash, (StgWord)identity );
1088 if (ctr != NULL) {
1089 #ifdef PROFILING
1090 if (RtsFlags.ProfFlags.bioSelector != NULL) {
1091 if (prim)
1092 ctr->c.ldv.prim += real_size;
1093 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
1094 ctr->c.ldv.not_used += real_size;
1095 else
1096 ctr->c.ldv.used += real_size;
1097 } else
1098 #endif
1099 {
1100 ctr->c.resid += real_size;
1101 }
1102 } else {
1103 ctr = arenaAlloc( census->arena, sizeof(counter) );
1104 initLDVCtr(ctr);
1105 insertHashTable( census->hash, (StgWord)identity, ctr );
1106 ctr->identity = identity;
1107 ctr->next = census->ctrs;
1108 census->ctrs = ctr;
1109
1110 #ifdef PROFILING
1111 if (RtsFlags.ProfFlags.bioSelector != NULL) {
1112 if (prim)
1113 ctr->c.ldv.prim = real_size;
1114 else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
1115 ctr->c.ldv.not_used = real_size;
1116 else
1117 ctr->c.ldv.used = real_size;
1118 } else
1119 #endif
1120 {
1121 ctr->c.resid = real_size;
1122 }
1123 }
1124 }
1125 }
1126 }
1127
1128 p += size;
1129 }
1130 }
1131 }
1132
1133 void
1134 heapCensus( void )
1135 {
1136 nat g, s;
1137 Census *census;
1138
1139 census = &censuses[era];
1140 census->time = mut_user_time();
1141
1142 // calculate retainer sets if necessary
1143 #ifdef PROFILING
1144 if (doingRetainerProfiling()) {
1145 retainerProfile();
1146 }
1147 #endif
1148
1149 #ifdef PROFILING
1150 stat_startHeapCensus();
1151 #endif
1152
1153 // Traverse the heap, collecting the census info
1154
1155 // First the small_alloc_list: we have to fix the free pointer at
1156 // the end by calling tidyAllocatedLists() first.
1157 tidyAllocateLists();
1158 heapCensusChain( census, small_alloc_list );
1159
1160 // Now traverse the heap in each generation/step.
1161 if (RtsFlags.GcFlags.generations == 1) {
1162 heapCensusChain( census, g0s0->blocks );
1163 } else {
1164 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1165 for (s = 0; s < generations[g].n_steps; s++) {
1166 heapCensusChain( census, generations[g].steps[s].blocks );
1167 // Are we interested in large objects? might be
1168 // confusing to include the stack in a heap profile.
1169 heapCensusChain( census, generations[g].steps[s].large_objects );
1170 }
1171 }
1172 }
1173
1174 // dump out the census info
1175 #ifdef PROFILING
1176 // We can't generate any info for LDV profiling until
1177 // the end of the run...
1178 if (!doingLDVProfiling())
1179 dumpCensus( census );
1180 #else
1181 dumpCensus( census );
1182 #endif
1183
1184
1185 // free our storage, unless we're keeping all the census info for
1186 // future restriction by biography.
1187 #ifdef PROFILING
1188 if (RtsFlags.ProfFlags.bioSelector == NULL)
1189 {
1190 freeHashTable( census->hash, NULL/* don't free the elements */ );
1191 arenaFree( census->arena );
1192 census->hash = NULL;
1193 census->arena = NULL;
1194 }
1195 #endif
1196
1197 // we're into the next time period now
1198 nextEra();
1199
1200 #ifdef PROFILING
1201 stat_endHeapCensus();
1202 #endif
1203 }
1204