Fix numa001 failure with "too many NUMA nodes"
[ghc.git] / rts / ProfHeap.c
index 9cb47a1..a494a1b 100644 (file)
@@ -8,8 +8,10 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "RtsUtils.h"
+
+#include "Capability.h"
 #include "RtsFlags.h"
+#include "RtsUtils.h"
 #include "Profiling.h"
 #include "ProfHeap.h"
 #include "Stats.h"
 #include "LdvProfile.h"
 #include "Arena.h"
 #include "Printer.h"
+#include "Trace.h"
+#include "sm/GCThread.h"
 
 #include <string.h>
-#include <stdlib.h>
-#include <math.h>
 
 /* -----------------------------------------------------------------------------
  * era stores the current time period.  It is the same as the
@@ -37,7 +39,7 @@
  * store only up to (max_era - 1) as its creation or last use time.
  * -------------------------------------------------------------------------- */
 unsigned int era;
-static nat max_era;
+static uint32_t max_era;
 
 /* -----------------------------------------------------------------------------
  * Counters
@@ -48,16 +50,17 @@ static nat max_era;
  * lag/drag/void counters for each identity.
  * -------------------------------------------------------------------------- */
 typedef struct _counter {
-    void *identity;
+    const void *identity;
     union {
-       nat resid;
-       struct {
-           int prim;     // total size of 'inherently used' closures
-           int not_used; // total size of 'never used' closures
-           int used;     // total size of 'used at least once' closures
-           int void_total;  // current total size of 'destroyed without being used' closures
-           int drag_total;  // current total size of 'used at least once and waiting to die'
-       } ldv;
+        ssize_t resid;
+        struct {
+            // Total sizes of:
+            ssize_t prim;     // 'inherently used' closures
+            ssize_t not_used; // 'never used' closures
+            ssize_t used;     // 'used at least once' closures
+            ssize_t void_total;  // 'destroyed without being used' closures
+            ssize_t drag_total;  // 'used at least once and waiting to die'
+        } ldv;
     } c;
     struct _counter *next;
 } counter;
@@ -79,15 +82,15 @@ typedef struct {
     Arena     * arena;
 
     // for LDV profiling, when just displaying by LDV
-    int       prim;
-    int       not_used;
-    int       used;
-    int       void_total;
-    int       drag_total;
+    ssize_t    prim;
+    ssize_t    not_used;
+    ssize_t    used;
+    ssize_t    void_total;
+    ssize_t    drag_total;
 } Census;
 
 static Census *censuses = NULL;
-static nat n_censuses = 0;
+static uint32_t n_censuses = 0;
 
 #ifdef PROFILING
 static void aggregateCensusInfo( void );
@@ -95,119 +98,39 @@ static void aggregateCensusInfo( void );
 
 static void dumpCensus( Census *census );
 
-/* ----------------------------------------------------------------------------
-   Closure Type Profiling;
-   ------------------------------------------------------------------------- */
-
-#ifndef PROFILING
-static char *type_names[] = {
-    "INVALID_OBJECT",
-    "CONSTR",
-    "CONSTR_1_0",
-    "CONSTR_0_1",
-    "CONSTR_2_0",
-    "CONSTR_1_1",
-    "CONSTR_0_2",
-    "CONSTR_STATIC",
-    "CONSTR_NOCAF_STATIC",
-    "FUN",
-    "FUN_1_0",
-    "FUN_0_1",
-    "FUN_2_0",
-    "FUN_1_1",
-    "FUN_0_2",
-    "FUN_STATIC",
-    "THUNK",
-    "THUNK_1_0",
-    "THUNK_0_1",
-    "THUNK_2_0",
-    "THUNK_1_1",
-    "THUNK_0_2",
-    "THUNK_STATIC",
-    "THUNK_SELECTOR",
-    "BCO",
-    "AP",
-    "PAP",
-    "AP_STACK",
-    "IND",
-    "IND_OLDGEN",
-    "IND_PERM",
-    "IND_OLDGEN_PERM",
-    "IND_STATIC",
-    "RET_BCO",
-    "RET_SMALL",
-    "RET_BIG",
-    "RET_DYN",
-    "RET_FUN",
-    "UPDATE_FRAME",
-    "CATCH_FRAME",
-    "STOP_FRAME",
-    "CAF_BLACKHOLE",
-    "BLACKHOLE",
-    "SE_BLACKHOLE",
-    "SE_CAF_BLACKHOLE",
-    "MVAR_CLEAN",
-    "MVAR_DIRTY",
-    "ARR_WORDS",
-    "MUT_ARR_PTRS_CLEAN",
-    "MUT_ARR_PTRS_DIRTY",
-    "MUT_ARR_PTRS_FROZEN0",
-    "MUT_ARR_PTRS_FROZEN",
-    "MUT_VAR_CLEAN",
-    "MUT_VAR_DIRTY",
-    "WEAK",
-    "STABLE_NAME",
-    "TSO",
-    "BLOCKED_FETCH",
-    "FETCH_ME",
-    "FETCH_ME_BQ",
-    "RBH",
-    "EVACUATED",
-    "REMOTE_REF",
-    "TVAR_WATCH_QUEUE",
-    "INVARIANT_CHECK_QUEUE",
-    "ATOMIC_INVARIANT",
-    "TVAR",
-    "TREC_CHUNK",
-    "TREC_HEADER",
-    "ATOMICALLY_FRAME",
-    "CATCH_RETRY_FRAME",
-    "CATCH_STM_FRAME",
-    "N_CLOSURE_TYPES"
-  };
-#endif
+static rtsBool closureSatisfiesConstraints( const StgClosure* p );
 
 /* ----------------------------------------------------------------------------
- * Find the "closure identity", which is a unique pointer reresenting
+ * Find the "closure identity", which is a unique pointer representing
  * the band to which this closure's heap space is attributed in the
  * heap profile.
  * ------------------------------------------------------------------------- */
-static void *
-closureIdentity( StgClosure *p )
+static const void *
+closureIdentity( const StgClosure *p )
 {
     switch (RtsFlags.ProfFlags.doHeapProfile) {
 
 #ifdef PROFILING
     case HEAP_BY_CCS:
-       return p->header.prof.ccs;
+        return p->header.prof.ccs;
     case HEAP_BY_MOD:
-       return p->header.prof.ccs->cc->module;
+        return p->header.prof.ccs->cc->module;
     case HEAP_BY_DESCR:
-       return GET_PROF_DESC(get_itbl(p));
+        return GET_PROF_DESC(get_itbl(p));
     case HEAP_BY_TYPE:
-       return GET_PROF_TYPE(get_itbl(p));
+        return GET_PROF_TYPE(get_itbl(p));
     case HEAP_BY_RETAINER:
-       // AFAIK, the only closures in the heap which might not have a
-       // valid retainer set are DEAD_WEAK closures.
-       if (isRetainerSetFieldValid(p))
-           return retainerSetOf(p);
-       else
-           return NULL;
+        // AFAIK, the only closures in the heap which might not have a
+        // valid retainer set are DEAD_WEAK closures.
+        if (isRetainerSetFieldValid(p))
+            return retainerSetOf(p);
+        else
+            return NULL;
 
 #else
     case HEAP_BY_CLOSURE_TYPE:
     {
-        StgInfoTable *info;
+        const StgInfoTable *info;
         info = get_itbl(p);
         switch (info->type) {
         case CONSTR:
@@ -216,17 +139,16 @@ closureIdentity( StgClosure *p )
         case CONSTR_2_0:
         case CONSTR_1_1:
         case CONSTR_0_2:
-        case CONSTR_STATIC:
-        case CONSTR_NOCAF_STATIC:
+        case CONSTR_NOCAF:
             return GET_CON_DESC(itbl_to_con_itbl(info));
         default:
-            return type_names[info->type];
+            return closure_type_names[info->type];
         }
     }
 
 #endif
     default:
-       barf("closureIdentity");
+        barf("closureIdentity");
     }
 }
 
@@ -237,87 +159,87 @@ closureIdentity( StgClosure *p )
 STATIC_INLINE rtsBool
 doingLDVProfiling( void )
 {
-    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV 
-           || RtsFlags.ProfFlags.bioSelector != NULL);
+    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
+            || RtsFlags.ProfFlags.bioSelector != NULL);
 }
 
-STATIC_INLINE rtsBool
+rtsBool
 doingRetainerProfiling( void )
 {
     return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
-           || RtsFlags.ProfFlags.retainerSelector != NULL);
+            || RtsFlags.ProfFlags.retainerSelector != NULL);
 }
 #endif /* PROFILING */
 
 // Precesses a closure 'c' being destroyed whose size is 'size'.
 // Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
 // such as TSO; they should not be involved in computing dragNew or voidNew.
-// 
-// Even though era is checked in both LdvCensusForDead() and 
-// LdvCensusKillAll(), we still need to make sure that era is > 0 because 
-// LDV_recordDead() may be called from elsewhere in the runtime system. E.g., 
+//
+// Even though era is checked in both LdvCensusForDead() and
+// LdvCensusKillAll(), we still need to make sure that era is > 0 because
+// LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
 // when a thunk is replaced by an indirection object.
 
 #ifdef PROFILING
 void
-LDV_recordDead( StgClosure *c, nat size )
+LDV_recordDead( const StgClosure *c, uint32_t size )
 {
-    void *id;
-    nat t;
+    const void *id;
+    uint32_t t;
     counter *ctr;
 
     if (era > 0 && closureSatisfiesConstraints(c)) {
-       size -= sizeofW(StgProfHeader);
-       ASSERT(LDVW(c) != 0);
-       if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
-           t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
-           if (t < era) {
-               if (RtsFlags.ProfFlags.bioSelector == NULL) {
-                   censuses[t].void_total   += (int)size;
-                   censuses[era].void_total -= (int)size;
-                   ASSERT(censuses[t].void_total < censuses[t].not_used);
-               } else {
-                   id = closureIdentity(c);
-                   ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
-                   ASSERT( ctr != NULL );
-                   ctr->c.ldv.void_total += (int)size;
-                   ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
-                   if (ctr == NULL) {
-                       ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
-                       initLDVCtr(ctr);
-                       insertHashTable(censuses[era].hash, (StgWord)id, ctr);
-                       ctr->identity = id;
-                       ctr->next = censuses[era].ctrs;
-                       censuses[era].ctrs = ctr;
-                   }
-                   ctr->c.ldv.void_total -= (int)size;
-               }
-           }
-       } else {
-           t = LDVW((c)) & LDV_LAST_MASK;
-           if (t + 1 < era) {
-               if (RtsFlags.ProfFlags.bioSelector == NULL) {
-                   censuses[t+1].drag_total += size;
-                   censuses[era].drag_total -= size;
-               } else {
-                   void *id;
-                   id = closureIdentity(c);
-                   ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
-                   ASSERT( ctr != NULL );
-                   ctr->c.ldv.drag_total += (int)size;
-                   ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
-                   if (ctr == NULL) {
-                       ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
-                       initLDVCtr(ctr);
-                       insertHashTable(censuses[era].hash, (StgWord)id, ctr);
-                       ctr->identity = id;
-                       ctr->next = censuses[era].ctrs;
-                       censuses[era].ctrs = ctr;
-                   }
-                   ctr->c.ldv.drag_total -= (int)size;
-               }
-           }
-       }
+        size -= sizeofW(StgProfHeader);
+        ASSERT(LDVW(c) != 0);
+        if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
+            t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
+            if (t < era) {
+                if (RtsFlags.ProfFlags.bioSelector == NULL) {
+                    censuses[t].void_total   += size;
+                    censuses[era].void_total -= size;
+                    ASSERT(censuses[t].void_total < censuses[t].not_used);
+                } else {
+                    id = closureIdentity(c);
+                    ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
+                    ASSERT( ctr != NULL );
+                    ctr->c.ldv.void_total += size;
+                    ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
+                    if (ctr == NULL) {
+                        ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
+                        initLDVCtr(ctr);
+                        insertHashTable(censuses[era].hash, (StgWord)id, ctr);
+                        ctr->identity = id;
+                        ctr->next = censuses[era].ctrs;
+                        censuses[era].ctrs = ctr;
+                    }
+                    ctr->c.ldv.void_total -= size;
+                }
+            }
+        } else {
+            t = LDVW((c)) & LDV_LAST_MASK;
+            if (t + 1 < era) {
+                if (RtsFlags.ProfFlags.bioSelector == NULL) {
+                    censuses[t+1].drag_total += size;
+                    censuses[era].drag_total -= size;
+                } else {
+                    const void *id;
+                    id = closureIdentity(c);
+                    ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
+                    ASSERT( ctr != NULL );
+                    ctr->c.ldv.drag_total += size;
+                    ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
+                    if (ctr == NULL) {
+                        ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
+                        initLDVCtr(ctr);
+                        insertHashTable(censuses[era].hash, (StgWord)id, ctr);
+                        ctr->identity = id;
+                        ctr->next = censuses[era].ctrs;
+                        censuses[era].ctrs = ctr;
+                    }
+                    ctr->c.ldv.drag_total -= size;
+                }
+            }
+        }
     }
 }
 #endif
@@ -343,12 +265,8 @@ initEra(Census *census)
 STATIC_INLINE void
 freeEra(Census *census)
 {
-    if (RtsFlags.ProfFlags.bioSelector != NULL)
-        // when bioSelector==NULL, these are freed in heapCensus()
-    {
-        arenaFree(census->arena);
-        freeHashTable(census->hash, NULL);
-    }
+    arenaFree(census->arena);
+    freeHashTable(census->hash, NULL);
 }
 
 /* --------------------------------------------------------------------------
@@ -360,19 +278,27 @@ static void
 nextEra( void )
 {
 #ifdef PROFILING
-    if (doingLDVProfiling()) { 
-       era++;
-
-       if (era == max_era) {
-           errorBelch("maximum number of censuses reached; use +RTS -i to reduce");
-           stg_exit(EXIT_FAILURE);
-       }
-       
-       if (era == n_censuses) {
-           n_censuses *= 2;
-           censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
-                                      "nextEra");
-       }
+    if (doingLDVProfiling()) {
+        era++;
+
+        if (era == max_era) {
+            errorBelch("Maximum number of censuses reached.");
+            if (rtsConfig.rts_opts_suggestions == rtsTrue) {
+                if (rtsConfig.rts_opts_enabled == RtsOptsAll)  {
+                    errorBelch("Use `+RTS -i' to reduce censuses.");
+                } else  {
+                    errorBelch("Relink with -rtsopts and "
+                               "use `+RTS -i' to reduce censuses.");
+                }
+            }
+            stg_exit(EXIT_FAILURE);
+        }
+
+        if (era == n_censuses) {
+            n_censuses *= 2;
+            censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
+                                       "nextEra");
+        }
     }
 #endif /* PROFILING */
 
@@ -387,15 +313,11 @@ nextEra( void )
 FILE *hp_file;
 static char *hp_filename;
 
-void initProfiling1 (void)
+void freeProfiling (void)
 {
 }
 
-void freeProfiling1 (void)
-{
-}
-
-void initProfiling2 (void)
+void initProfiling (void)
 {
     char *prog;
 
@@ -416,16 +338,17 @@ void initProfiling2 (void)
     /* Initialise the log file name */
     hp_filename = stgMallocBytes(strlen(prog) + 6, "hpFileName");
     sprintf(hp_filename, "%s.hp", prog);
-    
+
     /* open the log file */
     if ((hp_file = fopen(hp_filename, "w")) == NULL) {
-      debugBelch("Can't open profiling report file %s\n", 
-             hp_filename);
+      debugBelch("Can't open profiling report file %s\n",
+              hp_filename);
       RtsFlags.ProfFlags.doHeapProfile = 0;
+      stgFree(prog);
       return;
     }
   }
-  
+
   stgFree(prog);
 
   initHeapProfiling();
@@ -440,17 +363,31 @@ void endProfiling( void )
 static void
 printSample(rtsBool beginSample, StgDouble sampleValue)
 {
-    StgDouble fractionalPart, integralPart;
-    fractionalPart = modf(sampleValue, &integralPart);
-    fprintf(hp_file, "%s %" FMT_Word64 ".%02" FMT_Word64 "\n",
+    fprintf(hp_file, "%s %f\n",
             (beginSample ? "BEGIN_SAMPLE" : "END_SAMPLE"),
-            (StgWord64)integralPart, (StgWord64)(fractionalPart * 100));
+            sampleValue);
+    if (!beginSample) {
+        fflush(hp_file);
+    }
+}
+
+static void
+dumpCostCentresToEventLog(void)
+{
+#ifdef PROFILING
+    CostCentre *cc, *next;
+    for (cc = CC_LIST; cc != NULL; cc = next) {
+        next = cc->link;
+        traceHeapProfCostCentre(cc->ccID, cc->label, cc->module,
+                                cc->srcloc, cc->is_caf);
+    }
+#endif
 }
 
 /* --------------------------------------------------------------------------
  * Initialize the heap profilier
  * ----------------------------------------------------------------------- */
-nat
+uint32_t
 initHeapProfiling(void)
 {
     if (! RtsFlags.ProfFlags.doHeapProfile) {
@@ -459,24 +396,31 @@ initHeapProfiling(void)
 
 #ifdef PROFILING
     if (doingLDVProfiling() && doingRetainerProfiling()) {
-       errorBelch("cannot mix -hb and -hr");
-       stg_exit(EXIT_FAILURE);
+        errorBelch("cannot mix -hb and -hr");
+        stg_exit(EXIT_FAILURE);
+    }
+#ifdef THREADED_RTS
+    // See Trac #12019.
+    if (doingLDVProfiling() && RtsFlags.ParFlags.nCapabilities > 1) {
+        errorBelch("-hb cannot be used with multiple capabilities");
+        stg_exit(EXIT_FAILURE);
     }
 #endif
+#endif
 
     // we only count eras if we're doing LDV profiling.  Otherwise era
     // is fixed at zero.
 #ifdef PROFILING
     if (doingLDVProfiling()) {
-       era = 1;
+        era = 1;
     } else
 #endif
     {
-       era = 0;
+        era = 0;
     }
 
     // max_era = 2^LDV_SHIFT
-       max_era = 1 << LDV_SHIFT;
+    max_era = 1 << LDV_SHIFT;
 
     n_censuses = 32;
     censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
@@ -488,12 +432,12 @@ initHeapProfiling(void)
 
 #ifdef PROFILING
     {
-       int count;
-       for(count = 1; count < prog_argc; count++)
-           fprintf(hp_file, " %s", prog_argv[count]);
-       fprintf(hp_file, " +RTS");
-       for(count = 0; count < rts_argc; count++)
-           fprintf(hp_file, " %s", rts_argv[count]);
+        int count;
+        for(count = 1; count < prog_argc; count++)
+            fprintf(hp_file, " %s", prog_argv[count]);
+        fprintf(hp_file, " +RTS");
+        for(count = 0; count < rts_argc; count++)
+            fprintf(hp_file, " %s", rts_argv[count]);
     }
 #endif /* PROFILING */
 
@@ -509,10 +453,13 @@ initHeapProfiling(void)
 
 #ifdef PROFILING
     if (doingRetainerProfiling()) {
-       initRetainerProfiling();
+        initRetainerProfiling();
     }
 #endif
 
+    traceHeapProfBegin(0);
+    dumpCostCentresToEventLog();
+
     return 0;
 }
 
@@ -527,26 +474,30 @@ endHeapProfiling(void)
 
 #ifdef PROFILING
     if (doingRetainerProfiling()) {
-       endRetainerProfiling();
+        endRetainerProfiling();
     }
 #endif
 
 #ifdef PROFILING
     if (doingLDVProfiling()) {
-       nat t;
-       LdvCensusKillAll();
-       aggregateCensusInfo();
-       for (t = 1; t < era; t++) {
-           dumpCensus( &censuses[t] );
-       }
+        uint32_t t;
+        LdvCensusKillAll();
+        aggregateCensusInfo();
+        for (t = 1; t < era; t++) {
+            dumpCensus( &censuses[t] );
+        }
     }
 #endif
 
 #ifdef PROFILING
     if (doingLDVProfiling()) {
-        nat t;
-        for (t = 1; t <= era; t++) {
-            freeEra( &censuses[t] );
+        uint32_t t;
+        if (RtsFlags.ProfFlags.bioSelector != NULL) {
+            for (t = 1; t <= era; t++) {
+                freeEra( &censuses[t] );
+            }
+        } else {
+            freeEra( &censuses[era] );
         }
     } else {
         freeEra( &censuses[0] );
@@ -572,24 +523,24 @@ buf_append(char *p, const char *q, char *end)
     int m;
 
     for (m = 0; p < end; p++, q++, m++) {
-       *p = *q;
-       if (*q == '\0') { break; }
+        *p = *q;
+        if (*q == '\0') { break; }
     }
     return m;
 }
 
 static void
-fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
+fprint_ccs(FILE *fp, CostCentreStack *ccs, uint32_t max_length)
 {
     char buf[max_length+1], *p, *buf_end;
 
     // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
     if (ccs == CCS_MAIN) {
-       fprintf(fp, "MAIN");
-       return;
+        fprintf(fp, "MAIN");
+        return;
     }
 
-    fprintf(fp, "(%ld)", ccs->ccsID);
+    fprintf(fp, "(%" FMT_Int ")", ccs->ccsID);
 
     p = buf;
     buf_end = buf + max_length + 1;
@@ -598,57 +549,58 @@ fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
     // in the buffer.  If we run out of space, end with "...".
     for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
 
-       // CAF cost centres print as M.CAF, but we leave the module
-       // name out of all the others to save space.
-       if (!strcmp(ccs->cc->label,"CAF")) {
-           p += buf_append(p, ccs->cc->module, buf_end);
-           p += buf_append(p, ".CAF", buf_end);
-       } else {
-           p += buf_append(p, ccs->cc->label, buf_end);
-           if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
-               p += buf_append(p, "/", buf_end);
-           }
-       }
-       
-       if (p >= buf_end) {
-           sprintf(buf+max_length-4, "...");
-           break;
-       }
+        // CAF cost centres print as M.CAF, but we leave the module
+        // name out of all the others to save space.
+        if (!strcmp(ccs->cc->label,"CAF")) {
+            p += buf_append(p, ccs->cc->module, buf_end);
+            p += buf_append(p, ".CAF", buf_end);
+        } else {
+            p += buf_append(p, ccs->cc->label, buf_end);
+            if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
+                p += buf_append(p, "/", buf_end);
+            }
+        }
+
+        if (p >= buf_end) {
+            sprintf(buf+max_length-4, "...");
+            break;
+        }
     }
     fprintf(fp, "%s", buf);
 }
-#endif /* PROFILING */
 
 rtsBool
-strMatchesSelector( char* str, char* sel )
+strMatchesSelector( const char* str, const char* sel )
 {
-   char* p;
+   const char* p;
    // debugBelch("str_matches_selector %s %s\n", str, sel);
    while (1) {
        // Compare str against wherever we've got to in sel.
        p = str;
        while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
-          p++; sel++;
+           p++; sel++;
        }
        // Match if all of str used and have reached the end of a sel fragment.
        if (*p == '\0' && (*sel == ',' || *sel == '\0'))
-          return rtsTrue;
-       
+           return rtsTrue;
+
        // No match.  Advance sel to the start of the next elem.
        while (*sel != ',' && *sel != '\0') sel++;
        if (*sel == ',') sel++;
-       
+
        /* Run out of sel ?? */
        if (*sel == '\0') return rtsFalse;
    }
 }
 
+#endif /* PROFILING */
+
 /* -----------------------------------------------------------------------------
  * Figure out whether a closure should be counted in this census, by
  * testing against all the specified constraints.
  * -------------------------------------------------------------------------- */
-rtsBool
-closureSatisfiesConstraints( StgClosure* p )
+static rtsBool
+closureSatisfiesConstraints( const StgClosure* p )
 {
 #if !defined(PROFILING)
     (void)p;   /* keep gcc -Wall happy */
@@ -665,7 +617,7 @@ closureSatisfiesConstraints( StgClosure* p )
 
    if (RtsFlags.ProfFlags.descrSelector) {
        b = strMatchesSelector( (GET_PROF_DESC(get_itbl((StgClosure *)p))),
-                                RtsFlags.ProfFlags.descrSelector );
+                                 RtsFlags.ProfFlags.descrSelector );
        if (!b) return rtsFalse;
    }
    if (RtsFlags.ProfFlags.typeSelector) {
@@ -675,20 +627,20 @@ closureSatisfiesConstraints( StgClosure* p )
    }
    if (RtsFlags.ProfFlags.retainerSelector) {
        RetainerSet *rs;
-       nat i;
+       uint32_t i;
        // We must check that the retainer set is valid here.  One
        // reason it might not be valid is if this closure is a
        // a newly deceased weak pointer (i.e. a DEAD_WEAK), since
        // these aren't reached by the retainer profiler's traversal.
        if (isRetainerSetFieldValid((StgClosure *)p)) {
-          rs = retainerSetOf((StgClosure *)p);
-          if (rs != NULL) {
-              for (i = 0; i < rs->num; i++) {
-                  b = strMatchesSelector( rs->element[i]->cc->label,
-                                          RtsFlags.ProfFlags.retainerSelector );
-                  if (b) return rtsTrue;
-              }
-          }
+           rs = retainerSetOf((StgClosure *)p);
+           if (rs != NULL) {
+               for (i = 0; i < rs->num; i++) {
+                   b = strMatchesSelector( rs->element[i]->cc->label,
+                                           RtsFlags.ProfFlags.retainerSelector );
+                   if (b) return rtsTrue;
+               }
+           }
        }
        return rtsFalse;
    }
@@ -704,7 +656,7 @@ static void
 aggregateCensusInfo( void )
 {
     HashTable *acc;
-    nat t;
+    uint32_t t;
     counter *c, *d, *ctrs;
     Arena *arena;
 
@@ -712,37 +664,37 @@ aggregateCensusInfo( void )
 
     // Aggregate the LDV counters when displaying by biography.
     if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
-       int void_total, drag_total;
-
-       // Now we compute void_total and drag_total for each census
-       // After the program has finished, the void_total field of
-       // each census contains the count of words that were *created*
-       // in this era and were eventually void.  Conversely, if a
-       // void closure was destroyed in this era, it will be
-       // represented by a negative count of words in void_total.
-       //
-       // To get the count of live words that are void at each
-       // census, just propagate the void_total count forwards:
-
-       void_total = 0;
-       drag_total = 0;
-       for (t = 1; t < era; t++) { // note: start at 1, not 0
-           void_total += censuses[t].void_total;
-           drag_total += censuses[t].drag_total;
-           censuses[t].void_total = void_total;
-           censuses[t].drag_total = drag_total;
-
-           ASSERT( censuses[t].void_total <= censuses[t].not_used );
-           // should be true because: void_total is the count of
-           // live words that are void at this census, which *must*
-           // be less than the number of live words that have not
-           // been used yet.
-
-           ASSERT( censuses[t].drag_total <= censuses[t].used );
-           // similar reasoning as above.
-       }
-       
-       return;
+        long void_total, drag_total;
+
+        // Now we compute void_total and drag_total for each census
+        // After the program has finished, the void_total field of
+        // each census contains the count of words that were *created*
+        // in this era and were eventually void.  Conversely, if a
+        // void closure was destroyed in this era, it will be
+        // represented by a negative count of words in void_total.
+        //
+        // To get the count of live words that are void at each
+        // census, just propagate the void_total count forwards:
+
+        void_total = 0;
+        drag_total = 0;
+        for (t = 1; t < era; t++) { // note: start at 1, not 0
+            void_total += censuses[t].void_total;
+            drag_total += censuses[t].drag_total;
+            censuses[t].void_total = void_total;
+            censuses[t].drag_total = drag_total;
+
+            ASSERT( censuses[t].void_total <= censuses[t].not_used );
+            // should be true because: void_total is the count of
+            // live words that are void at this census, which *must*
+            // be less than the number of live words that have not
+            // been used yet.
+
+            ASSERT( censuses[t].drag_total <= censuses[t].used );
+            // similar reasoning as above.
+        }
+
+        return;
     }
 
     // otherwise... we're doing a heap profile that is restricted to
@@ -756,48 +708,48 @@ aggregateCensusInfo( void )
 
     for (t = 1; t < era; t++) {
 
-       // first look through all the counters we're aggregating
-       for (c = ctrs; c != NULL; c = c->next) {
-           // if one of the totals is non-zero, then this closure
-           // type must be present in the heap at this census time...
-           d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);
-
-           if (d == NULL) {
-               // if this closure identity isn't present in the
-               // census for this time period, then our running
-               // totals *must* be zero.
-               ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);
-
-               // debugCCS(c->identity);
-               // debugBelch(" census=%d void_total=%d drag_total=%d\n",
-               //         t, c->c.ldv.void_total, c->c.ldv.drag_total);
-           } else {
-               d->c.ldv.void_total += c->c.ldv.void_total;
-               d->c.ldv.drag_total += c->c.ldv.drag_total;
-               c->c.ldv.void_total =  d->c.ldv.void_total;
-               c->c.ldv.drag_total =  d->c.ldv.drag_total;
-
-               ASSERT( c->c.ldv.void_total >= 0 );
-               ASSERT( c->c.ldv.drag_total >= 0 );
-           }
-       }
-
-       // now look through the counters in this census to find new ones
-       for (c = censuses[t].ctrs; c != NULL; c = c->next) {
-           d = lookupHashTable(acc, (StgWord)c->identity);
-           if (d == NULL) {
-               d = arenaAlloc( arena, sizeof(counter) );
-               initLDVCtr(d);
-               insertHashTable( acc, (StgWord)c->identity, d );
-               d->identity = c->identity;
-               d->next = ctrs;
-               ctrs = d;
-               d->c.ldv.void_total = c->c.ldv.void_total;
-               d->c.ldv.drag_total = c->c.ldv.drag_total;
-           }
-           ASSERT( c->c.ldv.void_total >= 0 );
-           ASSERT( c->c.ldv.drag_total >= 0 );
-       }
+        // first look through all the counters we're aggregating
+        for (c = ctrs; c != NULL; c = c->next) {
+            // if one of the totals is non-zero, then this closure
+            // type must be present in the heap at this census time...
+            d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);
+
+            if (d == NULL) {
+                // if this closure identity isn't present in the
+                // census for this time period, then our running
+                // totals *must* be zero.
+                ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);
+
+                // debugCCS(c->identity);
+                // debugBelch(" census=%d void_total=%d drag_total=%d\n",
+                //         t, c->c.ldv.void_total, c->c.ldv.drag_total);
+            } else {
+                d->c.ldv.void_total += c->c.ldv.void_total;
+                d->c.ldv.drag_total += c->c.ldv.drag_total;
+                c->c.ldv.void_total =  d->c.ldv.void_total;
+                c->c.ldv.drag_total =  d->c.ldv.drag_total;
+
+                ASSERT( c->c.ldv.void_total >= 0 );
+                ASSERT( c->c.ldv.drag_total >= 0 );
+            }
+        }
+
+        // now look through the counters in this census to find new ones
+        for (c = censuses[t].ctrs; c != NULL; c = c->next) {
+            d = lookupHashTable(acc, (StgWord)c->identity);
+            if (d == NULL) {
+                d = arenaAlloc( arena, sizeof(counter) );
+                initLDVCtr(d);
+                insertHashTable( acc, (StgWord)c->identity, d );
+                d->identity = c->identity;
+                d->next = ctrs;
+                ctrs = d;
+                d->c.ldv.void_total = c->c.ldv.void_total;
+                d->c.ldv.drag_total = c->c.ldv.drag_total;
+            }
+            ASSERT( c->c.ldv.void_total >= 0 );
+            ASSERT( c->c.ldv.drag_total >= 0 );
+        }
     }
 
     freeHashTable(acc, NULL);
@@ -812,101 +764,206 @@ static void
 dumpCensus( Census *census )
 {
     counter *ctr;
-    int count;
+    ssize_t count;
 
     printSample(rtsTrue, census->time);
+    traceHeapProfSampleBegin(era);
 
 #ifdef PROFILING
     if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
-      fprintf(hp_file, "VOID\t%lu\n", (unsigned long)(census->void_total) * sizeof(W_));
-       fprintf(hp_file, "LAG\t%lu\n", 
-               (unsigned long)(census->not_used - census->void_total) * sizeof(W_));
-       fprintf(hp_file, "USE\t%lu\n", 
-               (unsigned long)(census->used - census->drag_total) * sizeof(W_));
-       fprintf(hp_file, "INHERENT_USE\t%lu\n", 
-               (unsigned long)(census->prim) * sizeof(W_));
-       fprintf(hp_file, "DRAG\t%lu\n",
-               (unsigned long)(census->drag_total) * sizeof(W_));
-       printSample(rtsFalse, census->time);
-       return;
+        fprintf(hp_file, "VOID\t%lu\n",
+                (unsigned long)(census->void_total) * sizeof(W_));
+        fprintf(hp_file, "LAG\t%lu\n",
+                (unsigned long)(census->not_used - census->void_total) * sizeof(W_));
+        fprintf(hp_file, "USE\t%lu\n",
+                (unsigned long)(census->used - census->drag_total) * sizeof(W_));
+        fprintf(hp_file, "INHERENT_USE\t%lu\n",
+                (unsigned long)(census->prim) * sizeof(W_));
+        fprintf(hp_file, "DRAG\t%lu\n",
+                (unsigned long)(census->drag_total) * sizeof(W_));
+        printSample(rtsFalse, census->time);
+        return;
     }
 #endif
 
     for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
 
 #ifdef PROFILING
-       if (RtsFlags.ProfFlags.bioSelector != NULL) {
-           count = 0;
-           if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
-               count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
-           if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
-               count += ctr->c.ldv.drag_total;
-           if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
-               count += ctr->c.ldv.void_total;
-           if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
-               count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
-       } else
+        if (RtsFlags.ProfFlags.bioSelector != NULL) {
+            count = 0;
+            if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
+                count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
+            if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
+                count += ctr->c.ldv.drag_total;
+            if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
+                count += ctr->c.ldv.void_total;
+            if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
+                count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
+        } else
 #endif
-       {
-           count = ctr->c.resid;
-       }
+        {
+            count = ctr->c.resid;
+        }
 
-       ASSERT( count >= 0 );
+        ASSERT( count >= 0 );
 
-       if (count == 0) continue;
+        if (count == 0) continue;
 
 #if !defined(PROFILING)
-       switch (RtsFlags.ProfFlags.doHeapProfile) {
-       case HEAP_BY_CLOSURE_TYPE:
-           fprintf(hp_file, "%s", (char *)ctr->identity);
-           break;
-       }
+        switch (RtsFlags.ProfFlags.doHeapProfile) {
+        case HEAP_BY_CLOSURE_TYPE:
+            fprintf(hp_file, "%s", (char *)ctr->identity);
+            traceHeapProfSampleString(0, (char *)ctr->identity,
+                                      count * sizeof(W_));
+            break;
+        }
 #endif
-       
+
 #ifdef PROFILING
-       switch (RtsFlags.ProfFlags.doHeapProfile) {
-       case HEAP_BY_CCS:
-           fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, RtsFlags.ProfFlags.ccsLength);
-           break;
-       case HEAP_BY_MOD:
-       case HEAP_BY_DESCR:
-       case HEAP_BY_TYPE:
-           fprintf(hp_file, "%s", (char *)ctr->identity);
-           break;
-       case HEAP_BY_RETAINER:
-       {
-           RetainerSet *rs = (RetainerSet *)ctr->identity;
-
-           // it might be the distinguished retainer set rs_MANY:
-           if (rs == &rs_MANY) {
-               fprintf(hp_file, "MANY");
-               break;
-           }
-
-           // Mark this retainer set by negating its id, because it
-           // has appeared in at least one census.  We print the
-           // values of all such retainer sets into the log file at
-           // the end.  A retainer set may exist but not feature in
-           // any censuses if it arose as the intermediate retainer
-           // set for some closure during retainer set calculation.
-           if (rs->id > 0)
-               rs->id = -(rs->id);
-
-           // report in the unit of bytes: * sizeof(StgWord)
-           printRetainerSetShort(hp_file, rs);
-           break;
-       }
-       default:
-           barf("dumpCensus; doHeapProfile");
-       }
+        switch (RtsFlags.ProfFlags.doHeapProfile) {
+        case HEAP_BY_CCS:
+            fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
+                       RtsFlags.ProfFlags.ccsLength);
+            traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
+                                          count * sizeof(W_));
+            break;
+        case HEAP_BY_MOD:
+        case HEAP_BY_DESCR:
+        case HEAP_BY_TYPE:
+            fprintf(hp_file, "%s", (char *)ctr->identity);
+            traceHeapProfSampleString(0, (char *)ctr->identity,
+                                      count * sizeof(W_));
+            break;
+        case HEAP_BY_RETAINER:
+        {
+            RetainerSet *rs = (RetainerSet *)ctr->identity;
+
+            // it might be the distinguished retainer set rs_MANY:
+            if (rs == &rs_MANY) {
+                fprintf(hp_file, "MANY");
+                break;
+            }
+
+            // Mark this retainer set by negating its id, because it
+            // has appeared in at least one census.  We print the
+            // values of all such retainer sets into the log file at
+            // the end.  A retainer set may exist but not feature in
+            // any censuses if it arose as the intermediate retainer
+            // set for some closure during retainer set calculation.
+            if (rs->id > 0)
+                rs->id = -(rs->id);
+
+            // report in the unit of bytes: * sizeof(StgWord)
+            printRetainerSetShort(hp_file, rs, RtsFlags.ProfFlags.ccsLength);
+            break;
+        }
+        default:
+            barf("dumpCensus; doHeapProfile");
+        }
 #endif
 
-       fprintf(hp_file, "\t%lu\n", (unsigned long)count * sizeof(W_));
+        fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
     }
 
     printSample(rtsFalse, census->time);
 }
 
+
+static void heapProfObject(Census *census, StgClosure *p, size_t size,
+                           rtsBool prim
+#ifndef PROFILING
+                           STG_UNUSED
+#endif
+                           )
+{
+    const void *identity;
+    size_t real_size;
+    counter *ctr;
+
+            identity = NULL;
+
+#ifdef PROFILING
+            // subtract the profiling overhead
+            real_size = size - sizeofW(StgProfHeader);
+#else
+            real_size = size;
+#endif
+
+            if (closureSatisfiesConstraints((StgClosure*)p)) {
+#ifdef PROFILING
+                if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+                    if (prim)
+                        census->prim += real_size;
+                    else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+                        census->not_used += real_size;
+                    else
+                        census->used += real_size;
+                } else
+#endif
+                {
+                    identity = closureIdentity((StgClosure *)p);
+
+                    if (identity != NULL) {
+                        ctr = lookupHashTable(census->hash, (StgWord)identity);
+                        if (ctr != NULL) {
+#ifdef PROFILING
+                            if (RtsFlags.ProfFlags.bioSelector != NULL) {
+                                if (prim)
+                                    ctr->c.ldv.prim += real_size;
+                                else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+                                    ctr->c.ldv.not_used += real_size;
+                                else
+                                    ctr->c.ldv.used += real_size;
+                            } else
+#endif
+                            {
+                                ctr->c.resid += real_size;
+                            }
+                        } else {
+                            ctr = arenaAlloc( census->arena, sizeof(counter) );
+                            initLDVCtr(ctr);
+                            insertHashTable( census->hash, (StgWord)identity, ctr );
+                            ctr->identity = identity;
+                            ctr->next = census->ctrs;
+                            census->ctrs = ctr;
+
+#ifdef PROFILING
+                            if (RtsFlags.ProfFlags.bioSelector != NULL) {
+                                if (prim)
+                                    ctr->c.ldv.prim = real_size;
+                                else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+                                    ctr->c.ldv.not_used = real_size;
+                                else
+                                    ctr->c.ldv.used = real_size;
+                            } else
+#endif
+                            {
+                                ctr->c.resid = real_size;
+                            }
+                        }
+                    }
+                }
+            }
+}
+
+// Compact objects require special handling code because they
+// are not stored consecutively in memory (rather, each object
+// is a list of objects), and that would break the while loop
+// below. But we know that each block holds at most one object
+// so we don't need the loop.
+//
+// See Note [Compact Normal Forms] for details.
+static void
+heapCensusCompactList(Census *census, bdescr *bd)
+{
+    for (; bd != NULL; bd = bd->link) {
+        StgCompactNFDataBlock *block = (StgCompactNFDataBlock*)bd->start;
+        StgCompactNFData *str = block->owner;
+        heapProfObject(census, (StgClosure*)str,
+                       compact_nfdata_full_sizeW(str), rtsTrue);
+    }
+}
+
 /* -----------------------------------------------------------------------------
  * Code to perform a heap census.
  * -------------------------------------------------------------------------- */
@@ -914,248 +971,200 @@ static void
 heapCensusChain( Census *census, bdescr *bd )
 {
     StgPtr p;
-    StgInfoTable *info;
-    void *identity;
-    nat size;
-    counter *ctr;
-    nat real_size;
+    const StgInfoTable *info;
+    size_t size;
     rtsBool prim;
 
     for (; bd != NULL; bd = bd->link) {
 
-       // HACK: ignore pinned blocks, because they contain gaps.
-       // It's not clear exactly what we'd like to do here, since we
-       // can't tell which objects in the block are actually alive.
-       // Perhaps the whole block should be counted as SYSTEM memory.
-       if (bd->flags & BF_PINNED) {
-           continue;
-       }
-
-       p = bd->start;
-       while (p < bd->free) {
-           info = get_itbl((StgClosure *)p);
-           prim = rtsFalse;
-           
-           switch (info->type) {
-
-           case THUNK:
-               size = thunk_sizeW_fromITBL(info);
-               break;
-
-           case THUNK_1_1:
-           case THUNK_0_2:
-           case THUNK_2_0:
-               size = sizeofW(StgThunkHeader) + 2;
-               break;
-
-           case THUNK_1_0:
-           case THUNK_0_1:
-           case THUNK_SELECTOR:
-               size = sizeofW(StgThunkHeader) + 1;
-               break;
-
-           case CONSTR:
-           case FUN:
-           case IND_PERM:
-           case IND_OLDGEN:
-           case IND_OLDGEN_PERM:
-           case CAF_BLACKHOLE:
-           case SE_CAF_BLACKHOLE:
-           case SE_BLACKHOLE:
-           case BLACKHOLE:
-           case FUN_1_0:
-           case FUN_0_1:
-           case FUN_1_1:
-           case FUN_0_2:
-           case FUN_2_0:
-           case CONSTR_1_0:
-           case CONSTR_0_1:
-           case CONSTR_1_1:
-           case CONSTR_0_2:
-           case CONSTR_2_0:
-               size = sizeW_fromITBL(info);
-               break;
-
-           case IND:
-               // Special case/Delicate Hack: INDs don't normally
-               // appear, since we're doing this heap census right
-               // after GC.  However, GarbageCollect() also does
-               // resurrectThreads(), which can update some
-               // blackholes when it calls raiseAsync() on the
-               // resurrected threads.  So we know that any IND will
-               // be the size of a BLACKHOLE.
-               size = BLACKHOLE_sizeW();
-               break;
-
-           case BCO:
-               prim = rtsTrue;
-               size = bco_sizeW((StgBCO *)p);
-               break;
+        // HACK: pretend a pinned block is just one big ARR_WORDS
+        // owned by CCS_PINNED.  These blocks can be full of holes due
+        // to alignment constraints so we can't traverse the memory
+        // and do a proper census.
+        if (bd->flags & BF_PINNED) {
+            StgClosure arr;
+            SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_PINNED);
+            heapProfObject(census, &arr, bd->blocks * BLOCK_SIZE_W, rtsTrue);
+            continue;
+        }
+
+        p = bd->start;
+
+        // When we shrink a large ARR_WORDS, we do not adjust the free pointer
+        // of the associated block descriptor, thus introducing slop at the end
+        // of the object.  This slop remains after GC, violating the assumption
+        // of the loop below that all slop has been eliminated (#11627).
+        // Consequently, we handle large ARR_WORDS objects as a special case.
+        if (bd->flags & BF_LARGE
+            && get_itbl((StgClosure *)p)->type == ARR_WORDS) {
+            size = arr_words_sizeW((StgArrBytes *)p);
+            prim = rtsTrue;
+            heapProfObject(census, (StgClosure *)p, size, prim);
+            continue;
+        }
+
+        while (p < bd->free) {
+            info = get_itbl((const StgClosure *)p);
+            prim = rtsFalse;
+
+            switch (info->type) {
+
+            case THUNK:
+                size = thunk_sizeW_fromITBL(info);
+                break;
+
+            case THUNK_1_1:
+            case THUNK_0_2:
+            case THUNK_2_0:
+                size = sizeofW(StgThunkHeader) + 2;
+                break;
+
+            case THUNK_1_0:
+            case THUNK_0_1:
+            case THUNK_SELECTOR:
+                size = sizeofW(StgThunkHeader) + 1;
+                break;
+
+            case FUN:
+            case BLACKHOLE:
+            case BLOCKING_QUEUE:
+            case FUN_1_0:
+            case FUN_0_1:
+            case FUN_1_1:
+            case FUN_0_2:
+            case FUN_2_0:
+            case CONSTR:
+            case CONSTR_NOCAF:
+            case CONSTR_1_0:
+            case CONSTR_0_1:
+            case CONSTR_1_1:
+            case CONSTR_0_2:
+            case CONSTR_2_0:
+                size = sizeW_fromITBL(info);
+                break;
+
+            case IND:
+                // Special case/Delicate Hack: INDs don't normally
+                // appear, since we're doing this heap census right
+                // after GC.  However, GarbageCollect() also does
+                // resurrectThreads(), which can update some
+                // blackholes when it calls raiseAsync() on the
+                // resurrected threads.  So we know that any IND will
+                // be the size of a BLACKHOLE.
+                size = BLACKHOLE_sizeW();
+                break;
+
+            case BCO:
+                prim = rtsTrue;
+                size = bco_sizeW((StgBCO *)p);
+                break;
 
             case MVAR_CLEAN:
             case MVAR_DIRTY:
-           case WEAK:
-           case STABLE_NAME:
-           case MUT_VAR_CLEAN:
-           case MUT_VAR_DIRTY:
-               prim = rtsTrue;
-               size = sizeW_fromITBL(info);
-               break;
-
-           case AP:
-               size = ap_sizeW((StgAP *)p);
-               break;
-
-           case PAP:
-               size = pap_sizeW((StgPAP *)p);
-               break;
-
-           case AP_STACK:
-               size = ap_stack_sizeW((StgAP_STACK *)p);
-               break;
-               
-           case ARR_WORDS:
-               prim = rtsTrue;
-               size = arr_words_sizeW(stgCast(StgArrWords*,p));
-               break;
-               
-           case MUT_ARR_PTRS_CLEAN:
-           case MUT_ARR_PTRS_DIRTY:
-           case MUT_ARR_PTRS_FROZEN:
-           case MUT_ARR_PTRS_FROZEN0:
-               prim = rtsTrue;
-               size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
-               break;
-               
-           case TSO:
-               prim = rtsTrue;
+            case TVAR:
+            case WEAK:
+            case PRIM:
+            case MUT_PRIM:
+            case MUT_VAR_CLEAN:
+            case MUT_VAR_DIRTY:
+                prim = rtsTrue;
+                size = sizeW_fromITBL(info);
+                break;
+
+            case AP:
+                size = ap_sizeW((StgAP *)p);
+                break;
+
+            case PAP:
+                size = pap_sizeW((StgPAP *)p);
+                break;
+
+            case AP_STACK:
+                size = ap_stack_sizeW((StgAP_STACK *)p);
+                break;
+
+            case ARR_WORDS:
+                prim = rtsTrue;
+                size = arr_words_sizeW((StgArrBytes*)p);
+                break;
+
+            case MUT_ARR_PTRS_CLEAN:
+            case MUT_ARR_PTRS_DIRTY:
+            case MUT_ARR_PTRS_FROZEN:
+            case MUT_ARR_PTRS_FROZEN0:
+                prim = rtsTrue;
+                size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
+                break;
+
+            case SMALL_MUT_ARR_PTRS_CLEAN:
+            case SMALL_MUT_ARR_PTRS_DIRTY:
+            case SMALL_MUT_ARR_PTRS_FROZEN:
+            case SMALL_MUT_ARR_PTRS_FROZEN0:
+                prim = rtsTrue;
+                size = small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)p);
+                break;
+
+            case TSO:
+                prim = rtsTrue;
 #ifdef PROFILING
-               if (RtsFlags.ProfFlags.includeTSOs) {
-                   size = tso_sizeW((StgTSO *)p);
-                   break;
-               } else {
-                   // Skip this TSO and move on to the next object
-                   p += tso_sizeW((StgTSO *)p);
-                   continue;
-               }
+                if (RtsFlags.ProfFlags.includeTSOs) {
+                    size = sizeofW(StgTSO);
+                    break;
+                } else {
+                    // Skip this TSO and move on to the next object
+                    p += sizeofW(StgTSO);
+                    continue;
+                }
 #else
-               size = tso_sizeW((StgTSO *)p);
-               break;
+                size = sizeofW(StgTSO);
+                break;
 #endif
 
-           case TREC_HEADER: 
-               prim = rtsTrue;
-               size = sizeofW(StgTRecHeader);
-               break;
-
-           case TVAR_WATCH_QUEUE:
-               prim = rtsTrue;
-               size = sizeofW(StgTVarWatchQueue);
-               break;
-               
-           case INVARIANT_CHECK_QUEUE:
-               prim = rtsTrue;
-               size = sizeofW(StgInvariantCheckQueue);
-               break;
-               
-           case ATOMIC_INVARIANT:
-               prim = rtsTrue;
-               size = sizeofW(StgAtomicInvariant);
-               break;
-               
-           case TVAR:
-               prim = rtsTrue;
-               size = sizeofW(StgTVar);
-               break;
-               
-           case TREC_CHUNK:
-               prim = rtsTrue;
-               size = sizeofW(StgTRecChunk);
-               break;
-
-           default:
-               barf("heapCensus, unknown object: %d", info->type);
-           }
-           
-           identity = NULL;
-
+            case STACK:
+                prim = rtsTrue;
 #ifdef PROFILING
-           // subtract the profiling overhead
-           real_size = size - sizeofW(StgProfHeader);
+                if (RtsFlags.ProfFlags.includeTSOs) {
+                    size = stack_sizeW((StgStack*)p);
+                    break;
+                } else {
+                    // Skip this TSO and move on to the next object
+                    p += stack_sizeW((StgStack*)p);
+                    continue;
+                }
 #else
-           real_size = size;
+                size = stack_sizeW((StgStack*)p);
+                break;
 #endif
 
-           if (closureSatisfiesConstraints((StgClosure*)p)) {
-#ifdef PROFILING
-               if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
-                   if (prim)
-                       census->prim += real_size;
-                   else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
-                       census->not_used += real_size;
-                   else
-                       census->used += real_size;
-               } else
-#endif
-               {
-                   identity = closureIdentity((StgClosure *)p);
+            case TREC_CHUNK:
+                prim = rtsTrue;
+                size = sizeofW(StgTRecChunk);
+                break;
 
-                   if (identity != NULL) {
-                       ctr = lookupHashTable( census->hash, (StgWord)identity );
-                       if (ctr != NULL) {
-#ifdef PROFILING
-                           if (RtsFlags.ProfFlags.bioSelector != NULL) {
-                               if (prim)
-                                   ctr->c.ldv.prim += real_size;
-                               else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
-                                   ctr->c.ldv.not_used += real_size;
-                               else
-                                   ctr->c.ldv.used += real_size;
-                           } else
-#endif
-                           {
-                               ctr->c.resid += real_size;
-                           }
-                       } else {
-                           ctr = arenaAlloc( census->arena, sizeof(counter) );
-                           initLDVCtr(ctr);
-                           insertHashTable( census->hash, (StgWord)identity, ctr );
-                           ctr->identity = identity;
-                           ctr->next = census->ctrs;
-                           census->ctrs = ctr;
+            case COMPACT_NFDATA:
+                barf("heapCensus, found compact object in the wrong list");
+                break;
 
-#ifdef PROFILING
-                           if (RtsFlags.ProfFlags.bioSelector != NULL) {
-                               if (prim)
-                                   ctr->c.ldv.prim = real_size;
-                               else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
-                                   ctr->c.ldv.not_used = real_size;
-                               else
-                                   ctr->c.ldv.used = real_size;
-                           } else
-#endif
-                           {
-                               ctr->c.resid = real_size;
-                           }
-                       }
-                   }
-               }
-           }
-
-           p += size;
-       }
+            default:
+                barf("heapCensus, unknown object: %d", info->type);
+            }
+
+            heapProfObject(census,(StgClosure*)p,size,prim);
+
+            p += size;
+        }
     }
 }
 
-void
-heapCensus( void )
+void heapCensus (Time t)
 {
-  nat g, s;
+  uint32_t g, n;
   Census *census;
+  gen_workspace *ws;
 
   census = &censuses[era];
-  census->time  = mut_user_time();
-    
+  census->time  = mut_user_time_until(t);
+
   // calculate retainer sets if necessary
 #ifdef PROFILING
   if (doingRetainerProfiling()) {
@@ -1168,16 +1177,18 @@ heapCensus( void )
 #endif
 
   // Traverse the heap, collecting the census info
-  if (RtsFlags.GcFlags.generations == 1) {
-      heapCensusChain( census, g0s0->blocks );
-  } else {
-      for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-         for (s = 0; s < generations[g].n_steps; s++) {
-             heapCensusChain( census, generations[g].steps[s].blocks );
-             // Are we interested in large objects?  might be
-             // confusing to include the stack in a heap profile.
-             heapCensusChain( census, generations[g].steps[s].large_objects );
-         }
+  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+      heapCensusChain( census, generations[g].blocks );
+      // Are we interested in large objects?  might be
+      // confusing to include the stack in a heap profile.
+      heapCensusChain( census, generations[g].large_objects );
+      heapCensusCompactList ( census, generations[g].compact_objects );
+
+      for (n = 0; n < n_capabilities; n++) {
+          ws = &gc_threads[n]->gens[g];
+          heapCensusChain(census, ws->todo_bd);
+          heapCensusChain(census, ws->part_list);
+          heapCensusChain(census, ws->scavd_list);
       }
   }
 
@@ -1186,7 +1197,7 @@ heapCensus( void )
     // We can't generate any info for LDV profiling until
     // the end of the run...
     if (!doingLDVProfiling())
-       dumpCensus( census );
+        dumpCensus( census );
 #else
     dumpCensus( census );
 #endif
@@ -1197,8 +1208,7 @@ heapCensus( void )
 #ifdef PROFILING
   if (RtsFlags.ProfFlags.bioSelector == NULL)
   {
-      freeHashTable( census->hash, NULL/* don't free the elements */ );
-      arenaFree( census->arena );
+      freeEra(census);
       census->hash = NULL;
       census->arena = NULL;
   }
@@ -1210,5 +1220,4 @@ heapCensus( void )
 #ifdef PROFILING
   stat_endHeapCensus();
 #endif
-}    
-
+}