Fix numa001 failure with "too many NUMA nodes"
[ghc.git] / rts / ProfHeap.c
index 58d66eb..a494a1b 100644 (file)
@@ -9,6 +9,7 @@
 #include "PosixSource.h"
 #include "Rts.h"
 
+#include "Capability.h"
 #include "RtsFlags.h"
 #include "RtsUtils.h"
 #include "Profiling.h"
@@ -19,6 +20,7 @@
 #include "LdvProfile.h"
 #include "Arena.h"
 #include "Printer.h"
+#include "Trace.h"
 #include "sm/GCThread.h"
 
 #include <string.h>
@@ -48,7 +50,7 @@ static uint32_t max_era;
  * lag/drag/void counters for each identity.
  * -------------------------------------------------------------------------- */
 typedef struct _counter {
-    void *identity;
+    const void *identity;
     union {
         ssize_t resid;
         struct {
@@ -96,15 +98,15 @@ static void aggregateCensusInfo( void );
 
 static void dumpCensus( Census *census );
 
-static rtsBool closureSatisfiesConstraints( StgClosure* p );
+static rtsBool closureSatisfiesConstraints( const StgClosure* p );
 
 /* ----------------------------------------------------------------------------
  * Find the "closure identity", which is a unique pointer representing
  * the band to which this closure's heap space is attributed in the
  * heap profile.
  * ------------------------------------------------------------------------- */
-static void *
-closureIdentity( StgClosure *p )
+static const void *
+closureIdentity( const StgClosure *p )
 {
     switch (RtsFlags.ProfFlags.doHeapProfile) {
 
@@ -128,7 +130,7 @@ closureIdentity( StgClosure *p )
 #else
     case HEAP_BY_CLOSURE_TYPE:
     {
-        StgInfoTable *info;
+        const StgInfoTable *info;
         info = get_itbl(p);
         switch (info->type) {
         case CONSTR:
@@ -137,8 +139,7 @@ closureIdentity( StgClosure *p )
         case CONSTR_2_0:
         case CONSTR_1_1:
         case CONSTR_0_2:
-        case CONSTR_STATIC:
-        case CONSTR_NOCAF_STATIC:
+        case CONSTR_NOCAF:
             return GET_CON_DESC(itbl_to_con_itbl(info));
         default:
             return closure_type_names[info->type];
@@ -181,9 +182,9 @@ doingRetainerProfiling( void )
 
 #ifdef PROFILING
 void
-LDV_recordDead( StgClosure *c, uint32_t size )
+LDV_recordDead( const StgClosure *c, uint32_t size )
 {
-    void *id;
+    const void *id;
     uint32_t t;
     counter *ctr;
 
@@ -221,7 +222,7 @@ LDV_recordDead( StgClosure *c, uint32_t size )
                     censuses[t+1].drag_total += size;
                     censuses[era].drag_total -= size;
                 } else {
-                    void *id;
+                    const void *id;
                     id = closureIdentity(c);
                     ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
                     ASSERT( ctr != NULL );
@@ -370,6 +371,19 @@ printSample(rtsBool beginSample, StgDouble sampleValue)
     }
 }
 
+static void
+dumpCostCentresToEventLog(void)
+{
+#ifdef PROFILING
+    CostCentre *cc, *next;
+    for (cc = CC_LIST; cc != NULL; cc = next) {
+        next = cc->link;
+        traceHeapProfCostCentre(cc->ccID, cc->label, cc->module,
+                                cc->srcloc, cc->is_caf);
+    }
+#endif
+}
+
 /* --------------------------------------------------------------------------
  * Initialize the heap profilier
  * ----------------------------------------------------------------------- */
@@ -385,6 +399,13 @@ initHeapProfiling(void)
         errorBelch("cannot mix -hb and -hr");
         stg_exit(EXIT_FAILURE);
     }
+#ifdef THREADED_RTS
+    // See Trac #12019.
+    if (doingLDVProfiling() && RtsFlags.ParFlags.nCapabilities > 1) {
+        errorBelch("-hb cannot be used with multiple capabilities");
+        stg_exit(EXIT_FAILURE);
+    }
+#endif
 #endif
 
     // we only count eras if we're doing LDV profiling.  Otherwise era
@@ -399,7 +420,7 @@ initHeapProfiling(void)
     }
 
     // max_era = 2^LDV_SHIFT
-        max_era = 1 << LDV_SHIFT;
+    max_era = 1 << LDV_SHIFT;
 
     n_censuses = 32;
     censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
@@ -436,6 +457,9 @@ initHeapProfiling(void)
     }
 #endif
 
+    traceHeapProfBegin(0);
+    dumpCostCentresToEventLog();
+
     return 0;
 }
 
@@ -576,7 +600,7 @@ strMatchesSelector( const char* str, const char* sel )
  * testing against all the specified constraints.
  * -------------------------------------------------------------------------- */
 static rtsBool
-closureSatisfiesConstraints( StgClosure* p )
+closureSatisfiesConstraints( const StgClosure* p )
 {
 #if !defined(PROFILING)
     (void)p;   /* keep gcc -Wall happy */
@@ -743,10 +767,12 @@ dumpCensus( Census *census )
     ssize_t count;
 
     printSample(rtsTrue, census->time);
+    traceHeapProfSampleBegin(era);
 
 #ifdef PROFILING
     if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
-      fprintf(hp_file, "VOID\t%lu\n", (unsigned long)(census->void_total) * sizeof(W_));
+        fprintf(hp_file, "VOID\t%lu\n",
+                (unsigned long)(census->void_total) * sizeof(W_));
         fprintf(hp_file, "LAG\t%lu\n",
                 (unsigned long)(census->not_used - census->void_total) * sizeof(W_));
         fprintf(hp_file, "USE\t%lu\n",
@@ -787,6 +813,8 @@ dumpCensus( Census *census )
         switch (RtsFlags.ProfFlags.doHeapProfile) {
         case HEAP_BY_CLOSURE_TYPE:
             fprintf(hp_file, "%s", (char *)ctr->identity);
+            traceHeapProfSampleString(0, (char *)ctr->identity,
+                                      count * sizeof(W_));
             break;
         }
 #endif
@@ -794,12 +822,17 @@ dumpCensus( Census *census )
 #ifdef PROFILING
         switch (RtsFlags.ProfFlags.doHeapProfile) {
         case HEAP_BY_CCS:
-            fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, RtsFlags.ProfFlags.ccsLength);
+            fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
+                       RtsFlags.ProfFlags.ccsLength);
+            traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
+                                          count * sizeof(W_));
             break;
         case HEAP_BY_MOD:
         case HEAP_BY_DESCR:
         case HEAP_BY_TYPE:
             fprintf(hp_file, "%s", (char *)ctr->identity);
+            traceHeapProfSampleString(0, (char *)ctr->identity,
+                                      count * sizeof(W_));
             break;
         case HEAP_BY_RETAINER:
         {
@@ -829,7 +862,7 @@ dumpCensus( Census *census )
         }
 #endif
 
-        fprintf(hp_file, "\t%" FMT_SizeT "\n", (W_)count * sizeof(W_));
+        fprintf(hp_file, "\t%" FMT_Word "\n", (W_)count * sizeof(W_));
     }
 
     printSample(rtsFalse, census->time);
@@ -843,7 +876,7 @@ static void heapProfObject(Census *census, StgClosure *p, size_t size,
 #endif
                            )
 {
-    void *identity;
+    const void *identity;
     size_t real_size;
     counter *ctr;
 
@@ -871,7 +904,7 @@ static void heapProfObject(Census *census, StgClosure *p, size_t size,
                     identity = closureIdentity((StgClosure *)p);
 
                     if (identity != NULL) {
-                        ctr = lookupHashTable( census->hash, (StgWord)identity );
+                        ctr = lookupHashTable(census->hash, (StgWord)identity);
                         if (ctr != NULL) {
 #ifdef PROFILING
                             if (RtsFlags.ProfFlags.bioSelector != NULL) {
@@ -913,6 +946,24 @@ static void heapProfObject(Census *census, StgClosure *p, size_t size,
             }
 }
 
+// Compact objects require special handling code because they
+// are not stored consecutively in memory (rather, each object
+// is a list of objects), and that would break the while loop
+// below. But we know that each block holds at most one object
+// so we don't need the loop.
+//
+// See Note [Compact Normal Forms] for details.
+static void
+heapCensusCompactList(Census *census, bdescr *bd)
+{
+    for (; bd != NULL; bd = bd->link) {
+        StgCompactNFDataBlock *block = (StgCompactNFDataBlock*)bd->start;
+        StgCompactNFData *str = block->owner;
+        heapProfObject(census, (StgClosure*)str,
+                       compact_nfdata_full_sizeW(str), rtsTrue);
+    }
+}
+
 /* -----------------------------------------------------------------------------
  * Code to perform a heap census.
  * -------------------------------------------------------------------------- */
@@ -920,7 +971,7 @@ static void
 heapCensusChain( Census *census, bdescr *bd )
 {
     StgPtr p;
-    StgInfoTable *info;
+    const StgInfoTable *info;
     size_t size;
     rtsBool prim;
 
@@ -953,7 +1004,7 @@ heapCensusChain( Census *census, bdescr *bd )
         }
 
         while (p < bd->free) {
-            info = get_itbl((StgClosure *)p);
+            info = get_itbl((const StgClosure *)p);
             prim = rtsFalse;
 
             switch (info->type) {
@@ -974,7 +1025,6 @@ heapCensusChain( Census *census, bdescr *bd )
                 size = sizeofW(StgThunkHeader) + 1;
                 break;
 
-            case CONSTR:
             case FUN:
             case BLACKHOLE:
             case BLOCKING_QUEUE:
@@ -983,6 +1033,8 @@ heapCensusChain( Census *census, bdescr *bd )
             case FUN_1_1:
             case FUN_0_2:
             case FUN_2_0:
+            case CONSTR:
+            case CONSTR_NOCAF:
             case CONSTR_1_0:
             case CONSTR_0_1:
             case CONSTR_1_1:
@@ -1089,6 +1141,10 @@ heapCensusChain( Census *census, bdescr *bd )
                 size = sizeofW(StgTRecChunk);
                 break;
 
+            case COMPACT_NFDATA:
+                barf("heapCensus, found compact object in the wrong list");
+                break;
+
             default:
                 barf("heapCensus, unknown object: %d", info->type);
             }
@@ -1126,6 +1182,7 @@ void heapCensus (Time t)
       // Are we interested in large objects?  might be
       // confusing to include the stack in a heap profile.
       heapCensusChain( census, generations[g].large_objects );
+      heapCensusCompactList ( census, generations[g].compact_objects );
 
       for (n = 0; n < n_capabilities; n++) {
           ws = &gc_threads[n]->gens[g];