Fix numa001 failure with "too many NUMA nodes"
[ghc.git] / rts / ProfHeap.c
index 9557648..a494a1b 100644 (file)
@@ -9,6 +9,7 @@
 #include "PosixSource.h"
 #include "Rts.h"
 
+#include "Capability.h"
 #include "RtsFlags.h"
 #include "RtsUtils.h"
 #include "Profiling.h"
@@ -19,6 +20,7 @@
 #include "LdvProfile.h"
 #include "Arena.h"
 #include "Printer.h"
+#include "Trace.h"
 #include "sm/GCThread.h"
 
 #include <string.h>
@@ -137,8 +139,7 @@ closureIdentity( const StgClosure *p )
         case CONSTR_2_0:
         case CONSTR_1_1:
         case CONSTR_0_2:
-        case CONSTR_STATIC:
-        case CONSTR_NOCAF_STATIC:
+        case CONSTR_NOCAF:
             return GET_CON_DESC(itbl_to_con_itbl(info));
         default:
             return closure_type_names[info->type];
@@ -370,6 +371,19 @@ printSample(rtsBool beginSample, StgDouble sampleValue)
     }
 }
 
+static void
+dumpCostCentresToEventLog(void)
+{
+#ifdef PROFILING
+    CostCentre *cc, *next;
+    for (cc = CC_LIST; cc != NULL; cc = next) {
+        next = cc->link;
+        traceHeapProfCostCentre(cc->ccID, cc->label, cc->module,
+                                cc->srcloc, cc->is_caf);
+    }
+#endif
+}
+
 /* --------------------------------------------------------------------------
  * Initialize the heap profilier
  * ----------------------------------------------------------------------- */
@@ -385,6 +399,13 @@ initHeapProfiling(void)
         errorBelch("cannot mix -hb and -hr");
         stg_exit(EXIT_FAILURE);
     }
+#ifdef THREADED_RTS
+    // See Trac #12019.
+    if (doingLDVProfiling() && RtsFlags.ParFlags.nCapabilities > 1) {
+        errorBelch("-hb cannot be used with multiple capabilities");
+        stg_exit(EXIT_FAILURE);
+    }
+#endif
 #endif
 
     // we only count eras if we're doing LDV profiling.  Otherwise era
@@ -399,7 +420,7 @@ initHeapProfiling(void)
     }
 
     // max_era = 2^LDV_SHIFT
-        max_era = 1 << LDV_SHIFT;
+    max_era = 1 << LDV_SHIFT;
 
     n_censuses = 32;
     censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
@@ -436,6 +457,9 @@ initHeapProfiling(void)
     }
 #endif
 
+    traceHeapProfBegin(0);
+    dumpCostCentresToEventLog();
+
     return 0;
 }
 
@@ -743,10 +767,12 @@ dumpCensus( Census *census )
     ssize_t count;
 
     printSample(rtsTrue, census->time);
+    traceHeapProfSampleBegin(era);
 
 #ifdef PROFILING
     if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
-      fprintf(hp_file, "VOID\t%lu\n", (unsigned long)(census->void_total) * sizeof(W_));
+        fprintf(hp_file, "VOID\t%lu\n",
+                (unsigned long)(census->void_total) * sizeof(W_));
         fprintf(hp_file, "LAG\t%lu\n",
                 (unsigned long)(census->not_used - census->void_total) * sizeof(W_));
         fprintf(hp_file, "USE\t%lu\n",
@@ -787,6 +813,8 @@ dumpCensus( Census *census )
         switch (RtsFlags.ProfFlags.doHeapProfile) {
         case HEAP_BY_CLOSURE_TYPE:
             fprintf(hp_file, "%s", (char *)ctr->identity);
+            traceHeapProfSampleString(0, (char *)ctr->identity,
+                                      count * sizeof(W_));
             break;
         }
 #endif
@@ -794,12 +822,17 @@ dumpCensus( Census *census )
 #ifdef PROFILING
         switch (RtsFlags.ProfFlags.doHeapProfile) {
         case HEAP_BY_CCS:
-            fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, RtsFlags.ProfFlags.ccsLength);
+            fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
+                       RtsFlags.ProfFlags.ccsLength);
+            traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
+                                          count * sizeof(W_));
             break;
         case HEAP_BY_MOD:
         case HEAP_BY_DESCR:
         case HEAP_BY_TYPE:
             fprintf(hp_file, "%s", (char *)ctr->identity);
+            traceHeapProfSampleString(0, (char *)ctr->identity,
+                                      count * sizeof(W_));
             break;
         case HEAP_BY_RETAINER:
         {
@@ -913,6 +946,24 @@ static void heapProfObject(Census *census, StgClosure *p, size_t size,
             }
 }
 
+// Compact objects require special handling code because they
+// are not stored consecutively in memory (rather, each object
+// is a list of objects), and that would break the while loop
+// below. But we know that each block holds at most one object
+// so we don't need the loop.
+//
+// See Note [Compact Normal Forms] for details.
+static void
+heapCensusCompactList(Census *census, bdescr *bd)
+{
+    for (; bd != NULL; bd = bd->link) {
+        StgCompactNFDataBlock *block = (StgCompactNFDataBlock*)bd->start;
+        StgCompactNFData *str = block->owner;
+        heapProfObject(census, (StgClosure*)str,
+                       compact_nfdata_full_sizeW(str), rtsTrue);
+    }
+}
+
 /* -----------------------------------------------------------------------------
  * Code to perform a heap census.
  * -------------------------------------------------------------------------- */
@@ -974,7 +1025,6 @@ heapCensusChain( Census *census, bdescr *bd )
                 size = sizeofW(StgThunkHeader) + 1;
                 break;
 
-            case CONSTR:
             case FUN:
             case BLACKHOLE:
             case BLOCKING_QUEUE:
@@ -983,6 +1033,8 @@ heapCensusChain( Census *census, bdescr *bd )
             case FUN_1_1:
             case FUN_0_2:
             case FUN_2_0:
+            case CONSTR:
+            case CONSTR_NOCAF:
             case CONSTR_1_0:
             case CONSTR_0_1:
             case CONSTR_1_1:
@@ -1089,6 +1141,10 @@ heapCensusChain( Census *census, bdescr *bd )
                 size = sizeofW(StgTRecChunk);
                 break;
 
+            case COMPACT_NFDATA:
+                barf("heapCensus, found compact object in the wrong list");
+                break;
+
             default:
                 barf("heapCensus, unknown object: %d", info->type);
             }
@@ -1126,6 +1182,7 @@ void heapCensus (Time t)
       // Are we interested in large objects?  might be
       // confusing to include the stack in a heap profile.
       heapCensusChain( census, generations[g].large_objects );
+      heapCensusCompactList ( census, generations[g].compact_objects );
 
       for (n = 0; n < n_capabilities; n++) {
           ws = &gc_threads[n]->gens[g];