Implement public interface for GC statistics.
authorEdward Z. Yang <ezyang@mit.edu>
Sat, 30 Jul 2011 20:02:10 +0000 (16:02 -0400)
committerEdward Z. Yang <ezyang@mit.edu>
Sun, 31 Jul 2011 02:42:16 +0000 (22:42 -0400)
We add a new RTS flag -T for collecting statistics but not giving any
new inputs.  There is one new struct in rts/storage/GC.h: GCStats.  We
add two new global counters current_residency and current_slop, which
are useful for in-program GC statistics.

See GHC.Stats in base for a Haskell interface to this functionality.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
includes/rts/storage/GC.h
rts/Linker.c
rts/RtsFlags.c
rts/Stats.c

index 3c6e6f6..e57ffd2 100644 (file)
@@ -181,6 +181,50 @@ void setKeepCAFs (void);
    Stats
    -------------------------------------------------------------------------- */
 
+typedef struct _GCStats {
+  StgWord64 bytes_allocated;
+  StgWord64 num_gcs;
+  StgWord64 num_byte_usage_samples;
+  StgWord64 max_bytes_used;
+  StgWord64 cumulative_bytes_used;
+  StgWord64 bytes_copied;
+  StgWord64 current_bytes_used;
+  StgWord64 current_bytes_slop;
+  StgWord64 max_bytes_slop;
+  StgWord64 peak_megabytes_allocated;
+  StgWord64 par_avg_bytes_copied;
+  StgWord64 par_max_bytes_copied;
+  StgDouble mutator_cpu_seconds;
+  StgDouble mutator_wall_seconds;
+  StgDouble gc_cpu_seconds;
+  StgDouble gc_wall_seconds;
+} GCStats;
+void getGCStats (GCStats *s);
+
+// These don't change over execution, so do them elsewhere
+//  StgDouble init_cpu_seconds;
+//  StgDouble init_wall_seconds;
+
+typedef struct _ParGCStats {
+  StgWord64 avg_copied;
+  StgWord64 max_copied;
+} ParGCStats;
+void getParGCStats (ParGCStats *s);
+
+/*
+typedef struct _TaskStats {
+  StgWord64 mut_time;
+  StgWord64 mut_etime;
+  StgWord64 gc_time;
+  StgWord64 gc_etime;
+} TaskStats;
+// would need to allocate arbitrarily large amount of memory
+// because it's a linked list of results
+void getTaskStats (TaskStats **s);
+// Need to stuff SparkCounters in a public header file...
+void getSparkStats (SparkCounters *s);
+*/
+
 // Returns the total number of bytes allocated since the start of the program.
 HsInt64 getAllocations (void);
 
index 781f705..f5b90d4 100644 (file)
@@ -793,6 +793,7 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(getOrSetGHCConcWindowsProddingStore)                \
       SymI_HasProto(getOrSetSystemEventThreadEventManagerStore)         \
       SymI_HasProto(getOrSetSystemEventThreadIOManagerThreadStore)      \
+      SymI_HasProto(getGCStats)                         \
       SymI_HasProto(genSymZh)                           \
       SymI_HasProto(genericRaise)                       \
       SymI_HasProto(getProgArgv)                        \
index fcc1f49..eda327d 100644 (file)
@@ -236,6 +236,7 @@ usage_text[] = {
 "  -I<sec>  Perform full GC after <sec> idle time (default: 0.3, 0 == off)",
 #endif
 "",
+"  -T         Collect GC statistics (useful for in-program statistics access)"
 "  -t[<file>] One-line GC statistics (if <file> omitted, uses stderr)",
 "  -s[<file>] Summary  GC statistics (if <file> omitted, uses stderr)",
 "  -S[<file>] Detailed GC statistics (if <file> omitted, uses stderr)",
@@ -841,6 +842,10 @@ error = rtsTrue;
                }
                break;
 
+              case 'T':
+                  RtsFlags.GcFlags.giveStats = COLLECT_GC_STATS;
+                  break; /* Don't initialize statistics file. */
+
              case 'S':
                  RtsFlags.GcFlags.giveStats = VERBOSE_GC_STATS;
                  goto stats;
index c071ec0..ebe239f 100644 (file)
@@ -56,9 +56,12 @@ static Ticks HCe_start_time, HCe_tot_time = 0;   // heap census prof elap time
 #define PROF_VAL(x)   0
 #endif
 
-static lnat max_residency     = 0; // in words; for stats only
+// current = current as of last GC
+static lnat current_residency = 0; // in words; for stats only
+static lnat max_residency     = 0;
 static lnat cumulative_residency = 0;
 static lnat residency_samples = 0; // for stats only
+static lnat current_slop      = 0;
 static lnat max_slop          = 0;
 
 static lnat GC_end_faults = 0;
@@ -367,6 +370,7 @@ stat_endGC (gc_thread *gct,
            if (live > max_residency) {
                max_residency = live;
            }
+            current_residency = live;
            residency_samples++;
            cumulative_residency += live;
        }
@@ -510,6 +514,9 @@ StgInt TOTAL_CALLS=1;
   statsPrintf("  (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
              SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
 
+static inline Ticks get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
+static inline Ticks get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
+
 void
 stat_exit(int alloc)
 {
@@ -553,8 +560,8 @@ stat_exit(int alloc)
             gc_elapsed += GC_coll_elapsed[i];
         }
 
-        init_cpu     = end_init_cpu - start_init_cpu;
-        init_elapsed = end_init_elapsed - start_init_elapsed;
+        init_cpu     = get_init_cpu();
+        init_elapsed = get_init_elapsed();
 
         exit_cpu     = end_exit_cpu - start_exit_cpu;
         exit_elapsed = end_exit_elapsed - start_exit_elapsed;
@@ -844,6 +851,70 @@ statDescribeGens(void)
 extern HsInt64 getAllocations( void ) 
 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
 
+/* EZY: I'm not convinced I got all the casting right. */
+
+extern void getGCStats( GCStats *s )
+{
+    nat total_collections = 0;
+    nat g;
+    Ticks gc_cpu = 0;
+    Ticks gc_elapsed = 0;
+    Ticks current_elapsed = 0;
+    Ticks current_cpu = 0;
+
+    getProcessTimes(&current_cpu, &current_elapsed);
+
+    /* EZY: static inline'ify these */
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+        total_collections += generations[g].collections;
+
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+        gc_cpu     += GC_coll_cpu[g];
+        gc_elapsed += GC_coll_elapsed[g];
+    }
+
+    s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
+    s->num_gcs = total_collections;
+    s->num_byte_usage_samples = residency_samples;
+    s->max_bytes_used = max_residency*sizeof(W_);
+    s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
+    s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
+    s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
+    s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
+    s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
+    s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
+    /*
+    s->init_cpu_seconds = TICK_TO_DBL(get_init_cpu());
+    s->init_wall_seconds = TICK_TO_DBL(get_init_elapsed());
+    */
+    s->mutator_cpu_seconds = TICK_TO_DBL(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
+    s->mutator_wall_seconds = TICK_TO_DBL(current_elapsed- end_init_elapsed - gc_elapsed);
+    s->gc_cpu_seconds = TICK_TO_DBL(gc_cpu);
+    s->gc_wall_seconds = TICK_TO_DBL(gc_elapsed);
+    s->par_avg_bytes_copied = GC_par_avg_copied*(StgWord64)sizeof(W_);
+    s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
+}
+// extern void getTaskStats( TaskStats **s ) {}
+#if 0
+extern void getSparkStats( SparkCounters *s ) {
+    nat i;
+    s->created = 0;
+    s->dud = 0;
+    s->overflowed = 0;
+    s->converted = 0;
+    s->gcd = 0;
+    s->fizzled = 0;
+    for (i = 0; i < n_capabilities; i++) {
+        s->created   += capabilities[i].spark_stats.created;
+        s->dud       += capabilities[i].spark_stats.dud;
+        s->overflowed+= capabilities[i].spark_stats.overflowed;
+        s->converted += capabilities[i].spark_stats.converted;
+        s->gcd       += capabilities[i].spark_stats.gcd;
+        s->fizzled   += capabilities[i].spark_stats.fizzled;
+    }
+}
+#endif
+
 /* -----------------------------------------------------------------------------
    Dumping stuff in the stats file, or via the debug message interface
    -------------------------------------------------------------------------- */