/* huh? */
#define BIG_STRING_LEN 512
-#define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND)
+#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
-static Ticks
+static Time
start_init_cpu, start_init_elapsed,
end_init_cpu, end_init_elapsed,
start_exit_cpu, start_exit_elapsed,
end_exit_cpu, end_exit_elapsed;
-static Ticks GC_tot_cpu = 0;
+static Time GC_tot_cpu = 0;
static StgWord64 GC_tot_alloc = 0;
static StgWord64 GC_tot_copied = 0;
static StgWord64 GC_par_max_copied = 0;
-static StgWord64 GC_par_avg_copied = 0;
+static StgWord64 GC_par_tot_copied = 0;
#ifdef PROFILING
-static Ticks RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
-static Ticks RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
+static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
+static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
-static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time
-static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
+static Time HC_start_time, HC_tot_time = 0; // heap census prof user time
+static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
#endif
#ifdef PROFILING
#endif
// current = current as of last GC
-static lnat current_residency = 0; // in words; for stats only
-static lnat max_residency = 0;
-static lnat cumulative_residency = 0;
-static lnat residency_samples = 0; // for stats only
-static lnat current_slop = 0;
-static lnat max_slop = 0;
+static W_ current_residency = 0; // in words; for stats only
+static W_ max_residency = 0;
+static W_ cumulative_residency = 0;
+static W_ residency_samples = 0; // for stats only
+static W_ current_slop = 0;
+static W_ max_slop = 0;
-static lnat GC_end_faults = 0;
+static W_ GC_end_faults = 0;
-static Ticks *GC_coll_cpu = NULL;
-static Ticks *GC_coll_elapsed = NULL;
-static Ticks *GC_coll_max_pause = NULL;
+static Time *GC_coll_cpu = NULL;
+static Time *GC_coll_elapsed = NULL;
+static Time *GC_coll_max_pause = NULL;
static void statsFlush( void );
static void statsClose( void );
Current elapsed time
------------------------------------------------------------------------- */
-Ticks stat_getElapsedTime(void)
+Time stat_getElapsedTime(void)
{
return getProcessElapsedTime() - start_init_elapsed;
}
------------------------------------------------------------------------ */
double
-mut_user_time_until( Ticks t )
+mut_user_time_until( Time t )
{
- return TICK_TO_DBL(t - GC_tot_cpu - PROF_VAL(RP_tot_time));
+ return TimeToSecondsDbl(t - GC_tot_cpu);
+ // heapCensus() time is included in GC_tot_cpu, so we don't need
+ // to subtract it here.
}
double
mut_user_time( void )
{
- Ticks cpu;
+ Time cpu;
cpu = getProcessCPUTime();
return mut_user_time_until(cpu);
}
double
mut_user_time_during_RP( void )
{
- return TICK_TO_DBL(RP_start_time - GC_tot_cpu - RP_tot_time);
+ return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
}
double
mut_user_time_during_heap_census( void )
{
- return TICK_TO_DBL(HC_start_time - GC_tot_cpu - RP_tot_time);
+ return TimeToSecondsDbl(HC_start_time - GC_tot_cpu - RP_tot_time);
}
#endif /* PROFILING */
GC_tot_alloc = 0;
GC_tot_copied = 0;
GC_par_max_copied = 0;
- GC_par_avg_copied = 0;
+ GC_par_tot_copied = 0;
GC_tot_cpu = 0;
#ifdef PROFILING
max_slop = 0;
GC_end_faults = 0;
-}
+}
/* ---------------------------------------------------------------------------
initStats1() can be called after setupRtsFlags()
initStats1 (void)
{
nat i;
-
+
if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
- statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
- statsPrintf(" bytes bytes bytes user elap user elap\n");
+ statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
+ statsPrintf(" bytes bytes bytes user elap user elap\n");
}
- GC_coll_cpu =
- (Ticks *)stgMallocBytes(
- sizeof(Ticks)*RtsFlags.GcFlags.generations,
- "initStats");
- GC_coll_elapsed =
- (Ticks *)stgMallocBytes(
- sizeof(Ticks)*RtsFlags.GcFlags.generations,
- "initStats");
+ GC_coll_cpu =
+ (Time *)stgMallocBytes(
+ sizeof(Time)*RtsFlags.GcFlags.generations,
+ "initStats");
+ GC_coll_elapsed =
+ (Time *)stgMallocBytes(
+ sizeof(Time)*RtsFlags.GcFlags.generations,
+ "initStats");
GC_coll_max_pause =
- (Ticks *)stgMallocBytes(
- sizeof(Ticks)*RtsFlags.GcFlags.generations,
- "initStats");
+ (Time *)stgMallocBytes(
+ sizeof(Time)*RtsFlags.GcFlags.generations,
+ "initStats");
for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
- GC_coll_cpu[i] = 0;
+ GC_coll_cpu[i] = 0;
GC_coll_elapsed[i] = 0;
GC_coll_max_pause[i] = 0;
}
getProcessTimes(&start_init_cpu, &start_init_elapsed);
}
-void
+void
stat_endInit(void)
{
getProcessTimes(&end_init_cpu, &end_init_elapsed);
/* -----------------------------------------------------------------------------
stat_startExit and stat_endExit
-
+
These two measure the time taken in shutdownHaskell().
-------------------------------------------------------------------------- */
static nat rub_bell = 0;
void
-stat_startGC (gc_thread *gct)
+stat_startGC (Capability *cap, gc_thread *gct)
{
nat bell = RtsFlags.GcFlags.ringBell;
if (bell) {
- if (bell > 1) {
- debugBelch(" GC ");
- rub_bell = 1;
- } else {
- debugBelch("\007");
- }
+ if (bell > 1) {
+ debugBelch(" GC ");
+ rub_bell = 1;
+ } else {
+ debugBelch("\007");
+ }
}
#if USE_PAPI
#endif
getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
- gct->gc_start_thread_cpu = getThreadCPUTime();
+
+ // Post EVENT_GC_START with the same timestamp as used for stats
+ // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
+ // Here, as opposed to other places, the event is emitted on the cap
+ // that initiates the GC and external tools expect it to have the same
+ // timestamp as used in +RTS -s calculcations.
+ traceEventGcStartAtT(cap,
+ TimeToNS(gct->gc_start_elapsed - start_init_elapsed));
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
{
gct->gc_start_faults = getPageFaults();
}
-}
-void
-stat_gcWorkerThreadStart (gc_thread *gct)
-{
- if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
- {
- getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
- gct->gc_start_thread_cpu = getThreadCPUTime();
- }
-}
-
-void
-stat_gcWorkerThreadDone (gc_thread *gct)
-{
- Ticks thread_cpu, elapsed, gc_cpu, gc_elapsed;
-
- if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
- {
- elapsed = getProcessElapsedTime();
- thread_cpu = getThreadCPUTime();
-
- gc_cpu = thread_cpu - gct->gc_start_thread_cpu;
- gc_elapsed = elapsed - gct->gc_start_elapsed;
-
- taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
- }
+ updateNurseriesStats();
}
/* -----------------------------------------------------------------------------
-------------------------------------------------------------------------- */
void
-stat_endGC (gc_thread *gct,
- lnat alloc, lnat live, lnat copied, nat gen,
- lnat max_copied, lnat avg_copied, lnat slop)
+stat_endGC (Capability *cap, gc_thread *gct,
+ W_ live, W_ copied, W_ slop, nat gen,
+ nat par_n_threads, W_ par_max_copied, W_ par_tot_copied)
{
+ W_ tot_alloc;
+ W_ alloc;
+
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
RtsFlags.ProfFlags.doHeapProfile)
// heap profiling needs GC_tot_time
{
- Ticks cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed;
-
+ Time cpu, elapsed, gc_cpu, gc_elapsed;
+
+ // Has to be emitted while all caps stopped for GC, but before GC_END.
+ // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
+ // for a detailed design rationale of the current setup
+ // of GC eventlog events.
+ traceEventGcGlobalSync(cap);
+
+ // Emitted before GC_END on all caps, which simplifies tools code.
+ traceEventGcStats(cap,
+ CAPSET_HEAP_DEFAULT,
+ gen,
+ copied * sizeof(W_),
+ slop * sizeof(W_),
+ /* current loss due to fragmentation */
+ (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
+ * BLOCK_SIZE_W * sizeof(W_),
+ par_n_threads,
+ par_max_copied * sizeof(W_),
+ par_tot_copied * sizeof(W_));
+
getProcessTimes(&cpu, &elapsed);
- gc_elapsed = elapsed - gct->gc_start_elapsed;
- thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu;
+ // Post EVENT_GC_END with the same timestamp as used for stats
+ // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
+ // Here, as opposed to other places, the event is emitted on the cap
+ // that initiates the GC and external tools expect it to have the same
+ // timestamp as used in +RTS -s calculcations.
+ traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));
+ gc_elapsed = elapsed - gct->gc_start_elapsed;
gc_cpu = cpu - gct->gc_start_cpu;
- taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed);
+ /* For the moment we calculate both per-HEC and total allocation.
+ * There is thus redundancy here, but for the moment we will calculate
+ * it both the old and new way and assert they're the same.
+ * When we're sure it's working OK then we can simplify things.
+ */
+ tot_alloc = calcTotalAllocated();
+
+ // allocated since the last GC
+ alloc = tot_alloc - GC_tot_alloc;
+ GC_tot_alloc = tot_alloc;
if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
- nat faults = getPageFaults();
-
- statsPrintf("%9ld %9ld %9ld",
- alloc*sizeof(W_), copied*sizeof(W_),
- live*sizeof(W_));
- statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n",
- TICK_TO_DBL(gc_cpu),
- TICK_TO_DBL(gc_elapsed),
- TICK_TO_DBL(cpu),
- TICK_TO_DBL(elapsed - start_init_elapsed),
- faults - gct->gc_start_faults,
+ W_ faults = getPageFaults();
+
+ statsPrintf("%9" FMT_SizeT " %9" FMT_SizeT " %9" FMT_SizeT,
+ alloc*sizeof(W_), copied*sizeof(W_),
+ live*sizeof(W_));
+ statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4" FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
+ TimeToSecondsDbl(gc_cpu),
+ TimeToSecondsDbl(gc_elapsed),
+ TimeToSecondsDbl(cpu),
+ TimeToSecondsDbl(elapsed - start_init_elapsed),
+ faults - gct->gc_start_faults,
gct->gc_start_faults - GC_end_faults,
gen);
GC_end_faults = faults;
- statsFlush();
- }
+ statsFlush();
+ }
GC_coll_cpu[gen] += gc_cpu;
GC_coll_elapsed[gen] += gc_elapsed;
GC_coll_max_pause[gen] = gc_elapsed;
}
- GC_tot_copied += (StgWord64) copied;
- GC_tot_alloc += (StgWord64) alloc;
- GC_par_max_copied += (StgWord64) max_copied;
- GC_par_avg_copied += (StgWord64) avg_copied;
- GC_tot_cpu += gc_cpu;
+ GC_tot_copied += (StgWord64) copied;
+ GC_par_max_copied += (StgWord64) par_max_copied;
+ GC_par_tot_copied += (StgWord64) par_tot_copied;
+ GC_tot_cpu += gc_cpu;
- if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
- if (live > max_residency) {
- max_residency = live;
- }
+ traceEventHeapSize(cap,
+ CAPSET_HEAP_DEFAULT,
+ mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
+
+ if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
+ if (live > max_residency) {
+ max_residency = live;
+ }
current_residency = live;
- residency_samples++;
- cumulative_residency += live;
- }
+ residency_samples++;
+ cumulative_residency += live;
+ traceEventHeapLive(cap,
+ CAPSET_HEAP_DEFAULT,
+ live * sizeof(W_));
+ }
if (slop > max_slop) max_slop = slop;
}
if (rub_bell) {
- debugBelch("\b\b\b \b\b\b");
- rub_bell = 0;
+ debugBelch("\b\b\b \b\b\b");
+ rub_bell = 0;
}
#if USE_PAPI
void
stat_startRP(void)
{
- Ticks user, elapsed;
+ Time user, elapsed;
getProcessTimes( &user, &elapsed );
RP_start_time = user;
#endif
double averageNumVisit)
{
- Ticks user, elapsed;
+ Time user, elapsed;
getProcessTimes( &user, &elapsed );
RP_tot_time += user - RP_start_time;
RPe_tot_time += elapsed - RPe_start_time;
- fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
+ fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
retainerGeneration, mut_user_time_during_RP());
#ifdef DEBUG_RETAINER
fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
void
stat_startHeapCensus(void)
{
- Ticks user, elapsed;
+ Time user, elapsed;
getProcessTimes( &user, &elapsed );
HC_start_time = user;
-------------------------------------------------------------------------- */
#ifdef PROFILING
void
-stat_endHeapCensus(void)
+stat_endHeapCensus(void)
{
- Ticks user, elapsed;
+ Time user, elapsed;
getProcessTimes( &user, &elapsed );
HC_tot_time += user - HC_start_time;
#define REPORT(counter) \
{ \
showStgWord64(counter,temp,rtsTrue/*commas*/); \
- statsPrintf(" (" #counter ") : %s\n",temp); \
+ statsPrintf(" (" #counter ") : %s\n",temp); \
}
/* Report the value of a counter as a percentage of another counter */
#define REPORT_PCT(counter,countertot) \
statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
- counter*100.0/countertot)
+ counter*100.0/countertot)
#define TICK_PRINT(arity) \
REPORT(SLOW_CALLS_##arity); \
#define TICK_PRINT_TOT(arity) \
statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
- SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
+ SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
+
+static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
+static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
-static inline Ticks get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
-static inline Ticks get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
void
-stat_exit(int alloc)
+stat_exit (void)
{
generation *gen;
- Ticks gc_cpu = 0;
- Ticks gc_elapsed = 0;
- Ticks init_cpu = 0;
- Ticks init_elapsed = 0;
- Ticks mut_cpu = 0;
- Ticks mut_elapsed = 0;
- Ticks exit_cpu = 0;
- Ticks exit_elapsed = 0;
+ Time gc_cpu = 0;
+ Time gc_elapsed = 0;
+ Time init_cpu = 0;
+ Time init_elapsed = 0;
+ Time mut_cpu = 0;
+ Time mut_elapsed = 0;
+ Time exit_cpu = 0;
+ Time exit_elapsed = 0;
+ W_ tot_alloc;
+ W_ alloc;
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
- char temp[BIG_STRING_LEN];
- Ticks tot_cpu;
- Ticks tot_elapsed;
- nat i, g, total_collections = 0;
+ char temp[BIG_STRING_LEN];
+ Time tot_cpu;
+ Time tot_elapsed;
+ nat i, g, total_collections = 0;
+
+ getProcessTimes( &tot_cpu, &tot_elapsed );
+ tot_elapsed -= start_init_elapsed;
+
+ tot_alloc = calcTotalAllocated();
- getProcessTimes( &tot_cpu, &tot_elapsed );
- tot_elapsed -= start_init_elapsed;
+ // allocated since the last GC
+ alloc = tot_alloc - GC_tot_alloc;
+ GC_tot_alloc = tot_alloc;
- GC_tot_alloc += alloc;
+ /* Count total garbage collections */
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+ total_collections += generations[g].collections;
- /* Count total garbage collections */
- for (g = 0; g < RtsFlags.GcFlags.generations; g++)
- total_collections += generations[g].collections;
+ /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
+ if (tot_cpu == 0.0) tot_cpu = 1;
+ if (tot_elapsed == 0.0) tot_elapsed = 1;
- /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
- if (tot_cpu == 0.0) tot_cpu = 1;
- if (tot_elapsed == 0.0) tot_elapsed = 1;
-
- if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
- statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
- statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
- }
+ if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
+ statsPrintf("%9" FMT_SizeT " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", "");
+ statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
+ }
for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
gc_cpu += GC_coll_cpu[i];
gc_elapsed += GC_coll_elapsed[i];
}
+ // heapCensus() is called by the GC, so RP and HC time are
+ // included in the GC stats. We therefore subtract them to
+ // obtain the actual GC cpu time.
+ gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time);
+ gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
+
init_cpu = get_init_cpu();
init_elapsed = get_init_elapsed();
- PROF_VAL(RP_tot_time + HC_tot_time);
if (mut_cpu < 0) { mut_cpu = 0; }
- if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
- showStgWord64(GC_tot_alloc*sizeof(W_),
- temp, rtsTrue/*commas*/);
- statsPrintf("%16s bytes allocated in the heap\n", temp);
+ if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
+ showStgWord64(GC_tot_alloc*sizeof(W_),
+ temp, rtsTrue/*commas*/);
+ statsPrintf("%16s bytes allocated in the heap\n", temp);
- showStgWord64(GC_tot_copied*sizeof(W_),
- temp, rtsTrue/*commas*/);
- statsPrintf("%16s bytes copied during GC\n", temp);
+ showStgWord64(GC_tot_copied*sizeof(W_),
+ temp, rtsTrue/*commas*/);
+ statsPrintf("%16s bytes copied during GC\n", temp);
if ( residency_samples > 0 ) {
- showStgWord64(max_residency*sizeof(W_),
- temp, rtsTrue/*commas*/);
- statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
- temp, residency_samples);
- }
+ showStgWord64(max_residency*sizeof(W_),
+ temp, rtsTrue/*commas*/);
+ statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n",
+ temp, residency_samples);
+ }
- showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
- statsPrintf("%16s bytes maximum slop\n", temp);
+ showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
+ statsPrintf("%16s bytes maximum slop\n", temp);
- statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n",
- peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
- (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
+ statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n",
+ (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
+ (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
- /* Print garbage collections in each gen */
- statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
+ /* Print garbage collections in each gen */
+ statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
gen = &generations[g];
- statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n",
+ statsPrintf(" Gen %2d %5d colls, %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
gen->no,
gen->collections,
gen->par_collections,
- TICK_TO_DBL(GC_coll_cpu[g]),
- TICK_TO_DBL(GC_coll_elapsed[g]),
- gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections),
- TICK_TO_DBL(GC_coll_max_pause[g]));
+ TimeToSecondsDbl(GC_coll_cpu[g]),
+ TimeToSecondsDbl(GC_coll_elapsed[g]),
+ gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
+ TimeToSecondsDbl(GC_coll_max_pause[g]));
}
#if defined(THREADED_RTS)
- if (RtsFlags.ParFlags.parGcEnabled) {
- statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
- (double)GC_par_avg_copied / (double)GC_par_max_copied,
- (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
- RtsFlags.ParFlags.nNodes
+ if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
+ statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
+ 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
+ / (n_capabilities - 1)
);
}
#endif
statsPrintf("\n");
#if defined(THREADED_RTS)
- {
- nat i;
- Task *task;
- statsPrintf(" MUT time (elapsed) GC time (elapsed)\n");
- for (i = 0, task = all_tasks;
- task != NULL;
- i++, task = task->all_link) {
- statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n",
- i,
- (task->worker) ? "(worker)" : "(bound)",
- TICK_TO_DBL(task->mut_time),
- TICK_TO_DBL(task->mut_etime),
- TICK_TO_DBL(task->gc_time),
- TICK_TO_DBL(task->gc_etime));
- }
- }
-
- statsPrintf("\n");
+ statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
+ taskCount, taskCount - workerCount,
+ peakWorkerCount, workerCount,
+ n_capabilities);
+
+ statsPrintf("\n");
{
nat i;
SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
for (i = 0; i < n_capabilities; i++) {
- sparks.created += capabilities[i].spark_stats.created;
- sparks.dud += capabilities[i].spark_stats.dud;
- sparks.overflowed+= capabilities[i].spark_stats.overflowed;
- sparks.converted += capabilities[i].spark_stats.converted;
- sparks.gcd += capabilities[i].spark_stats.gcd;
- sparks.fizzled += capabilities[i].spark_stats.fizzled;
+ sparks.created += capabilities[i]->spark_stats.created;
+ sparks.dud += capabilities[i]->spark_stats.dud;
+ sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
+ sparks.converted += capabilities[i]->spark_stats.converted;
+ sparks.gcd += capabilities[i]->spark_stats.gcd;
+ sparks.fizzled += capabilities[i]->spark_stats.fizzled;
}
- statsPrintf(" SPARKS: %ld (%ld converted, %ld overflowed, %ld dud, %ld GC'd, %ld fizzled)\n\n",
+ statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
sparks.created + sparks.dud + sparks.overflowed,
sparks.converted, sparks.overflowed, sparks.dud,
sparks.gcd, sparks.fizzled);
}
#endif
- statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
- TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed));
+ statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
- statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n",
- TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed));
- statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n",
- TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
+ statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
+ statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
#ifdef PROFILING
- statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n",
- TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time));
- statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n",
- TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time));
-#endif
- statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n",
- TICK_TO_DBL(exit_cpu), TICK_TO_DBL(exit_elapsed));
- statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n",
- TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_elapsed));
+ statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
+ statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
+#endif
+ statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n",
+ TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
+ statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n",
+ TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
#ifndef THREADED_RTS
- statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
- TICK_TO_DBL(gc_cpu)*100/TICK_TO_DBL(tot_cpu),
- TICK_TO_DBL(gc_elapsed)*100/TICK_TO_DBL(tot_elapsed));
+ statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
+ TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
+ TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
#endif
- if (tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) == 0)
- showStgWord64(0, temp, rtsTrue/*commas*/);
- else
- showStgWord64(
- (StgWord64)((GC_tot_alloc*sizeof(W_))/
- TICK_TO_DBL(tot_cpu - GC_tot_cpu -
- PROF_VAL(RP_tot_time + HC_tot_time))),
- temp, rtsTrue/*commas*/);
-
- statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
-
- statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
- TICK_TO_DBL(tot_cpu - GC_tot_cpu -
- PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
- / TICK_TO_DBL(tot_cpu),
- TICK_TO_DBL(tot_cpu - GC_tot_cpu -
- PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
- / TICK_TO_DBL(tot_elapsed));
+ if (mut_cpu == 0) {
+ showStgWord64(0, temp, rtsTrue/*commas*/);
+ } else {
+ showStgWord64(
+ (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
+ temp, rtsTrue/*commas*/);
+ }
+
+ statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
+
+ statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
+ TimeToSecondsDbl(tot_cpu - gc_cpu -
+ PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
+ / TimeToSecondsDbl(tot_cpu),
+ TimeToSecondsDbl(tot_cpu - gc_cpu -
+ PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
+ / TimeToSecondsDbl(tot_elapsed));
/*
TICK_PRINT(1);
TICK_PRINT(2);
- REPORT(TOTAL_CALLS);
+ REPORT(TOTAL_CALLS);
TICK_PRINT_TOT(1);
TICK_PRINT_TOT(2);
*/
#if defined(THREADED_RTS) && defined(PROF_SPIN)
{
nat g;
-
+
statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
}
}
#endif
- }
+ }
- if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
+ if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
char *fmt1, *fmt2;
if (RtsFlags.MiscFlags.machineReadable) {
fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
" ,(\"max_bytes_used\", \"%ld\")\n"
" ,(\"num_byte_usage_samples\", \"%ld\")\n"
" ,(\"peak_megabytes_allocated\", \"%lu\")\n"
- " ,(\"init_cpu_seconds\", \"%.2f\")\n"
- " ,(\"init_wall_seconds\", \"%.2f\")\n"
- " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
- " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
- " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
- " ,(\"GC_wall_seconds\", \"%.2f\")\n"
+ " ,(\"init_cpu_seconds\", \"%.3f\")\n"
+ " ,(\"init_wall_seconds\", \"%.3f\")\n"
+ " ,(\"mutator_cpu_seconds\", \"%.3f\")\n"
+ " ,(\"mutator_wall_seconds\", \"%.3f\")\n"
+ " ,(\"GC_cpu_seconds\", \"%.3f\")\n"
+ " ,(\"GC_wall_seconds\", \"%.3f\")\n"
" ]\n";
}
else {
fmt1 = "<<ghc: %llu bytes, ";
- fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
+ fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.3f INIT (%.3f elapsed), %.3f MUT (%.3f elapsed), %.3f GC (%.3f elapsed) :ghc>>\n";
}
- /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
- statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
- statsPrintf(fmt2,
- total_collections,
- residency_samples == 0 ? 0 :
- cumulative_residency*sizeof(W_)/residency_samples,
- max_residency*sizeof(W_),
- residency_samples,
- (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
- TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed),
- TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed),
- TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
- }
-
- statsFlush();
- statsClose();
+ /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
+ statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
+ statsPrintf(fmt2,
+ total_collections,
+ residency_samples == 0 ? 0 :
+ cumulative_residency*sizeof(W_)/residency_samples,
+ max_residency*sizeof(W_),
+ residency_samples,
+ (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
+ TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
+ TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
+ TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
+ }
+
+ statsFlush();
+ statsClose();
}
if (GC_coll_cpu) {
statDescribeGens(void)
{
nat g, mut, lge, i;
- lnat gen_slop;
- lnat tot_live, tot_slop;
- lnat gen_live, gen_blocks;
+ W_ gen_slop;
+ W_ tot_live, tot_slop;
+ W_ gen_live, gen_blocks;
bdescr *bd;
generation *gen;
-
+
debugBelch(
"----------------------------------------------------------\n"
" Gen Max Mut-list Blocks Large Live Slop\n"
mut = 0;
for (i = 0; i < n_capabilities; i++) {
- mut += countOccupied(capabilities[i].mut_lists[g]);
+ mut += countOccupied(capabilities[i]->mut_lists[g]);
// Add the pinned object block.
- bd = capabilities[i].pinned_object_block;
+ bd = capabilities[i]->pinned_object_block;
if (bd != NULL) {
gen_live += bd->free - bd->start;
gen_blocks += bd->blocks;
gen_blocks += gcThreadLiveBlocks(i,g);
}
- debugBelch("%5d %7ld %9d", g, (lnat)gen->max_blocks, mut);
+ debugBelch("%5d %7" FMT_Word " %9d", g, (W_)gen->max_blocks, mut);
gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
- debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
- gen_live*sizeof(W_), gen_slop*sizeof(W_));
+ debugBelch("%8" FMT_Word " %8d %8" FMT_Word " %8" FMT_Word "\n", gen_blocks, lge,
+ gen_live*(W_)sizeof(W_), gen_slop*(W_)sizeof(W_));
tot_live += gen_live;
tot_slop += gen_slop;
}
debugBelch("----------------------------------------------------------\n");
- debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
+ debugBelch("%41s%8" FMT_SizeT " %8" FMT_SizeT "\n",
+ "",tot_live*sizeof(W_),tot_slop*sizeof(W_));
debugBelch("----------------------------------------------------------\n");
debugBelch("\n");
}
each compilation and expression evaluation.
-------------------------------------------------------------------------- */
-extern HsInt64 getAllocations( void )
+extern HsInt64 getAllocations( void )
{ return (HsInt64)GC_tot_alloc * sizeof(W_); }
/* EZY: I'm not convinced I got all the casting right. */
+extern rtsBool getGCStatsEnabled( void )
+{
+ return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
+}
+
extern void getGCStats( GCStats *s )
{
nat total_collections = 0;
nat g;
- Ticks gc_cpu = 0;
- Ticks gc_elapsed = 0;
- Ticks current_elapsed = 0;
- Ticks current_cpu = 0;
+ Time gc_cpu = 0;
+ Time gc_elapsed = 0;
+ Time current_elapsed = 0;
+ Time current_cpu = 0;
getProcessTimes(¤t_cpu, ¤t_elapsed);
s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
/*
- s->init_cpu_seconds = TICK_TO_DBL(get_init_cpu());
- s->init_wall_seconds = TICK_TO_DBL(get_init_elapsed());
+ s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
+ s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
*/
- s->mutator_cpu_seconds = TICK_TO_DBL(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
- s->mutator_wall_seconds = TICK_TO_DBL(current_elapsed- end_init_elapsed - gc_elapsed);
- s->gc_cpu_seconds = TICK_TO_DBL(gc_cpu);
- s->gc_wall_seconds = TICK_TO_DBL(gc_elapsed);
- s->par_avg_bytes_copied = GC_par_avg_copied*(StgWord64)sizeof(W_);
+ s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
+ s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
+ s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
+ s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
+ /* EZY: Being consistent with incremental output, but maybe should also discount init */
+ s->cpu_seconds = TimeToSecondsDbl(current_cpu);
+ s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
+ s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
}
// extern void getTaskStats( TaskStats **s ) {}
s->gcd = 0;
s->fizzled = 0;
for (i = 0; i < n_capabilities; i++) {
- s->created += capabilities[i].spark_stats.created;
- s->dud += capabilities[i].spark_stats.dud;
- s->overflowed+= capabilities[i].spark_stats.overflowed;
- s->converted += capabilities[i].spark_stats.converted;
- s->gcd += capabilities[i].spark_stats.gcd;
- s->fizzled += capabilities[i].spark_stats.fizzled;
+ s->created += capabilities[i]->spark_stats.created;
+ s->dud += capabilities[i]->spark_stats.dud;
+ s->overflowed+= capabilities[i]->spark_stats.overflowed;
+ s->converted += capabilities[i]->spark_stats.converted;
+ s->gcd += capabilities[i]->spark_stats.gcd;
+ s->fizzled += capabilities[i]->spark_stats.fizzled;
}
}
#endif
{
FILE *sf = RtsFlags.GcFlags.statsFile;
va_list ap;
-
+
va_start(ap,s);
if (sf == NULL) {
- vdebugBelch(s,ap);
+ vdebugBelch(s,ap);
} else {
- vfprintf(sf, s, ap);
+ vfprintf(sf, s, ap);
}
va_end(ap);
}
{
FILE *sf = RtsFlags.GcFlags.statsFile;
if (sf != NULL) {
- fflush(sf);
+ fflush(sf);
}
}
{
FILE *sf = RtsFlags.GcFlags.statsFile;
if (sf != NULL) {
- fclose(sf);
+ fclose(sf);
}
}