Revert "rts, base: Refactor stats.c to improve --machine-readable report"
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsFlags.h"
13 #include "RtsUtils.h"
14 #include "Schedule.h"
15 #include "Stats.h"
16 #include "Profiling.h"
17 #include "GetTime.h"
18 #include "sm/Storage.h"
19 #include "sm/GCThread.h"
20 #include "sm/BlockAlloc.h"
21
22 // for spin/yield counters
23 #include "sm/GC.h"
24 #include "ThreadPaused.h"
25 #include "Messages.h"
26
27
28 #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
29
30 static Time
31 start_init_cpu, start_init_elapsed,
32 end_init_cpu, end_init_elapsed,
33 start_exit_cpu, start_exit_elapsed,
34 start_exit_gc_elapsed, start_exit_gc_cpu,
35 end_exit_cpu, end_exit_elapsed;
36
37 #if defined(PROFILING)
38 static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
39 static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
40
41 static Time HC_start_time, HC_tot_time = 0; // heap census prof user time
42 static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
43 #endif
44
45 #if defined(PROFILING)
46 #define PROF_VAL(x) (x)
47 #else
48 #define PROF_VAL(x) 0
49 #endif
50
51 #if defined(PROF_SPIN)
52 volatile StgWord64 whitehole_lockClosure_spin = 0;
53 volatile StgWord64 whitehole_lockClosure_yield = 0;
54 volatile StgWord64 whitehole_threadPaused_spin = 0;
55 volatile StgWord64 whitehole_executeMessage_spin = 0;
56 #endif
57
58 //
59 // All the stats!
60 //
61 // This is where we accumulate all the stats during execution, and it's also
62 // in a convenient form that we can copy over to a caller of getRTSStats().
63 //
64 static RTSStats stats;
65
66 static W_ GC_end_faults = 0;
67
68 static Time *GC_coll_cpu = NULL;
69 static Time *GC_coll_elapsed = NULL;
70 static Time *GC_coll_max_pause = NULL;
71
72 static void statsPrintf( char *s, ... ) GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
73 static void statsFlush( void );
74 static void statsClose( void );
75
76 /* -----------------------------------------------------------------------------
77 Current elapsed time
78 ------------------------------------------------------------------------- */
79
80 Time stat_getElapsedTime(void)
81 {
82 return getProcessElapsedTime() - start_init_elapsed;
83 }
84
85 /* ---------------------------------------------------------------------------
86 Measure the current MUT time, for profiling
87 ------------------------------------------------------------------------ */
88
89 double
90 mut_user_time_until( Time t )
91 {
92 return TimeToSecondsDbl(t - stats.gc_cpu_ns);
93 // heapCensus() time is included in GC_tot_cpu, so we don't need
94 // to subtract it here.
95 }
96
97 double
98 mut_user_time( void )
99 {
100 Time cpu;
101 cpu = getProcessCPUTime();
102 return mut_user_time_until(cpu);
103 }
104
105 #if defined(PROFILING)
106 /*
107 mut_user_time_during_RP() returns the MUT time during retainer profiling.
108 The same is for mut_user_time_during_HC();
109 */
110 static double
111 mut_user_time_during_RP( void )
112 {
113 return TimeToSecondsDbl(RP_start_time - stats.gc_cpu_ns - RP_tot_time);
114 }
115
116 #endif /* PROFILING */
117
118 /* ---------------------------------------------------------------------------
119 initStats0() has no dependencies, it can be called right at the beginning
120 ------------------------------------------------------------------------ */
121
122 void
123 initStats0(void)
124 {
125 start_init_cpu = 0;
126 start_init_elapsed = 0;
127 end_init_cpu = 0;
128 end_init_elapsed = 0;
129
130 start_exit_cpu = 0;
131 start_exit_elapsed = 0;
132 start_exit_gc_cpu = 0;
133 start_exit_gc_elapsed = 0;
134 end_exit_cpu = 0;
135 end_exit_elapsed = 0;
136
137 #if defined(PROFILING)
138 RP_start_time = 0;
139 RP_tot_time = 0;
140 RPe_start_time = 0;
141 RPe_tot_time = 0;
142
143 HC_start_time = 0;
144 HC_tot_time = 0;
145 HCe_start_time = 0;
146 HCe_tot_time = 0;
147 #endif
148
149 GC_end_faults = 0;
150
151 stats = (RTSStats) {
152 .gcs = 0,
153 .major_gcs = 0,
154 .allocated_bytes = 0,
155 .max_live_bytes = 0,
156 .max_large_objects_bytes = 0,
157 .max_compact_bytes = 0,
158 .max_slop_bytes = 0,
159 .max_mem_in_use_bytes = 0,
160 .cumulative_live_bytes = 0,
161 .copied_bytes = 0,
162 .par_copied_bytes = 0,
163 .cumulative_par_max_copied_bytes = 0,
164 .cumulative_par_balanced_copied_bytes = 0,
165 .gc_spin_spin = 0,
166 .gc_spin_yield = 0,
167 .mut_spin_spin = 0,
168 .mut_spin_yield = 0,
169 .any_work = 0,
170 .no_work = 0,
171 .scav_find_work = 0,
172 .mutator_cpu_ns = 0,
173 .mutator_elapsed_ns = 0,
174 .gc_cpu_ns = 0,
175 .gc_elapsed_ns = 0,
176 .cpu_ns = 0,
177 .elapsed_ns = 0,
178 .gc = {
179 .gen = 0,
180 .threads = 0,
181 .allocated_bytes = 0,
182 .live_bytes = 0,
183 .large_objects_bytes = 0,
184 .compact_bytes = 0,
185 .slop_bytes = 0,
186 .mem_in_use_bytes = 0,
187 .copied_bytes = 0,
188 .par_max_copied_bytes = 0,
189 .par_balanced_copied_bytes = 0,
190 .sync_elapsed_ns = 0,
191 .cpu_ns = 0,
192 .elapsed_ns = 0
193 }
194 };
195 }
196
197 /* ---------------------------------------------------------------------------
198 initStats1() can be called after setupRtsFlags()
199 ------------------------------------------------------------------------ */
200
201 void
202 initStats1 (void)
203 {
204 uint32_t i;
205
206 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
207 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
208 statsPrintf(" bytes bytes bytes user elap user elap\n");
209 }
210 GC_coll_cpu =
211 (Time *)stgMallocBytes(
212 sizeof(Time)*RtsFlags.GcFlags.generations,
213 "initStats");
214 GC_coll_elapsed =
215 (Time *)stgMallocBytes(
216 sizeof(Time)*RtsFlags.GcFlags.generations,
217 "initStats");
218 GC_coll_max_pause =
219 (Time *)stgMallocBytes(
220 sizeof(Time)*RtsFlags.GcFlags.generations,
221 "initStats");
222 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
223 GC_coll_cpu[i] = 0;
224 GC_coll_elapsed[i] = 0;
225 GC_coll_max_pause[i] = 0;
226 }
227 }
228
229 /* -----------------------------------------------------------------------------
230 Initialisation time...
231 -------------------------------------------------------------------------- */
232
233 void
234 stat_startInit(void)
235 {
236 getProcessTimes(&start_init_cpu, &start_init_elapsed);
237 }
238
239 void
240 stat_endInit(void)
241 {
242 getProcessTimes(&end_init_cpu, &end_init_elapsed);
243 }
244
245 /* -----------------------------------------------------------------------------
246 stat_startExit and stat_endExit
247
248 These two measure the time taken in shutdownHaskell().
249 -------------------------------------------------------------------------- */
250
251 void
252 stat_startExit(void)
253 {
254 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
255 start_exit_gc_elapsed = stats.gc_elapsed_ns;
256 start_exit_gc_cpu = stats.gc_cpu_ns;
257 }
258
259 void
260 stat_endExit(void)
261 {
262 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
263 }
264
265 void
266 stat_startGCSync (gc_thread *gct)
267 {
268 gct->gc_sync_start_elapsed = getProcessElapsedTime();
269 }
270
271 /* -----------------------------------------------------------------------------
272 Called at the beginning of each GC
273 -------------------------------------------------------------------------- */
274
275 void
276 stat_startGC (Capability *cap, gc_thread *gct)
277 {
278 if (RtsFlags.GcFlags.ringBell) {
279 debugBelch("\007");
280 }
281
282 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
283
284 // Post EVENT_GC_START with the same timestamp as used for stats
285 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
286 // Here, as opposed to other places, the event is emitted on the cap
287 // that initiates the GC and external tools expect it to have the same
288 // timestamp as used in +RTS -s calculcations.
289 traceEventGcStartAtT(cap,
290 TimeToNS(gct->gc_start_elapsed - start_init_elapsed));
291
292 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
293 {
294 gct->gc_start_faults = getPageFaults();
295 }
296
297 updateNurseriesStats();
298 }
299
300 /* -----------------------------------------------------------------------------
301 Called at the end of each GC
302 -------------------------------------------------------------------------- */
303
304 void
305 stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop,
306 uint32_t gen, uint32_t par_n_threads, W_ par_max_copied,
307 W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield,
308 W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ no_work,
309 W_ scav_find_work)
310 {
311 // -------------------------------------------------
312 // Collect all the stats about this GC in stats.gc. We always do this since
313 // it's relatively cheap and we need allocated_bytes to catch heap
314 // overflows.
315
316 stats.gc.gen = gen;
317 stats.gc.threads = par_n_threads;
318
319 uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
320
321 // allocated since the last GC
322 stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
323
324 stats.gc.live_bytes = live * sizeof(W_);
325 stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_);
326 stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_);
327 stats.gc.slop_bytes = slop * sizeof(W_);
328 stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE;
329 stats.gc.copied_bytes = copied * sizeof(W_);
330 stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
331 stats.gc.par_balanced_copied_bytes = par_balanced_copied * sizeof(W_);
332
333 bool stats_enabled =
334 RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
335 rtsConfig.gcDoneHook != NULL;
336
337 if (stats_enabled
338 || RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
339 {
340 // We only update the times when stats are explicitly enabled since
341 // getProcessTimes (e.g. requiring a system call) can be expensive on
342 // some platforms.
343 Time current_cpu, current_elapsed;
344 getProcessTimes(&current_cpu, &current_elapsed);
345 stats.cpu_ns = current_cpu - start_init_cpu;
346 stats.elapsed_ns = current_elapsed - start_init_elapsed;
347
348 stats.gc.sync_elapsed_ns =
349 gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
350 stats.gc.elapsed_ns = current_elapsed - gct->gc_start_elapsed;
351 stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
352 }
353 // -------------------------------------------------
354 // Update the cumulative stats
355
356 stats.gcs++;
357 stats.allocated_bytes = tot_alloc_bytes;
358 stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE;
359
360 GC_coll_cpu[gen] += stats.gc.cpu_ns;
361 GC_coll_elapsed[gen] += stats.gc.elapsed_ns;
362 if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) {
363 GC_coll_max_pause[gen] = stats.gc.elapsed_ns;
364 }
365
366 stats.copied_bytes += stats.gc.copied_bytes;
367 if (par_n_threads > 1) {
368 stats.par_copied_bytes += stats.gc.copied_bytes;
369 stats.cumulative_par_max_copied_bytes +=
370 stats.gc.par_max_copied_bytes;
371 stats.cumulative_par_balanced_copied_bytes +=
372 stats.gc.par_balanced_copied_bytes;
373 stats.any_work += any_work;
374 stats.no_work += no_work;
375 stats.scav_find_work += scav_find_work;
376 stats.gc_spin_spin += gc_spin_spin;
377 stats.gc_spin_yield += gc_spin_yield;
378 stats.mut_spin_spin += mut_spin_spin;
379 stats.mut_spin_yield += mut_spin_yield;
380 }
381 stats.gc_cpu_ns += stats.gc.cpu_ns;
382 stats.gc_elapsed_ns += stats.gc.elapsed_ns;
383
384 if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
385 stats.major_gcs++;
386 if (stats.gc.live_bytes > stats.max_live_bytes) {
387 stats.max_live_bytes = stats.gc.live_bytes;
388 }
389 if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) {
390 stats.max_large_objects_bytes = stats.gc.large_objects_bytes;
391 }
392 if (stats.gc.compact_bytes > stats.max_compact_bytes) {
393 stats.max_compact_bytes = stats.gc.compact_bytes;
394 }
395 if (stats.gc.slop_bytes > stats.max_slop_bytes) {
396 stats.max_slop_bytes = stats.gc.slop_bytes;
397 }
398 stats.cumulative_live_bytes += stats.gc.live_bytes;
399 }
400
401 // -------------------------------------------------
402 // Do the more expensive bits only when stats are enabled.
403
404 if (stats_enabled)
405 {
406 // -------------------------------------------------
407 // Emit events to the event log
408
409 // Has to be emitted while all caps stopped for GC, but before GC_END.
410 // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
411 // for a detailed design rationale of the current setup
412 // of GC eventlog events.
413 traceEventGcGlobalSync(cap);
414
415 // Emitted before GC_END on all caps, which simplifies tools code.
416 traceEventGcStats(cap,
417 CAPSET_HEAP_DEFAULT,
418 stats.gc.gen,
419 stats.gc.copied_bytes,
420 stats.gc.slop_bytes,
421 /* current loss due to fragmentation */
422 (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
423 * BLOCK_SIZE,
424 par_n_threads,
425 stats.gc.par_max_copied_bytes,
426 stats.gc.copied_bytes,
427 stats.gc.par_balanced_copied_bytes);
428
429 // Post EVENT_GC_END with the same timestamp as used for stats
430 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
431 // Here, as opposed to other places, the event is emitted on the cap
432 // that initiates the GC and external tools expect it to have the same
433 // timestamp as used in +RTS -s calculcations.
434 traceEventGcEndAtT(cap, TimeToNS(stats.elapsed_ns));
435
436 if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
437 traceEventHeapLive(cap,
438 CAPSET_HEAP_DEFAULT,
439 stats.gc.live_bytes);
440 }
441
442 // -------------------------------------------------
443 // Print GC stats to stdout or a file (+RTS -S/-s)
444
445 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
446 W_ faults = getPageFaults();
447
448 statsPrintf("%9" FMT_Word64 " %9" FMT_Word64 " %9" FMT_Word64,
449 stats.gc.allocated_bytes, stats.gc.copied_bytes,
450 stats.gc.live_bytes);
451
452 statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4"
453 FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
454 TimeToSecondsDbl(stats.gc.cpu_ns),
455 TimeToSecondsDbl(stats.gc.elapsed_ns),
456 TimeToSecondsDbl(stats.cpu_ns),
457 TimeToSecondsDbl(stats.elapsed_ns),
458 faults - gct->gc_start_faults,
459 gct->gc_start_faults - GC_end_faults,
460 gen);
461
462 GC_end_faults = faults;
463 statsFlush();
464 }
465
466
467 if (rtsConfig.gcDoneHook != NULL) {
468 rtsConfig.gcDoneHook(&stats.gc);
469 }
470
471 traceEventHeapSize(cap,
472 CAPSET_HEAP_DEFAULT,
473 mblocks_allocated * MBLOCK_SIZE);
474 }
475 }
476
477 /* -----------------------------------------------------------------------------
478 Called at the beginning of each Retainer Profiliing
479 -------------------------------------------------------------------------- */
480 #if defined(PROFILING)
481 void
482 stat_startRP(void)
483 {
484 Time user, elapsed;
485 getProcessTimes( &user, &elapsed );
486
487 RP_start_time = user;
488 RPe_start_time = elapsed;
489 }
490 #endif /* PROFILING */
491
492 /* -----------------------------------------------------------------------------
493 Called at the end of each Retainer Profiliing
494 -------------------------------------------------------------------------- */
495
496 #if defined(PROFILING)
497 void
498 stat_endRP(
499 uint32_t retainerGeneration,
500 #if defined(DEBUG_RETAINER)
501 uint32_t maxCStackSize,
502 int maxStackSize,
503 #endif
504 double averageNumVisit)
505 {
506 Time user, elapsed;
507 getProcessTimes( &user, &elapsed );
508
509 RP_tot_time += user - RP_start_time;
510 RPe_tot_time += elapsed - RPe_start_time;
511
512 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
513 retainerGeneration, mut_user_time_during_RP());
514 #if defined(DEBUG_RETAINER)
515 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
516 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
517 #endif
518 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
519 }
520 #endif /* PROFILING */
521
522 /* -----------------------------------------------------------------------------
523 Called at the beginning of each heap census
524 -------------------------------------------------------------------------- */
525 #if defined(PROFILING)
526 void
527 stat_startHeapCensus(void)
528 {
529 Time user, elapsed;
530 getProcessTimes( &user, &elapsed );
531
532 HC_start_time = user;
533 HCe_start_time = elapsed;
534 }
535 #endif /* PROFILING */
536
537 /* -----------------------------------------------------------------------------
538 Called at the end of each heap census
539 -------------------------------------------------------------------------- */
540 #if defined(PROFILING)
541 void
542 stat_endHeapCensus(void)
543 {
544 Time user, elapsed;
545 getProcessTimes( &user, &elapsed );
546
547 HC_tot_time += user - HC_start_time;
548 HCe_tot_time += elapsed - HCe_start_time;
549 }
550 #endif /* PROFILING */
551
552 /* -----------------------------------------------------------------------------
553 Called at the end of execution
554
555 NOTE: number of allocations is not entirely accurate: it doesn't
556 take into account the few bytes at the end of the heap that
557 were left unused when the heap-check failed.
558 -------------------------------------------------------------------------- */
559
560 #if defined(DEBUG)
561 #define TICK_VAR_INI(arity) \
562 StgInt SLOW_CALLS_##arity = 1; \
563 StgInt RIGHT_ARITY_##arity = 1; \
564 StgInt TAGGED_PTR_##arity = 0;
565
566 TICK_VAR_INI(1)
567 TICK_VAR_INI(2)
568
569 StgInt TOTAL_CALLS=1;
570 #endif
571
572 /* Report the value of a counter */
573 #define REPORT(counter) \
574 { \
575 showStgWord64(counter,temp,true/*commas*/); \
576 statsPrintf(" (" #counter ") : %s\n",temp); \
577 }
578
579 /* Report the value of a counter as a percentage of another counter */
580 #define REPORT_PCT(counter,countertot) \
581 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
582 counter*100.0/countertot)
583
584 #define TICK_PRINT(arity) \
585 REPORT(SLOW_CALLS_##arity); \
586 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
587 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
588 REPORT(RIGHT_ARITY_##arity); \
589 REPORT(TAGGED_PTR_##arity)
590
591 #define TICK_PRINT_TOT(arity) \
592 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
593 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
594
595 STATIC_INLINE Time get_init_cpu(void) {
596 return end_init_cpu - start_init_cpu;
597 }
598
599 STATIC_INLINE Time get_init_elapsed(void) {
600 return end_init_elapsed - start_init_elapsed;
601 }
602
603
604 void
605 stat_exit (void)
606 {
607 generation *gen;
608 Time gc_cpu = 0;
609 Time gc_elapsed = 0;
610 Time init_cpu = 0;
611 Time init_elapsed = 0;
612 Time mut_cpu = 0;
613 Time mut_elapsed = 0;
614 Time exit_cpu = 0;
615 Time exit_elapsed = 0;
616 Time exit_gc_cpu = 0;
617 Time exit_gc_elapsed = 0;
618
619 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
620
621 char temp[512];
622 Time tot_cpu;
623 Time tot_elapsed;
624 uint32_t g;
625
626 getProcessTimes( &tot_cpu, &tot_elapsed );
627 tot_cpu -= start_init_cpu;
628 tot_elapsed -= start_init_elapsed;
629
630 uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
631
632 // allocated since the last GC
633 stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
634 stats.allocated_bytes = tot_alloc_bytes;
635
636 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
637 if (tot_cpu <= 0) tot_cpu = 1;
638 if (tot_elapsed <= 0) tot_elapsed = 1;
639
640 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
641 statsPrintf("%9" FMT_Word " %9.9s %9.9s",
642 (W_)stats.gc.allocated_bytes, "", "");
643 statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
644 }
645
646 // heapCensus() is called by the GC, so RP and HC time are
647 // included in the GC stats. We therefore subtract them to
648 // obtain the actual GC cpu time.
649 gc_cpu = stats.gc_cpu_ns - PROF_VAL(RP_tot_time + HC_tot_time);
650 gc_elapsed = stats.gc_elapsed_ns - PROF_VAL(RPe_tot_time + HCe_tot_time);
651
652 init_cpu = get_init_cpu();
653 init_elapsed = get_init_elapsed();
654
655 // We do a GC during the EXIT phase. We'll attribute the cost of that
656 // to GC instead of EXIT, so carefully subtract it from the EXIT time.
657 exit_gc_cpu = stats.gc_cpu_ns - start_exit_gc_cpu;
658 exit_gc_elapsed = stats.gc_elapsed_ns - start_exit_gc_elapsed;
659 exit_cpu = end_exit_cpu - start_exit_cpu - exit_gc_cpu;
660 exit_elapsed = end_exit_elapsed - start_exit_elapsed - exit_gc_elapsed;
661
662 mut_elapsed = start_exit_elapsed - end_init_elapsed -
663 (gc_elapsed - exit_gc_elapsed) -
664 PROF_VAL(RPe_tot_time + HCe_tot_time);
665
666 mut_cpu = start_exit_cpu - end_init_cpu - (gc_cpu - exit_gc_cpu)
667 - PROF_VAL(RP_tot_time + HC_tot_time);
668 if (mut_cpu < 0) { mut_cpu = 0; }
669
670 // The subdivision of runtime into INIT/EXIT/GC/MUT is just adding and
671 // subtracting, so the parts should add up to the total exactly. Note
672 // that tot_elapsed is captured a tiny bit later than end_exit_elapsed,
673 // so we don't use it here.
674 ASSERT(init_elapsed + mut_elapsed + gc_elapsed + exit_elapsed
675 == end_exit_elapsed - start_init_elapsed);
676
677
678 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
679 showStgWord64(stats.allocated_bytes, temp, true/*commas*/);
680 statsPrintf("%16s bytes allocated in the heap\n", temp);
681
682 showStgWord64(stats.copied_bytes, temp, true/*commas*/);
683 statsPrintf("%16s bytes copied during GC\n", temp);
684
685 if ( stats.major_gcs > 0 ) {
686 showStgWord64(stats.max_live_bytes, temp, true/*commas*/);
687 statsPrintf("%16s bytes maximum residency (%" FMT_Word32
688 " sample(s))\n",
689 temp, stats.major_gcs);
690 }
691
692 showStgWord64(stats.max_slop_bytes, temp, true/*commas*/);
693 statsPrintf("%16s bytes maximum slop\n", temp);
694
695 statsPrintf("%16" FMT_SizeT " MB total memory in use (%"
696 FMT_SizeT " MB lost due to fragmentation)\n\n",
697 (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
698 (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
699
700 /* Print garbage collections in each gen */
701 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
702 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
703 gen = &generations[g];
704 statsPrintf(" Gen %2d %5d colls, %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
705 gen->no,
706 gen->collections,
707 gen->par_collections,
708 TimeToSecondsDbl(GC_coll_cpu[g]),
709 TimeToSecondsDbl(GC_coll_elapsed[g]),
710 gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
711 TimeToSecondsDbl(GC_coll_max_pause[g]));
712 }
713
714 #if defined(THREADED_RTS)
715 if (RtsFlags.ParFlags.parGcEnabled && stats.par_copied_bytes > 0) {
716 // See Note [Work Balance]
717 statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
718 100 * (double)stats.cumulative_par_balanced_copied_bytes /
719 (double)stats.par_copied_bytes);
720 }
721 #endif
722 statsPrintf("\n");
723
724 #if defined(THREADED_RTS)
725 statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
726 taskCount, taskCount - workerCount,
727 peakWorkerCount, workerCount,
728 n_capabilities);
729
730 statsPrintf("\n");
731
732 {
733 uint32_t i;
734 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
735 for (i = 0; i < n_capabilities; i++) {
736 sparks.created += capabilities[i]->spark_stats.created;
737 sparks.dud += capabilities[i]->spark_stats.dud;
738 sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
739 sparks.converted += capabilities[i]->spark_stats.converted;
740 sparks.gcd += capabilities[i]->spark_stats.gcd;
741 sparks.fizzled += capabilities[i]->spark_stats.fizzled;
742 }
743
744 statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
745 sparks.created + sparks.dud + sparks.overflowed,
746 sparks.converted, sparks.overflowed, sparks.dud,
747 sparks.gcd, sparks.fizzled);
748 }
749 #endif
750
751 statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n",
752 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
753
754 statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n",
755 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
756 statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n",
757 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
758
759 #if defined(PROFILING)
760 statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n",
761 TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
762 statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n",
763 TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
764 #endif
765 statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n",
766 TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
767 statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n",
768 TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
769 #if !defined(THREADED_RTS)
770 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
771 TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
772 TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
773 #endif
774
775 if (mut_cpu == 0) {
776 showStgWord64(0, temp, true/*commas*/);
777 } else {
778 showStgWord64(
779 (StgWord64)((double)stats.allocated_bytes /
780 TimeToSecondsDbl(mut_cpu)),
781 temp, true/*commas*/);
782 }
783
784 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
785
786 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
787 TimeToSecondsDbl(tot_cpu - gc_cpu -
788 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
789 / TimeToSecondsDbl(tot_cpu),
790 TimeToSecondsDbl(tot_elapsed - gc_elapsed -
791 PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100
792 / TimeToSecondsDbl(tot_elapsed));
793
794 // See Note [Internal Counter Stats] for a description of the
795 // following counters. If you add a counter here, please remember
796 // to update the Note.
797 if (RtsFlags.MiscFlags.internalCounters) {
798 #if defined(THREADED_RTS) && defined(PROF_SPIN)
799 uint32_t g;
800 const int32_t col_width[] = {4, -30, 14, 14};
801 statsPrintf("Internal Counters:\n");
802 statsPrintf("%*s" "%*s" "%*s" "%*s" "\n"
803 , col_width[0], ""
804 , col_width[1], "SpinLock"
805 , col_width[2], "Spins"
806 , col_width[3], "Yields");
807 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
808 , col_width[0], ""
809 , col_width[1], "gc_alloc_block_sync"
810 , col_width[2], gc_alloc_block_sync.spin
811 , col_width[3], gc_alloc_block_sync.yield);
812 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
813 , col_width[0], ""
814 , col_width[1], "gc_spin"
815 , col_width[2], stats.gc_spin_spin
816 , col_width[3], stats.gc_spin_yield);
817 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
818 , col_width[0], ""
819 , col_width[1], "mut_spin"
820 , col_width[2], stats.mut_spin_spin
821 , col_width[3], stats.mut_spin_yield);
822 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
823 , col_width[0], ""
824 , col_width[1], "whitehole_gc"
825 , col_width[2], whitehole_gc_spin
826 , col_width[3], "n/a");
827 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
828 , col_width[0], ""
829 , col_width[1], "whitehole_threadPaused"
830 , col_width[2], whitehole_threadPaused_spin
831 , col_width[3], "n/a");
832 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
833 , col_width[0], ""
834 , col_width[1], "whitehole_executeMessage"
835 , col_width[2], whitehole_executeMessage_spin
836 , col_width[3], "n/a");
837 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
838 , col_width[0], ""
839 , col_width[1], "whitehole_lockClosure"
840 , col_width[2], whitehole_lockClosure_spin
841 , col_width[3], whitehole_lockClosure_yield);
842 // waitForGcThreads isn't really spin-locking(see the function)
843 // but these numbers still seem useful.
844 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
845 , col_width[0], ""
846 , col_width[1], "waitForGcThreads"
847 , col_width[2], waitForGcThreads_spin
848 , col_width[3], waitForGcThreads_yield);
849
850 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
851 int prefix_length = 0;
852 statsPrintf("%*s" "gen[%" FMT_Word32 "%n",
853 col_width[0], "", g, &prefix_length);
854 prefix_length -= col_width[0];
855 int suffix_length = col_width[1] + prefix_length;
856 suffix_length =
857 suffix_length > 0 ? col_width[1] : suffix_length;
858
859 statsPrintf("%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
860 , suffix_length, "].sync"
861 , col_width[2], generations[g].sync.spin
862 , col_width[3], generations[g].sync.yield);
863 }
864 statsPrintf("\n");
865 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
866 , col_width[0], ""
867 , col_width[1], "any_work"
868 , col_width[2], stats.any_work);
869 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
870 , col_width[0], ""
871 , col_width[1], "no_work"
872 , col_width[2], stats.no_work);
873 statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
874 , col_width[0], ""
875 , col_width[1], "scav_find_work"
876 , col_width[2], stats.scav_find_work);
877 #elif defined(THREADED_RTS) // THREADED_RTS && PROF_SPIN
878 statsPrintf("Internal Counters require the RTS to be built "
879 "with PROF_SPIN"); // PROF_SPIN is not #defined here
880 #else // THREADED_RTS
881 statsPrintf("Internal Counters require the threaded RTS");
882 #endif
883 }
884 }
885
886 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
887 char *fmt;
888 if (RtsFlags.MiscFlags.machineReadable) {
889 fmt =
890 " [(\"bytes allocated\", \"%" FMT_Word64 "\")\n"
891 " ,(\"num_GCs\", \"%" FMT_Word32 "\")\n"
892 " ,(\"average_bytes_used\", \"%" FMT_Word64 "\")\n"
893 " ,(\"max_bytes_used\", \"%" FMT_Word64 "\")\n"
894 " ,(\"num_byte_usage_samples\", \"%" FMT_Word32 "\")\n"
895 " ,(\"peak_megabytes_allocated\", \"%" FMT_Word64 "\")\n"
896 " ,(\"init_cpu_seconds\", \"%.3f\")\n"
897 " ,(\"init_wall_seconds\", \"%.3f\")\n"
898 " ,(\"mutator_cpu_seconds\", \"%.3f\")\n"
899 " ,(\"mutator_wall_seconds\", \"%.3f\")\n"
900 " ,(\"GC_cpu_seconds\", \"%.3f\")\n"
901 " ,(\"GC_wall_seconds\", \"%.3f\")\n"
902 " ]\n";
903 }
904 else {
905 fmt =
906 "<<ghc: %" FMT_Word64 " bytes, "
907 "%" FMT_Word32 " GCs, "
908 "%" FMT_Word64 "/%" FMT_Word64 " avg/max bytes residency (%" FMT_Word32 " samples), "
909 "%" FMT_Word64 "M in use, "
910 "%.3f INIT (%.3f elapsed), "
911 "%.3f MUT (%.3f elapsed), "
912 "%.3f GC (%.3f elapsed) :ghc>>\n";
913 }
914 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
915 statsPrintf(fmt,
916 stats.allocated_bytes,
917 stats.gcs,
918 (uint64_t)
919 (stats.major_gcs == 0 ? 0 :
920 stats.cumulative_live_bytes/stats.major_gcs),
921 stats.max_live_bytes,
922 stats.major_gcs,
923 (uint64_t) (peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
924 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
925 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
926 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
927 }
928
929 statsFlush();
930 statsClose();
931 }
932
933 if (GC_coll_cpu) {
934 stgFree(GC_coll_cpu);
935 GC_coll_cpu = NULL;
936 }
937 if (GC_coll_elapsed) {
938 stgFree(GC_coll_elapsed);
939 GC_coll_elapsed = NULL;
940 }
941 if (GC_coll_max_pause) {
942 stgFree(GC_coll_max_pause);
943 GC_coll_max_pause = NULL;
944 }
945 }
946
947 /* Note [Work Balance]
948 ----------------------
949 Work balance is a measure of how evenly the work done during parallel garbage
950 collection is spread across threads. To compute work balance we must take care
951 to account for the number of GC threads changing between GCs. The statistics we
952 track must have the number of GC threads "integrated out".
953
954 We accumulate two values from each garbage collection:
955 * par_copied: is a measure of the total work done during parallel garbage
956 collection
957 * par_balanced_copied: is a measure of the balanced work done
958 during parallel garbage collection.
959
960 par_copied is simple to compute, but par_balanced_copied_bytes is somewhat more
961 complicated:
962
963 For a given garbage collection:
964 Let gc_copied := total copies during the gc
965 gc_copied_i := copies by the ith thread during the gc
966 num_gc_threads := the number of threads participating in the gc
967 balance_limit := (gc_copied / num_gc_threads)
968
969 If we were to graph gc_copied_i, sorted from largest to smallest we would see
970 something like:
971
972 |X
973 ^ |X X
974 | |X X X X: unbalanced copies
975 copies |----------- Y: balanced copies by the busiest GC thread
976 |Y Z Z Z: other balanced copies
977 |Y Z Z Z
978 |Y Z Z Z Z
979 |Y Z Z Z Z Z
980 |===========
981 |1 2 3 4 5 6
982 i ->
983
984 where the --- line is at balance_limit. Balanced copies are those under the ---
985 line, i.e. the area of the Ys and Zs. Note that the area occupied by the Ys will
986 always equal balance_limit. Completely balanced gc has every thread copying
987 balance_limit and a completely unbalanced gc has a single thread copying
988 gc_copied.
989
990 One could define par_balance_copied as the areas of the Ys and Zs in the graph
991 above, however we would like the ratio of (par_balance_copied / gc_copied) to
992 range from 0 to 1, so that work_balance will be a nice percentage, also ranging
993 from 0 to 1. We therefore define par_balanced_copied as:
994
995 ( num_gc_threads )
996 {Sum[Min(gc_copied_i,balance_limit)] - balance_limit} * (------------------)
997 i (num_gc_threads - 1)
998 vvv vvv
999 S T
1000
1001 Where the S and T terms serve to remove the area of the Ys, and
1002 to normalize the result to lie between 0 and gc_copied.
1003
1004 Note that the implementation orders these operations differently to minimize
1005 error due to integer rounding.
1006
1007 Then cumulative work balance is computed as
1008 (cumulative_par_balanced_copied_bytes / par_copied_byes)
1009
1010 Previously, cumulative work balance was computed as:
1011
1012 (cumulative_par_max_copied_bytes)
1013 (-------------------------------) - 1
1014 ( par_copied_bytes )
1015 -------------------------------------
1016 (n_capabilities - 1)
1017
1018 This was less accurate than the current method, and invalid whenever a garbage
1019 collection had occurred with num_gc_threads /= n_capabilities; which can happen
1020 when setNumCapabilities is called, when -qn is passed as an RTS option, or when
1021 the number of gc threads is limited to the number of cores.
1022 See #13830
1023 */
1024
1025 /*
1026 Note [Internal Counter Stats]
1027 -----------------------------
1028 What do the counts at the end of a '+RTS -s --internal-counters' report mean?
1029 They are detailed below. Most of these counters are used by multiple threads
1030 with no attempt at synchronisation. This means that reported values may be
1031 lower than the true value and this becomes more likely and more severe as
1032 contention increases.
1033
1034 The first counters are for various SpinLock-like constructs in the RTS. See
1035 Spinlock.h for the definition of a SpinLock. We maintain up two counters per
1036 SpinLock:
1037 * spin: The number of busy-spins over the length of the program.
1038 * yield: The number of times the SpinLock spun SPIN_COUNT times without success
1039 and called yieldThread().
1040 Not all of these are actual SpinLocks, see the details below.
1041
1042 Actual SpinLocks:
1043 * gc_alloc_block:
1044 This SpinLock protects the block allocator and free list manager. See
1045 BlockAlloc.c.
1046 * gc_spin and mut_spin:
1047 These SpinLocks are used to herd gc worker threads during parallel garbage
1048 collection. See gcWorkerThread, wakeup_gc_threads and releaseGCThreads.
1049 * gen[g].sync:
1050 These SpinLocks, one per generation, protect the generations[g] data
1051 structure during garbage collection.
1052
1053 waitForGcThreads:
1054 These counters are incremented while we wait for all threads to be ready
1055 for a parallel garbage collection. We yield more than we spin in this case.
1056
1057 In several places in the runtime we must take a lock on a closure. To do this,
1058 we replace it's info table with stg_WHITEHOLE_info, spinning if it is already
1059 a white-hole. Sometimes we yieldThread() if we spin too long, sometimes we
1060 don't. We count these white-hole spins and include them in the SpinLocks table.
1061 If a particular loop does not yield, we put "n/a" in the table. They are named
1062 for the function that has the spinning loop except that several loops in the
1063 garbage collector accumulate into whitehole_gc.
1064 TODO: Should these counters be more or less granular?
1065
1066 white-hole spin counters:
1067 * whitehole_gc
1068 * whitehole_lockClosure
1069 * whitehole_executeMessage
1070 * whitehole_threadPaused
1071
1072
1073 We count the number of calls of several functions in the parallel garbage
1074 collector.
1075
1076 Parallel garbage collector counters:
1077 * any_work:
1078 A cheap function called whenever a gc_thread is ready for work. Does
1079 not do any work.
1080 * no_work:
1081 Incremented whenever any_work finds no work.
1082 * scav_find_work:
1083 Called to do work when any_work return true.
1084
1085 */
1086
1087 /* -----------------------------------------------------------------------------
1088 stat_describe_gens
1089
1090 Produce some detailed info on the state of the generational GC.
1091 -------------------------------------------------------------------------- */
1092 void
1093 statDescribeGens(void)
1094 {
1095 uint32_t g, mut, lge, compacts, i;
1096 W_ gen_slop;
1097 W_ tot_live, tot_slop;
1098 W_ gen_live, gen_blocks;
1099 bdescr *bd;
1100 generation *gen;
1101
1102 debugBelch(
1103 "----------------------------------------------------------------------\n"
1104 " Gen Max Mut-list Blocks Large Compacts Live Slop\n"
1105 " Blocks Bytes Objects \n"
1106 "----------------------------------------------------------------------\n");
1107
1108 tot_live = 0;
1109 tot_slop = 0;
1110
1111 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1112 gen = &generations[g];
1113
1114 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
1115 lge++;
1116 }
1117
1118 for (bd = gen->compact_objects, compacts = 0; bd; bd = bd->link) {
1119 compacts++;
1120 }
1121
1122 gen_live = genLiveWords(gen);
1123 gen_blocks = genLiveBlocks(gen);
1124
1125 mut = 0;
1126 for (i = 0; i < n_capabilities; i++) {
1127 mut += countOccupied(capabilities[i]->mut_lists[g]);
1128
1129 // Add the pinned object block.
1130 bd = capabilities[i]->pinned_object_block;
1131 if (bd != NULL) {
1132 gen_live += bd->free - bd->start;
1133 gen_blocks += bd->blocks;
1134 }
1135
1136 gen_live += gcThreadLiveWords(i,g);
1137 gen_blocks += gcThreadLiveBlocks(i,g);
1138 }
1139
1140 debugBelch("%5d %7" FMT_Word " %9d", g, (W_)gen->max_blocks, mut);
1141
1142 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
1143
1144 debugBelch("%8" FMT_Word " %8d %8d %9" FMT_Word " %9" FMT_Word "\n", gen_blocks, lge, compacts,
1145 gen_live*(W_)sizeof(W_), gen_slop*(W_)sizeof(W_));
1146 tot_live += gen_live;
1147 tot_slop += gen_slop;
1148 }
1149 debugBelch("----------------------------------------------------------------------\n");
1150 debugBelch("%51s%9" FMT_Word " %9" FMT_Word "\n",
1151 "",tot_live*sizeof(W_),tot_slop*sizeof(W_));
1152 debugBelch("----------------------------------------------------------------------\n");
1153 debugBelch("\n");
1154 }
1155
1156 /* -----------------------------------------------------------------------------
1157 Stats available via a programmatic interface, so eg. GHCi can time
1158 each compilation and expression evaluation.
1159 -------------------------------------------------------------------------- */
1160
1161 uint64_t getAllocations( void )
1162 {
1163 return stats.allocated_bytes;
1164 }
1165
1166 int getRTSStatsEnabled( void )
1167 {
1168 return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
1169 }
1170
1171 void getRTSStats( RTSStats *s )
1172 {
1173 Time current_elapsed = 0;
1174 Time current_cpu = 0;
1175
1176 *s = stats;
1177
1178 getProcessTimes(&current_cpu, &current_elapsed);
1179 s->cpu_ns = current_cpu - end_init_cpu;
1180 s->elapsed_ns = current_elapsed - end_init_elapsed;
1181
1182 s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns;
1183 s->mutator_elapsed_ns = current_elapsed - end_init_elapsed -
1184 stats.gc_elapsed_ns;
1185 }
1186
1187 /* -----------------------------------------------------------------------------
1188 Dumping stuff in the stats file, or via the debug message interface
1189 -------------------------------------------------------------------------- */
1190
1191 void
1192 statsPrintf( char *s, ... )
1193 {
1194 FILE *sf = RtsFlags.GcFlags.statsFile;
1195 va_list ap;
1196
1197 va_start(ap,s);
1198 if (sf == NULL) {
1199 vdebugBelch(s,ap);
1200 } else {
1201 vfprintf(sf, s, ap);
1202 }
1203 va_end(ap);
1204 }
1205
1206 static void
1207 statsFlush( void )
1208 {
1209 FILE *sf = RtsFlags.GcFlags.statsFile;
1210 if (sf != NULL) {
1211 fflush(sf);
1212 }
1213 }
1214
1215 static void
1216 statsClose( void )
1217 {
1218 FILE *sf = RtsFlags.GcFlags.statsFile;
1219 if (sf != NULL) {
1220 fclose(sf);
1221 }
1222 }