b0c1be05820c793b0f6f4c1e15d0033ebeb4af1d
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsFlags.h"
13 #include "RtsUtils.h"
14 #include "Schedule.h"
15 #include "Stats.h"
16 #include "Profiling.h"
17 #include "GetTime.h"
18 #include "sm/Storage.h"
19 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
20 #include "sm/GCThread.h"
21 #include "sm/BlockAlloc.h"
22
23 #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
24
25 static Time
26 start_init_cpu, start_init_elapsed,
27 end_init_cpu, end_init_elapsed,
28 start_exit_cpu, start_exit_elapsed,
29 start_exit_gc_elapsed, start_exit_gc_cpu,
30 end_exit_cpu, end_exit_elapsed;
31
32 #if defined(PROFILING)
33 static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
34 static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
35
36 static Time HC_start_time, HC_tot_time = 0; // heap census prof user time
37 static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
38 #endif
39
40 #if defined(PROFILING)
41 #define PROF_VAL(x) (x)
42 #else
43 #define PROF_VAL(x) 0
44 #endif
45
46 //
47 // All the stats!
48 //
49 // This is where we accumulate all the stats during execution, and it's also
50 // in a convenient form that we can copy over to a caller of getRTSStats().
51 //
52 static RTSStats stats;
53
54 static W_ GC_end_faults = 0;
55
56 static Time *GC_coll_cpu = NULL;
57 static Time *GC_coll_elapsed = NULL;
58 static Time *GC_coll_max_pause = NULL;
59
60 static void statsPrintf( char *s, ... ) GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
61 static void statsFlush( void );
62 static void statsClose( void );
63
64 /* -----------------------------------------------------------------------------
65 Current elapsed time
66 ------------------------------------------------------------------------- */
67
68 Time stat_getElapsedTime(void)
69 {
70 return getProcessElapsedTime() - start_init_elapsed;
71 }
72
73 /* ---------------------------------------------------------------------------
74 Measure the current MUT time, for profiling
75 ------------------------------------------------------------------------ */
76
77 double
78 mut_user_time_until( Time t )
79 {
80 return TimeToSecondsDbl(t - stats.gc_cpu_ns);
81 // heapCensus() time is included in GC_tot_cpu, so we don't need
82 // to subtract it here.
83 }
84
85 double
86 mut_user_time( void )
87 {
88 Time cpu;
89 cpu = getProcessCPUTime();
90 return mut_user_time_until(cpu);
91 }
92
93 #if defined(PROFILING)
94 /*
95 mut_user_time_during_RP() returns the MUT time during retainer profiling.
96 The same is for mut_user_time_during_HC();
97 */
98 static double
99 mut_user_time_during_RP( void )
100 {
101 return TimeToSecondsDbl(RP_start_time - stats.gc_cpu_ns - RP_tot_time);
102 }
103
104 #endif /* PROFILING */
105
106 /* ---------------------------------------------------------------------------
107 initStats0() has no dependencies, it can be called right at the beginning
108 ------------------------------------------------------------------------ */
109
110 void
111 initStats0(void)
112 {
113 start_init_cpu = 0;
114 start_init_elapsed = 0;
115 end_init_cpu = 0;
116 end_init_elapsed = 0;
117
118 start_exit_cpu = 0;
119 start_exit_elapsed = 0;
120 start_exit_gc_cpu = 0;
121 start_exit_gc_elapsed = 0;
122 end_exit_cpu = 0;
123 end_exit_elapsed = 0;
124
125 #if defined(PROFILING)
126 RP_start_time = 0;
127 RP_tot_time = 0;
128 RPe_start_time = 0;
129 RPe_tot_time = 0;
130
131 HC_start_time = 0;
132 HC_tot_time = 0;
133 HCe_start_time = 0;
134 HCe_tot_time = 0;
135 #endif
136
137 GC_end_faults = 0;
138
139 stats = (RTSStats) {
140 .gcs = 0,
141 .major_gcs = 0,
142 .allocated_bytes = 0,
143 .max_live_bytes = 0,
144 .max_large_objects_bytes = 0,
145 .max_compact_bytes = 0,
146 .max_slop_bytes = 0,
147 .max_mem_in_use_bytes = 0,
148 .cumulative_live_bytes = 0,
149 .copied_bytes = 0,
150 .par_copied_bytes = 0,
151 .cumulative_par_max_copied_bytes = 0,
152 .mutator_cpu_ns = 0,
153 .mutator_elapsed_ns = 0,
154 .gc_cpu_ns = 0,
155 .gc_elapsed_ns = 0,
156 .cpu_ns = 0,
157 .elapsed_ns = 0,
158 .gc = {
159 .gen = 0,
160 .threads = 0,
161 .allocated_bytes = 0,
162 .live_bytes = 0,
163 .large_objects_bytes = 0,
164 .compact_bytes = 0,
165 .slop_bytes = 0,
166 .mem_in_use_bytes = 0,
167 .copied_bytes = 0,
168 .par_max_copied_bytes = 0,
169 .sync_elapsed_ns = 0,
170 .cpu_ns = 0,
171 .elapsed_ns = 0
172 }
173 };
174 }
175
176 /* ---------------------------------------------------------------------------
177 initStats1() can be called after setupRtsFlags()
178 ------------------------------------------------------------------------ */
179
180 void
181 initStats1 (void)
182 {
183 uint32_t i;
184
185 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
186 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
187 statsPrintf(" bytes bytes bytes user elap user elap\n");
188 }
189 GC_coll_cpu =
190 (Time *)stgMallocBytes(
191 sizeof(Time)*RtsFlags.GcFlags.generations,
192 "initStats");
193 GC_coll_elapsed =
194 (Time *)stgMallocBytes(
195 sizeof(Time)*RtsFlags.GcFlags.generations,
196 "initStats");
197 GC_coll_max_pause =
198 (Time *)stgMallocBytes(
199 sizeof(Time)*RtsFlags.GcFlags.generations,
200 "initStats");
201 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
202 GC_coll_cpu[i] = 0;
203 GC_coll_elapsed[i] = 0;
204 GC_coll_max_pause[i] = 0;
205 }
206 }
207
208 /* -----------------------------------------------------------------------------
209 Initialisation time...
210 -------------------------------------------------------------------------- */
211
212 void
213 stat_startInit(void)
214 {
215 getProcessTimes(&start_init_cpu, &start_init_elapsed);
216 }
217
218 void
219 stat_endInit(void)
220 {
221 getProcessTimes(&end_init_cpu, &end_init_elapsed);
222 }
223
224 /* -----------------------------------------------------------------------------
225 stat_startExit and stat_endExit
226
227 These two measure the time taken in shutdownHaskell().
228 -------------------------------------------------------------------------- */
229
230 void
231 stat_startExit(void)
232 {
233 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
234 start_exit_gc_elapsed = stats.gc_elapsed_ns;
235 start_exit_gc_cpu = stats.gc_cpu_ns;
236 }
237
238 void
239 stat_endExit(void)
240 {
241 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
242 }
243
244 void
245 stat_startGCSync (gc_thread *gct)
246 {
247 gct->gc_sync_start_elapsed = getProcessElapsedTime();
248 }
249
250 /* -----------------------------------------------------------------------------
251 Called at the beginning of each GC
252 -------------------------------------------------------------------------- */
253
254 void
255 stat_startGC (Capability *cap, gc_thread *gct)
256 {
257 if (RtsFlags.GcFlags.ringBell) {
258 debugBelch("\007");
259 }
260
261 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
262
263 // Post EVENT_GC_START with the same timestamp as used for stats
264 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
265 // Here, as opposed to other places, the event is emitted on the cap
266 // that initiates the GC and external tools expect it to have the same
267 // timestamp as used in +RTS -s calculcations.
268 traceEventGcStartAtT(cap,
269 TimeToNS(gct->gc_start_elapsed - start_init_elapsed));
270
271 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
272 {
273 gct->gc_start_faults = getPageFaults();
274 }
275
276 updateNurseriesStats();
277 }
278
279 /* -----------------------------------------------------------------------------
280 Called at the end of each GC
281 -------------------------------------------------------------------------- */
282
283 void
284 stat_endGC (Capability *cap, gc_thread *gct,
285 W_ live, W_ copied, W_ slop, uint32_t gen,
286 uint32_t par_n_threads, W_ par_max_copied)
287 {
288 // -------------------------------------------------
289 // Collect all the stats about this GC in stats.gc. We always do this since
290 // it's relatively cheap and we need allocated_bytes to catch heap
291 // overflows.
292
293 stats.gc.gen = gen;
294 stats.gc.threads = par_n_threads;
295
296 uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
297
298 // allocated since the last GC
299 stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
300
301 stats.gc.live_bytes = live * sizeof(W_);
302 stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_);
303 stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_);
304 stats.gc.slop_bytes = slop * sizeof(W_);
305 stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE;
306 stats.gc.copied_bytes = copied * sizeof(W_);
307 stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
308
309 // -------------------------------------------------
310 // Update the cumulative stats
311
312 stats.gcs++;
313 stats.allocated_bytes = tot_alloc_bytes;
314 stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE;
315
316 GC_coll_cpu[gen] += stats.gc.cpu_ns;
317 GC_coll_elapsed[gen] += stats.gc.elapsed_ns;
318 if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) {
319 GC_coll_max_pause[gen] = stats.gc.elapsed_ns;
320 }
321
322 stats.copied_bytes += stats.gc.copied_bytes;
323 if (par_n_threads > 1) {
324 stats.par_copied_bytes += stats.gc.copied_bytes;
325 stats.cumulative_par_max_copied_bytes +=
326 stats.gc.par_max_copied_bytes;
327 }
328 stats.gc_cpu_ns += stats.gc.cpu_ns;
329 stats.gc_elapsed_ns += stats.gc.elapsed_ns;
330
331 if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
332 stats.major_gcs++;
333 if (stats.gc.live_bytes > stats.max_live_bytes) {
334 stats.max_live_bytes = stats.gc.live_bytes;
335 }
336 if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) {
337 stats.max_large_objects_bytes = stats.gc.large_objects_bytes;
338 }
339 if (stats.gc.compact_bytes > stats.max_compact_bytes) {
340 stats.max_compact_bytes = stats.gc.compact_bytes;
341 }
342 if (stats.gc.slop_bytes > stats.max_slop_bytes) {
343 stats.max_slop_bytes = stats.gc.slop_bytes;
344 }
345 stats.cumulative_live_bytes += stats.gc.live_bytes;
346 }
347
348 // -------------------------------------------------
349 // Do the more expensive bits only when stats are enabled.
350
351 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
352 rtsConfig.gcDoneHook != NULL ||
353 RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
354 {
355 // We only update the times when stats are explicitly enabled since
356 // getProcessTimes (e.g. requiring a system call) can be expensive on
357 // some platforms.
358 Time current_cpu, current_elapsed;
359 getProcessTimes(&current_cpu, &current_elapsed);
360 stats.cpu_ns = current_cpu - start_init_cpu;
361 stats.elapsed_ns = current_elapsed - start_init_elapsed;
362
363 stats.gc.sync_elapsed_ns =
364 gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
365 stats.gc.elapsed_ns = current_elapsed - gct->gc_start_elapsed;
366 stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
367
368 // -------------------------------------------------
369 // Emit events to the event log
370
371 // Has to be emitted while all caps stopped for GC, but before GC_END.
372 // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
373 // for a detailed design rationale of the current setup
374 // of GC eventlog events.
375 traceEventGcGlobalSync(cap);
376
377 // Emitted before GC_END on all caps, which simplifies tools code.
378 traceEventGcStats(cap,
379 CAPSET_HEAP_DEFAULT,
380 stats.gc.gen,
381 stats.gc.copied_bytes,
382 stats.gc.slop_bytes,
383 /* current loss due to fragmentation */
384 (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
385 * BLOCK_SIZE,
386 par_n_threads,
387 stats.gc.par_max_copied_bytes,
388 stats.gc.copied_bytes);
389
390 // Post EVENT_GC_END with the same timestamp as used for stats
391 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
392 // Here, as opposed to other places, the event is emitted on the cap
393 // that initiates the GC and external tools expect it to have the same
394 // timestamp as used in +RTS -s calculcations.
395 traceEventGcEndAtT(cap, TimeToNS(stats.elapsed_ns));
396
397 if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
398 traceEventHeapLive(cap,
399 CAPSET_HEAP_DEFAULT,
400 stats.gc.live_bytes);
401 }
402
403 // -------------------------------------------------
404 // Print GC stats to stdout or a file (+RTS -S/-s)
405
406 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
407 W_ faults = getPageFaults();
408
409 statsPrintf("%9" FMT_Word64 " %9" FMT_Word64 " %9" FMT_Word64,
410 stats.gc.allocated_bytes, stats.gc.copied_bytes,
411 stats.gc.live_bytes);
412
413 statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4"
414 FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
415 TimeToSecondsDbl(stats.gc.cpu_ns),
416 TimeToSecondsDbl(stats.gc.elapsed_ns),
417 TimeToSecondsDbl(stats.cpu_ns),
418 TimeToSecondsDbl(stats.elapsed_ns),
419 faults - gct->gc_start_faults,
420 gct->gc_start_faults - GC_end_faults,
421 gen);
422
423 GC_end_faults = faults;
424 statsFlush();
425 }
426
427
428 if (rtsConfig.gcDoneHook != NULL) {
429 rtsConfig.gcDoneHook(&stats.gc);
430 }
431
432 traceEventHeapSize(cap,
433 CAPSET_HEAP_DEFAULT,
434 mblocks_allocated * MBLOCK_SIZE);
435 }
436 }
437
438 /* -----------------------------------------------------------------------------
439 Called at the beginning of each Retainer Profiliing
440 -------------------------------------------------------------------------- */
441 #if defined(PROFILING)
442 void
443 stat_startRP(void)
444 {
445 Time user, elapsed;
446 getProcessTimes( &user, &elapsed );
447
448 RP_start_time = user;
449 RPe_start_time = elapsed;
450 }
451 #endif /* PROFILING */
452
453 /* -----------------------------------------------------------------------------
454 Called at the end of each Retainer Profiliing
455 -------------------------------------------------------------------------- */
456
457 #if defined(PROFILING)
458 void
459 stat_endRP(
460 uint32_t retainerGeneration,
461 #if defined(DEBUG_RETAINER)
462 uint32_t maxCStackSize,
463 int maxStackSize,
464 #endif
465 double averageNumVisit)
466 {
467 Time user, elapsed;
468 getProcessTimes( &user, &elapsed );
469
470 RP_tot_time += user - RP_start_time;
471 RPe_tot_time += elapsed - RPe_start_time;
472
473 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
474 retainerGeneration, mut_user_time_during_RP());
475 #if defined(DEBUG_RETAINER)
476 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
477 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
478 #endif
479 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
480 }
481 #endif /* PROFILING */
482
483 /* -----------------------------------------------------------------------------
484 Called at the beginning of each heap census
485 -------------------------------------------------------------------------- */
486 #if defined(PROFILING)
487 void
488 stat_startHeapCensus(void)
489 {
490 Time user, elapsed;
491 getProcessTimes( &user, &elapsed );
492
493 HC_start_time = user;
494 HCe_start_time = elapsed;
495 }
496 #endif /* PROFILING */
497
498 /* -----------------------------------------------------------------------------
499 Called at the end of each heap census
500 -------------------------------------------------------------------------- */
501 #if defined(PROFILING)
502 void
503 stat_endHeapCensus(void)
504 {
505 Time user, elapsed;
506 getProcessTimes( &user, &elapsed );
507
508 HC_tot_time += user - HC_start_time;
509 HCe_tot_time += elapsed - HCe_start_time;
510 }
511 #endif /* PROFILING */
512
513 /* -----------------------------------------------------------------------------
514 Called at the end of execution
515
516 NOTE: number of allocations is not entirely accurate: it doesn't
517 take into account the few bytes at the end of the heap that
518 were left unused when the heap-check failed.
519 -------------------------------------------------------------------------- */
520
521 #if defined(DEBUG)
522 #define TICK_VAR_INI(arity) \
523 StgInt SLOW_CALLS_##arity = 1; \
524 StgInt RIGHT_ARITY_##arity = 1; \
525 StgInt TAGGED_PTR_##arity = 0;
526
527 TICK_VAR_INI(1)
528 TICK_VAR_INI(2)
529
530 StgInt TOTAL_CALLS=1;
531 #endif
532
533 /* Report the value of a counter */
534 #define REPORT(counter) \
535 { \
536 showStgWord64(counter,temp,true/*commas*/); \
537 statsPrintf(" (" #counter ") : %s\n",temp); \
538 }
539
540 /* Report the value of a counter as a percentage of another counter */
541 #define REPORT_PCT(counter,countertot) \
542 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
543 counter*100.0/countertot)
544
545 #define TICK_PRINT(arity) \
546 REPORT(SLOW_CALLS_##arity); \
547 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
548 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
549 REPORT(RIGHT_ARITY_##arity); \
550 REPORT(TAGGED_PTR_##arity)
551
552 #define TICK_PRINT_TOT(arity) \
553 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
554 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
555
556 STATIC_INLINE Time get_init_cpu(void) {
557 return end_init_cpu - start_init_cpu;
558 }
559
560 STATIC_INLINE Time get_init_elapsed(void) {
561 return end_init_elapsed - start_init_elapsed;
562 }
563
564
565 void
566 stat_exit (void)
567 {
568 generation *gen;
569 Time gc_cpu = 0;
570 Time gc_elapsed = 0;
571 Time init_cpu = 0;
572 Time init_elapsed = 0;
573 Time mut_cpu = 0;
574 Time mut_elapsed = 0;
575 Time exit_cpu = 0;
576 Time exit_elapsed = 0;
577 Time exit_gc_cpu = 0;
578 Time exit_gc_elapsed = 0;
579
580 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
581
582 char temp[512];
583 Time tot_cpu;
584 Time tot_elapsed;
585 uint32_t g;
586
587 getProcessTimes( &tot_cpu, &tot_elapsed );
588 tot_cpu -= start_init_cpu;
589 tot_elapsed -= start_init_elapsed;
590
591 uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
592
593 // allocated since the last GC
594 stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
595 stats.allocated_bytes = tot_alloc_bytes;
596
597 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
598 if (tot_cpu <= 0) tot_cpu = 1;
599 if (tot_elapsed <= 0) tot_elapsed = 1;
600
601 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
602 statsPrintf("%9" FMT_Word " %9.9s %9.9s",
603 (W_)stats.gc.allocated_bytes, "", "");
604 statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
605 }
606
607 // heapCensus() is called by the GC, so RP and HC time are
608 // included in the GC stats. We therefore subtract them to
609 // obtain the actual GC cpu time.
610 gc_cpu = stats.gc_cpu_ns - PROF_VAL(RP_tot_time + HC_tot_time);
611 gc_elapsed = stats.gc_elapsed_ns - PROF_VAL(RPe_tot_time + HCe_tot_time);
612
613 init_cpu = get_init_cpu();
614 init_elapsed = get_init_elapsed();
615
616 // We do a GC during the EXIT phase. We'll attribute the cost of that
617 // to GC instead of EXIT, so carefully subtract it from the EXIT time.
618 exit_gc_cpu = stats.gc_cpu_ns - start_exit_gc_cpu;
619 exit_gc_elapsed = stats.gc_elapsed_ns - start_exit_gc_elapsed;
620 exit_cpu = end_exit_cpu - start_exit_cpu - exit_gc_cpu;
621 exit_elapsed = end_exit_elapsed - start_exit_elapsed - exit_gc_elapsed;
622
623 mut_elapsed = start_exit_elapsed - end_init_elapsed -
624 (gc_elapsed - exit_gc_elapsed);
625
626 mut_cpu = start_exit_cpu - end_init_cpu - (gc_cpu - exit_gc_cpu)
627 - PROF_VAL(RP_tot_time + HC_tot_time);
628 if (mut_cpu < 0) { mut_cpu = 0; }
629
630 // The subdivision of runtime into INIT/EXIT/GC/MUT is just adding and
631 // subtracting, so the parts should add up to the total exactly. Note
632 // that tot_elapsed is captured a tiny bit later than end_exit_elapsed,
633 // so we don't use it here.
634 ASSERT(init_elapsed + mut_elapsed + gc_elapsed + exit_elapsed
635 == end_exit_elapsed - start_init_elapsed);
636
637
638 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
639 showStgWord64(stats.allocated_bytes, temp, true/*commas*/);
640 statsPrintf("%16s bytes allocated in the heap\n", temp);
641
642 showStgWord64(stats.copied_bytes, temp, true/*commas*/);
643 statsPrintf("%16s bytes copied during GC\n", temp);
644
645 if ( stats.major_gcs > 0 ) {
646 showStgWord64(stats.max_live_bytes, temp, true/*commas*/);
647 statsPrintf("%16s bytes maximum residency (%" FMT_Word32
648 " sample(s))\n",
649 temp, stats.major_gcs);
650 }
651
652 showStgWord64(stats.max_slop_bytes, temp, true/*commas*/);
653 statsPrintf("%16s bytes maximum slop\n", temp);
654
655 statsPrintf("%16" FMT_SizeT " MB total memory in use (%"
656 FMT_SizeT " MB lost due to fragmentation)\n\n",
657 (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
658 (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
659
660 /* Print garbage collections in each gen */
661 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
662 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
663 gen = &generations[g];
664 statsPrintf(" Gen %2d %5d colls, %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
665 gen->no,
666 gen->collections,
667 gen->par_collections,
668 TimeToSecondsDbl(GC_coll_cpu[g]),
669 TimeToSecondsDbl(GC_coll_elapsed[g]),
670 gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
671 TimeToSecondsDbl(GC_coll_max_pause[g]));
672 }
673
674 #if defined(THREADED_RTS)
675 if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
676 statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
677 100 * (((double)stats.par_copied_bytes / (double)stats.cumulative_par_max_copied_bytes) - 1)
678 / (n_capabilities - 1)
679 );
680 }
681 #endif
682 statsPrintf("\n");
683
684 #if defined(THREADED_RTS)
685 statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
686 taskCount, taskCount - workerCount,
687 peakWorkerCount, workerCount,
688 n_capabilities);
689
690 statsPrintf("\n");
691
692 {
693 uint32_t i;
694 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
695 for (i = 0; i < n_capabilities; i++) {
696 sparks.created += capabilities[i]->spark_stats.created;
697 sparks.dud += capabilities[i]->spark_stats.dud;
698 sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
699 sparks.converted += capabilities[i]->spark_stats.converted;
700 sparks.gcd += capabilities[i]->spark_stats.gcd;
701 sparks.fizzled += capabilities[i]->spark_stats.fizzled;
702 }
703
704 statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
705 sparks.created + sparks.dud + sparks.overflowed,
706 sparks.converted, sparks.overflowed, sparks.dud,
707 sparks.gcd, sparks.fizzled);
708 }
709 #endif
710
711 statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n",
712 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
713
714 statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n",
715 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
716 statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n",
717 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
718
719 #if defined(PROFILING)
720 statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n",
721 TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
722 statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n",
723 TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
724 #endif
725 statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n",
726 TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
727 statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n",
728 TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
729 #if !defined(THREADED_RTS)
730 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
731 TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
732 TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
733 #endif
734
735 if (mut_cpu == 0) {
736 showStgWord64(0, temp, true/*commas*/);
737 } else {
738 showStgWord64(
739 (StgWord64)((double)stats.allocated_bytes /
740 TimeToSecondsDbl(mut_cpu)),
741 temp, true/*commas*/);
742 }
743
744 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
745
746 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
747 TimeToSecondsDbl(tot_cpu - gc_cpu -
748 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
749 / TimeToSecondsDbl(tot_cpu),
750 TimeToSecondsDbl(tot_elapsed - gc_elapsed -
751 PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100
752 / TimeToSecondsDbl(tot_elapsed));
753
754 #if defined(THREADED_RTS) && defined(PROF_SPIN)
755 {
756 uint32_t g;
757
758 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
759 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
760 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
761 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
762 }
763 }
764 #endif
765 }
766
767 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
768 char *fmt;
769 if (RtsFlags.MiscFlags.machineReadable) {
770 fmt =
771 " [(\"bytes allocated\", \"%" FMT_Word64 "\")\n"
772 " ,(\"num_GCs\", \"%" FMT_Word32 "\")\n"
773 " ,(\"average_bytes_used\", \"%" FMT_Word64 "\")\n"
774 " ,(\"max_bytes_used\", \"%" FMT_Word64 "\")\n"
775 " ,(\"num_byte_usage_samples\", \"%" FMT_Word32 "\")\n"
776 " ,(\"peak_megabytes_allocated\", \"%" FMT_Word64 "\")\n"
777 " ,(\"init_cpu_seconds\", \"%.3f\")\n"
778 " ,(\"init_wall_seconds\", \"%.3f\")\n"
779 " ,(\"mutator_cpu_seconds\", \"%.3f\")\n"
780 " ,(\"mutator_wall_seconds\", \"%.3f\")\n"
781 " ,(\"GC_cpu_seconds\", \"%.3f\")\n"
782 " ,(\"GC_wall_seconds\", \"%.3f\")\n"
783 " ]\n";
784 }
785 else {
786 fmt =
787 "<<ghc: %" FMT_Word64 " bytes, "
788 "%" FMT_Word32 " GCs, "
789 "%" FMT_Word64 "/%" FMT_Word64 " avg/max bytes residency (%" FMT_Word32 " samples), "
790 "%" FMT_Word64 "M in use, "
791 "%.3f INIT (%.3f elapsed), "
792 "%.3f MUT (%.3f elapsed), "
793 "%.3f GC (%.3f elapsed) :ghc>>\n";
794 }
795 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
796 statsPrintf(fmt,
797 stats.allocated_bytes,
798 stats.gcs,
799 (uint64_t)
800 (stats.major_gcs == 0 ? 0 :
801 stats.cumulative_live_bytes/stats.major_gcs),
802 stats.max_live_bytes,
803 stats.major_gcs,
804 (uint64_t) (peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
805 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
806 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
807 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
808 }
809
810 statsFlush();
811 statsClose();
812 }
813
814 if (GC_coll_cpu) {
815 stgFree(GC_coll_cpu);
816 GC_coll_cpu = NULL;
817 }
818 if (GC_coll_elapsed) {
819 stgFree(GC_coll_elapsed);
820 GC_coll_elapsed = NULL;
821 }
822 if (GC_coll_max_pause) {
823 stgFree(GC_coll_max_pause);
824 GC_coll_max_pause = NULL;
825 }
826 }
827
828 /* -----------------------------------------------------------------------------
829 stat_describe_gens
830
831 Produce some detailed info on the state of the generational GC.
832 -------------------------------------------------------------------------- */
833 void
834 statDescribeGens(void)
835 {
836 uint32_t g, mut, lge, compacts, i;
837 W_ gen_slop;
838 W_ tot_live, tot_slop;
839 W_ gen_live, gen_blocks;
840 bdescr *bd;
841 generation *gen;
842
843 debugBelch(
844 "----------------------------------------------------------------------\n"
845 " Gen Max Mut-list Blocks Large Compacts Live Slop\n"
846 " Blocks Bytes Objects \n"
847 "----------------------------------------------------------------------\n");
848
849 tot_live = 0;
850 tot_slop = 0;
851
852 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
853 gen = &generations[g];
854
855 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
856 lge++;
857 }
858
859 for (bd = gen->compact_objects, compacts = 0; bd; bd = bd->link) {
860 compacts++;
861 }
862
863 gen_live = genLiveWords(gen);
864 gen_blocks = genLiveBlocks(gen);
865
866 mut = 0;
867 for (i = 0; i < n_capabilities; i++) {
868 mut += countOccupied(capabilities[i]->mut_lists[g]);
869
870 // Add the pinned object block.
871 bd = capabilities[i]->pinned_object_block;
872 if (bd != NULL) {
873 gen_live += bd->free - bd->start;
874 gen_blocks += bd->blocks;
875 }
876
877 gen_live += gcThreadLiveWords(i,g);
878 gen_blocks += gcThreadLiveBlocks(i,g);
879 }
880
881 debugBelch("%5d %7" FMT_Word " %9d", g, (W_)gen->max_blocks, mut);
882
883 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
884
885 debugBelch("%8" FMT_Word " %8d %8d %9" FMT_Word " %9" FMT_Word "\n", gen_blocks, lge, compacts,
886 gen_live*(W_)sizeof(W_), gen_slop*(W_)sizeof(W_));
887 tot_live += gen_live;
888 tot_slop += gen_slop;
889 }
890 debugBelch("----------------------------------------------------------------------\n");
891 debugBelch("%51s%9" FMT_Word " %9" FMT_Word "\n",
892 "",tot_live*sizeof(W_),tot_slop*sizeof(W_));
893 debugBelch("----------------------------------------------------------------------\n");
894 debugBelch("\n");
895 }
896
897 /* -----------------------------------------------------------------------------
898 Stats available via a programmatic interface, so eg. GHCi can time
899 each compilation and expression evaluation.
900 -------------------------------------------------------------------------- */
901
902 uint64_t getAllocations( void )
903 {
904 return stats.allocated_bytes;
905 }
906
907 int getRTSStatsEnabled( void )
908 {
909 return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
910 }
911
912 void getRTSStats( RTSStats *s )
913 {
914 Time current_elapsed = 0;
915 Time current_cpu = 0;
916
917 *s = stats;
918
919 getProcessTimes(&current_cpu, &current_elapsed);
920 s->cpu_ns = current_cpu - end_init_cpu;
921 s->elapsed_ns = current_elapsed - end_init_elapsed;
922
923 s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns -
924 PROF_VAL(RP_tot_time + HC_tot_time);
925 s->mutator_elapsed_ns = current_elapsed - end_init_elapsed -
926 stats.gc_elapsed_ns;
927 }
928
929 /* -----------------------------------------------------------------------------
930 Dumping stuff in the stats file, or via the debug message interface
931 -------------------------------------------------------------------------- */
932
933 void
934 statsPrintf( char *s, ... )
935 {
936 FILE *sf = RtsFlags.GcFlags.statsFile;
937 va_list ap;
938
939 va_start(ap,s);
940 if (sf == NULL) {
941 vdebugBelch(s,ap);
942 } else {
943 vfprintf(sf, s, ap);
944 }
945 va_end(ap);
946 }
947
948 static void
949 statsFlush( void )
950 {
951 FILE *sf = RtsFlags.GcFlags.statsFile;
952 if (sf != NULL) {
953 fflush(sf);
954 }
955 }
956
957 static void
958 statsClose( void )
959 {
960 FILE *sf = RtsFlags.GcFlags.statsFile;
961 if (sf != NULL) {
962 fclose(sf);
963 }
964 }