Support more sphinx-build versions in configure script
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsFlags.h"
13 #include "RtsUtils.h"
14 #include "Schedule.h"
15 #include "Stats.h"
16 #include "Profiling.h"
17 #include "GetTime.h"
18 #include "sm/Storage.h"
19 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
20 #include "sm/GCThread.h"
21 #include "sm/BlockAlloc.h"
22
23 #if USE_PAPI
24 #include "Papi.h"
25 #endif
26
27 /* huh? */
28 #define BIG_STRING_LEN 512
29
30 #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
31
32 static Time
33 start_init_cpu, start_init_elapsed,
34 end_init_cpu, end_init_elapsed,
35 start_exit_cpu, start_exit_elapsed,
36 end_exit_cpu, end_exit_elapsed;
37
38 static Time GC_tot_cpu = 0;
39
40 static StgWord64 GC_tot_alloc = 0;
41 static StgWord64 GC_tot_copied = 0;
42
43 static StgWord64 GC_par_max_copied = 0;
44 static StgWord64 GC_par_tot_copied = 0;
45
46 #ifdef PROFILING
47 static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
48 static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
49
50 static Time HC_start_time, HC_tot_time = 0; // heap census prof user time
51 static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
52 #endif
53
54 #ifdef PROFILING
55 #define PROF_VAL(x) (x)
56 #else
57 #define PROF_VAL(x) 0
58 #endif
59
60 // current = current as of last GC
61 static W_ current_residency = 0; // in words; for stats only
62 static W_ max_residency = 0;
63 static W_ cumulative_residency = 0;
64 static W_ residency_samples = 0; // for stats only
65 static W_ current_slop = 0;
66 static W_ max_slop = 0;
67
68 static W_ GC_end_faults = 0;
69
70 static Time *GC_coll_cpu = NULL;
71 static Time *GC_coll_elapsed = NULL;
72 static Time *GC_coll_max_pause = NULL;
73
74 static void statsFlush( void );
75 static void statsClose( void );
76
77 /* -----------------------------------------------------------------------------
78 Current elapsed time
79 ------------------------------------------------------------------------- */
80
81 Time stat_getElapsedTime(void)
82 {
83 return getProcessElapsedTime() - start_init_elapsed;
84 }
85
86 /* ---------------------------------------------------------------------------
87 Measure the current MUT time, for profiling
88 ------------------------------------------------------------------------ */
89
90 double
91 mut_user_time_until( Time t )
92 {
93 return TimeToSecondsDbl(t - GC_tot_cpu);
94 // heapCensus() time is included in GC_tot_cpu, so we don't need
95 // to subtract it here.
96 }
97
98 double
99 mut_user_time( void )
100 {
101 Time cpu;
102 cpu = getProcessCPUTime();
103 return mut_user_time_until(cpu);
104 }
105
106 #ifdef PROFILING
107 /*
108 mut_user_time_during_RP() returns the MUT time during retainer profiling.
109 The same is for mut_user_time_during_HC();
110 */
111 double
112 mut_user_time_during_RP( void )
113 {
114 return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
115 }
116
117 double
118 mut_user_time_during_heap_census( void )
119 {
120 return TimeToSecondsDbl(HC_start_time - GC_tot_cpu - RP_tot_time);
121 }
122 #endif /* PROFILING */
123
124 /* ---------------------------------------------------------------------------
125 initStats0() has no dependencies, it can be called right at the beginning
126 ------------------------------------------------------------------------ */
127
128 void
129 initStats0(void)
130 {
131 start_init_cpu = 0;
132 start_init_elapsed = 0;
133 end_init_cpu = 0;
134 end_init_elapsed = 0;
135
136 start_exit_cpu = 0;
137 start_exit_elapsed = 0;
138 end_exit_cpu = 0;
139 end_exit_elapsed = 0;
140
141 GC_tot_alloc = 0;
142 GC_tot_copied = 0;
143 GC_par_max_copied = 0;
144 GC_par_tot_copied = 0;
145 GC_tot_cpu = 0;
146
147 #ifdef PROFILING
148 RP_start_time = 0;
149 RP_tot_time = 0;
150 RPe_start_time = 0;
151 RPe_tot_time = 0;
152
153 HC_start_time = 0;
154 HC_tot_time = 0;
155 HCe_start_time = 0;
156 HCe_tot_time = 0;
157 #endif
158
159 max_residency = 0;
160 cumulative_residency = 0;
161 residency_samples = 0;
162 max_slop = 0;
163
164 GC_end_faults = 0;
165 }
166
167 /* ---------------------------------------------------------------------------
168 initStats1() can be called after setupRtsFlags()
169 ------------------------------------------------------------------------ */
170
171 void
172 initStats1 (void)
173 {
174 nat i;
175
176 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
177 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
178 statsPrintf(" bytes bytes bytes user elap user elap\n");
179 }
180 GC_coll_cpu =
181 (Time *)stgMallocBytes(
182 sizeof(Time)*RtsFlags.GcFlags.generations,
183 "initStats");
184 GC_coll_elapsed =
185 (Time *)stgMallocBytes(
186 sizeof(Time)*RtsFlags.GcFlags.generations,
187 "initStats");
188 GC_coll_max_pause =
189 (Time *)stgMallocBytes(
190 sizeof(Time)*RtsFlags.GcFlags.generations,
191 "initStats");
192 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
193 GC_coll_cpu[i] = 0;
194 GC_coll_elapsed[i] = 0;
195 GC_coll_max_pause[i] = 0;
196 }
197 }
198
199 /* -----------------------------------------------------------------------------
200 Initialisation time...
201 -------------------------------------------------------------------------- */
202
203 void
204 stat_startInit(void)
205 {
206 getProcessTimes(&start_init_cpu, &start_init_elapsed);
207 }
208
209 void
210 stat_endInit(void)
211 {
212 getProcessTimes(&end_init_cpu, &end_init_elapsed);
213
214 #if USE_PAPI
215 /* We start counting events for the mutator
216 * when garbage collection starts
217 * we switch to the GC event set. */
218 papi_start_mutator_count();
219
220 /* This flag is needed to avoid counting the last GC */
221 papi_is_reporting = 1;
222
223 #endif
224 }
225
226 /* -----------------------------------------------------------------------------
227 stat_startExit and stat_endExit
228
229 These two measure the time taken in shutdownHaskell().
230 -------------------------------------------------------------------------- */
231
232 void
233 stat_startExit(void)
234 {
235 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
236
237 #if USE_PAPI
238 /* We stop counting mutator events
239 * GC events are not being counted at this point */
240 papi_stop_mutator_count();
241
242 /* This flag is needed, because GC is run once more after this function */
243 papi_is_reporting = 0;
244 #endif
245 }
246
247 void
248 stat_endExit(void)
249 {
250 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
251 }
252
253 void
254 stat_startGCSync (gc_thread *gct)
255 {
256 gct->gc_sync_start_elapsed = getProcessElapsedTime();
257 }
258
259 /* -----------------------------------------------------------------------------
260 Called at the beginning of each GC
261 -------------------------------------------------------------------------- */
262
263 static nat rub_bell = 0;
264
265 void
266 stat_startGC (Capability *cap, gc_thread *gct)
267 {
268 nat bell = RtsFlags.GcFlags.ringBell;
269
270 if (bell) {
271 if (bell > 1) {
272 debugBelch(" GC ");
273 rub_bell = 1;
274 } else {
275 debugBelch("\007");
276 }
277 }
278
279 #if USE_PAPI
280 if(papi_is_reporting) {
281 /* Switch to counting GC events */
282 papi_stop_mutator_count();
283 papi_start_gc_count();
284 }
285 #endif
286
287 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
288
289 // Post EVENT_GC_START with the same timestamp as used for stats
290 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
291 // Here, as opposed to other places, the event is emitted on the cap
292 // that initiates the GC and external tools expect it to have the same
293 // timestamp as used in +RTS -s calculcations.
294 traceEventGcStartAtT(cap,
295 TimeToNS(gct->gc_start_elapsed - start_init_elapsed));
296
297 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
298 {
299 gct->gc_start_faults = getPageFaults();
300 }
301
302 updateNurseriesStats();
303 }
304
305 /* -----------------------------------------------------------------------------
306 Called at the end of each GC
307 -------------------------------------------------------------------------- */
308
309 void
310 stat_endGC (Capability *cap, gc_thread *gct,
311 W_ live, W_ copied, W_ slop, nat gen,
312 nat par_n_threads, W_ par_max_copied, W_ par_tot_copied)
313 {
314 W_ tot_alloc;
315 W_ alloc;
316
317 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
318 rtsConfig.gcDoneHook != NULL ||
319 RtsFlags.ProfFlags.doHeapProfile)
320 // heap profiling needs GC_tot_time
321 {
322 Time cpu, elapsed, gc_cpu, gc_elapsed, gc_sync_elapsed;
323
324 // Has to be emitted while all caps stopped for GC, but before GC_END.
325 // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
326 // for a detailed design rationale of the current setup
327 // of GC eventlog events.
328 traceEventGcGlobalSync(cap);
329
330 // Emitted before GC_END on all caps, which simplifies tools code.
331 traceEventGcStats(cap,
332 CAPSET_HEAP_DEFAULT,
333 gen,
334 copied * sizeof(W_),
335 slop * sizeof(W_),
336 /* current loss due to fragmentation */
337 (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
338 * BLOCK_SIZE_W * sizeof(W_),
339 par_n_threads,
340 par_max_copied * sizeof(W_),
341 par_tot_copied * sizeof(W_));
342
343 getProcessTimes(&cpu, &elapsed);
344
345 // Post EVENT_GC_END with the same timestamp as used for stats
346 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
347 // Here, as opposed to other places, the event is emitted on the cap
348 // that initiates the GC and external tools expect it to have the same
349 // timestamp as used in +RTS -s calculcations.
350 traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));
351
352 gc_sync_elapsed = gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
353 gc_elapsed = elapsed - gct->gc_start_elapsed;
354 gc_cpu = cpu - gct->gc_start_cpu;
355
356 /* For the moment we calculate both per-HEC and total allocation.
357 * There is thus redundancy here, but for the moment we will calculate
358 * it both the old and new way and assert they're the same.
359 * When we're sure it's working OK then we can simplify things.
360 */
361 tot_alloc = calcTotalAllocated();
362
363 // allocated since the last GC
364 alloc = tot_alloc - GC_tot_alloc;
365 GC_tot_alloc = tot_alloc;
366
367 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
368 W_ faults = getPageFaults();
369
370 statsPrintf("%9" FMT_SizeT " %9" FMT_SizeT " %9" FMT_SizeT,
371 alloc*sizeof(W_), copied*sizeof(W_),
372 live*sizeof(W_));
373 statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4" FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
374 TimeToSecondsDbl(gc_cpu),
375 TimeToSecondsDbl(gc_elapsed),
376 TimeToSecondsDbl(cpu),
377 TimeToSecondsDbl(elapsed - start_init_elapsed),
378 faults - gct->gc_start_faults,
379 gct->gc_start_faults - GC_end_faults,
380 gen);
381
382 GC_end_faults = faults;
383 statsFlush();
384 }
385
386
387 if (rtsConfig.gcDoneHook != NULL) {
388 rtsConfig.gcDoneHook(gen,
389 alloc*sizeof(W_),
390 live*sizeof(W_),
391 copied*sizeof(W_),
392 par_max_copied * sizeof(W_),
393 mblocks_allocated * BLOCKS_PER_MBLOCK
394 * BLOCK_SIZE_W * sizeof(W_),
395 slop * sizeof(W_),
396 TimeToNS(gc_sync_elapsed),
397 TimeToNS(gc_elapsed),
398 TimeToNS(gc_cpu));
399 }
400
401 GC_coll_cpu[gen] += gc_cpu;
402 GC_coll_elapsed[gen] += gc_elapsed;
403 if (GC_coll_max_pause[gen] < gc_elapsed) {
404 GC_coll_max_pause[gen] = gc_elapsed;
405 }
406
407 GC_tot_copied += (StgWord64) copied;
408 GC_par_max_copied += (StgWord64) par_max_copied;
409 GC_par_tot_copied += (StgWord64) par_tot_copied;
410 GC_tot_cpu += gc_cpu;
411
412 traceEventHeapSize(cap,
413 CAPSET_HEAP_DEFAULT,
414 mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
415
416 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
417 if (live > max_residency) {
418 max_residency = live;
419 }
420 current_residency = live;
421 residency_samples++;
422 cumulative_residency += live;
423 traceEventHeapLive(cap,
424 CAPSET_HEAP_DEFAULT,
425 live * sizeof(W_));
426 }
427
428 if (slop > max_slop) max_slop = slop;
429 }
430
431 if (rub_bell) {
432 debugBelch("\b\b\b \b\b\b");
433 rub_bell = 0;
434 }
435
436 #if USE_PAPI
437 if(papi_is_reporting) {
438 /* Switch to counting mutator events */
439 if (gen == 0) {
440 papi_stop_gc0_count();
441 } else {
442 papi_stop_gc1_count();
443 }
444 papi_start_mutator_count();
445 }
446 #endif
447 }
448
449 /* -----------------------------------------------------------------------------
450 Called at the beginning of each Retainer Profiliing
451 -------------------------------------------------------------------------- */
452 #ifdef PROFILING
453 void
454 stat_startRP(void)
455 {
456 Time user, elapsed;
457 getProcessTimes( &user, &elapsed );
458
459 RP_start_time = user;
460 RPe_start_time = elapsed;
461 }
462 #endif /* PROFILING */
463
464 /* -----------------------------------------------------------------------------
465 Called at the end of each Retainer Profiliing
466 -------------------------------------------------------------------------- */
467
468 #ifdef PROFILING
469 void
470 stat_endRP(
471 nat retainerGeneration,
472 #ifdef DEBUG_RETAINER
473 nat maxCStackSize,
474 int maxStackSize,
475 #endif
476 double averageNumVisit)
477 {
478 Time user, elapsed;
479 getProcessTimes( &user, &elapsed );
480
481 RP_tot_time += user - RP_start_time;
482 RPe_tot_time += elapsed - RPe_start_time;
483
484 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
485 retainerGeneration, mut_user_time_during_RP());
486 #ifdef DEBUG_RETAINER
487 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
488 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
489 #endif
490 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
491 }
492 #endif /* PROFILING */
493
494 /* -----------------------------------------------------------------------------
495 Called at the beginning of each heap census
496 -------------------------------------------------------------------------- */
497 #ifdef PROFILING
498 void
499 stat_startHeapCensus(void)
500 {
501 Time user, elapsed;
502 getProcessTimes( &user, &elapsed );
503
504 HC_start_time = user;
505 HCe_start_time = elapsed;
506 }
507 #endif /* PROFILING */
508
509 /* -----------------------------------------------------------------------------
510 Called at the end of each heap census
511 -------------------------------------------------------------------------- */
512 #ifdef PROFILING
513 void
514 stat_endHeapCensus(void)
515 {
516 Time user, elapsed;
517 getProcessTimes( &user, &elapsed );
518
519 HC_tot_time += user - HC_start_time;
520 HCe_tot_time += elapsed - HCe_start_time;
521 }
522 #endif /* PROFILING */
523
524 /* -----------------------------------------------------------------------------
525 Called at the end of execution
526
527 NOTE: number of allocations is not entirely accurate: it doesn't
528 take into account the few bytes at the end of the heap that
529 were left unused when the heap-check failed.
530 -------------------------------------------------------------------------- */
531
532 #ifdef DEBUG
533 #define TICK_VAR_INI(arity) \
534 StgInt SLOW_CALLS_##arity = 1; \
535 StgInt RIGHT_ARITY_##arity = 1; \
536 StgInt TAGGED_PTR_##arity = 0;
537
538 TICK_VAR_INI(1)
539 TICK_VAR_INI(2)
540
541 StgInt TOTAL_CALLS=1;
542 #endif
543
544 /* Report the value of a counter */
545 #define REPORT(counter) \
546 { \
547 showStgWord64(counter,temp,rtsTrue/*commas*/); \
548 statsPrintf(" (" #counter ") : %s\n",temp); \
549 }
550
551 /* Report the value of a counter as a percentage of another counter */
552 #define REPORT_PCT(counter,countertot) \
553 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
554 counter*100.0/countertot)
555
556 #define TICK_PRINT(arity) \
557 REPORT(SLOW_CALLS_##arity); \
558 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
559 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
560 REPORT(RIGHT_ARITY_##arity); \
561 REPORT(TAGGED_PTR_##arity)
562
563 #define TICK_PRINT_TOT(arity) \
564 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
565 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
566
567 static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
568 static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
569
570
571 void
572 stat_exit (void)
573 {
574 generation *gen;
575 Time gc_cpu = 0;
576 Time gc_elapsed = 0;
577 Time init_cpu = 0;
578 Time init_elapsed = 0;
579 Time mut_cpu = 0;
580 Time mut_elapsed = 0;
581 Time exit_cpu = 0;
582 Time exit_elapsed = 0;
583 W_ tot_alloc;
584 W_ alloc;
585
586 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
587
588 char temp[BIG_STRING_LEN];
589 Time tot_cpu;
590 Time tot_elapsed;
591 nat i, g, total_collections = 0;
592
593 getProcessTimes( &tot_cpu, &tot_elapsed );
594 tot_elapsed -= start_init_elapsed;
595
596 tot_alloc = calcTotalAllocated();
597
598 // allocated since the last GC
599 alloc = tot_alloc - GC_tot_alloc;
600 GC_tot_alloc = tot_alloc;
601
602 /* Count total garbage collections */
603 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
604 total_collections += generations[g].collections;
605
606 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
607 if (tot_cpu == 0.0) tot_cpu = 1;
608 if (tot_elapsed == 0.0) tot_elapsed = 1;
609
610 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
611 statsPrintf("%9" FMT_SizeT " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", "");
612 statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
613 }
614
615 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
616 gc_cpu += GC_coll_cpu[i];
617 gc_elapsed += GC_coll_elapsed[i];
618 }
619
620 // heapCensus() is called by the GC, so RP and HC time are
621 // included in the GC stats. We therefore subtract them to
622 // obtain the actual GC cpu time.
623 gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time);
624 gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
625
626 init_cpu = get_init_cpu();
627 init_elapsed = get_init_elapsed();
628
629 exit_cpu = end_exit_cpu - start_exit_cpu;
630 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
631
632 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
633
634 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
635 - PROF_VAL(RP_tot_time + HC_tot_time);
636 if (mut_cpu < 0) { mut_cpu = 0; }
637
638 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
639 showStgWord64(GC_tot_alloc*sizeof(W_),
640 temp, rtsTrue/*commas*/);
641 statsPrintf("%16s bytes allocated in the heap\n", temp);
642
643 showStgWord64(GC_tot_copied*sizeof(W_),
644 temp, rtsTrue/*commas*/);
645 statsPrintf("%16s bytes copied during GC\n", temp);
646
647 if ( residency_samples > 0 ) {
648 showStgWord64(max_residency*sizeof(W_),
649 temp, rtsTrue/*commas*/);
650 statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n",
651 temp, residency_samples);
652 }
653
654 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
655 statsPrintf("%16s bytes maximum slop\n", temp);
656
657 statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n",
658 (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
659 (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
660
661 /* Print garbage collections in each gen */
662 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
663 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
664 gen = &generations[g];
665 statsPrintf(" Gen %2d %5d colls, %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
666 gen->no,
667 gen->collections,
668 gen->par_collections,
669 TimeToSecondsDbl(GC_coll_cpu[g]),
670 TimeToSecondsDbl(GC_coll_elapsed[g]),
671 gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
672 TimeToSecondsDbl(GC_coll_max_pause[g]));
673 }
674
675 #if defined(THREADED_RTS)
676 if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
677 statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
678 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
679 / (n_capabilities - 1)
680 );
681 }
682 #endif
683 statsPrintf("\n");
684
685 #if defined(THREADED_RTS)
686 statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
687 taskCount, taskCount - workerCount,
688 peakWorkerCount, workerCount,
689 n_capabilities);
690
691 statsPrintf("\n");
692
693 {
694 nat i;
695 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
696 for (i = 0; i < n_capabilities; i++) {
697 sparks.created += capabilities[i]->spark_stats.created;
698 sparks.dud += capabilities[i]->spark_stats.dud;
699 sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
700 sparks.converted += capabilities[i]->spark_stats.converted;
701 sparks.gcd += capabilities[i]->spark_stats.gcd;
702 sparks.fizzled += capabilities[i]->spark_stats.fizzled;
703 }
704
705 statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
706 sparks.created + sparks.dud + sparks.overflowed,
707 sparks.converted, sparks.overflowed, sparks.dud,
708 sparks.gcd, sparks.fizzled);
709 }
710 #endif
711
712 statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n",
713 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
714
715 statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n",
716 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
717 statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n",
718 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
719
720 #ifdef PROFILING
721 statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n",
722 TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
723 statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n",
724 TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
725 #endif
726 statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n",
727 TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
728 statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n",
729 TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
730 #ifndef THREADED_RTS
731 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
732 TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
733 TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
734 #endif
735
736 if (mut_cpu == 0) {
737 showStgWord64(0, temp, rtsTrue/*commas*/);
738 } else {
739 showStgWord64(
740 (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
741 temp, rtsTrue/*commas*/);
742 }
743
744 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
745
746 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
747 TimeToSecondsDbl(tot_cpu - gc_cpu -
748 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
749 / TimeToSecondsDbl(tot_cpu),
750 TimeToSecondsDbl(tot_cpu - gc_cpu -
751 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
752 / TimeToSecondsDbl(tot_elapsed));
753
754 /*
755 TICK_PRINT(1);
756 TICK_PRINT(2);
757 REPORT(TOTAL_CALLS);
758 TICK_PRINT_TOT(1);
759 TICK_PRINT_TOT(2);
760 */
761
762 #if USE_PAPI
763 papi_stats_report();
764 #endif
765 #if defined(THREADED_RTS) && defined(PROF_SPIN)
766 {
767 nat g;
768
769 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
770 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
771 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
772 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
773 }
774 }
775 #endif
776 }
777
778 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
779 char *fmt1, *fmt2;
780 if (RtsFlags.MiscFlags.machineReadable) {
781 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
782 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
783 " ,(\"average_bytes_used\", \"%ld\")\n"
784 " ,(\"max_bytes_used\", \"%ld\")\n"
785 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
786 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
787 " ,(\"init_cpu_seconds\", \"%.3f\")\n"
788 " ,(\"init_wall_seconds\", \"%.3f\")\n"
789 " ,(\"mutator_cpu_seconds\", \"%.3f\")\n"
790 " ,(\"mutator_wall_seconds\", \"%.3f\")\n"
791 " ,(\"GC_cpu_seconds\", \"%.3f\")\n"
792 " ,(\"GC_wall_seconds\", \"%.3f\")\n"
793 " ]\n";
794 }
795 else {
796 fmt1 = "<<ghc: %llu bytes, ";
797 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.3f INIT (%.3f elapsed), %.3f MUT (%.3f elapsed), %.3f GC (%.3f elapsed) :ghc>>\n";
798 }
799 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
800 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
801 statsPrintf(fmt2,
802 total_collections,
803 residency_samples == 0 ? 0 :
804 cumulative_residency*sizeof(W_)/residency_samples,
805 max_residency*sizeof(W_),
806 residency_samples,
807 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
808 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
809 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
810 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
811 }
812
813 statsFlush();
814 statsClose();
815 }
816
817 if (GC_coll_cpu) {
818 stgFree(GC_coll_cpu);
819 GC_coll_cpu = NULL;
820 }
821 if (GC_coll_elapsed) {
822 stgFree(GC_coll_elapsed);
823 GC_coll_elapsed = NULL;
824 }
825 if (GC_coll_max_pause) {
826 stgFree(GC_coll_max_pause);
827 GC_coll_max_pause = NULL;
828 }
829 }
830
831 /* -----------------------------------------------------------------------------
832 stat_describe_gens
833
834 Produce some detailed info on the state of the generational GC.
835 -------------------------------------------------------------------------- */
836 void
837 statDescribeGens(void)
838 {
839 nat g, mut, lge, i;
840 W_ gen_slop;
841 W_ tot_live, tot_slop;
842 W_ gen_live, gen_blocks;
843 bdescr *bd;
844 generation *gen;
845
846 debugBelch(
847 "----------------------------------------------------------\n"
848 " Gen Max Mut-list Blocks Large Live Slop\n"
849 " Blocks Bytes Objects \n"
850 "----------------------------------------------------------\n");
851
852 tot_live = 0;
853 tot_slop = 0;
854
855 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
856 gen = &generations[g];
857
858 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
859 lge++;
860 }
861
862 gen_live = genLiveWords(gen);
863 gen_blocks = genLiveBlocks(gen);
864
865 mut = 0;
866 for (i = 0; i < n_capabilities; i++) {
867 mut += countOccupied(capabilities[i]->mut_lists[g]);
868
869 // Add the pinned object block.
870 bd = capabilities[i]->pinned_object_block;
871 if (bd != NULL) {
872 gen_live += bd->free - bd->start;
873 gen_blocks += bd->blocks;
874 }
875
876 gen_live += gcThreadLiveWords(i,g);
877 gen_blocks += gcThreadLiveBlocks(i,g);
878 }
879
880 debugBelch("%5d %7" FMT_Word " %9d", g, (W_)gen->max_blocks, mut);
881
882 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
883
884 debugBelch("%8" FMT_Word " %8d %8" FMT_Word " %8" FMT_Word "\n", gen_blocks, lge,
885 gen_live*(W_)sizeof(W_), gen_slop*(W_)sizeof(W_));
886 tot_live += gen_live;
887 tot_slop += gen_slop;
888 }
889 debugBelch("----------------------------------------------------------\n");
890 debugBelch("%41s%8" FMT_SizeT " %8" FMT_SizeT "\n",
891 "",tot_live*sizeof(W_),tot_slop*sizeof(W_));
892 debugBelch("----------------------------------------------------------\n");
893 debugBelch("\n");
894 }
895
896 /* -----------------------------------------------------------------------------
897 Stats available via a programmatic interface, so eg. GHCi can time
898 each compilation and expression evaluation.
899 -------------------------------------------------------------------------- */
900
901 extern HsInt64 getAllocations( void )
902 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
903
904 /* EZY: I'm not convinced I got all the casting right. */
905
906 extern rtsBool getGCStatsEnabled( void )
907 {
908 return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
909 }
910
911 extern void getGCStats( GCStats *s )
912 {
913 nat total_collections = 0;
914 nat g;
915 Time gc_cpu = 0;
916 Time gc_elapsed = 0;
917 Time current_elapsed = 0;
918 Time current_cpu = 0;
919
920 getProcessTimes(&current_cpu, &current_elapsed);
921
922 /* EZY: static inline'ify these */
923 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
924 total_collections += generations[g].collections;
925
926 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
927 gc_cpu += GC_coll_cpu[g];
928 gc_elapsed += GC_coll_elapsed[g];
929 }
930
931 s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
932 s->num_gcs = total_collections;
933 s->num_byte_usage_samples = residency_samples;
934 s->max_bytes_used = max_residency*sizeof(W_);
935 s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
936 s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
937 s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
938 s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
939 s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
940 s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
941 /*
942 s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
943 s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
944 */
945 s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
946 s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
947 s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
948 s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
949 /* EZY: Being consistent with incremental output, but maybe should also discount init */
950 s->cpu_seconds = TimeToSecondsDbl(current_cpu);
951 s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
952 s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
953 s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
954 }
955 // extern void getTaskStats( TaskStats **s ) {}
956 #if 0
957 extern void getSparkStats( SparkCounters *s ) {
958 nat i;
959 s->created = 0;
960 s->dud = 0;
961 s->overflowed = 0;
962 s->converted = 0;
963 s->gcd = 0;
964 s->fizzled = 0;
965 for (i = 0; i < n_capabilities; i++) {
966 s->created += capabilities[i]->spark_stats.created;
967 s->dud += capabilities[i]->spark_stats.dud;
968 s->overflowed+= capabilities[i]->spark_stats.overflowed;
969 s->converted += capabilities[i]->spark_stats.converted;
970 s->gcd += capabilities[i]->spark_stats.gcd;
971 s->fizzled += capabilities[i]->spark_stats.fizzled;
972 }
973 }
974 #endif
975
976 /* -----------------------------------------------------------------------------
977 Dumping stuff in the stats file, or via the debug message interface
978 -------------------------------------------------------------------------- */
979
980 void
981 statsPrintf( char *s, ... )
982 {
983 FILE *sf = RtsFlags.GcFlags.statsFile;
984 va_list ap;
985
986 va_start(ap,s);
987 if (sf == NULL) {
988 vdebugBelch(s,ap);
989 } else {
990 vfprintf(sf, s, ap);
991 }
992 va_end(ap);
993 }
994
995 static void
996 statsFlush( void )
997 {
998 FILE *sf = RtsFlags.GcFlags.statsFile;
999 if (sf != NULL) {
1000 fflush(sf);
1001 }
1002 }
1003
1004 static void
1005 statsClose( void )
1006 {
1007 FILE *sf = RtsFlags.GcFlags.statsFile;
1008 if (sf != NULL) {
1009 fclose(sf);
1010 }
1011 }