Use a struct for the set of spark counters
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsUtils.h"
13 #include "Schedule.h"
14 #include "Stats.h"
15 #include "Profiling.h"
16 #include "GetTime.h"
17 #include "sm/Storage.h"
18 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
19 #include "sm/GCThread.h"
20 #include "sm/BlockAlloc.h"
21
22 #if USE_PAPI
23 #include "Papi.h"
24 #endif
25
26 /* huh? */
27 #define BIG_STRING_LEN 512
28
29 #define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND)
30
31 static Ticks
32 start_init_cpu, start_init_elapsed,
33 end_init_cpu, end_init_elapsed,
34 start_exit_cpu, start_exit_elapsed,
35 end_exit_cpu, end_exit_elapsed;
36
37 static Ticks GC_tot_cpu = 0;
38
39 static StgWord64 GC_tot_alloc = 0;
40 static StgWord64 GC_tot_copied = 0;
41
42 static StgWord64 GC_par_max_copied = 0;
43 static StgWord64 GC_par_avg_copied = 0;
44
45 #ifdef PROFILING
46 static Ticks RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
47 static Ticks RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
48
49 static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time
50 static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
51 #endif
52
53 #ifdef PROFILING
54 #define PROF_VAL(x) (x)
55 #else
56 #define PROF_VAL(x) 0
57 #endif
58
59 static lnat max_residency = 0; // in words; for stats only
60 static lnat avg_residency = 0;
61 static lnat residency_samples = 0; // for stats only
62 static lnat max_slop = 0;
63
64 static lnat GC_end_faults = 0;
65
66 static Ticks *GC_coll_cpu = NULL;
67 static Ticks *GC_coll_elapsed = NULL;
68 static Ticks *GC_coll_max_pause = NULL;
69
70 static void statsFlush( void );
71 static void statsClose( void );
72
73 /* -----------------------------------------------------------------------------
74 Current elapsed time
75 ------------------------------------------------------------------------- */
76
77 Ticks stat_getElapsedTime(void)
78 {
79 return getProcessElapsedTime() - start_init_elapsed;
80 }
81
82 /* ---------------------------------------------------------------------------
83 Measure the current MUT time, for profiling
84 ------------------------------------------------------------------------ */
85
86 double
87 mut_user_time( void )
88 {
89 Ticks cpu;
90 cpu = getProcessCPUTime();
91 return TICK_TO_DBL(cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
92 }
93
94 #ifdef PROFILING
95 /*
96 mut_user_time_during_RP() returns the MUT time during retainer profiling.
97 The same is for mut_user_time_during_HC();
98 */
99 double
100 mut_user_time_during_RP( void )
101 {
102 return TICK_TO_DBL(RP_start_time - GC_tot_cpu - RP_tot_time - HC_tot_time);
103 }
104
105 double
106 mut_user_time_during_heap_census( void )
107 {
108 return TICK_TO_DBL(HC_start_time - GC_tot_cpu - RP_tot_time - HC_tot_time);
109 }
110 #endif /* PROFILING */
111
112 /* ---------------------------------------------------------------------------
113 initStats0() has no dependencies, it can be called right at the beginning
114 ------------------------------------------------------------------------ */
115
116 void
117 initStats0(void)
118 {
119 start_init_cpu = 0;
120 start_init_elapsed = 0;
121 end_init_cpu = 0;
122 end_init_elapsed = 0;
123
124 start_exit_cpu = 0;
125 start_exit_elapsed = 0;
126 end_exit_cpu = 0;
127 end_exit_elapsed = 0;
128
129 GC_tot_alloc = 0;
130 GC_tot_copied = 0;
131 GC_par_max_copied = 0;
132 GC_par_avg_copied = 0;
133 GC_tot_cpu = 0;
134
135 #ifdef PROFILING
136 RP_start_time = 0;
137 RP_tot_time = 0;
138 RPe_start_time = 0;
139 RPe_tot_time = 0;
140
141 HC_start_time = 0;
142 HC_tot_time = 0;
143 HCe_start_time = 0;
144 HCe_tot_time = 0;
145 #endif
146
147 max_residency = 0;
148 avg_residency = 0;
149 residency_samples = 0;
150 max_slop = 0;
151
152 GC_end_faults = 0;
153 }
154
155 /* ---------------------------------------------------------------------------
156 initStats1() can be called after setupRtsFlags()
157 ------------------------------------------------------------------------ */
158
159 void
160 initStats1 (void)
161 {
162 nat i;
163
164 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
165 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
166 statsPrintf(" bytes bytes bytes user elap user elap\n");
167 }
168 GC_coll_cpu =
169 (Ticks *)stgMallocBytes(
170 sizeof(Ticks)*RtsFlags.GcFlags.generations,
171 "initStats");
172 GC_coll_elapsed =
173 (Ticks *)stgMallocBytes(
174 sizeof(Ticks)*RtsFlags.GcFlags.generations,
175 "initStats");
176 GC_coll_max_pause =
177 (Ticks *)stgMallocBytes(
178 sizeof(Ticks)*RtsFlags.GcFlags.generations,
179 "initStats");
180 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
181 GC_coll_cpu[i] = 0;
182 GC_coll_elapsed[i] = 0;
183 GC_coll_max_pause[i] = 0;
184 }
185 }
186
187 /* -----------------------------------------------------------------------------
188 Initialisation time...
189 -------------------------------------------------------------------------- */
190
191 void
192 stat_startInit(void)
193 {
194 getProcessTimes(&start_init_cpu, &start_init_elapsed);
195 }
196
197 void
198 stat_endInit(void)
199 {
200 getProcessTimes(&end_init_cpu, &end_init_elapsed);
201
202 #if USE_PAPI
203 /* We start counting events for the mutator
204 * when garbage collection starts
205 * we switch to the GC event set. */
206 papi_start_mutator_count();
207
208 /* This flag is needed to avoid counting the last GC */
209 papi_is_reporting = 1;
210
211 #endif
212 }
213
214 /* -----------------------------------------------------------------------------
215 stat_startExit and stat_endExit
216
217 These two measure the time taken in shutdownHaskell().
218 -------------------------------------------------------------------------- */
219
220 void
221 stat_startExit(void)
222 {
223 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
224
225 #if USE_PAPI
226 /* We stop counting mutator events
227 * GC events are not being counted at this point */
228 papi_stop_mutator_count();
229
230 /* This flag is needed, because GC is run once more after this function */
231 papi_is_reporting = 0;
232 #endif
233 }
234
235 void
236 stat_endExit(void)
237 {
238 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
239 }
240
241 /* -----------------------------------------------------------------------------
242 Called at the beginning of each GC
243 -------------------------------------------------------------------------- */
244
245 static nat rub_bell = 0;
246
247 void
248 stat_startGC (gc_thread *gct)
249 {
250 nat bell = RtsFlags.GcFlags.ringBell;
251
252 if (bell) {
253 if (bell > 1) {
254 debugBelch(" GC ");
255 rub_bell = 1;
256 } else {
257 debugBelch("\007");
258 }
259 }
260
261 #if USE_PAPI
262 if(papi_is_reporting) {
263 /* Switch to counting GC events */
264 papi_stop_mutator_count();
265 papi_start_gc_count();
266 }
267 #endif
268
269 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
270 gct->gc_start_thread_cpu = getThreadCPUTime();
271
272 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
273 {
274 gct->gc_start_faults = getPageFaults();
275 }
276 }
277
278 void
279 stat_gcWorkerThreadStart (gc_thread *gct)
280 {
281 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
282 {
283 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
284 gct->gc_start_thread_cpu = getThreadCPUTime();
285 }
286 }
287
288 void
289 stat_gcWorkerThreadDone (gc_thread *gct)
290 {
291 Ticks thread_cpu, elapsed, gc_cpu, gc_elapsed;
292
293 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
294 {
295 elapsed = getProcessElapsedTime();
296 thread_cpu = getThreadCPUTime();
297
298 gc_cpu = thread_cpu - gct->gc_start_thread_cpu;
299 gc_elapsed = elapsed - gct->gc_start_elapsed;
300
301 taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
302 }
303 }
304
305 /* -----------------------------------------------------------------------------
306 Called at the end of each GC
307 -------------------------------------------------------------------------- */
308
309 void
310 stat_endGC (gc_thread *gct,
311 lnat alloc, lnat live, lnat copied, nat gen,
312 lnat max_copied, lnat avg_copied, lnat slop)
313 {
314 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
315 RtsFlags.ProfFlags.doHeapProfile)
316 // heap profiling needs GC_tot_time
317 {
318 Ticks cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed;
319
320 getProcessTimes(&cpu, &elapsed);
321 gc_elapsed = elapsed - gct->gc_start_elapsed;
322
323 thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu;
324
325 gc_cpu = cpu - gct->gc_start_cpu;
326
327 taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed);
328
329 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
330 nat faults = getPageFaults();
331
332 statsPrintf("%9ld %9ld %9ld",
333 alloc*sizeof(W_), copied*sizeof(W_),
334 live*sizeof(W_));
335 statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n",
336 TICK_TO_DBL(gc_cpu),
337 TICK_TO_DBL(gc_elapsed),
338 TICK_TO_DBL(cpu),
339 TICK_TO_DBL(elapsed - start_init_elapsed),
340 faults - gct->gc_start_faults,
341 gct->gc_start_faults - GC_end_faults,
342 gen);
343
344 GC_end_faults = faults;
345 statsFlush();
346 }
347
348 GC_coll_cpu[gen] += gc_cpu;
349 GC_coll_elapsed[gen] += gc_elapsed;
350 if (GC_coll_max_pause[gen] < gc_elapsed) {
351 GC_coll_max_pause[gen] = gc_elapsed;
352 }
353
354 GC_tot_copied += (StgWord64) copied;
355 GC_tot_alloc += (StgWord64) alloc;
356 GC_par_max_copied += (StgWord64) max_copied;
357 GC_par_avg_copied += (StgWord64) avg_copied;
358 GC_tot_cpu += gc_cpu;
359
360 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
361 if (live > max_residency) {
362 max_residency = live;
363 }
364 residency_samples++;
365 avg_residency += live;
366 }
367
368 if (slop > max_slop) max_slop = slop;
369 }
370
371 if (rub_bell) {
372 debugBelch("\b\b\b \b\b\b");
373 rub_bell = 0;
374 }
375
376 #if USE_PAPI
377 if(papi_is_reporting) {
378 /* Switch to counting mutator events */
379 if (gen == 0) {
380 papi_stop_gc0_count();
381 } else {
382 papi_stop_gc1_count();
383 }
384 papi_start_mutator_count();
385 }
386 #endif
387 }
388
389 /* -----------------------------------------------------------------------------
390 Called at the beginning of each Retainer Profiliing
391 -------------------------------------------------------------------------- */
392 #ifdef PROFILING
393 void
394 stat_startRP(void)
395 {
396 Ticks user, elapsed;
397 getProcessTimes( &user, &elapsed );
398
399 RP_start_time = user;
400 RPe_start_time = elapsed;
401 }
402 #endif /* PROFILING */
403
404 /* -----------------------------------------------------------------------------
405 Called at the end of each Retainer Profiliing
406 -------------------------------------------------------------------------- */
407
408 #ifdef PROFILING
409 void
410 stat_endRP(
411 nat retainerGeneration,
412 #ifdef DEBUG_RETAINER
413 nat maxCStackSize,
414 int maxStackSize,
415 #endif
416 double averageNumVisit)
417 {
418 Ticks user, elapsed;
419 getProcessTimes( &user, &elapsed );
420
421 RP_tot_time += user - RP_start_time;
422 RPe_tot_time += elapsed - RPe_start_time;
423
424 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
425 retainerGeneration, mut_user_time_during_RP());
426 #ifdef DEBUG_RETAINER
427 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
428 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
429 #endif
430 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
431 }
432 #endif /* PROFILING */
433
434 /* -----------------------------------------------------------------------------
435 Called at the beginning of each heap census
436 -------------------------------------------------------------------------- */
437 #ifdef PROFILING
438 void
439 stat_startHeapCensus(void)
440 {
441 Ticks user, elapsed;
442 getProcessTimes( &user, &elapsed );
443
444 HC_start_time = user;
445 HCe_start_time = elapsed;
446 }
447 #endif /* PROFILING */
448
449 /* -----------------------------------------------------------------------------
450 Called at the end of each heap census
451 -------------------------------------------------------------------------- */
452 #ifdef PROFILING
453 void
454 stat_endHeapCensus(void)
455 {
456 Ticks user, elapsed;
457 getProcessTimes( &user, &elapsed );
458
459 HC_tot_time += user - HC_start_time;
460 HCe_tot_time += elapsed - HCe_start_time;
461 }
462 #endif /* PROFILING */
463
464 /* -----------------------------------------------------------------------------
465 Called at the end of execution
466
467 NOTE: number of allocations is not entirely accurate: it doesn't
468 take into account the few bytes at the end of the heap that
469 were left unused when the heap-check failed.
470 -------------------------------------------------------------------------- */
471
472 #ifdef DEBUG
473 #define TICK_VAR_INI(arity) \
474 StgInt SLOW_CALLS_##arity = 1; \
475 StgInt RIGHT_ARITY_##arity = 1; \
476 StgInt TAGGED_PTR_##arity = 0;
477
478 TICK_VAR_INI(1)
479 TICK_VAR_INI(2)
480
481 StgInt TOTAL_CALLS=1;
482 #endif
483
484 /* Report the value of a counter */
485 #define REPORT(counter) \
486 { \
487 showStgWord64(counter,temp,rtsTrue/*commas*/); \
488 statsPrintf(" (" #counter ") : %s\n",temp); \
489 }
490
491 /* Report the value of a counter as a percentage of another counter */
492 #define REPORT_PCT(counter,countertot) \
493 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
494 counter*100.0/countertot)
495
496 #define TICK_PRINT(arity) \
497 REPORT(SLOW_CALLS_##arity); \
498 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
499 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
500 REPORT(RIGHT_ARITY_##arity); \
501 REPORT(TAGGED_PTR_##arity)
502
503 #define TICK_PRINT_TOT(arity) \
504 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
505 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
506
507 void
508 stat_exit(int alloc)
509 {
510 generation *gen;
511 Ticks gc_cpu = 0;
512 Ticks gc_elapsed = 0;
513 Ticks init_cpu = 0;
514 Ticks init_elapsed = 0;
515 Ticks mut_cpu = 0;
516 Ticks mut_elapsed = 0;
517 Ticks exit_cpu = 0;
518 Ticks exit_elapsed = 0;
519
520 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
521
522 char temp[BIG_STRING_LEN];
523 Ticks tot_cpu;
524 Ticks tot_elapsed;
525 nat i, g, total_collections = 0;
526
527 getProcessTimes( &tot_cpu, &tot_elapsed );
528 tot_elapsed -= start_init_elapsed;
529
530 GC_tot_alloc += alloc;
531
532 /* Count total garbage collections */
533 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
534 total_collections += generations[g].collections;
535
536 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
537 if (tot_cpu == 0.0) tot_cpu = 1;
538 if (tot_elapsed == 0.0) tot_elapsed = 1;
539
540 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
541 statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
542 statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
543 }
544
545 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
546 gc_cpu += GC_coll_cpu[i];
547 gc_elapsed += GC_coll_elapsed[i];
548 }
549
550 init_cpu = end_init_cpu - start_init_cpu;
551 init_elapsed = end_init_elapsed - start_init_elapsed;
552
553 exit_cpu = end_exit_cpu - start_exit_cpu;
554 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
555
556 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
557
558 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
559 - PROF_VAL(RP_tot_time + HC_tot_time);
560 if (mut_cpu < 0) { mut_cpu = 0; }
561
562 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
563 showStgWord64(GC_tot_alloc*sizeof(W_),
564 temp, rtsTrue/*commas*/);
565 statsPrintf("%16s bytes allocated in the heap\n", temp);
566
567 showStgWord64(GC_tot_copied*sizeof(W_),
568 temp, rtsTrue/*commas*/);
569 statsPrintf("%16s bytes copied during GC\n", temp);
570
571 if ( residency_samples > 0 ) {
572 showStgWord64(max_residency*sizeof(W_),
573 temp, rtsTrue/*commas*/);
574 statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
575 temp, residency_samples);
576 }
577
578 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
579 statsPrintf("%16s bytes maximum slop\n", temp);
580
581 statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n",
582 peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
583 (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
584
585 /* Print garbage collections in each gen */
586 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
587 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
588 gen = &generations[g];
589 statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n",
590 gen->no,
591 gen->collections,
592 gen->par_collections,
593 TICK_TO_DBL(GC_coll_cpu[g]),
594 TICK_TO_DBL(GC_coll_elapsed[g]),
595 gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections),
596 TICK_TO_DBL(GC_coll_max_pause[g]));
597 }
598
599 #if defined(THREADED_RTS)
600 if (RtsFlags.ParFlags.parGcEnabled) {
601 statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
602 (double)GC_par_avg_copied / (double)GC_par_max_copied,
603 (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
604 RtsFlags.ParFlags.nNodes
605 );
606 }
607 #endif
608 statsPrintf("\n");
609
610 #if defined(THREADED_RTS)
611 {
612 nat i;
613 Task *task;
614 statsPrintf(" MUT time (elapsed) GC time (elapsed)\n");
615 for (i = 0, task = all_tasks;
616 task != NULL;
617 i++, task = task->all_link) {
618 statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n",
619 i,
620 (task->worker) ? "(worker)" : "(bound)",
621 TICK_TO_DBL(task->mut_time),
622 TICK_TO_DBL(task->mut_etime),
623 TICK_TO_DBL(task->gc_time),
624 TICK_TO_DBL(task->gc_etime));
625 }
626 }
627
628 statsPrintf("\n");
629
630 {
631 nat i;
632 SparkCounters sparks = { 0, 0, 0, 0, 0};
633 for (i = 0; i < n_capabilities; i++) {
634 sparks.created += capabilities[i].spark_stats.created;
635 sparks.dud += capabilities[i].spark_stats.dud;
636 sparks.converted += capabilities[i].spark_stats.converted;
637 sparks.gcd += capabilities[i].spark_stats.gcd;
638 sparks.fizzled += capabilities[i].spark_stats.fizzled;
639 }
640
641 statsPrintf(" SPARKS: %ld (%ld converted, %ld dud, %ld GC'd, %ld fizzled)\n\n",
642 sparks.created + sparks.dud,
643 sparks.converted, sparks.dud,
644 sparks.gcd, sparks.fizzled);
645 }
646 #endif
647
648 statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
649 TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed));
650
651 statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n",
652 TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed));
653 statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n",
654 TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
655
656 #ifdef PROFILING
657 statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n",
658 TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time));
659 statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n",
660 TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time));
661 #endif
662 statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n",
663 TICK_TO_DBL(exit_cpu), TICK_TO_DBL(exit_elapsed));
664 statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n",
665 TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_elapsed));
666 #ifndef THREADED_RTS
667 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
668 TICK_TO_DBL(gc_cpu)*100/TICK_TO_DBL(tot_cpu),
669 TICK_TO_DBL(gc_elapsed)*100/TICK_TO_DBL(tot_elapsed));
670 #endif
671
672 if (tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) == 0)
673 showStgWord64(0, temp, rtsTrue/*commas*/);
674 else
675 showStgWord64(
676 (StgWord64)((GC_tot_alloc*sizeof(W_))/
677 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
678 PROF_VAL(RP_tot_time + HC_tot_time))),
679 temp, rtsTrue/*commas*/);
680
681 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
682
683 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
684 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
685 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
686 / TICK_TO_DBL(tot_cpu),
687 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
688 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
689 / TICK_TO_DBL(tot_elapsed));
690
691 /*
692 TICK_PRINT(1);
693 TICK_PRINT(2);
694 REPORT(TOTAL_CALLS);
695 TICK_PRINT_TOT(1);
696 TICK_PRINT_TOT(2);
697 */
698
699 #if USE_PAPI
700 papi_stats_report();
701 #endif
702 #if defined(THREADED_RTS) && defined(PROF_SPIN)
703 {
704 nat g;
705
706 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
707 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
708 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
709 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
710 }
711 }
712 #endif
713 }
714
715 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
716 char *fmt1, *fmt2;
717 if (RtsFlags.MiscFlags.machineReadable) {
718 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
719 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
720 " ,(\"average_bytes_used\", \"%ld\")\n"
721 " ,(\"max_bytes_used\", \"%ld\")\n"
722 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
723 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
724 " ,(\"init_cpu_seconds\", \"%.2f\")\n"
725 " ,(\"init_wall_seconds\", \"%.2f\")\n"
726 " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
727 " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
728 " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
729 " ,(\"GC_wall_seconds\", \"%.2f\")\n"
730 " ]\n";
731 }
732 else {
733 fmt1 = "<<ghc: %llu bytes, ";
734 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
735 }
736 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
737 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
738 statsPrintf(fmt2,
739 total_collections,
740 residency_samples == 0 ? 0 :
741 avg_residency*sizeof(W_)/residency_samples,
742 max_residency*sizeof(W_),
743 residency_samples,
744 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
745 TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed),
746 TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed),
747 TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
748 }
749
750 statsFlush();
751 statsClose();
752 }
753
754 if (GC_coll_cpu) {
755 stgFree(GC_coll_cpu);
756 GC_coll_cpu = NULL;
757 }
758 if (GC_coll_elapsed) {
759 stgFree(GC_coll_elapsed);
760 GC_coll_elapsed = NULL;
761 }
762 if (GC_coll_max_pause) {
763 stgFree(GC_coll_max_pause);
764 GC_coll_max_pause = NULL;
765 }
766 }
767
768 /* -----------------------------------------------------------------------------
769 stat_describe_gens
770
771 Produce some detailed info on the state of the generational GC.
772 -------------------------------------------------------------------------- */
773 void
774 statDescribeGens(void)
775 {
776 nat g, mut, lge, i;
777 lnat gen_slop;
778 lnat tot_live, tot_slop;
779 lnat gen_live, gen_blocks;
780 bdescr *bd;
781 generation *gen;
782
783 debugBelch(
784 "----------------------------------------------------------\n"
785 " Gen Max Mut-list Blocks Large Live Slop\n"
786 " Blocks Bytes Objects \n"
787 "----------------------------------------------------------\n");
788
789 tot_live = 0;
790 tot_slop = 0;
791
792 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
793 gen = &generations[g];
794
795 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
796 lge++;
797 }
798
799 gen_live = genLiveWords(gen);
800 gen_blocks = genLiveBlocks(gen);
801
802 mut = 0;
803 for (i = 0; i < n_capabilities; i++) {
804 mut += countOccupied(capabilities[i].mut_lists[g]);
805
806 // Add the pinned object block.
807 bd = capabilities[i].pinned_object_block;
808 if (bd != NULL) {
809 gen_live += bd->free - bd->start;
810 gen_blocks += bd->blocks;
811 }
812
813 gen_live += gcThreadLiveWords(i,g);
814 gen_blocks += gcThreadLiveBlocks(i,g);
815 }
816
817 debugBelch("%5d %7ld %9d", g, (lnat)gen->max_blocks, mut);
818
819 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
820
821 debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
822 gen_live*sizeof(W_), gen_slop*sizeof(W_));
823 tot_live += gen_live;
824 tot_slop += gen_slop;
825 }
826 debugBelch("----------------------------------------------------------\n");
827 debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
828 debugBelch("----------------------------------------------------------\n");
829 debugBelch("\n");
830 }
831
832 /* -----------------------------------------------------------------------------
833 Stats available via a programmatic interface, so eg. GHCi can time
834 each compilation and expression evaluation.
835 -------------------------------------------------------------------------- */
836
837 extern HsInt64 getAllocations( void )
838 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
839
840 /* -----------------------------------------------------------------------------
841 Dumping stuff in the stats file, or via the debug message interface
842 -------------------------------------------------------------------------- */
843
844 void
845 statsPrintf( char *s, ... )
846 {
847 FILE *sf = RtsFlags.GcFlags.statsFile;
848 va_list ap;
849
850 va_start(ap,s);
851 if (sf == NULL) {
852 vdebugBelch(s,ap);
853 } else {
854 vfprintf(sf, s, ap);
855 }
856 va_end(ap);
857 }
858
859 static void
860 statsFlush( void )
861 {
862 FILE *sf = RtsFlags.GcFlags.statsFile;
863 if (sf != NULL) {
864 fflush(sf);
865 }
866 }
867
868 static void
869 statsClose( void )
870 {
871 FILE *sf = RtsFlags.GcFlags.statsFile;
872 if (sf != NULL) {
873 fclose(sf);
874 }
875 }