Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsUtils.h"
13 #include "Schedule.h"
14 #include "Stats.h"
15 #include "Profiling.h"
16 #include "GetTime.h"
17 #include "sm/Storage.h"
18 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
19 #include "sm/GCThread.h"
20 #include "sm/BlockAlloc.h"
21
22 #if USE_PAPI
23 #include "Papi.h"
24 #endif
25
26 /* huh? */
27 #define BIG_STRING_LEN 512
28
29 #define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND)
30
31 static Ticks
32 start_init_cpu, start_init_elapsed,
33 end_init_cpu, end_init_elapsed,
34 start_exit_cpu, start_exit_elapsed,
35 end_exit_cpu, end_exit_elapsed;
36
37 static Ticks GC_tot_cpu = 0;
38
39 static StgWord64 GC_tot_alloc = 0;
40 static StgWord64 GC_tot_copied = 0;
41
42 static StgWord64 GC_par_max_copied = 0;
43 static StgWord64 GC_par_avg_copied = 0;
44
45 #ifdef PROFILING
46 static Ticks RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
47 static Ticks RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
48
49 static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time
50 static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
51 #endif
52
53 #ifdef PROFILING
54 #define PROF_VAL(x) (x)
55 #else
56 #define PROF_VAL(x) 0
57 #endif
58
59 static lnat max_residency = 0; // in words; for stats only
60 static lnat cumulative_residency = 0;
61 static lnat residency_samples = 0; // for stats only
62 static lnat max_slop = 0;
63
64 static lnat GC_end_faults = 0;
65
66 static Ticks *GC_coll_cpu = NULL;
67 static Ticks *GC_coll_elapsed = NULL;
68 static Ticks *GC_coll_max_pause = NULL;
69
70 static void statsFlush( void );
71 static void statsClose( void );
72
73 /* -----------------------------------------------------------------------------
74 Current elapsed time
75 ------------------------------------------------------------------------- */
76
77 Ticks stat_getElapsedTime(void)
78 {
79 return getProcessElapsedTime() - start_init_elapsed;
80 }
81
82 /* ---------------------------------------------------------------------------
83 Measure the current MUT time, for profiling
84 ------------------------------------------------------------------------ */
85
86 double
87 mut_user_time_until( Ticks t )
88 {
89 return TICK_TO_DBL(t - GC_tot_cpu - PROF_VAL(RP_tot_time));
90 }
91
92 double
93 mut_user_time( void )
94 {
95 Ticks cpu;
96 cpu = getProcessCPUTime();
97 return mut_user_time_until(cpu);
98 }
99
100 #ifdef PROFILING
101 /*
102 mut_user_time_during_RP() returns the MUT time during retainer profiling.
103 The same is for mut_user_time_during_HC();
104 */
105 double
106 mut_user_time_during_RP( void )
107 {
108 return TICK_TO_DBL(RP_start_time - GC_tot_cpu - RP_tot_time);
109 }
110
111 double
112 mut_user_time_during_heap_census( void )
113 {
114 return TICK_TO_DBL(HC_start_time - GC_tot_cpu - RP_tot_time);
115 }
116 #endif /* PROFILING */
117
118 /* ---------------------------------------------------------------------------
119 initStats0() has no dependencies, it can be called right at the beginning
120 ------------------------------------------------------------------------ */
121
122 void
123 initStats0(void)
124 {
125 start_init_cpu = 0;
126 start_init_elapsed = 0;
127 end_init_cpu = 0;
128 end_init_elapsed = 0;
129
130 start_exit_cpu = 0;
131 start_exit_elapsed = 0;
132 end_exit_cpu = 0;
133 end_exit_elapsed = 0;
134
135 GC_tot_alloc = 0;
136 GC_tot_copied = 0;
137 GC_par_max_copied = 0;
138 GC_par_avg_copied = 0;
139 GC_tot_cpu = 0;
140
141 #ifdef PROFILING
142 RP_start_time = 0;
143 RP_tot_time = 0;
144 RPe_start_time = 0;
145 RPe_tot_time = 0;
146
147 HC_start_time = 0;
148 HC_tot_time = 0;
149 HCe_start_time = 0;
150 HCe_tot_time = 0;
151 #endif
152
153 max_residency = 0;
154 cumulative_residency = 0;
155 residency_samples = 0;
156 max_slop = 0;
157
158 GC_end_faults = 0;
159 }
160
161 /* ---------------------------------------------------------------------------
162 initStats1() can be called after setupRtsFlags()
163 ------------------------------------------------------------------------ */
164
165 void
166 initStats1 (void)
167 {
168 nat i;
169
170 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
171 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
172 statsPrintf(" bytes bytes bytes user elap user elap\n");
173 }
174 GC_coll_cpu =
175 (Ticks *)stgMallocBytes(
176 sizeof(Ticks)*RtsFlags.GcFlags.generations,
177 "initStats");
178 GC_coll_elapsed =
179 (Ticks *)stgMallocBytes(
180 sizeof(Ticks)*RtsFlags.GcFlags.generations,
181 "initStats");
182 GC_coll_max_pause =
183 (Ticks *)stgMallocBytes(
184 sizeof(Ticks)*RtsFlags.GcFlags.generations,
185 "initStats");
186 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
187 GC_coll_cpu[i] = 0;
188 GC_coll_elapsed[i] = 0;
189 GC_coll_max_pause[i] = 0;
190 }
191 }
192
193 /* -----------------------------------------------------------------------------
194 Initialisation time...
195 -------------------------------------------------------------------------- */
196
197 void
198 stat_startInit(void)
199 {
200 getProcessTimes(&start_init_cpu, &start_init_elapsed);
201 }
202
203 void
204 stat_endInit(void)
205 {
206 getProcessTimes(&end_init_cpu, &end_init_elapsed);
207
208 #if USE_PAPI
209 /* We start counting events for the mutator
210 * when garbage collection starts
211 * we switch to the GC event set. */
212 papi_start_mutator_count();
213
214 /* This flag is needed to avoid counting the last GC */
215 papi_is_reporting = 1;
216
217 #endif
218 }
219
220 /* -----------------------------------------------------------------------------
221 stat_startExit and stat_endExit
222
223 These two measure the time taken in shutdownHaskell().
224 -------------------------------------------------------------------------- */
225
226 void
227 stat_startExit(void)
228 {
229 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
230
231 #if USE_PAPI
232 /* We stop counting mutator events
233 * GC events are not being counted at this point */
234 papi_stop_mutator_count();
235
236 /* This flag is needed, because GC is run once more after this function */
237 papi_is_reporting = 0;
238 #endif
239 }
240
241 void
242 stat_endExit(void)
243 {
244 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
245 }
246
247 /* -----------------------------------------------------------------------------
248 Called at the beginning of each GC
249 -------------------------------------------------------------------------- */
250
251 static nat rub_bell = 0;
252
253 void
254 stat_startGC (gc_thread *gct)
255 {
256 nat bell = RtsFlags.GcFlags.ringBell;
257
258 if (bell) {
259 if (bell > 1) {
260 debugBelch(" GC ");
261 rub_bell = 1;
262 } else {
263 debugBelch("\007");
264 }
265 }
266
267 #if USE_PAPI
268 if(papi_is_reporting) {
269 /* Switch to counting GC events */
270 papi_stop_mutator_count();
271 papi_start_gc_count();
272 }
273 #endif
274
275 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
276 gct->gc_start_thread_cpu = getThreadCPUTime();
277
278 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
279 {
280 gct->gc_start_faults = getPageFaults();
281 }
282 }
283
284 void
285 stat_gcWorkerThreadStart (gc_thread *gct)
286 {
287 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
288 {
289 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
290 gct->gc_start_thread_cpu = getThreadCPUTime();
291 }
292 }
293
294 void
295 stat_gcWorkerThreadDone (gc_thread *gct)
296 {
297 Ticks thread_cpu, elapsed, gc_cpu, gc_elapsed;
298
299 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
300 {
301 elapsed = getProcessElapsedTime();
302 thread_cpu = getThreadCPUTime();
303
304 gc_cpu = thread_cpu - gct->gc_start_thread_cpu;
305 gc_elapsed = elapsed - gct->gc_start_elapsed;
306
307 taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
308 }
309 }
310
311 /* -----------------------------------------------------------------------------
312 Called at the end of each GC
313 -------------------------------------------------------------------------- */
314
315 void
316 stat_endGC (gc_thread *gct,
317 lnat alloc, lnat live, lnat copied, nat gen,
318 lnat max_copied, lnat avg_copied, lnat slop)
319 {
320 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
321 RtsFlags.ProfFlags.doHeapProfile)
322 // heap profiling needs GC_tot_time
323 {
324 Ticks cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed;
325
326 getProcessTimes(&cpu, &elapsed);
327 gc_elapsed = elapsed - gct->gc_start_elapsed;
328
329 thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu;
330
331 gc_cpu = cpu - gct->gc_start_cpu;
332
333 taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed);
334
335 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
336 nat faults = getPageFaults();
337
338 statsPrintf("%9ld %9ld %9ld",
339 alloc*sizeof(W_), copied*sizeof(W_),
340 live*sizeof(W_));
341 statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n",
342 TICK_TO_DBL(gc_cpu),
343 TICK_TO_DBL(gc_elapsed),
344 TICK_TO_DBL(cpu),
345 TICK_TO_DBL(elapsed - start_init_elapsed),
346 faults - gct->gc_start_faults,
347 gct->gc_start_faults - GC_end_faults,
348 gen);
349
350 GC_end_faults = faults;
351 statsFlush();
352 }
353
354 GC_coll_cpu[gen] += gc_cpu;
355 GC_coll_elapsed[gen] += gc_elapsed;
356 if (GC_coll_max_pause[gen] < gc_elapsed) {
357 GC_coll_max_pause[gen] = gc_elapsed;
358 }
359
360 GC_tot_copied += (StgWord64) copied;
361 GC_tot_alloc += (StgWord64) alloc;
362 GC_par_max_copied += (StgWord64) max_copied;
363 GC_par_avg_copied += (StgWord64) avg_copied;
364 GC_tot_cpu += gc_cpu;
365
366 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
367 if (live > max_residency) {
368 max_residency = live;
369 }
370 residency_samples++;
371 cumulative_residency += live;
372 }
373
374 if (slop > max_slop) max_slop = slop;
375 }
376
377 if (rub_bell) {
378 debugBelch("\b\b\b \b\b\b");
379 rub_bell = 0;
380 }
381
382 #if USE_PAPI
383 if(papi_is_reporting) {
384 /* Switch to counting mutator events */
385 if (gen == 0) {
386 papi_stop_gc0_count();
387 } else {
388 papi_stop_gc1_count();
389 }
390 papi_start_mutator_count();
391 }
392 #endif
393 }
394
395 /* -----------------------------------------------------------------------------
396 Called at the beginning of each Retainer Profiliing
397 -------------------------------------------------------------------------- */
398 #ifdef PROFILING
399 void
400 stat_startRP(void)
401 {
402 Ticks user, elapsed;
403 getProcessTimes( &user, &elapsed );
404
405 RP_start_time = user;
406 RPe_start_time = elapsed;
407 }
408 #endif /* PROFILING */
409
410 /* -----------------------------------------------------------------------------
411 Called at the end of each Retainer Profiliing
412 -------------------------------------------------------------------------- */
413
414 #ifdef PROFILING
415 void
416 stat_endRP(
417 nat retainerGeneration,
418 #ifdef DEBUG_RETAINER
419 nat maxCStackSize,
420 int maxStackSize,
421 #endif
422 double averageNumVisit)
423 {
424 Ticks user, elapsed;
425 getProcessTimes( &user, &elapsed );
426
427 RP_tot_time += user - RP_start_time;
428 RPe_tot_time += elapsed - RPe_start_time;
429
430 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
431 retainerGeneration, mut_user_time_during_RP());
432 #ifdef DEBUG_RETAINER
433 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
434 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
435 #endif
436 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
437 }
438 #endif /* PROFILING */
439
440 /* -----------------------------------------------------------------------------
441 Called at the beginning of each heap census
442 -------------------------------------------------------------------------- */
443 #ifdef PROFILING
444 void
445 stat_startHeapCensus(void)
446 {
447 Ticks user, elapsed;
448 getProcessTimes( &user, &elapsed );
449
450 HC_start_time = user;
451 HCe_start_time = elapsed;
452 }
453 #endif /* PROFILING */
454
455 /* -----------------------------------------------------------------------------
456 Called at the end of each heap census
457 -------------------------------------------------------------------------- */
458 #ifdef PROFILING
459 void
460 stat_endHeapCensus(void)
461 {
462 Ticks user, elapsed;
463 getProcessTimes( &user, &elapsed );
464
465 HC_tot_time += user - HC_start_time;
466 HCe_tot_time += elapsed - HCe_start_time;
467 }
468 #endif /* PROFILING */
469
470 /* -----------------------------------------------------------------------------
471 Called at the end of execution
472
473 NOTE: number of allocations is not entirely accurate: it doesn't
474 take into account the few bytes at the end of the heap that
475 were left unused when the heap-check failed.
476 -------------------------------------------------------------------------- */
477
478 #ifdef DEBUG
479 #define TICK_VAR_INI(arity) \
480 StgInt SLOW_CALLS_##arity = 1; \
481 StgInt RIGHT_ARITY_##arity = 1; \
482 StgInt TAGGED_PTR_##arity = 0;
483
484 TICK_VAR_INI(1)
485 TICK_VAR_INI(2)
486
487 StgInt TOTAL_CALLS=1;
488 #endif
489
490 /* Report the value of a counter */
491 #define REPORT(counter) \
492 { \
493 showStgWord64(counter,temp,rtsTrue/*commas*/); \
494 statsPrintf(" (" #counter ") : %s\n",temp); \
495 }
496
497 /* Report the value of a counter as a percentage of another counter */
498 #define REPORT_PCT(counter,countertot) \
499 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
500 counter*100.0/countertot)
501
502 #define TICK_PRINT(arity) \
503 REPORT(SLOW_CALLS_##arity); \
504 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
505 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
506 REPORT(RIGHT_ARITY_##arity); \
507 REPORT(TAGGED_PTR_##arity)
508
509 #define TICK_PRINT_TOT(arity) \
510 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
511 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
512
513 void
514 stat_exit(int alloc)
515 {
516 generation *gen;
517 Ticks gc_cpu = 0;
518 Ticks gc_elapsed = 0;
519 Ticks init_cpu = 0;
520 Ticks init_elapsed = 0;
521 Ticks mut_cpu = 0;
522 Ticks mut_elapsed = 0;
523 Ticks exit_cpu = 0;
524 Ticks exit_elapsed = 0;
525
526 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
527
528 char temp[BIG_STRING_LEN];
529 Ticks tot_cpu;
530 Ticks tot_elapsed;
531 nat i, g, total_collections = 0;
532
533 getProcessTimes( &tot_cpu, &tot_elapsed );
534 tot_elapsed -= start_init_elapsed;
535
536 GC_tot_alloc += alloc;
537
538 /* Count total garbage collections */
539 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
540 total_collections += generations[g].collections;
541
542 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
543 if (tot_cpu == 0.0) tot_cpu = 1;
544 if (tot_elapsed == 0.0) tot_elapsed = 1;
545
546 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
547 statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
548 statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
549 }
550
551 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
552 gc_cpu += GC_coll_cpu[i];
553 gc_elapsed += GC_coll_elapsed[i];
554 }
555
556 init_cpu = end_init_cpu - start_init_cpu;
557 init_elapsed = end_init_elapsed - start_init_elapsed;
558
559 exit_cpu = end_exit_cpu - start_exit_cpu;
560 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
561
562 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
563
564 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
565 - PROF_VAL(RP_tot_time + HC_tot_time);
566 if (mut_cpu < 0) { mut_cpu = 0; }
567
568 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
569 showStgWord64(GC_tot_alloc*sizeof(W_),
570 temp, rtsTrue/*commas*/);
571 statsPrintf("%16s bytes allocated in the heap\n", temp);
572
573 showStgWord64(GC_tot_copied*sizeof(W_),
574 temp, rtsTrue/*commas*/);
575 statsPrintf("%16s bytes copied during GC\n", temp);
576
577 if ( residency_samples > 0 ) {
578 showStgWord64(max_residency*sizeof(W_),
579 temp, rtsTrue/*commas*/);
580 statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
581 temp, residency_samples);
582 }
583
584 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
585 statsPrintf("%16s bytes maximum slop\n", temp);
586
587 statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n",
588 peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
589 (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
590
591 /* Print garbage collections in each gen */
592 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
593 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
594 gen = &generations[g];
595 statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n",
596 gen->no,
597 gen->collections,
598 gen->par_collections,
599 TICK_TO_DBL(GC_coll_cpu[g]),
600 TICK_TO_DBL(GC_coll_elapsed[g]),
601 gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections),
602 TICK_TO_DBL(GC_coll_max_pause[g]));
603 }
604
605 #if defined(THREADED_RTS)
606 if (RtsFlags.ParFlags.parGcEnabled) {
607 statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
608 (double)GC_par_avg_copied / (double)GC_par_max_copied,
609 (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
610 RtsFlags.ParFlags.nNodes
611 );
612 }
613 #endif
614 statsPrintf("\n");
615
616 #if defined(THREADED_RTS)
617 {
618 nat i;
619 Task *task;
620 statsPrintf(" MUT time (elapsed) GC time (elapsed)\n");
621 for (i = 0, task = all_tasks;
622 task != NULL;
623 i++, task = task->all_link) {
624 statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n",
625 i,
626 (task->worker) ? "(worker)" : "(bound)",
627 TICK_TO_DBL(task->mut_time),
628 TICK_TO_DBL(task->mut_etime),
629 TICK_TO_DBL(task->gc_time),
630 TICK_TO_DBL(task->gc_etime));
631 }
632 }
633
634 statsPrintf("\n");
635
636 {
637 nat i;
638 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
639 for (i = 0; i < n_capabilities; i++) {
640 sparks.created += capabilities[i].spark_stats.created;
641 sparks.dud += capabilities[i].spark_stats.dud;
642 sparks.overflowed+= capabilities[i].spark_stats.overflowed;
643 sparks.converted += capabilities[i].spark_stats.converted;
644 sparks.gcd += capabilities[i].spark_stats.gcd;
645 sparks.fizzled += capabilities[i].spark_stats.fizzled;
646 }
647
648 statsPrintf(" SPARKS: %ld (%ld converted, %ld overflowed, %ld dud, %ld GC'd, %ld fizzled)\n\n",
649 sparks.created + sparks.dud + sparks.overflowed,
650 sparks.converted, sparks.overflowed, sparks.dud,
651 sparks.gcd, sparks.fizzled);
652 }
653 #endif
654
655 statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
656 TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed));
657
658 statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n",
659 TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed));
660 statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n",
661 TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
662
663 #ifdef PROFILING
664 statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n",
665 TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time));
666 statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n",
667 TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time));
668 #endif
669 statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n",
670 TICK_TO_DBL(exit_cpu), TICK_TO_DBL(exit_elapsed));
671 statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n",
672 TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_elapsed));
673 #ifndef THREADED_RTS
674 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
675 TICK_TO_DBL(gc_cpu)*100/TICK_TO_DBL(tot_cpu),
676 TICK_TO_DBL(gc_elapsed)*100/TICK_TO_DBL(tot_elapsed));
677 #endif
678
679 if (tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) == 0)
680 showStgWord64(0, temp, rtsTrue/*commas*/);
681 else
682 showStgWord64(
683 (StgWord64)((GC_tot_alloc*sizeof(W_))/
684 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
685 PROF_VAL(RP_tot_time + HC_tot_time))),
686 temp, rtsTrue/*commas*/);
687
688 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
689
690 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
691 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
692 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
693 / TICK_TO_DBL(tot_cpu),
694 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
695 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
696 / TICK_TO_DBL(tot_elapsed));
697
698 /*
699 TICK_PRINT(1);
700 TICK_PRINT(2);
701 REPORT(TOTAL_CALLS);
702 TICK_PRINT_TOT(1);
703 TICK_PRINT_TOT(2);
704 */
705
706 #if USE_PAPI
707 papi_stats_report();
708 #endif
709 #if defined(THREADED_RTS) && defined(PROF_SPIN)
710 {
711 nat g;
712
713 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
714 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
715 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
716 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
717 }
718 }
719 #endif
720 }
721
722 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
723 char *fmt1, *fmt2;
724 if (RtsFlags.MiscFlags.machineReadable) {
725 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
726 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
727 " ,(\"average_bytes_used\", \"%ld\")\n"
728 " ,(\"max_bytes_used\", \"%ld\")\n"
729 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
730 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
731 " ,(\"init_cpu_seconds\", \"%.2f\")\n"
732 " ,(\"init_wall_seconds\", \"%.2f\")\n"
733 " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
734 " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
735 " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
736 " ,(\"GC_wall_seconds\", \"%.2f\")\n"
737 " ]\n";
738 }
739 else {
740 fmt1 = "<<ghc: %llu bytes, ";
741 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
742 }
743 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
744 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
745 statsPrintf(fmt2,
746 total_collections,
747 residency_samples == 0 ? 0 :
748 cumulative_residency*sizeof(W_)/residency_samples,
749 max_residency*sizeof(W_),
750 residency_samples,
751 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
752 TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed),
753 TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed),
754 TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
755 }
756
757 statsFlush();
758 statsClose();
759 }
760
761 if (GC_coll_cpu) {
762 stgFree(GC_coll_cpu);
763 GC_coll_cpu = NULL;
764 }
765 if (GC_coll_elapsed) {
766 stgFree(GC_coll_elapsed);
767 GC_coll_elapsed = NULL;
768 }
769 if (GC_coll_max_pause) {
770 stgFree(GC_coll_max_pause);
771 GC_coll_max_pause = NULL;
772 }
773 }
774
775 /* -----------------------------------------------------------------------------
776 stat_describe_gens
777
778 Produce some detailed info on the state of the generational GC.
779 -------------------------------------------------------------------------- */
780 void
781 statDescribeGens(void)
782 {
783 nat g, mut, lge, i;
784 lnat gen_slop;
785 lnat tot_live, tot_slop;
786 lnat gen_live, gen_blocks;
787 bdescr *bd;
788 generation *gen;
789
790 debugBelch(
791 "----------------------------------------------------------\n"
792 " Gen Max Mut-list Blocks Large Live Slop\n"
793 " Blocks Bytes Objects \n"
794 "----------------------------------------------------------\n");
795
796 tot_live = 0;
797 tot_slop = 0;
798
799 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
800 gen = &generations[g];
801
802 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
803 lge++;
804 }
805
806 gen_live = genLiveWords(gen);
807 gen_blocks = genLiveBlocks(gen);
808
809 mut = 0;
810 for (i = 0; i < n_capabilities; i++) {
811 mut += countOccupied(capabilities[i].mut_lists[g]);
812
813 // Add the pinned object block.
814 bd = capabilities[i].pinned_object_block;
815 if (bd != NULL) {
816 gen_live += bd->free - bd->start;
817 gen_blocks += bd->blocks;
818 }
819
820 gen_live += gcThreadLiveWords(i,g);
821 gen_blocks += gcThreadLiveBlocks(i,g);
822 }
823
824 debugBelch("%5d %7ld %9d", g, (lnat)gen->max_blocks, mut);
825
826 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
827
828 debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
829 gen_live*sizeof(W_), gen_slop*sizeof(W_));
830 tot_live += gen_live;
831 tot_slop += gen_slop;
832 }
833 debugBelch("----------------------------------------------------------\n");
834 debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
835 debugBelch("----------------------------------------------------------\n");
836 debugBelch("\n");
837 }
838
839 /* -----------------------------------------------------------------------------
840 Stats available via a programmatic interface, so eg. GHCi can time
841 each compilation and expression evaluation.
842 -------------------------------------------------------------------------- */
843
844 extern HsInt64 getAllocations( void )
845 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
846
847 /* -----------------------------------------------------------------------------
848 Dumping stuff in the stats file, or via the debug message interface
849 -------------------------------------------------------------------------- */
850
851 void
852 statsPrintf( char *s, ... )
853 {
854 FILE *sf = RtsFlags.GcFlags.statsFile;
855 va_list ap;
856
857 va_start(ap,s);
858 if (sf == NULL) {
859 vdebugBelch(s,ap);
860 } else {
861 vfprintf(sf, s, ap);
862 }
863 va_end(ap);
864 }
865
866 static void
867 statsFlush( void )
868 {
869 FILE *sf = RtsFlags.GcFlags.statsFile;
870 if (sf != NULL) {
871 fflush(sf);
872 }
873 }
874
875 static void
876 statsClose( void )
877 {
878 FILE *sf = RtsFlags.GcFlags.statsFile;
879 if (sf != NULL) {
880 fclose(sf);
881 }
882 }