Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsUtils.h"
13 #include "Schedule.h"
14 #include "Stats.h"
15 #include "Profiling.h"
16 #include "GetTime.h"
17 #include "sm/Storage.h"
18 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
19 #include "sm/GCThread.h"
20 #include "sm/BlockAlloc.h"
21
22 #if USE_PAPI
23 #include "Papi.h"
24 #endif
25
26 /* huh? */
27 #define BIG_STRING_LEN 512
28
29 #define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND)
30
31 static Ticks
32 start_init_cpu, start_init_elapsed,
33 end_init_cpu, end_init_elapsed,
34 start_exit_cpu, start_exit_elapsed,
35 end_exit_cpu, end_exit_elapsed;
36
37 static Ticks GC_tot_cpu = 0;
38
39 static StgWord64 GC_tot_alloc = 0;
40 static StgWord64 GC_tot_copied = 0;
41
42 static StgWord64 GC_par_max_copied = 0;
43 static StgWord64 GC_par_avg_copied = 0;
44
45 #ifdef PROFILING
46 static Ticks RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
47 static Ticks RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
48
49 static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time
50 static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
51 #endif
52
53 #ifdef PROFILING
54 #define PROF_VAL(x) (x)
55 #else
56 #define PROF_VAL(x) 0
57 #endif
58
59 static lnat max_residency = 0; // in words; for stats only
60 static lnat avg_residency = 0;
61 static lnat residency_samples = 0; // for stats only
62 static lnat max_slop = 0;
63
64 static lnat GC_end_faults = 0;
65
66 static Ticks *GC_coll_cpu = NULL;
67 static Ticks *GC_coll_elapsed = NULL;
68 static Ticks *GC_coll_max_pause = NULL;
69
70 static void statsFlush( void );
71 static void statsClose( void );
72
73 /* -----------------------------------------------------------------------------
74 Current elapsed time
75 ------------------------------------------------------------------------- */
76
77 Ticks stat_getElapsedTime(void)
78 {
79 return getProcessElapsedTime() - start_init_elapsed;
80 }
81
82 /* ---------------------------------------------------------------------------
83 Measure the current MUT time, for profiling
84 ------------------------------------------------------------------------ */
85
86 double
87 mut_user_time( void )
88 {
89 Ticks cpu;
90 cpu = getProcessCPUTime();
91 return TICK_TO_DBL(cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
92 }
93
94 #ifdef PROFILING
95 /*
96 mut_user_time_during_RP() returns the MUT time during retainer profiling.
97 The same is for mut_user_time_during_HC();
98 */
99 double
100 mut_user_time_during_RP( void )
101 {
102 return TICK_TO_DBL(RP_start_time - GC_tot_cpu - RP_tot_time - HC_tot_time);
103 }
104
105 double
106 mut_user_time_during_heap_census( void )
107 {
108 return TICK_TO_DBL(HC_start_time - GC_tot_cpu - RP_tot_time - HC_tot_time);
109 }
110 #endif /* PROFILING */
111
112 /* ---------------------------------------------------------------------------
113 initStats0() has no dependencies, it can be called right at the beginning
114 ------------------------------------------------------------------------ */
115
116 void
117 initStats0(void)
118 {
119 start_init_cpu = 0;
120 start_init_elapsed = 0;
121 end_init_cpu = 0;
122 end_init_elapsed = 0;
123
124 start_exit_cpu = 0;
125 start_exit_elapsed = 0;
126 end_exit_cpu = 0;
127 end_exit_elapsed = 0;
128
129 GC_tot_alloc = 0;
130 GC_tot_copied = 0;
131 GC_par_max_copied = 0;
132 GC_par_avg_copied = 0;
133 GC_tot_cpu = 0;
134
135 #ifdef PROFILING
136 RP_start_time = 0;
137 RP_tot_time = 0;
138 RPe_start_time = 0;
139 RPe_tot_time = 0;
140
141 HC_start_time = 0;
142 HC_tot_time = 0;
143 HCe_start_time = 0;
144 HCe_tot_time = 0;
145 #endif
146
147 max_residency = 0;
148 avg_residency = 0;
149 residency_samples = 0;
150 max_slop = 0;
151
152 GC_end_faults = 0;
153 }
154
155 /* ---------------------------------------------------------------------------
156 initStats1() can be called after setupRtsFlags()
157 ------------------------------------------------------------------------ */
158
159 void
160 initStats1 (void)
161 {
162 nat i;
163
164 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
165 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
166 statsPrintf(" bytes bytes bytes user elap user elap\n");
167 }
168 GC_coll_cpu =
169 (Ticks *)stgMallocBytes(
170 sizeof(Ticks)*RtsFlags.GcFlags.generations,
171 "initStats");
172 GC_coll_elapsed =
173 (Ticks *)stgMallocBytes(
174 sizeof(Ticks)*RtsFlags.GcFlags.generations,
175 "initStats");
176 GC_coll_max_pause =
177 (Ticks *)stgMallocBytes(
178 sizeof(Ticks)*RtsFlags.GcFlags.generations,
179 "initStats");
180 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
181 GC_coll_cpu[i] = 0;
182 GC_coll_elapsed[i] = 0;
183 GC_coll_max_pause[i] = 0;
184 }
185 }
186
187 /* -----------------------------------------------------------------------------
188 Initialisation time...
189 -------------------------------------------------------------------------- */
190
191 void
192 stat_startInit(void)
193 {
194 getProcessTimes(&start_init_cpu, &start_init_elapsed);
195 }
196
197 void
198 stat_endInit(void)
199 {
200 getProcessTimes(&end_init_cpu, &end_init_elapsed);
201
202 #if USE_PAPI
203 /* We start counting events for the mutator
204 * when garbage collection starts
205 * we switch to the GC event set. */
206 papi_start_mutator_count();
207
208 /* This flag is needed to avoid counting the last GC */
209 papi_is_reporting = 1;
210
211 #endif
212 }
213
214 /* -----------------------------------------------------------------------------
215 stat_startExit and stat_endExit
216
217 These two measure the time taken in shutdownHaskell().
218 -------------------------------------------------------------------------- */
219
220 void
221 stat_startExit(void)
222 {
223 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
224
225 #if USE_PAPI
226 /* We stop counting mutator events
227 * GC events are not being counted at this point */
228 papi_stop_mutator_count();
229
230 /* This flag is needed, because GC is run once more after this function */
231 papi_is_reporting = 0;
232 #endif
233 }
234
235 void
236 stat_endExit(void)
237 {
238 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
239 }
240
241 /* -----------------------------------------------------------------------------
242 Called at the beginning of each GC
243 -------------------------------------------------------------------------- */
244
245 static nat rub_bell = 0;
246
247 void
248 stat_startGC (gc_thread *gct)
249 {
250 nat bell = RtsFlags.GcFlags.ringBell;
251
252 if (bell) {
253 if (bell > 1) {
254 debugBelch(" GC ");
255 rub_bell = 1;
256 } else {
257 debugBelch("\007");
258 }
259 }
260
261 #if USE_PAPI
262 if(papi_is_reporting) {
263 /* Switch to counting GC events */
264 papi_stop_mutator_count();
265 papi_start_gc_count();
266 }
267 #endif
268
269 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
270 gct->gc_start_thread_cpu = getThreadCPUTime();
271
272 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
273 {
274 gct->gc_start_faults = getPageFaults();
275 }
276 }
277
278 void
279 stat_gcWorkerThreadStart (gc_thread *gct)
280 {
281 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
282 {
283 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
284 gct->gc_start_thread_cpu = getThreadCPUTime();
285 }
286 }
287
288 void
289 stat_gcWorkerThreadDone (gc_thread *gct)
290 {
291 Ticks thread_cpu, elapsed, gc_cpu, gc_elapsed;
292
293 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
294 {
295 elapsed = getProcessElapsedTime();
296 thread_cpu = getThreadCPUTime();
297
298 gc_cpu = thread_cpu - gct->gc_start_thread_cpu;
299 gc_elapsed = elapsed - gct->gc_start_elapsed;
300
301 taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
302 }
303 }
304
305 /* -----------------------------------------------------------------------------
306 Called at the end of each GC
307 -------------------------------------------------------------------------- */
308
309 void
310 stat_endGC (gc_thread *gct,
311 lnat alloc, lnat live, lnat copied, nat gen,
312 lnat max_copied, lnat avg_copied, lnat slop)
313 {
314 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
315 RtsFlags.ProfFlags.doHeapProfile)
316 // heap profiling needs GC_tot_time
317 {
318 Ticks cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed;
319
320 getProcessTimes(&cpu, &elapsed);
321 gc_elapsed = elapsed - gct->gc_start_elapsed;
322
323 thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu;
324
325 gc_cpu = cpu - gct->gc_start_cpu;
326
327 taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed);
328
329 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
330 nat faults = getPageFaults();
331
332 statsPrintf("%9ld %9ld %9ld",
333 alloc*sizeof(W_), copied*sizeof(W_),
334 live*sizeof(W_));
335 statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n",
336 TICK_TO_DBL(gc_cpu),
337 TICK_TO_DBL(gc_elapsed),
338 TICK_TO_DBL(cpu),
339 TICK_TO_DBL(elapsed - start_init_elapsed),
340 faults - gct->gc_start_faults,
341 gct->gc_start_faults - GC_end_faults,
342 gen);
343
344 GC_end_faults = faults;
345 statsFlush();
346 }
347
348 GC_coll_cpu[gen] += gc_cpu;
349 GC_coll_elapsed[gen] += gc_elapsed;
350 if (GC_coll_max_pause[gen] < gc_elapsed) {
351 GC_coll_max_pause[gen] = gc_elapsed;
352 }
353
354 GC_tot_copied += (StgWord64) copied;
355 GC_tot_alloc += (StgWord64) alloc;
356 GC_par_max_copied += (StgWord64) max_copied;
357 GC_par_avg_copied += (StgWord64) avg_copied;
358 GC_tot_cpu += gc_cpu;
359
360 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
361 if (live > max_residency) {
362 max_residency = live;
363 }
364 residency_samples++;
365 avg_residency += live;
366 }
367
368 if (slop > max_slop) max_slop = slop;
369 }
370
371 if (rub_bell) {
372 debugBelch("\b\b\b \b\b\b");
373 rub_bell = 0;
374 }
375
376 #if USE_PAPI
377 if(papi_is_reporting) {
378 /* Switch to counting mutator events */
379 if (gen == 0) {
380 papi_stop_gc0_count();
381 } else {
382 papi_stop_gc1_count();
383 }
384 papi_start_mutator_count();
385 }
386 #endif
387 }
388
389 /* -----------------------------------------------------------------------------
390 Called at the beginning of each Retainer Profiliing
391 -------------------------------------------------------------------------- */
392 #ifdef PROFILING
393 void
394 stat_startRP(void)
395 {
396 Ticks user, elapsed;
397 getProcessTimes( &user, &elapsed );
398
399 RP_start_time = user;
400 RPe_start_time = elapsed;
401 }
402 #endif /* PROFILING */
403
404 /* -----------------------------------------------------------------------------
405 Called at the end of each Retainer Profiliing
406 -------------------------------------------------------------------------- */
407
408 #ifdef PROFILING
409 void
410 stat_endRP(
411 nat retainerGeneration,
412 #ifdef DEBUG_RETAINER
413 nat maxCStackSize,
414 int maxStackSize,
415 #endif
416 double averageNumVisit)
417 {
418 Ticks user, elapsed;
419 getProcessTimes( &user, &elapsed );
420
421 RP_tot_time += user - RP_start_time;
422 RPe_tot_time += elapsed - RPe_start_time;
423
424 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
425 retainerGeneration, mut_user_time_during_RP());
426 #ifdef DEBUG_RETAINER
427 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
428 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
429 #endif
430 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
431 }
432 #endif /* PROFILING */
433
434 /* -----------------------------------------------------------------------------
435 Called at the beginning of each heap census
436 -------------------------------------------------------------------------- */
437 #ifdef PROFILING
438 void
439 stat_startHeapCensus(void)
440 {
441 Ticks user, elapsed;
442 getProcessTimes( &user, &elapsed );
443
444 HC_start_time = user;
445 HCe_start_time = elapsed;
446 }
447 #endif /* PROFILING */
448
449 /* -----------------------------------------------------------------------------
450 Called at the end of each heap census
451 -------------------------------------------------------------------------- */
452 #ifdef PROFILING
453 void
454 stat_endHeapCensus(void)
455 {
456 Ticks user, elapsed;
457 getProcessTimes( &user, &elapsed );
458
459 HC_tot_time += user - HC_start_time;
460 HCe_tot_time += elapsed - HCe_start_time;
461 }
462 #endif /* PROFILING */
463
464 /* -----------------------------------------------------------------------------
465 Called at the end of execution
466
467 NOTE: number of allocations is not entirely accurate: it doesn't
468 take into account the few bytes at the end of the heap that
469 were left unused when the heap-check failed.
470 -------------------------------------------------------------------------- */
471
472 #ifdef DEBUG
473 #define TICK_VAR_INI(arity) \
474 StgInt SLOW_CALLS_##arity = 1; \
475 StgInt RIGHT_ARITY_##arity = 1; \
476 StgInt TAGGED_PTR_##arity = 0;
477
478 TICK_VAR_INI(1)
479 TICK_VAR_INI(2)
480
481 StgInt TOTAL_CALLS=1;
482 #endif
483
484 /* Report the value of a counter */
485 #define REPORT(counter) \
486 { \
487 showStgWord64(counter,temp,rtsTrue/*commas*/); \
488 statsPrintf(" (" #counter ") : %s\n",temp); \
489 }
490
491 /* Report the value of a counter as a percentage of another counter */
492 #define REPORT_PCT(counter,countertot) \
493 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
494 counter*100.0/countertot)
495
496 #define TICK_PRINT(arity) \
497 REPORT(SLOW_CALLS_##arity); \
498 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
499 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
500 REPORT(RIGHT_ARITY_##arity); \
501 REPORT(TAGGED_PTR_##arity)
502
503 #define TICK_PRINT_TOT(arity) \
504 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
505 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
506
507 void
508 stat_exit(int alloc)
509 {
510 generation *gen;
511 Ticks gc_cpu = 0;
512 Ticks gc_elapsed = 0;
513 Ticks init_cpu = 0;
514 Ticks init_elapsed = 0;
515 Ticks mut_cpu = 0;
516 Ticks mut_elapsed = 0;
517 Ticks exit_cpu = 0;
518 Ticks exit_elapsed = 0;
519
520 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
521
522 char temp[BIG_STRING_LEN];
523 Ticks tot_cpu;
524 Ticks tot_elapsed;
525 nat i, g, total_collections = 0;
526
527 getProcessTimes( &tot_cpu, &tot_elapsed );
528 tot_elapsed -= start_init_elapsed;
529
530 GC_tot_alloc += alloc;
531
532 /* Count total garbage collections */
533 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
534 total_collections += generations[g].collections;
535
536 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
537 if (tot_cpu == 0.0) tot_cpu = 1;
538 if (tot_elapsed == 0.0) tot_elapsed = 1;
539
540 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
541 statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
542 statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
543 }
544
545 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
546 gc_cpu += GC_coll_cpu[i];
547 gc_elapsed += GC_coll_elapsed[i];
548 }
549
550 init_cpu = end_init_cpu - start_init_cpu;
551 init_elapsed = end_init_elapsed - start_init_elapsed;
552
553 exit_cpu = end_exit_cpu - start_exit_cpu;
554 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
555
556 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
557
558 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
559 - PROF_VAL(RP_tot_time + HC_tot_time);
560 if (mut_cpu < 0) { mut_cpu = 0; }
561
562 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
563 showStgWord64(GC_tot_alloc*sizeof(W_),
564 temp, rtsTrue/*commas*/);
565 statsPrintf("%16s bytes allocated in the heap\n", temp);
566
567 showStgWord64(GC_tot_copied*sizeof(W_),
568 temp, rtsTrue/*commas*/);
569 statsPrintf("%16s bytes copied during GC\n", temp);
570
571 if ( residency_samples > 0 ) {
572 showStgWord64(max_residency*sizeof(W_),
573 temp, rtsTrue/*commas*/);
574 statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
575 temp, residency_samples);
576 }
577
578 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
579 statsPrintf("%16s bytes maximum slop\n", temp);
580
581 statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n",
582 peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
583 (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
584
585 /* Print garbage collections in each gen */
586 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
587 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
588 gen = &generations[g];
589 statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n",
590 gen->no,
591 gen->collections,
592 gen->par_collections,
593 TICK_TO_DBL(GC_coll_cpu[g]),
594 TICK_TO_DBL(GC_coll_elapsed[g]),
595 gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections),
596 TICK_TO_DBL(GC_coll_max_pause[g]));
597 }
598
599 #if defined(THREADED_RTS)
600 if (RtsFlags.ParFlags.parGcEnabled) {
601 statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
602 (double)GC_par_avg_copied / (double)GC_par_max_copied,
603 (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
604 RtsFlags.ParFlags.nNodes
605 );
606 }
607 #endif
608 statsPrintf("\n");
609
610 #if defined(THREADED_RTS)
611 {
612 nat i;
613 Task *task;
614 statsPrintf(" MUT time (elapsed) GC time (elapsed)\n");
615 for (i = 0, task = all_tasks;
616 task != NULL;
617 i++, task = task->all_link) {
618 statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n",
619 i,
620 (task->worker) ? "(worker)" : "(bound)",
621 TICK_TO_DBL(task->mut_time),
622 TICK_TO_DBL(task->mut_etime),
623 TICK_TO_DBL(task->gc_time),
624 TICK_TO_DBL(task->gc_etime));
625 }
626 }
627
628 statsPrintf("\n");
629
630 {
631 nat i;
632 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
633 for (i = 0; i < n_capabilities; i++) {
634 sparks.created += capabilities[i].spark_stats.created;
635 sparks.dud += capabilities[i].spark_stats.dud;
636 sparks.overflowed+= capabilities[i].spark_stats.overflowed;
637 sparks.converted += capabilities[i].spark_stats.converted;
638 sparks.gcd += capabilities[i].spark_stats.gcd;
639 sparks.fizzled += capabilities[i].spark_stats.fizzled;
640 }
641
642 statsPrintf(" SPARKS: %ld (%ld converted, %ld overflowed, %ld dud, %ld GC'd, %ld fizzled)\n\n",
643 sparks.created + sparks.dud + sparks.overflowed,
644 sparks.converted, sparks.overflowed, sparks.dud,
645 sparks.gcd, sparks.fizzled);
646 }
647 #endif
648
649 statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
650 TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed));
651
652 statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n",
653 TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed));
654 statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n",
655 TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
656
657 #ifdef PROFILING
658 statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n",
659 TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time));
660 statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n",
661 TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time));
662 #endif
663 statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n",
664 TICK_TO_DBL(exit_cpu), TICK_TO_DBL(exit_elapsed));
665 statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n",
666 TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_elapsed));
667 #ifndef THREADED_RTS
668 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
669 TICK_TO_DBL(gc_cpu)*100/TICK_TO_DBL(tot_cpu),
670 TICK_TO_DBL(gc_elapsed)*100/TICK_TO_DBL(tot_elapsed));
671 #endif
672
673 if (tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) == 0)
674 showStgWord64(0, temp, rtsTrue/*commas*/);
675 else
676 showStgWord64(
677 (StgWord64)((GC_tot_alloc*sizeof(W_))/
678 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
679 PROF_VAL(RP_tot_time + HC_tot_time))),
680 temp, rtsTrue/*commas*/);
681
682 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
683
684 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
685 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
686 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
687 / TICK_TO_DBL(tot_cpu),
688 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
689 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
690 / TICK_TO_DBL(tot_elapsed));
691
692 /*
693 TICK_PRINT(1);
694 TICK_PRINT(2);
695 REPORT(TOTAL_CALLS);
696 TICK_PRINT_TOT(1);
697 TICK_PRINT_TOT(2);
698 */
699
700 #if USE_PAPI
701 papi_stats_report();
702 #endif
703 #if defined(THREADED_RTS) && defined(PROF_SPIN)
704 {
705 nat g;
706
707 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
708 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
709 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
710 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
711 }
712 }
713 #endif
714 }
715
716 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
717 char *fmt1, *fmt2;
718 if (RtsFlags.MiscFlags.machineReadable) {
719 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
720 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
721 " ,(\"average_bytes_used\", \"%ld\")\n"
722 " ,(\"max_bytes_used\", \"%ld\")\n"
723 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
724 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
725 " ,(\"init_cpu_seconds\", \"%.2f\")\n"
726 " ,(\"init_wall_seconds\", \"%.2f\")\n"
727 " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
728 " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
729 " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
730 " ,(\"GC_wall_seconds\", \"%.2f\")\n"
731 " ]\n";
732 }
733 else {
734 fmt1 = "<<ghc: %llu bytes, ";
735 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
736 }
737 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
738 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
739 statsPrintf(fmt2,
740 total_collections,
741 residency_samples == 0 ? 0 :
742 avg_residency*sizeof(W_)/residency_samples,
743 max_residency*sizeof(W_),
744 residency_samples,
745 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
746 TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed),
747 TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed),
748 TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
749 }
750
751 statsFlush();
752 statsClose();
753 }
754
755 if (GC_coll_cpu) {
756 stgFree(GC_coll_cpu);
757 GC_coll_cpu = NULL;
758 }
759 if (GC_coll_elapsed) {
760 stgFree(GC_coll_elapsed);
761 GC_coll_elapsed = NULL;
762 }
763 if (GC_coll_max_pause) {
764 stgFree(GC_coll_max_pause);
765 GC_coll_max_pause = NULL;
766 }
767 }
768
769 /* -----------------------------------------------------------------------------
770 stat_describe_gens
771
772 Produce some detailed info on the state of the generational GC.
773 -------------------------------------------------------------------------- */
774 void
775 statDescribeGens(void)
776 {
777 nat g, mut, lge, i;
778 lnat gen_slop;
779 lnat tot_live, tot_slop;
780 lnat gen_live, gen_blocks;
781 bdescr *bd;
782 generation *gen;
783
784 debugBelch(
785 "----------------------------------------------------------\n"
786 " Gen Max Mut-list Blocks Large Live Slop\n"
787 " Blocks Bytes Objects \n"
788 "----------------------------------------------------------\n");
789
790 tot_live = 0;
791 tot_slop = 0;
792
793 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
794 gen = &generations[g];
795
796 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
797 lge++;
798 }
799
800 gen_live = genLiveWords(gen);
801 gen_blocks = genLiveBlocks(gen);
802
803 mut = 0;
804 for (i = 0; i < n_capabilities; i++) {
805 mut += countOccupied(capabilities[i].mut_lists[g]);
806
807 // Add the pinned object block.
808 bd = capabilities[i].pinned_object_block;
809 if (bd != NULL) {
810 gen_live += bd->free - bd->start;
811 gen_blocks += bd->blocks;
812 }
813
814 gen_live += gcThreadLiveWords(i,g);
815 gen_blocks += gcThreadLiveBlocks(i,g);
816 }
817
818 debugBelch("%5d %7ld %9d", g, (lnat)gen->max_blocks, mut);
819
820 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
821
822 debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
823 gen_live*sizeof(W_), gen_slop*sizeof(W_));
824 tot_live += gen_live;
825 tot_slop += gen_slop;
826 }
827 debugBelch("----------------------------------------------------------\n");
828 debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
829 debugBelch("----------------------------------------------------------\n");
830 debugBelch("\n");
831 }
832
833 /* -----------------------------------------------------------------------------
834 Stats available via a programmatic interface, so eg. GHCi can time
835 each compilation and expression evaluation.
836 -------------------------------------------------------------------------- */
837
838 extern HsInt64 getAllocations( void )
839 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
840
841 /* -----------------------------------------------------------------------------
842 Dumping stuff in the stats file, or via the debug message interface
843 -------------------------------------------------------------------------- */
844
845 void
846 statsPrintf( char *s, ... )
847 {
848 FILE *sf = RtsFlags.GcFlags.statsFile;
849 va_list ap;
850
851 va_start(ap,s);
852 if (sf == NULL) {
853 vdebugBelch(s,ap);
854 } else {
855 vfprintf(sf, s, ap);
856 }
857 va_end(ap);
858 }
859
860 static void
861 statsFlush( void )
862 {
863 FILE *sf = RtsFlags.GcFlags.statsFile;
864 if (sf != NULL) {
865 fflush(sf);
866 }
867 }
868
869 static void
870 statsClose( void )
871 {
872 FILE *sf = RtsFlags.GcFlags.statsFile;
873 if (sf != NULL) {
874 fclose(sf);
875 }
876 }