Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsUtils.h"
13 #include "Schedule.h"
14 #include "Stats.h"
15 #include "Profiling.h"
16 #include "GetTime.h"
17 #include "sm/Storage.h"
18 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
19 #include "sm/GCThread.h"
20 #include "sm/BlockAlloc.h"
21
22 #if USE_PAPI
23 #include "Papi.h"
24 #endif
25
26 /* huh? */
27 #define BIG_STRING_LEN 512
28
29 #define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND)
30
31 static Ticks
32 start_init_cpu, start_init_elapsed,
33 end_init_cpu, end_init_elapsed,
34 start_exit_cpu, start_exit_elapsed,
35 end_exit_cpu, end_exit_elapsed;
36
37 static Ticks GC_tot_cpu = 0;
38
39 static StgWord64 GC_tot_alloc = 0;
40 static StgWord64 GC_tot_copied = 0;
41
42 static StgWord64 GC_par_max_copied = 0;
43 static StgWord64 GC_par_avg_copied = 0;
44
45 #ifdef PROFILING
46 static Ticks RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
47 static Ticks RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
48
49 static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time
50 static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
51 #endif
52
53 #ifdef PROFILING
54 #define PROF_VAL(x) (x)
55 #else
56 #define PROF_VAL(x) 0
57 #endif
58
59 // current = current as of last GC
60 static lnat current_residency = 0; // in words; for stats only
61 static lnat max_residency = 0;
62 static lnat cumulative_residency = 0;
63 static lnat residency_samples = 0; // for stats only
64 static lnat current_slop = 0;
65 static lnat max_slop = 0;
66
67 static lnat GC_end_faults = 0;
68
69 static Ticks *GC_coll_cpu = NULL;
70 static Ticks *GC_coll_elapsed = NULL;
71 static Ticks *GC_coll_max_pause = NULL;
72
73 static void statsFlush( void );
74 static void statsClose( void );
75
76 /* -----------------------------------------------------------------------------
77 Current elapsed time
78 ------------------------------------------------------------------------- */
79
80 Ticks stat_getElapsedTime(void)
81 {
82 return getProcessElapsedTime() - start_init_elapsed;
83 }
84
85 /* ---------------------------------------------------------------------------
86 Measure the current MUT time, for profiling
87 ------------------------------------------------------------------------ */
88
89 double
90 mut_user_time_until( Ticks t )
91 {
92 return TICK_TO_DBL(t - GC_tot_cpu - PROF_VAL(RP_tot_time));
93 }
94
95 double
96 mut_user_time( void )
97 {
98 Ticks cpu;
99 cpu = getProcessCPUTime();
100 return mut_user_time_until(cpu);
101 }
102
103 #ifdef PROFILING
104 /*
105 mut_user_time_during_RP() returns the MUT time during retainer profiling.
106 The same is for mut_user_time_during_HC();
107 */
108 double
109 mut_user_time_during_RP( void )
110 {
111 return TICK_TO_DBL(RP_start_time - GC_tot_cpu - RP_tot_time);
112 }
113
114 double
115 mut_user_time_during_heap_census( void )
116 {
117 return TICK_TO_DBL(HC_start_time - GC_tot_cpu - RP_tot_time);
118 }
119 #endif /* PROFILING */
120
121 /* ---------------------------------------------------------------------------
122 initStats0() has no dependencies, it can be called right at the beginning
123 ------------------------------------------------------------------------ */
124
125 void
126 initStats0(void)
127 {
128 start_init_cpu = 0;
129 start_init_elapsed = 0;
130 end_init_cpu = 0;
131 end_init_elapsed = 0;
132
133 start_exit_cpu = 0;
134 start_exit_elapsed = 0;
135 end_exit_cpu = 0;
136 end_exit_elapsed = 0;
137
138 GC_tot_alloc = 0;
139 GC_tot_copied = 0;
140 GC_par_max_copied = 0;
141 GC_par_avg_copied = 0;
142 GC_tot_cpu = 0;
143
144 #ifdef PROFILING
145 RP_start_time = 0;
146 RP_tot_time = 0;
147 RPe_start_time = 0;
148 RPe_tot_time = 0;
149
150 HC_start_time = 0;
151 HC_tot_time = 0;
152 HCe_start_time = 0;
153 HCe_tot_time = 0;
154 #endif
155
156 max_residency = 0;
157 cumulative_residency = 0;
158 residency_samples = 0;
159 max_slop = 0;
160
161 GC_end_faults = 0;
162 }
163
164 /* ---------------------------------------------------------------------------
165 initStats1() can be called after setupRtsFlags()
166 ------------------------------------------------------------------------ */
167
168 void
169 initStats1 (void)
170 {
171 nat i;
172
173 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
174 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
175 statsPrintf(" bytes bytes bytes user elap user elap\n");
176 }
177 GC_coll_cpu =
178 (Ticks *)stgMallocBytes(
179 sizeof(Ticks)*RtsFlags.GcFlags.generations,
180 "initStats");
181 GC_coll_elapsed =
182 (Ticks *)stgMallocBytes(
183 sizeof(Ticks)*RtsFlags.GcFlags.generations,
184 "initStats");
185 GC_coll_max_pause =
186 (Ticks *)stgMallocBytes(
187 sizeof(Ticks)*RtsFlags.GcFlags.generations,
188 "initStats");
189 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
190 GC_coll_cpu[i] = 0;
191 GC_coll_elapsed[i] = 0;
192 GC_coll_max_pause[i] = 0;
193 }
194 }
195
196 /* -----------------------------------------------------------------------------
197 Initialisation time...
198 -------------------------------------------------------------------------- */
199
200 void
201 stat_startInit(void)
202 {
203 getProcessTimes(&start_init_cpu, &start_init_elapsed);
204 }
205
206 void
207 stat_endInit(void)
208 {
209 getProcessTimes(&end_init_cpu, &end_init_elapsed);
210
211 #if USE_PAPI
212 /* We start counting events for the mutator
213 * when garbage collection starts
214 * we switch to the GC event set. */
215 papi_start_mutator_count();
216
217 /* This flag is needed to avoid counting the last GC */
218 papi_is_reporting = 1;
219
220 #endif
221 }
222
223 /* -----------------------------------------------------------------------------
224 stat_startExit and stat_endExit
225
226 These two measure the time taken in shutdownHaskell().
227 -------------------------------------------------------------------------- */
228
229 void
230 stat_startExit(void)
231 {
232 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
233
234 #if USE_PAPI
235 /* We stop counting mutator events
236 * GC events are not being counted at this point */
237 papi_stop_mutator_count();
238
239 /* This flag is needed, because GC is run once more after this function */
240 papi_is_reporting = 0;
241 #endif
242 }
243
244 void
245 stat_endExit(void)
246 {
247 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
248 }
249
250 /* -----------------------------------------------------------------------------
251 Called at the beginning of each GC
252 -------------------------------------------------------------------------- */
253
254 static nat rub_bell = 0;
255
256 void
257 stat_startGC (gc_thread *gct)
258 {
259 nat bell = RtsFlags.GcFlags.ringBell;
260
261 if (bell) {
262 if (bell > 1) {
263 debugBelch(" GC ");
264 rub_bell = 1;
265 } else {
266 debugBelch("\007");
267 }
268 }
269
270 #if USE_PAPI
271 if(papi_is_reporting) {
272 /* Switch to counting GC events */
273 papi_stop_mutator_count();
274 papi_start_gc_count();
275 }
276 #endif
277
278 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
279 gct->gc_start_thread_cpu = getThreadCPUTime();
280
281 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
282 {
283 gct->gc_start_faults = getPageFaults();
284 }
285 }
286
287 void
288 stat_gcWorkerThreadStart (gc_thread *gct)
289 {
290 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
291 {
292 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
293 gct->gc_start_thread_cpu = getThreadCPUTime();
294 }
295 }
296
297 void
298 stat_gcWorkerThreadDone (gc_thread *gct)
299 {
300 Ticks thread_cpu, elapsed, gc_cpu, gc_elapsed;
301
302 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
303 {
304 elapsed = getProcessElapsedTime();
305 thread_cpu = getThreadCPUTime();
306
307 gc_cpu = thread_cpu - gct->gc_start_thread_cpu;
308 gc_elapsed = elapsed - gct->gc_start_elapsed;
309
310 taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
311 }
312 }
313
314 /* -----------------------------------------------------------------------------
315 Called at the end of each GC
316 -------------------------------------------------------------------------- */
317
318 void
319 stat_endGC (gc_thread *gct,
320 lnat alloc, lnat live, lnat copied, nat gen,
321 lnat max_copied, lnat avg_copied, lnat slop)
322 {
323 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
324 RtsFlags.ProfFlags.doHeapProfile)
325 // heap profiling needs GC_tot_time
326 {
327 Ticks cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed;
328
329 getProcessTimes(&cpu, &elapsed);
330 gc_elapsed = elapsed - gct->gc_start_elapsed;
331
332 thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu;
333
334 gc_cpu = cpu - gct->gc_start_cpu;
335
336 taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed);
337
338 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
339 nat faults = getPageFaults();
340
341 statsPrintf("%9ld %9ld %9ld",
342 alloc*sizeof(W_), copied*sizeof(W_),
343 live*sizeof(W_));
344 statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n",
345 TICK_TO_DBL(gc_cpu),
346 TICK_TO_DBL(gc_elapsed),
347 TICK_TO_DBL(cpu),
348 TICK_TO_DBL(elapsed - start_init_elapsed),
349 faults - gct->gc_start_faults,
350 gct->gc_start_faults - GC_end_faults,
351 gen);
352
353 GC_end_faults = faults;
354 statsFlush();
355 }
356
357 GC_coll_cpu[gen] += gc_cpu;
358 GC_coll_elapsed[gen] += gc_elapsed;
359 if (GC_coll_max_pause[gen] < gc_elapsed) {
360 GC_coll_max_pause[gen] = gc_elapsed;
361 }
362
363 GC_tot_copied += (StgWord64) copied;
364 GC_tot_alloc += (StgWord64) alloc;
365 GC_par_max_copied += (StgWord64) max_copied;
366 GC_par_avg_copied += (StgWord64) avg_copied;
367 GC_tot_cpu += gc_cpu;
368
369 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
370 if (live > max_residency) {
371 max_residency = live;
372 }
373 current_residency = live;
374 residency_samples++;
375 cumulative_residency += live;
376 }
377
378 if (slop > max_slop) max_slop = slop;
379 }
380
381 if (rub_bell) {
382 debugBelch("\b\b\b \b\b\b");
383 rub_bell = 0;
384 }
385
386 #if USE_PAPI
387 if(papi_is_reporting) {
388 /* Switch to counting mutator events */
389 if (gen == 0) {
390 papi_stop_gc0_count();
391 } else {
392 papi_stop_gc1_count();
393 }
394 papi_start_mutator_count();
395 }
396 #endif
397 }
398
399 /* -----------------------------------------------------------------------------
400 Called at the beginning of each Retainer Profiliing
401 -------------------------------------------------------------------------- */
402 #ifdef PROFILING
403 void
404 stat_startRP(void)
405 {
406 Ticks user, elapsed;
407 getProcessTimes( &user, &elapsed );
408
409 RP_start_time = user;
410 RPe_start_time = elapsed;
411 }
412 #endif /* PROFILING */
413
414 /* -----------------------------------------------------------------------------
415 Called at the end of each Retainer Profiliing
416 -------------------------------------------------------------------------- */
417
418 #ifdef PROFILING
419 void
420 stat_endRP(
421 nat retainerGeneration,
422 #ifdef DEBUG_RETAINER
423 nat maxCStackSize,
424 int maxStackSize,
425 #endif
426 double averageNumVisit)
427 {
428 Ticks user, elapsed;
429 getProcessTimes( &user, &elapsed );
430
431 RP_tot_time += user - RP_start_time;
432 RPe_tot_time += elapsed - RPe_start_time;
433
434 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
435 retainerGeneration, mut_user_time_during_RP());
436 #ifdef DEBUG_RETAINER
437 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
438 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
439 #endif
440 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
441 }
442 #endif /* PROFILING */
443
444 /* -----------------------------------------------------------------------------
445 Called at the beginning of each heap census
446 -------------------------------------------------------------------------- */
447 #ifdef PROFILING
448 void
449 stat_startHeapCensus(void)
450 {
451 Ticks user, elapsed;
452 getProcessTimes( &user, &elapsed );
453
454 HC_start_time = user;
455 HCe_start_time = elapsed;
456 }
457 #endif /* PROFILING */
458
459 /* -----------------------------------------------------------------------------
460 Called at the end of each heap census
461 -------------------------------------------------------------------------- */
462 #ifdef PROFILING
463 void
464 stat_endHeapCensus(void)
465 {
466 Ticks user, elapsed;
467 getProcessTimes( &user, &elapsed );
468
469 HC_tot_time += user - HC_start_time;
470 HCe_tot_time += elapsed - HCe_start_time;
471 }
472 #endif /* PROFILING */
473
474 /* -----------------------------------------------------------------------------
475 Called at the end of execution
476
477 NOTE: number of allocations is not entirely accurate: it doesn't
478 take into account the few bytes at the end of the heap that
479 were left unused when the heap-check failed.
480 -------------------------------------------------------------------------- */
481
482 #ifdef DEBUG
483 #define TICK_VAR_INI(arity) \
484 StgInt SLOW_CALLS_##arity = 1; \
485 StgInt RIGHT_ARITY_##arity = 1; \
486 StgInt TAGGED_PTR_##arity = 0;
487
488 TICK_VAR_INI(1)
489 TICK_VAR_INI(2)
490
491 StgInt TOTAL_CALLS=1;
492 #endif
493
494 /* Report the value of a counter */
495 #define REPORT(counter) \
496 { \
497 showStgWord64(counter,temp,rtsTrue/*commas*/); \
498 statsPrintf(" (" #counter ") : %s\n",temp); \
499 }
500
501 /* Report the value of a counter as a percentage of another counter */
502 #define REPORT_PCT(counter,countertot) \
503 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
504 counter*100.0/countertot)
505
506 #define TICK_PRINT(arity) \
507 REPORT(SLOW_CALLS_##arity); \
508 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
509 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
510 REPORT(RIGHT_ARITY_##arity); \
511 REPORT(TAGGED_PTR_##arity)
512
513 #define TICK_PRINT_TOT(arity) \
514 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
515 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
516
517 static inline Ticks get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
518 static inline Ticks get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
519
520 void
521 stat_exit(int alloc)
522 {
523 generation *gen;
524 Ticks gc_cpu = 0;
525 Ticks gc_elapsed = 0;
526 Ticks init_cpu = 0;
527 Ticks init_elapsed = 0;
528 Ticks mut_cpu = 0;
529 Ticks mut_elapsed = 0;
530 Ticks exit_cpu = 0;
531 Ticks exit_elapsed = 0;
532
533 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
534
535 char temp[BIG_STRING_LEN];
536 Ticks tot_cpu;
537 Ticks tot_elapsed;
538 nat i, g, total_collections = 0;
539
540 getProcessTimes( &tot_cpu, &tot_elapsed );
541 tot_elapsed -= start_init_elapsed;
542
543 GC_tot_alloc += alloc;
544
545 /* Count total garbage collections */
546 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
547 total_collections += generations[g].collections;
548
549 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
550 if (tot_cpu == 0.0) tot_cpu = 1;
551 if (tot_elapsed == 0.0) tot_elapsed = 1;
552
553 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
554 statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
555 statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
556 }
557
558 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
559 gc_cpu += GC_coll_cpu[i];
560 gc_elapsed += GC_coll_elapsed[i];
561 }
562
563 init_cpu = get_init_cpu();
564 init_elapsed = get_init_elapsed();
565
566 exit_cpu = end_exit_cpu - start_exit_cpu;
567 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
568
569 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
570
571 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
572 - PROF_VAL(RP_tot_time + HC_tot_time);
573 if (mut_cpu < 0) { mut_cpu = 0; }
574
575 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
576 showStgWord64(GC_tot_alloc*sizeof(W_),
577 temp, rtsTrue/*commas*/);
578 statsPrintf("%16s bytes allocated in the heap\n", temp);
579
580 showStgWord64(GC_tot_copied*sizeof(W_),
581 temp, rtsTrue/*commas*/);
582 statsPrintf("%16s bytes copied during GC\n", temp);
583
584 if ( residency_samples > 0 ) {
585 showStgWord64(max_residency*sizeof(W_),
586 temp, rtsTrue/*commas*/);
587 statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
588 temp, residency_samples);
589 }
590
591 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
592 statsPrintf("%16s bytes maximum slop\n", temp);
593
594 statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n",
595 peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
596 (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
597
598 /* Print garbage collections in each gen */
599 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
600 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
601 gen = &generations[g];
602 statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n",
603 gen->no,
604 gen->collections,
605 gen->par_collections,
606 TICK_TO_DBL(GC_coll_cpu[g]),
607 TICK_TO_DBL(GC_coll_elapsed[g]),
608 gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections),
609 TICK_TO_DBL(GC_coll_max_pause[g]));
610 }
611
612 #if defined(THREADED_RTS)
613 if (RtsFlags.ParFlags.parGcEnabled) {
614 statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
615 (double)GC_par_avg_copied / (double)GC_par_max_copied,
616 (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
617 RtsFlags.ParFlags.nNodes
618 );
619 }
620 #endif
621 statsPrintf("\n");
622
623 #if defined(THREADED_RTS)
624 {
625 nat i;
626 Task *task;
627 statsPrintf(" MUT time (elapsed) GC time (elapsed)\n");
628 for (i = 0, task = all_tasks;
629 task != NULL;
630 i++, task = task->all_link) {
631 statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n",
632 i,
633 (task->worker) ? "(worker)" : "(bound)",
634 TICK_TO_DBL(task->mut_time),
635 TICK_TO_DBL(task->mut_etime),
636 TICK_TO_DBL(task->gc_time),
637 TICK_TO_DBL(task->gc_etime));
638 }
639 }
640
641 statsPrintf("\n");
642
643 {
644 nat i;
645 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
646 for (i = 0; i < n_capabilities; i++) {
647 sparks.created += capabilities[i].spark_stats.created;
648 sparks.dud += capabilities[i].spark_stats.dud;
649 sparks.overflowed+= capabilities[i].spark_stats.overflowed;
650 sparks.converted += capabilities[i].spark_stats.converted;
651 sparks.gcd += capabilities[i].spark_stats.gcd;
652 sparks.fizzled += capabilities[i].spark_stats.fizzled;
653 }
654
655 statsPrintf(" SPARKS: %ld (%ld converted, %ld overflowed, %ld dud, %ld GC'd, %ld fizzled)\n\n",
656 sparks.created + sparks.dud + sparks.overflowed,
657 sparks.converted, sparks.overflowed, sparks.dud,
658 sparks.gcd, sparks.fizzled);
659 }
660 #endif
661
662 statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
663 TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed));
664
665 statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n",
666 TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed));
667 statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n",
668 TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
669
670 #ifdef PROFILING
671 statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n",
672 TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time));
673 statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n",
674 TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time));
675 #endif
676 statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n",
677 TICK_TO_DBL(exit_cpu), TICK_TO_DBL(exit_elapsed));
678 statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n",
679 TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_elapsed));
680 #ifndef THREADED_RTS
681 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
682 TICK_TO_DBL(gc_cpu)*100/TICK_TO_DBL(tot_cpu),
683 TICK_TO_DBL(gc_elapsed)*100/TICK_TO_DBL(tot_elapsed));
684 #endif
685
686 if (tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) == 0)
687 showStgWord64(0, temp, rtsTrue/*commas*/);
688 else
689 showStgWord64(
690 (StgWord64)((GC_tot_alloc*sizeof(W_))/
691 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
692 PROF_VAL(RP_tot_time + HC_tot_time))),
693 temp, rtsTrue/*commas*/);
694
695 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
696
697 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
698 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
699 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
700 / TICK_TO_DBL(tot_cpu),
701 TICK_TO_DBL(tot_cpu - GC_tot_cpu -
702 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
703 / TICK_TO_DBL(tot_elapsed));
704
705 /*
706 TICK_PRINT(1);
707 TICK_PRINT(2);
708 REPORT(TOTAL_CALLS);
709 TICK_PRINT_TOT(1);
710 TICK_PRINT_TOT(2);
711 */
712
713 #if USE_PAPI
714 papi_stats_report();
715 #endif
716 #if defined(THREADED_RTS) && defined(PROF_SPIN)
717 {
718 nat g;
719
720 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
721 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
722 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
723 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
724 }
725 }
726 #endif
727 }
728
729 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
730 char *fmt1, *fmt2;
731 if (RtsFlags.MiscFlags.machineReadable) {
732 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
733 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
734 " ,(\"average_bytes_used\", \"%ld\")\n"
735 " ,(\"max_bytes_used\", \"%ld\")\n"
736 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
737 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
738 " ,(\"init_cpu_seconds\", \"%.2f\")\n"
739 " ,(\"init_wall_seconds\", \"%.2f\")\n"
740 " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
741 " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
742 " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
743 " ,(\"GC_wall_seconds\", \"%.2f\")\n"
744 " ]\n";
745 }
746 else {
747 fmt1 = "<<ghc: %llu bytes, ";
748 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
749 }
750 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
751 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
752 statsPrintf(fmt2,
753 total_collections,
754 residency_samples == 0 ? 0 :
755 cumulative_residency*sizeof(W_)/residency_samples,
756 max_residency*sizeof(W_),
757 residency_samples,
758 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
759 TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed),
760 TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed),
761 TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed));
762 }
763
764 statsFlush();
765 statsClose();
766 }
767
768 if (GC_coll_cpu) {
769 stgFree(GC_coll_cpu);
770 GC_coll_cpu = NULL;
771 }
772 if (GC_coll_elapsed) {
773 stgFree(GC_coll_elapsed);
774 GC_coll_elapsed = NULL;
775 }
776 if (GC_coll_max_pause) {
777 stgFree(GC_coll_max_pause);
778 GC_coll_max_pause = NULL;
779 }
780 }
781
782 /* -----------------------------------------------------------------------------
783 stat_describe_gens
784
785 Produce some detailed info on the state of the generational GC.
786 -------------------------------------------------------------------------- */
787 void
788 statDescribeGens(void)
789 {
790 nat g, mut, lge, i;
791 lnat gen_slop;
792 lnat tot_live, tot_slop;
793 lnat gen_live, gen_blocks;
794 bdescr *bd;
795 generation *gen;
796
797 debugBelch(
798 "----------------------------------------------------------\n"
799 " Gen Max Mut-list Blocks Large Live Slop\n"
800 " Blocks Bytes Objects \n"
801 "----------------------------------------------------------\n");
802
803 tot_live = 0;
804 tot_slop = 0;
805
806 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
807 gen = &generations[g];
808
809 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
810 lge++;
811 }
812
813 gen_live = genLiveWords(gen);
814 gen_blocks = genLiveBlocks(gen);
815
816 mut = 0;
817 for (i = 0; i < n_capabilities; i++) {
818 mut += countOccupied(capabilities[i].mut_lists[g]);
819
820 // Add the pinned object block.
821 bd = capabilities[i].pinned_object_block;
822 if (bd != NULL) {
823 gen_live += bd->free - bd->start;
824 gen_blocks += bd->blocks;
825 }
826
827 gen_live += gcThreadLiveWords(i,g);
828 gen_blocks += gcThreadLiveBlocks(i,g);
829 }
830
831 debugBelch("%5d %7ld %9d", g, (lnat)gen->max_blocks, mut);
832
833 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
834
835 debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
836 gen_live*sizeof(W_), gen_slop*sizeof(W_));
837 tot_live += gen_live;
838 tot_slop += gen_slop;
839 }
840 debugBelch("----------------------------------------------------------\n");
841 debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
842 debugBelch("----------------------------------------------------------\n");
843 debugBelch("\n");
844 }
845
846 /* -----------------------------------------------------------------------------
847 Stats available via a programmatic interface, so eg. GHCi can time
848 each compilation and expression evaluation.
849 -------------------------------------------------------------------------- */
850
851 extern HsInt64 getAllocations( void )
852 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
853
854 /* EZY: I'm not convinced I got all the casting right. */
855
856 extern void getGCStats( GCStats *s )
857 {
858 nat total_collections = 0;
859 nat g;
860 Ticks gc_cpu = 0;
861 Ticks gc_elapsed = 0;
862 Ticks current_elapsed = 0;
863 Ticks current_cpu = 0;
864
865 getProcessTimes(&current_cpu, &current_elapsed);
866
867 /* EZY: static inline'ify these */
868 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
869 total_collections += generations[g].collections;
870
871 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
872 gc_cpu += GC_coll_cpu[g];
873 gc_elapsed += GC_coll_elapsed[g];
874 }
875
876 s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
877 s->num_gcs = total_collections;
878 s->num_byte_usage_samples = residency_samples;
879 s->max_bytes_used = max_residency*sizeof(W_);
880 s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
881 s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
882 s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
883 s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
884 s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
885 s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
886 /*
887 s->init_cpu_seconds = TICK_TO_DBL(get_init_cpu());
888 s->init_wall_seconds = TICK_TO_DBL(get_init_elapsed());
889 */
890 s->mutator_cpu_seconds = TICK_TO_DBL(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
891 s->mutator_wall_seconds = TICK_TO_DBL(current_elapsed- end_init_elapsed - gc_elapsed);
892 s->gc_cpu_seconds = TICK_TO_DBL(gc_cpu);
893 s->gc_wall_seconds = TICK_TO_DBL(gc_elapsed);
894 s->par_avg_bytes_copied = GC_par_avg_copied*(StgWord64)sizeof(W_);
895 s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
896 }
897 // extern void getTaskStats( TaskStats **s ) {}
898 #if 0
899 extern void getSparkStats( SparkCounters *s ) {
900 nat i;
901 s->created = 0;
902 s->dud = 0;
903 s->overflowed = 0;
904 s->converted = 0;
905 s->gcd = 0;
906 s->fizzled = 0;
907 for (i = 0; i < n_capabilities; i++) {
908 s->created += capabilities[i].spark_stats.created;
909 s->dud += capabilities[i].spark_stats.dud;
910 s->overflowed+= capabilities[i].spark_stats.overflowed;
911 s->converted += capabilities[i].spark_stats.converted;
912 s->gcd += capabilities[i].spark_stats.gcd;
913 s->fizzled += capabilities[i].spark_stats.fizzled;
914 }
915 }
916 #endif
917
918 /* -----------------------------------------------------------------------------
919 Dumping stuff in the stats file, or via the debug message interface
920 -------------------------------------------------------------------------- */
921
922 void
923 statsPrintf( char *s, ... )
924 {
925 FILE *sf = RtsFlags.GcFlags.statsFile;
926 va_list ap;
927
928 va_start(ap,s);
929 if (sf == NULL) {
930 vdebugBelch(s,ap);
931 } else {
932 vfprintf(sf, s, ap);
933 }
934 va_end(ap);
935 }
936
937 static void
938 statsFlush( void )
939 {
940 FILE *sf = RtsFlags.GcFlags.statsFile;
941 if (sf != NULL) {
942 fflush(sf);
943 }
944 }
945
946 static void
947 statsClose( void )
948 {
949 FILE *sf = RtsFlags.GcFlags.statsFile;
950 if (sf != NULL) {
951 fclose(sf);
952 }
953 }