Merge remote-tracking branch 'origin/master' into type-nats
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsUtils.h"
13 #include "Schedule.h"
14 #include "Stats.h"
15 #include "Profiling.h"
16 #include "GetTime.h"
17 #include "sm/Storage.h"
18 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
19 #include "sm/GCThread.h"
20 #include "sm/BlockAlloc.h"
21
22 #if USE_PAPI
23 #include "Papi.h"
24 #endif
25
26 /* huh? */
27 #define BIG_STRING_LEN 512
28
29 #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
30
31 static Time
32 start_init_cpu, start_init_elapsed,
33 end_init_cpu, end_init_elapsed,
34 start_exit_cpu, start_exit_elapsed,
35 end_exit_cpu, end_exit_elapsed;
36
37 static Time GC_tot_cpu = 0;
38
39 static StgWord64 GC_tot_alloc = 0;
40 static StgWord64 GC_tot_copied = 0;
41
42 static StgWord64 GC_par_max_copied = 0;
43 static StgWord64 GC_par_avg_copied = 0;
44
45 #ifdef PROFILING
46 static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
47 static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
48
49 static Time HC_start_time, HC_tot_time = 0; // heap census prof user time
50 static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
51 #endif
52
53 #ifdef PROFILING
54 #define PROF_VAL(x) (x)
55 #else
56 #define PROF_VAL(x) 0
57 #endif
58
59 // current = current as of last GC
60 static lnat current_residency = 0; // in words; for stats only
61 static lnat max_residency = 0;
62 static lnat cumulative_residency = 0;
63 static lnat residency_samples = 0; // for stats only
64 static lnat current_slop = 0;
65 static lnat max_slop = 0;
66
67 static lnat GC_end_faults = 0;
68
69 static Time *GC_coll_cpu = NULL;
70 static Time *GC_coll_elapsed = NULL;
71 static Time *GC_coll_max_pause = NULL;
72
73 static void statsFlush( void );
74 static void statsClose( void );
75
76 /* -----------------------------------------------------------------------------
77 Current elapsed time
78 ------------------------------------------------------------------------- */
79
80 Time stat_getElapsedTime(void)
81 {
82 return getProcessElapsedTime() - start_init_elapsed;
83 }
84
85 /* ---------------------------------------------------------------------------
86 Measure the current MUT time, for profiling
87 ------------------------------------------------------------------------ */
88
89 double
90 mut_user_time_until( Time t )
91 {
92 return TimeToSecondsDbl(t - GC_tot_cpu);
93 // heapCensus() time is included in GC_tot_cpu, so we don't need
94 // to subtract it here.
95 }
96
97 double
98 mut_user_time( void )
99 {
100 Time cpu;
101 cpu = getProcessCPUTime();
102 return mut_user_time_until(cpu);
103 }
104
105 #ifdef PROFILING
106 /*
107 mut_user_time_during_RP() returns the MUT time during retainer profiling.
108 The same is for mut_user_time_during_HC();
109 */
110 double
111 mut_user_time_during_RP( void )
112 {
113 return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
114 }
115
116 double
117 mut_user_time_during_heap_census( void )
118 {
119 return TimeToSecondsDbl(HC_start_time - GC_tot_cpu - RP_tot_time);
120 }
121 #endif /* PROFILING */
122
123 /* ---------------------------------------------------------------------------
124 initStats0() has no dependencies, it can be called right at the beginning
125 ------------------------------------------------------------------------ */
126
127 void
128 initStats0(void)
129 {
130 start_init_cpu = 0;
131 start_init_elapsed = 0;
132 end_init_cpu = 0;
133 end_init_elapsed = 0;
134
135 start_exit_cpu = 0;
136 start_exit_elapsed = 0;
137 end_exit_cpu = 0;
138 end_exit_elapsed = 0;
139
140 GC_tot_alloc = 0;
141 GC_tot_copied = 0;
142 GC_par_max_copied = 0;
143 GC_par_avg_copied = 0;
144 GC_tot_cpu = 0;
145
146 #ifdef PROFILING
147 RP_start_time = 0;
148 RP_tot_time = 0;
149 RPe_start_time = 0;
150 RPe_tot_time = 0;
151
152 HC_start_time = 0;
153 HC_tot_time = 0;
154 HCe_start_time = 0;
155 HCe_tot_time = 0;
156 #endif
157
158 max_residency = 0;
159 cumulative_residency = 0;
160 residency_samples = 0;
161 max_slop = 0;
162
163 GC_end_faults = 0;
164 }
165
166 /* ---------------------------------------------------------------------------
167 initStats1() can be called after setupRtsFlags()
168 ------------------------------------------------------------------------ */
169
170 void
171 initStats1 (void)
172 {
173 nat i;
174
175 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
176 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
177 statsPrintf(" bytes bytes bytes user elap user elap\n");
178 }
179 GC_coll_cpu =
180 (Time *)stgMallocBytes(
181 sizeof(Time)*RtsFlags.GcFlags.generations,
182 "initStats");
183 GC_coll_elapsed =
184 (Time *)stgMallocBytes(
185 sizeof(Time)*RtsFlags.GcFlags.generations,
186 "initStats");
187 GC_coll_max_pause =
188 (Time *)stgMallocBytes(
189 sizeof(Time)*RtsFlags.GcFlags.generations,
190 "initStats");
191 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
192 GC_coll_cpu[i] = 0;
193 GC_coll_elapsed[i] = 0;
194 GC_coll_max_pause[i] = 0;
195 }
196 }
197
198 /* -----------------------------------------------------------------------------
199 Initialisation time...
200 -------------------------------------------------------------------------- */
201
202 void
203 stat_startInit(void)
204 {
205 getProcessTimes(&start_init_cpu, &start_init_elapsed);
206 }
207
208 void
209 stat_endInit(void)
210 {
211 getProcessTimes(&end_init_cpu, &end_init_elapsed);
212
213 #if USE_PAPI
214 /* We start counting events for the mutator
215 * when garbage collection starts
216 * we switch to the GC event set. */
217 papi_start_mutator_count();
218
219 /* This flag is needed to avoid counting the last GC */
220 papi_is_reporting = 1;
221
222 #endif
223 }
224
225 /* -----------------------------------------------------------------------------
226 stat_startExit and stat_endExit
227
228 These two measure the time taken in shutdownHaskell().
229 -------------------------------------------------------------------------- */
230
231 void
232 stat_startExit(void)
233 {
234 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
235
236 #if USE_PAPI
237 /* We stop counting mutator events
238 * GC events are not being counted at this point */
239 papi_stop_mutator_count();
240
241 /* This flag is needed, because GC is run once more after this function */
242 papi_is_reporting = 0;
243 #endif
244 }
245
246 void
247 stat_endExit(void)
248 {
249 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
250 }
251
252 /* -----------------------------------------------------------------------------
253 Called at the beginning of each GC
254 -------------------------------------------------------------------------- */
255
256 static nat rub_bell = 0;
257
258 void
259 stat_startGC (gc_thread *gct)
260 {
261 nat bell = RtsFlags.GcFlags.ringBell;
262
263 if (bell) {
264 if (bell > 1) {
265 debugBelch(" GC ");
266 rub_bell = 1;
267 } else {
268 debugBelch("\007");
269 }
270 }
271
272 #if USE_PAPI
273 if(papi_is_reporting) {
274 /* Switch to counting GC events */
275 papi_stop_mutator_count();
276 papi_start_gc_count();
277 }
278 #endif
279
280 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
281 gct->gc_start_thread_cpu = getThreadCPUTime();
282
283 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
284 {
285 gct->gc_start_faults = getPageFaults();
286 }
287 }
288
289 void
290 stat_gcWorkerThreadStart (gc_thread *gct)
291 {
292 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
293 {
294 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
295 gct->gc_start_thread_cpu = getThreadCPUTime();
296 }
297 }
298
299 void
300 stat_gcWorkerThreadDone (gc_thread *gct)
301 {
302 Time thread_cpu, elapsed, gc_cpu, gc_elapsed;
303
304 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
305 {
306 elapsed = getProcessElapsedTime();
307 thread_cpu = getThreadCPUTime();
308
309 gc_cpu = thread_cpu - gct->gc_start_thread_cpu;
310 gc_elapsed = elapsed - gct->gc_start_elapsed;
311
312 taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
313 }
314 }
315
316 /* -----------------------------------------------------------------------------
317 Called at the end of each GC
318 -------------------------------------------------------------------------- */
319
320 void
321 stat_endGC (gc_thread *gct,
322 lnat alloc, lnat live, lnat copied, nat gen,
323 lnat max_copied, lnat avg_copied, lnat slop)
324 {
325 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
326 RtsFlags.ProfFlags.doHeapProfile)
327 // heap profiling needs GC_tot_time
328 {
329 Time cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed;
330
331 getProcessTimes(&cpu, &elapsed);
332 gc_elapsed = elapsed - gct->gc_start_elapsed;
333
334 thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu;
335
336 gc_cpu = cpu - gct->gc_start_cpu;
337
338 taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed);
339
340 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
341 nat faults = getPageFaults();
342
343 statsPrintf("%9ld %9ld %9ld",
344 alloc*sizeof(W_), copied*sizeof(W_),
345 live*sizeof(W_));
346 statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n",
347 TimeToSecondsDbl(gc_cpu),
348 TimeToSecondsDbl(gc_elapsed),
349 TimeToSecondsDbl(cpu),
350 TimeToSecondsDbl(elapsed - start_init_elapsed),
351 faults - gct->gc_start_faults,
352 gct->gc_start_faults - GC_end_faults,
353 gen);
354
355 GC_end_faults = faults;
356 statsFlush();
357 }
358
359 GC_coll_cpu[gen] += gc_cpu;
360 GC_coll_elapsed[gen] += gc_elapsed;
361 if (GC_coll_max_pause[gen] < gc_elapsed) {
362 GC_coll_max_pause[gen] = gc_elapsed;
363 }
364
365 GC_tot_copied += (StgWord64) copied;
366 GC_tot_alloc += (StgWord64) alloc;
367 GC_par_max_copied += (StgWord64) max_copied;
368 GC_par_avg_copied += (StgWord64) avg_copied;
369 GC_tot_cpu += gc_cpu;
370
371 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
372 if (live > max_residency) {
373 max_residency = live;
374 }
375 current_residency = live;
376 residency_samples++;
377 cumulative_residency += live;
378 }
379
380 if (slop > max_slop) max_slop = slop;
381 }
382
383 if (rub_bell) {
384 debugBelch("\b\b\b \b\b\b");
385 rub_bell = 0;
386 }
387
388 #if USE_PAPI
389 if(papi_is_reporting) {
390 /* Switch to counting mutator events */
391 if (gen == 0) {
392 papi_stop_gc0_count();
393 } else {
394 papi_stop_gc1_count();
395 }
396 papi_start_mutator_count();
397 }
398 #endif
399 }
400
401 /* -----------------------------------------------------------------------------
402 Called at the beginning of each Retainer Profiliing
403 -------------------------------------------------------------------------- */
404 #ifdef PROFILING
405 void
406 stat_startRP(void)
407 {
408 Time user, elapsed;
409 getProcessTimes( &user, &elapsed );
410
411 RP_start_time = user;
412 RPe_start_time = elapsed;
413 }
414 #endif /* PROFILING */
415
416 /* -----------------------------------------------------------------------------
417 Called at the end of each Retainer Profiliing
418 -------------------------------------------------------------------------- */
419
420 #ifdef PROFILING
421 void
422 stat_endRP(
423 nat retainerGeneration,
424 #ifdef DEBUG_RETAINER
425 nat maxCStackSize,
426 int maxStackSize,
427 #endif
428 double averageNumVisit)
429 {
430 Time user, elapsed;
431 getProcessTimes( &user, &elapsed );
432
433 RP_tot_time += user - RP_start_time;
434 RPe_tot_time += elapsed - RPe_start_time;
435
436 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
437 retainerGeneration, mut_user_time_during_RP());
438 #ifdef DEBUG_RETAINER
439 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
440 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
441 #endif
442 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
443 }
444 #endif /* PROFILING */
445
446 /* -----------------------------------------------------------------------------
447 Called at the beginning of each heap census
448 -------------------------------------------------------------------------- */
449 #ifdef PROFILING
450 void
451 stat_startHeapCensus(void)
452 {
453 Time user, elapsed;
454 getProcessTimes( &user, &elapsed );
455
456 HC_start_time = user;
457 HCe_start_time = elapsed;
458 }
459 #endif /* PROFILING */
460
461 /* -----------------------------------------------------------------------------
462 Called at the end of each heap census
463 -------------------------------------------------------------------------- */
464 #ifdef PROFILING
465 void
466 stat_endHeapCensus(void)
467 {
468 Time user, elapsed;
469 getProcessTimes( &user, &elapsed );
470
471 HC_tot_time += user - HC_start_time;
472 HCe_tot_time += elapsed - HCe_start_time;
473 }
474 #endif /* PROFILING */
475
476 /* -----------------------------------------------------------------------------
477 Called at the end of execution
478
479 NOTE: number of allocations is not entirely accurate: it doesn't
480 take into account the few bytes at the end of the heap that
481 were left unused when the heap-check failed.
482 -------------------------------------------------------------------------- */
483
484 #ifdef DEBUG
485 #define TICK_VAR_INI(arity) \
486 StgInt SLOW_CALLS_##arity = 1; \
487 StgInt RIGHT_ARITY_##arity = 1; \
488 StgInt TAGGED_PTR_##arity = 0;
489
490 TICK_VAR_INI(1)
491 TICK_VAR_INI(2)
492
493 StgInt TOTAL_CALLS=1;
494 #endif
495
496 /* Report the value of a counter */
497 #define REPORT(counter) \
498 { \
499 showStgWord64(counter,temp,rtsTrue/*commas*/); \
500 statsPrintf(" (" #counter ") : %s\n",temp); \
501 }
502
503 /* Report the value of a counter as a percentage of another counter */
504 #define REPORT_PCT(counter,countertot) \
505 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
506 counter*100.0/countertot)
507
508 #define TICK_PRINT(arity) \
509 REPORT(SLOW_CALLS_##arity); \
510 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
511 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
512 REPORT(RIGHT_ARITY_##arity); \
513 REPORT(TAGGED_PTR_##arity)
514
515 #define TICK_PRINT_TOT(arity) \
516 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
517 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
518
519 static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
520 static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
521
522 void
523 stat_exit(int alloc)
524 {
525 generation *gen;
526 Time gc_cpu = 0;
527 Time gc_elapsed = 0;
528 Time init_cpu = 0;
529 Time init_elapsed = 0;
530 Time mut_cpu = 0;
531 Time mut_elapsed = 0;
532 Time exit_cpu = 0;
533 Time exit_elapsed = 0;
534
535 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
536
537 char temp[BIG_STRING_LEN];
538 Time tot_cpu;
539 Time tot_elapsed;
540 nat i, g, total_collections = 0;
541
542 getProcessTimes( &tot_cpu, &tot_elapsed );
543 tot_elapsed -= start_init_elapsed;
544
545 GC_tot_alloc += alloc;
546
547 /* Count total garbage collections */
548 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
549 total_collections += generations[g].collections;
550
551 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
552 if (tot_cpu == 0.0) tot_cpu = 1;
553 if (tot_elapsed == 0.0) tot_elapsed = 1;
554
555 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
556 statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
557 statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
558 }
559
560 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
561 gc_cpu += GC_coll_cpu[i];
562 gc_elapsed += GC_coll_elapsed[i];
563 }
564
565 // heapCensus() is called by the GC, so RP and HC time are
566 // included in the GC stats. We therefore subtract them to
567 // obtain the actual GC cpu time. XXX: we aren't doing this
568 // for elapsed time.
569 gc_cpu -= 0 + PROF_VAL(RP_tot_time + HC_tot_time);
570 gc_elapsed -= 0 + PROF_VAL(RPe_tot_time + HCe_tot_time);
571
572 init_cpu = get_init_cpu();
573 init_elapsed = get_init_elapsed();
574
575 exit_cpu = end_exit_cpu - start_exit_cpu;
576 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
577
578 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
579
580 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
581 - PROF_VAL(RP_tot_time + HC_tot_time);
582 if (mut_cpu < 0) { mut_cpu = 0; }
583
584 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
585 showStgWord64(GC_tot_alloc*sizeof(W_),
586 temp, rtsTrue/*commas*/);
587 statsPrintf("%16s bytes allocated in the heap\n", temp);
588
589 showStgWord64(GC_tot_copied*sizeof(W_),
590 temp, rtsTrue/*commas*/);
591 statsPrintf("%16s bytes copied during GC\n", temp);
592
593 if ( residency_samples > 0 ) {
594 showStgWord64(max_residency*sizeof(W_),
595 temp, rtsTrue/*commas*/);
596 statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
597 temp, residency_samples);
598 }
599
600 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
601 statsPrintf("%16s bytes maximum slop\n", temp);
602
603 statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n",
604 peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
605 (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
606
607 /* Print garbage collections in each gen */
608 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
609 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
610 gen = &generations[g];
611 statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n",
612 gen->no,
613 gen->collections,
614 gen->par_collections,
615 TimeToSecondsDbl(GC_coll_cpu[g]),
616 TimeToSecondsDbl(GC_coll_elapsed[g]),
617 gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
618 TimeToSecondsDbl(GC_coll_max_pause[g]));
619 }
620
621 #if defined(THREADED_RTS)
622 if (RtsFlags.ParFlags.parGcEnabled) {
623 statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
624 (double)GC_par_avg_copied / (double)GC_par_max_copied,
625 (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
626 n_capabilities
627 );
628 }
629 #endif
630 statsPrintf("\n");
631
632 #if defined(THREADED_RTS)
633 {
634 nat i;
635 Task *task;
636 statsPrintf(" MUT time (elapsed) GC time (elapsed)\n");
637 for (i = 0, task = all_tasks;
638 task != NULL;
639 i++, task = task->all_link) {
640 statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n",
641 i,
642 (task->worker) ? "(worker)" : "(bound)",
643 TimeToSecondsDbl(task->mut_time),
644 TimeToSecondsDbl(task->mut_etime),
645 TimeToSecondsDbl(task->gc_time),
646 TimeToSecondsDbl(task->gc_etime));
647 }
648 }
649
650 statsPrintf("\n");
651
652 {
653 nat i;
654 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
655 for (i = 0; i < n_capabilities; i++) {
656 sparks.created += capabilities[i].spark_stats.created;
657 sparks.dud += capabilities[i].spark_stats.dud;
658 sparks.overflowed+= capabilities[i].spark_stats.overflowed;
659 sparks.converted += capabilities[i].spark_stats.converted;
660 sparks.gcd += capabilities[i].spark_stats.gcd;
661 sparks.fizzled += capabilities[i].spark_stats.fizzled;
662 }
663
664 statsPrintf(" SPARKS: %ld (%ld converted, %ld overflowed, %ld dud, %ld GC'd, %ld fizzled)\n\n",
665 sparks.created + sparks.dud + sparks.overflowed,
666 sparks.converted, sparks.overflowed, sparks.dud,
667 sparks.gcd, sparks.fizzled);
668 }
669 #endif
670
671 statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
672 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
673
674 statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n",
675 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
676 statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n",
677 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
678
679 #ifdef PROFILING
680 statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n",
681 TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
682 statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n",
683 TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
684 #endif
685 statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n",
686 TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
687 statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n",
688 TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
689 #ifndef THREADED_RTS
690 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
691 TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
692 TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
693 #endif
694
695 if (mut_cpu == 0) {
696 showStgWord64(0, temp, rtsTrue/*commas*/);
697 } else {
698 showStgWord64(
699 (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
700 temp, rtsTrue/*commas*/);
701 }
702
703 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
704
705 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
706 TimeToSecondsDbl(tot_cpu - gc_cpu -
707 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
708 / TimeToSecondsDbl(tot_cpu),
709 TimeToSecondsDbl(tot_cpu - gc_cpu -
710 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
711 / TimeToSecondsDbl(tot_elapsed));
712
713 /*
714 TICK_PRINT(1);
715 TICK_PRINT(2);
716 REPORT(TOTAL_CALLS);
717 TICK_PRINT_TOT(1);
718 TICK_PRINT_TOT(2);
719 */
720
721 #if USE_PAPI
722 papi_stats_report();
723 #endif
724 #if defined(THREADED_RTS) && defined(PROF_SPIN)
725 {
726 nat g;
727
728 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
729 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
730 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
731 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
732 }
733 }
734 #endif
735 }
736
737 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
738 char *fmt1, *fmt2;
739 if (RtsFlags.MiscFlags.machineReadable) {
740 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
741 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
742 " ,(\"average_bytes_used\", \"%ld\")\n"
743 " ,(\"max_bytes_used\", \"%ld\")\n"
744 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
745 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
746 " ,(\"init_cpu_seconds\", \"%.2f\")\n"
747 " ,(\"init_wall_seconds\", \"%.2f\")\n"
748 " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
749 " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
750 " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
751 " ,(\"GC_wall_seconds\", \"%.2f\")\n"
752 " ]\n";
753 }
754 else {
755 fmt1 = "<<ghc: %llu bytes, ";
756 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
757 }
758 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
759 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
760 statsPrintf(fmt2,
761 total_collections,
762 residency_samples == 0 ? 0 :
763 cumulative_residency*sizeof(W_)/residency_samples,
764 max_residency*sizeof(W_),
765 residency_samples,
766 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
767 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
768 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
769 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
770 }
771
772 statsFlush();
773 statsClose();
774 }
775
776 if (GC_coll_cpu) {
777 stgFree(GC_coll_cpu);
778 GC_coll_cpu = NULL;
779 }
780 if (GC_coll_elapsed) {
781 stgFree(GC_coll_elapsed);
782 GC_coll_elapsed = NULL;
783 }
784 if (GC_coll_max_pause) {
785 stgFree(GC_coll_max_pause);
786 GC_coll_max_pause = NULL;
787 }
788 }
789
790 /* -----------------------------------------------------------------------------
791 stat_describe_gens
792
793 Produce some detailed info on the state of the generational GC.
794 -------------------------------------------------------------------------- */
795 void
796 statDescribeGens(void)
797 {
798 nat g, mut, lge, i;
799 lnat gen_slop;
800 lnat tot_live, tot_slop;
801 lnat gen_live, gen_blocks;
802 bdescr *bd;
803 generation *gen;
804
805 debugBelch(
806 "----------------------------------------------------------\n"
807 " Gen Max Mut-list Blocks Large Live Slop\n"
808 " Blocks Bytes Objects \n"
809 "----------------------------------------------------------\n");
810
811 tot_live = 0;
812 tot_slop = 0;
813
814 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
815 gen = &generations[g];
816
817 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
818 lge++;
819 }
820
821 gen_live = genLiveWords(gen);
822 gen_blocks = genLiveBlocks(gen);
823
824 mut = 0;
825 for (i = 0; i < n_capabilities; i++) {
826 mut += countOccupied(capabilities[i].mut_lists[g]);
827
828 // Add the pinned object block.
829 bd = capabilities[i].pinned_object_block;
830 if (bd != NULL) {
831 gen_live += bd->free - bd->start;
832 gen_blocks += bd->blocks;
833 }
834
835 gen_live += gcThreadLiveWords(i,g);
836 gen_blocks += gcThreadLiveBlocks(i,g);
837 }
838
839 debugBelch("%5d %7ld %9d", g, (lnat)gen->max_blocks, mut);
840
841 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
842
843 debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
844 gen_live*sizeof(W_), gen_slop*sizeof(W_));
845 tot_live += gen_live;
846 tot_slop += gen_slop;
847 }
848 debugBelch("----------------------------------------------------------\n");
849 debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
850 debugBelch("----------------------------------------------------------\n");
851 debugBelch("\n");
852 }
853
854 /* -----------------------------------------------------------------------------
855 Stats available via a programmatic interface, so eg. GHCi can time
856 each compilation and expression evaluation.
857 -------------------------------------------------------------------------- */
858
859 extern HsInt64 getAllocations( void )
860 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
861
862 /* EZY: I'm not convinced I got all the casting right. */
863
864 extern void getGCStats( GCStats *s )
865 {
866 nat total_collections = 0;
867 nat g;
868 Time gc_cpu = 0;
869 Time gc_elapsed = 0;
870 Time current_elapsed = 0;
871 Time current_cpu = 0;
872
873 getProcessTimes(&current_cpu, &current_elapsed);
874
875 /* EZY: static inline'ify these */
876 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
877 total_collections += generations[g].collections;
878
879 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
880 gc_cpu += GC_coll_cpu[g];
881 gc_elapsed += GC_coll_elapsed[g];
882 }
883
884 s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
885 s->num_gcs = total_collections;
886 s->num_byte_usage_samples = residency_samples;
887 s->max_bytes_used = max_residency*sizeof(W_);
888 s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
889 s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
890 s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
891 s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
892 s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
893 s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
894 /*
895 s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
896 s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
897 */
898 s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
899 s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
900 s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
901 s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
902 /* EZY: Being consistent with incremental output, but maybe should also discount init */
903 s->cpu_seconds = TimeToSecondsDbl(current_cpu);
904 s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
905 s->par_avg_bytes_copied = GC_par_avg_copied*(StgWord64)sizeof(W_);
906 s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
907 }
908 // extern void getTaskStats( TaskStats **s ) {}
909 #if 0
910 extern void getSparkStats( SparkCounters *s ) {
911 nat i;
912 s->created = 0;
913 s->dud = 0;
914 s->overflowed = 0;
915 s->converted = 0;
916 s->gcd = 0;
917 s->fizzled = 0;
918 for (i = 0; i < n_capabilities; i++) {
919 s->created += capabilities[i].spark_stats.created;
920 s->dud += capabilities[i].spark_stats.dud;
921 s->overflowed+= capabilities[i].spark_stats.overflowed;
922 s->converted += capabilities[i].spark_stats.converted;
923 s->gcd += capabilities[i].spark_stats.gcd;
924 s->fizzled += capabilities[i].spark_stats.fizzled;
925 }
926 }
927 #endif
928
929 /* -----------------------------------------------------------------------------
930 Dumping stuff in the stats file, or via the debug message interface
931 -------------------------------------------------------------------------- */
932
933 void
934 statsPrintf( char *s, ... )
935 {
936 FILE *sf = RtsFlags.GcFlags.statsFile;
937 va_list ap;
938
939 va_start(ap,s);
940 if (sf == NULL) {
941 vdebugBelch(s,ap);
942 } else {
943 vfprintf(sf, s, ap);
944 }
945 va_end(ap);
946 }
947
948 static void
949 statsFlush( void )
950 {
951 FILE *sf = RtsFlags.GcFlags.statsFile;
952 if (sf != NULL) {
953 fflush(sf);
954 }
955 }
956
957 static void
958 statsClose( void )
959 {
960 FILE *sf = RtsFlags.GcFlags.statsFile;
961 if (sf != NULL) {
962 fclose(sf);
963 }
964 }