Add getGCStatsEnabled function.
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsUtils.h"
13 #include "Schedule.h"
14 #include "Stats.h"
15 #include "Profiling.h"
16 #include "GetTime.h"
17 #include "sm/Storage.h"
18 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
19 #include "sm/GCThread.h"
20 #include "sm/BlockAlloc.h"
21
22 #if USE_PAPI
23 #include "Papi.h"
24 #endif
25
26 /* huh? */
27 #define BIG_STRING_LEN 512
28
29 #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
30
31 static Time
32 start_init_cpu, start_init_elapsed,
33 end_init_cpu, end_init_elapsed,
34 start_exit_cpu, start_exit_elapsed,
35 end_exit_cpu, end_exit_elapsed;
36
37 static Time GC_tot_cpu = 0;
38
39 static StgWord64 GC_tot_alloc = 0;
40 static StgWord64 GC_tot_copied = 0;
41
42 static StgWord64 GC_par_max_copied = 0;
43 static StgWord64 GC_par_tot_copied = 0;
44
45 #ifdef PROFILING
46 static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
47 static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
48
49 static Time HC_start_time, HC_tot_time = 0; // heap census prof user time
50 static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
51 #endif
52
53 #ifdef PROFILING
54 #define PROF_VAL(x) (x)
55 #else
56 #define PROF_VAL(x) 0
57 #endif
58
59 // current = current as of last GC
60 static lnat current_residency = 0; // in words; for stats only
61 static lnat max_residency = 0;
62 static lnat cumulative_residency = 0;
63 static lnat residency_samples = 0; // for stats only
64 static lnat current_slop = 0;
65 static lnat max_slop = 0;
66
67 static lnat GC_end_faults = 0;
68
69 static Time *GC_coll_cpu = NULL;
70 static Time *GC_coll_elapsed = NULL;
71 static Time *GC_coll_max_pause = NULL;
72
73 static void statsFlush( void );
74 static void statsClose( void );
75
76 /* -----------------------------------------------------------------------------
77 Current elapsed time
78 ------------------------------------------------------------------------- */
79
80 Time stat_getElapsedTime(void)
81 {
82 return getProcessElapsedTime() - start_init_elapsed;
83 }
84
85 /* ---------------------------------------------------------------------------
86 Measure the current MUT time, for profiling
87 ------------------------------------------------------------------------ */
88
89 double
90 mut_user_time_until( Time t )
91 {
92 return TimeToSecondsDbl(t - GC_tot_cpu);
93 // heapCensus() time is included in GC_tot_cpu, so we don't need
94 // to subtract it here.
95 }
96
97 double
98 mut_user_time( void )
99 {
100 Time cpu;
101 cpu = getProcessCPUTime();
102 return mut_user_time_until(cpu);
103 }
104
105 #ifdef PROFILING
106 /*
107 mut_user_time_during_RP() returns the MUT time during retainer profiling.
108 The same is for mut_user_time_during_HC();
109 */
110 double
111 mut_user_time_during_RP( void )
112 {
113 return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
114 }
115
116 double
117 mut_user_time_during_heap_census( void )
118 {
119 return TimeToSecondsDbl(HC_start_time - GC_tot_cpu - RP_tot_time);
120 }
121 #endif /* PROFILING */
122
123 /* ---------------------------------------------------------------------------
124 initStats0() has no dependencies, it can be called right at the beginning
125 ------------------------------------------------------------------------ */
126
127 void
128 initStats0(void)
129 {
130 start_init_cpu = 0;
131 start_init_elapsed = 0;
132 end_init_cpu = 0;
133 end_init_elapsed = 0;
134
135 start_exit_cpu = 0;
136 start_exit_elapsed = 0;
137 end_exit_cpu = 0;
138 end_exit_elapsed = 0;
139
140 GC_tot_alloc = 0;
141 GC_tot_copied = 0;
142 GC_par_max_copied = 0;
143 GC_par_tot_copied = 0;
144 GC_tot_cpu = 0;
145
146 #ifdef PROFILING
147 RP_start_time = 0;
148 RP_tot_time = 0;
149 RPe_start_time = 0;
150 RPe_tot_time = 0;
151
152 HC_start_time = 0;
153 HC_tot_time = 0;
154 HCe_start_time = 0;
155 HCe_tot_time = 0;
156 #endif
157
158 max_residency = 0;
159 cumulative_residency = 0;
160 residency_samples = 0;
161 max_slop = 0;
162
163 GC_end_faults = 0;
164 }
165
166 /* ---------------------------------------------------------------------------
167 initStats1() can be called after setupRtsFlags()
168 ------------------------------------------------------------------------ */
169
170 void
171 initStats1 (void)
172 {
173 nat i;
174
175 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
176 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
177 statsPrintf(" bytes bytes bytes user elap user elap\n");
178 }
179 GC_coll_cpu =
180 (Time *)stgMallocBytes(
181 sizeof(Time)*RtsFlags.GcFlags.generations,
182 "initStats");
183 GC_coll_elapsed =
184 (Time *)stgMallocBytes(
185 sizeof(Time)*RtsFlags.GcFlags.generations,
186 "initStats");
187 GC_coll_max_pause =
188 (Time *)stgMallocBytes(
189 sizeof(Time)*RtsFlags.GcFlags.generations,
190 "initStats");
191 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
192 GC_coll_cpu[i] = 0;
193 GC_coll_elapsed[i] = 0;
194 GC_coll_max_pause[i] = 0;
195 }
196 }
197
198 /* -----------------------------------------------------------------------------
199 Initialisation time...
200 -------------------------------------------------------------------------- */
201
202 void
203 stat_startInit(void)
204 {
205 getProcessTimes(&start_init_cpu, &start_init_elapsed);
206 }
207
208 void
209 stat_endInit(void)
210 {
211 getProcessTimes(&end_init_cpu, &end_init_elapsed);
212
213 #if USE_PAPI
214 /* We start counting events for the mutator
215 * when garbage collection starts
216 * we switch to the GC event set. */
217 papi_start_mutator_count();
218
219 /* This flag is needed to avoid counting the last GC */
220 papi_is_reporting = 1;
221
222 #endif
223 }
224
225 /* -----------------------------------------------------------------------------
226 stat_startExit and stat_endExit
227
228 These two measure the time taken in shutdownHaskell().
229 -------------------------------------------------------------------------- */
230
231 void
232 stat_startExit(void)
233 {
234 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
235
236 #if USE_PAPI
237 /* We stop counting mutator events
238 * GC events are not being counted at this point */
239 papi_stop_mutator_count();
240
241 /* This flag is needed, because GC is run once more after this function */
242 papi_is_reporting = 0;
243 #endif
244 }
245
246 void
247 stat_endExit(void)
248 {
249 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
250 }
251
252 /* -----------------------------------------------------------------------------
253 Called at the beginning of each GC
254 -------------------------------------------------------------------------- */
255
256 static nat rub_bell = 0;
257
258 void
259 stat_startGC (Capability *cap, gc_thread *gct)
260 {
261 nat bell = RtsFlags.GcFlags.ringBell;
262
263 if (bell) {
264 if (bell > 1) {
265 debugBelch(" GC ");
266 rub_bell = 1;
267 } else {
268 debugBelch("\007");
269 }
270 }
271
272 #if USE_PAPI
273 if(papi_is_reporting) {
274 /* Switch to counting GC events */
275 papi_stop_mutator_count();
276 papi_start_gc_count();
277 }
278 #endif
279
280 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
281
282 // Post EVENT_GC_START with the same timestamp as used for stats
283 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
284 // Here, as opposed to other places, the event is emitted on the cap
285 // that initiates the GC and external tools expect it to have the same
286 // timestamp as used in +RTS -s calculcations.
287 traceEventGcStartAtT(cap,
288 TimeToNS(gct->gc_start_elapsed - start_init_elapsed));
289
290 gct->gc_start_thread_cpu = getThreadCPUTime();
291
292 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
293 {
294 gct->gc_start_faults = getPageFaults();
295 }
296 }
297
298 void
299 stat_gcWorkerThreadStart (gc_thread *gct STG_UNUSED)
300 {
301 #if 0
302 /*
303 * We dont' collect per-thread GC stats any more, but this code
304 * could be used to do that if we want to in the future:
305 */
306 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
307 {
308 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
309 gct->gc_start_thread_cpu = getThreadCPUTime();
310 }
311 #endif
312 }
313
314 void
315 stat_gcWorkerThreadDone (gc_thread *gct STG_UNUSED)
316 {
317 #if 0
318 /*
319 * We dont' collect per-thread GC stats any more, but this code
320 * could be used to do that if we want to in the future:
321 */
322 Time thread_cpu, elapsed, gc_cpu, gc_elapsed;
323
324 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
325 {
326 elapsed = getProcessElapsedTime();
327 thread_cpu = getThreadCPUTime();
328
329 gc_cpu = thread_cpu - gct->gc_start_thread_cpu;
330 gc_elapsed = elapsed - gct->gc_start_elapsed;
331
332 taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
333 }
334 #endif
335 }
336
337 /* -----------------------------------------------------------------------------
338 Called at the end of each GC
339 -------------------------------------------------------------------------- */
340
341 void
342 stat_endGC (Capability *cap, gc_thread *gct,
343 lnat alloc, lnat live, lnat copied, lnat slop, nat gen,
344 nat par_n_threads, lnat par_max_copied, lnat par_tot_copied)
345 {
346 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
347 RtsFlags.ProfFlags.doHeapProfile)
348 // heap profiling needs GC_tot_time
349 {
350 Time cpu, elapsed, gc_cpu, gc_elapsed;
351
352 // Has to be emitted while all caps stopped for GC, but before GC_END.
353 // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
354 // for a detailed design rationale of the current setup
355 // of GC eventlog events.
356 traceEventGcGlobalSync(cap);
357
358 // Emitted before GC_END on all caps, which simplifies tools code.
359 traceEventGcStats(cap,
360 CAPSET_HEAP_DEFAULT,
361 gen,
362 copied * sizeof(W_),
363 slop * sizeof(W_),
364 /* current loss due to fragmentation */
365 (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
366 * BLOCK_SIZE_W * sizeof(W_),
367 par_n_threads,
368 par_max_copied * sizeof(W_),
369 par_tot_copied * sizeof(W_));
370
371 getProcessTimes(&cpu, &elapsed);
372
373 // Post EVENT_GC_END with the same timestamp as used for stats
374 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
375 // Here, as opposed to other places, the event is emitted on the cap
376 // that initiates the GC and external tools expect it to have the same
377 // timestamp as used in +RTS -s calculcations.
378 traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));
379
380 gc_elapsed = elapsed - gct->gc_start_elapsed;
381 gc_cpu = cpu - gct->gc_start_cpu;
382
383 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
384 nat faults = getPageFaults();
385
386 statsPrintf("%9" FMT_SizeT " %9" FMT_SizeT " %9" FMT_SizeT,
387 alloc*sizeof(W_), copied*sizeof(W_),
388 live*sizeof(W_));
389 statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4" FMT_SizeT " %4" FMT_SizeT " (Gen: %2d)\n",
390 TimeToSecondsDbl(gc_cpu),
391 TimeToSecondsDbl(gc_elapsed),
392 TimeToSecondsDbl(cpu),
393 TimeToSecondsDbl(elapsed - start_init_elapsed),
394 faults - gct->gc_start_faults,
395 gct->gc_start_faults - GC_end_faults,
396 gen);
397
398 GC_end_faults = faults;
399 statsFlush();
400 }
401
402 GC_coll_cpu[gen] += gc_cpu;
403 GC_coll_elapsed[gen] += gc_elapsed;
404 if (GC_coll_max_pause[gen] < gc_elapsed) {
405 GC_coll_max_pause[gen] = gc_elapsed;
406 }
407
408 GC_tot_copied += (StgWord64) copied;
409 GC_tot_alloc += (StgWord64) alloc;
410 GC_par_max_copied += (StgWord64) par_max_copied;
411 GC_par_tot_copied += (StgWord64) par_tot_copied;
412 GC_tot_cpu += gc_cpu;
413
414 /* For the moment we calculate both per-HEC and total allocation.
415 * There is thus redundancy here, but for the moment we will calculate
416 * it both the old and new way and assert they're the same.
417 * When we're sure it's working OK then we can simplify things.
418 * TODO: simplify calcAllocated and clearNurseries so they don't have
419 * to calculate the total
420 */
421 {
422 lnat tot_alloc = 0;
423 lnat n;
424 for (n = 0; n < n_capabilities; n++) {
425 tot_alloc += capabilities[n].total_allocated;
426 traceEventHeapAllocated(&capabilities[n],
427 CAPSET_HEAP_DEFAULT,
428 capabilities[n].total_allocated * sizeof(W_));
429 }
430 ASSERT(GC_tot_alloc == tot_alloc);
431 }
432 traceEventHeapSize(cap,
433 CAPSET_HEAP_DEFAULT,
434 mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
435
436 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
437 if (live > max_residency) {
438 max_residency = live;
439 }
440 current_residency = live;
441 residency_samples++;
442 cumulative_residency += live;
443 traceEventHeapLive(cap,
444 CAPSET_HEAP_DEFAULT,
445 live * sizeof(W_));
446 }
447
448 if (slop > max_slop) max_slop = slop;
449 }
450
451 if (rub_bell) {
452 debugBelch("\b\b\b \b\b\b");
453 rub_bell = 0;
454 }
455
456 #if USE_PAPI
457 if(papi_is_reporting) {
458 /* Switch to counting mutator events */
459 if (gen == 0) {
460 papi_stop_gc0_count();
461 } else {
462 papi_stop_gc1_count();
463 }
464 papi_start_mutator_count();
465 }
466 #endif
467 }
468
469 /* -----------------------------------------------------------------------------
470 Called at the beginning of each Retainer Profiliing
471 -------------------------------------------------------------------------- */
472 #ifdef PROFILING
473 void
474 stat_startRP(void)
475 {
476 Time user, elapsed;
477 getProcessTimes( &user, &elapsed );
478
479 RP_start_time = user;
480 RPe_start_time = elapsed;
481 }
482 #endif /* PROFILING */
483
484 /* -----------------------------------------------------------------------------
485 Called at the end of each Retainer Profiliing
486 -------------------------------------------------------------------------- */
487
488 #ifdef PROFILING
489 void
490 stat_endRP(
491 nat retainerGeneration,
492 #ifdef DEBUG_RETAINER
493 nat maxCStackSize,
494 int maxStackSize,
495 #endif
496 double averageNumVisit)
497 {
498 Time user, elapsed;
499 getProcessTimes( &user, &elapsed );
500
501 RP_tot_time += user - RP_start_time;
502 RPe_tot_time += elapsed - RPe_start_time;
503
504 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
505 retainerGeneration, mut_user_time_during_RP());
506 #ifdef DEBUG_RETAINER
507 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
508 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
509 #endif
510 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
511 }
512 #endif /* PROFILING */
513
514 /* -----------------------------------------------------------------------------
515 Called at the beginning of each heap census
516 -------------------------------------------------------------------------- */
517 #ifdef PROFILING
518 void
519 stat_startHeapCensus(void)
520 {
521 Time user, elapsed;
522 getProcessTimes( &user, &elapsed );
523
524 HC_start_time = user;
525 HCe_start_time = elapsed;
526 }
527 #endif /* PROFILING */
528
529 /* -----------------------------------------------------------------------------
530 Called at the end of each heap census
531 -------------------------------------------------------------------------- */
532 #ifdef PROFILING
533 void
534 stat_endHeapCensus(void)
535 {
536 Time user, elapsed;
537 getProcessTimes( &user, &elapsed );
538
539 HC_tot_time += user - HC_start_time;
540 HCe_tot_time += elapsed - HCe_start_time;
541 }
542 #endif /* PROFILING */
543
544 /* -----------------------------------------------------------------------------
545 Called at the end of execution
546
547 NOTE: number of allocations is not entirely accurate: it doesn't
548 take into account the few bytes at the end of the heap that
549 were left unused when the heap-check failed.
550 -------------------------------------------------------------------------- */
551
552 #ifdef DEBUG
553 #define TICK_VAR_INI(arity) \
554 StgInt SLOW_CALLS_##arity = 1; \
555 StgInt RIGHT_ARITY_##arity = 1; \
556 StgInt TAGGED_PTR_##arity = 0;
557
558 TICK_VAR_INI(1)
559 TICK_VAR_INI(2)
560
561 StgInt TOTAL_CALLS=1;
562 #endif
563
564 /* Report the value of a counter */
565 #define REPORT(counter) \
566 { \
567 showStgWord64(counter,temp,rtsTrue/*commas*/); \
568 statsPrintf(" (" #counter ") : %s\n",temp); \
569 }
570
571 /* Report the value of a counter as a percentage of another counter */
572 #define REPORT_PCT(counter,countertot) \
573 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
574 counter*100.0/countertot)
575
576 #define TICK_PRINT(arity) \
577 REPORT(SLOW_CALLS_##arity); \
578 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
579 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
580 REPORT(RIGHT_ARITY_##arity); \
581 REPORT(TAGGED_PTR_##arity)
582
583 #define TICK_PRINT_TOT(arity) \
584 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
585 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
586
587 static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
588 static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
589
590 void
591 stat_exit(int alloc)
592 {
593 generation *gen;
594 Time gc_cpu = 0;
595 Time gc_elapsed = 0;
596 Time init_cpu = 0;
597 Time init_elapsed = 0;
598 Time mut_cpu = 0;
599 Time mut_elapsed = 0;
600 Time exit_cpu = 0;
601 Time exit_elapsed = 0;
602
603 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
604
605 char temp[BIG_STRING_LEN];
606 Time tot_cpu;
607 Time tot_elapsed;
608 nat i, g, total_collections = 0;
609
610 getProcessTimes( &tot_cpu, &tot_elapsed );
611 tot_elapsed -= start_init_elapsed;
612
613 GC_tot_alloc += alloc;
614
615 for (i = 0; i < n_capabilities; i++) {
616 traceEventHeapAllocated(&capabilities[i],
617 CAPSET_HEAP_DEFAULT,
618 capabilities[i].total_allocated * sizeof(W_));
619 }
620
621 /* Count total garbage collections */
622 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
623 total_collections += generations[g].collections;
624
625 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
626 if (tot_cpu == 0.0) tot_cpu = 1;
627 if (tot_elapsed == 0.0) tot_elapsed = 1;
628
629 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
630 statsPrintf("%9" FMT_SizeT " %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
631 statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
632 }
633
634 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
635 gc_cpu += GC_coll_cpu[i];
636 gc_elapsed += GC_coll_elapsed[i];
637 }
638
639 // heapCensus() is called by the GC, so RP and HC time are
640 // included in the GC stats. We therefore subtract them to
641 // obtain the actual GC cpu time.
642 gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time);
643 gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
644
645 init_cpu = get_init_cpu();
646 init_elapsed = get_init_elapsed();
647
648 exit_cpu = end_exit_cpu - start_exit_cpu;
649 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
650
651 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
652
653 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
654 - PROF_VAL(RP_tot_time + HC_tot_time);
655 if (mut_cpu < 0) { mut_cpu = 0; }
656
657 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
658 showStgWord64(GC_tot_alloc*sizeof(W_),
659 temp, rtsTrue/*commas*/);
660 statsPrintf("%16s bytes allocated in the heap\n", temp);
661
662 showStgWord64(GC_tot_copied*sizeof(W_),
663 temp, rtsTrue/*commas*/);
664 statsPrintf("%16s bytes copied during GC\n", temp);
665
666 if ( residency_samples > 0 ) {
667 showStgWord64(max_residency*sizeof(W_),
668 temp, rtsTrue/*commas*/);
669 statsPrintf("%16s bytes maximum residency (%" FMT_SizeT " sample(s))\n",
670 temp, residency_samples);
671 }
672
673 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
674 statsPrintf("%16s bytes maximum slop\n", temp);
675
676 statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n",
677 peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
678 (lnat)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
679
680 /* Print garbage collections in each gen */
681 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
682 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
683 gen = &generations[g];
684 statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n",
685 gen->no,
686 gen->collections,
687 gen->par_collections,
688 TimeToSecondsDbl(GC_coll_cpu[g]),
689 TimeToSecondsDbl(GC_coll_elapsed[g]),
690 gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
691 TimeToSecondsDbl(GC_coll_max_pause[g]));
692 }
693
694 #if defined(THREADED_RTS)
695 if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
696 statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
697 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
698 / (n_capabilities - 1)
699 );
700 }
701 #endif
702 statsPrintf("\n");
703
704 #if defined(THREADED_RTS)
705 statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
706 taskCount, taskCount - workerCount,
707 peakWorkerCount, workerCount,
708 n_capabilities);
709
710 statsPrintf("\n");
711
712 {
713 nat i;
714 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
715 for (i = 0; i < n_capabilities; i++) {
716 sparks.created += capabilities[i].spark_stats.created;
717 sparks.dud += capabilities[i].spark_stats.dud;
718 sparks.overflowed+= capabilities[i].spark_stats.overflowed;
719 sparks.converted += capabilities[i].spark_stats.converted;
720 sparks.gcd += capabilities[i].spark_stats.gcd;
721 sparks.fizzled += capabilities[i].spark_stats.fizzled;
722 }
723
724 statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
725 sparks.created + sparks.dud + sparks.overflowed,
726 sparks.converted, sparks.overflowed, sparks.dud,
727 sparks.gcd, sparks.fizzled);
728 }
729 #endif
730
731 statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
732 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
733
734 statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n",
735 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
736 statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n",
737 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
738
739 #ifdef PROFILING
740 statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n",
741 TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
742 statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n",
743 TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
744 #endif
745 statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n",
746 TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
747 statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n",
748 TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
749 #ifndef THREADED_RTS
750 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
751 TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
752 TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
753 #endif
754
755 if (mut_cpu == 0) {
756 showStgWord64(0, temp, rtsTrue/*commas*/);
757 } else {
758 showStgWord64(
759 (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
760 temp, rtsTrue/*commas*/);
761 }
762
763 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
764
765 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
766 TimeToSecondsDbl(tot_cpu - gc_cpu -
767 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
768 / TimeToSecondsDbl(tot_cpu),
769 TimeToSecondsDbl(tot_cpu - gc_cpu -
770 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
771 / TimeToSecondsDbl(tot_elapsed));
772
773 /*
774 TICK_PRINT(1);
775 TICK_PRINT(2);
776 REPORT(TOTAL_CALLS);
777 TICK_PRINT_TOT(1);
778 TICK_PRINT_TOT(2);
779 */
780
781 #if USE_PAPI
782 papi_stats_report();
783 #endif
784 #if defined(THREADED_RTS) && defined(PROF_SPIN)
785 {
786 nat g;
787
788 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
789 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
790 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
791 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
792 }
793 }
794 #endif
795 }
796
797 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
798 char *fmt1, *fmt2;
799 if (RtsFlags.MiscFlags.machineReadable) {
800 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
801 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
802 " ,(\"average_bytes_used\", \"%ld\")\n"
803 " ,(\"max_bytes_used\", \"%ld\")\n"
804 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
805 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
806 " ,(\"init_cpu_seconds\", \"%.2f\")\n"
807 " ,(\"init_wall_seconds\", \"%.2f\")\n"
808 " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
809 " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
810 " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
811 " ,(\"GC_wall_seconds\", \"%.2f\")\n"
812 " ]\n";
813 }
814 else {
815 fmt1 = "<<ghc: %llu bytes, ";
816 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
817 }
818 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
819 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
820 statsPrintf(fmt2,
821 total_collections,
822 residency_samples == 0 ? 0 :
823 cumulative_residency*sizeof(W_)/residency_samples,
824 max_residency*sizeof(W_),
825 residency_samples,
826 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
827 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
828 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
829 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
830 }
831
832 statsFlush();
833 statsClose();
834 }
835
836 if (GC_coll_cpu) {
837 stgFree(GC_coll_cpu);
838 GC_coll_cpu = NULL;
839 }
840 if (GC_coll_elapsed) {
841 stgFree(GC_coll_elapsed);
842 GC_coll_elapsed = NULL;
843 }
844 if (GC_coll_max_pause) {
845 stgFree(GC_coll_max_pause);
846 GC_coll_max_pause = NULL;
847 }
848 }
849
850 /* -----------------------------------------------------------------------------
851 stat_describe_gens
852
853 Produce some detailed info on the state of the generational GC.
854 -------------------------------------------------------------------------- */
855 void
856 statDescribeGens(void)
857 {
858 nat g, mut, lge, i;
859 lnat gen_slop;
860 lnat tot_live, tot_slop;
861 lnat gen_live, gen_blocks;
862 bdescr *bd;
863 generation *gen;
864
865 debugBelch(
866 "----------------------------------------------------------\n"
867 " Gen Max Mut-list Blocks Large Live Slop\n"
868 " Blocks Bytes Objects \n"
869 "----------------------------------------------------------\n");
870
871 tot_live = 0;
872 tot_slop = 0;
873
874 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
875 gen = &generations[g];
876
877 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
878 lge++;
879 }
880
881 gen_live = genLiveWords(gen);
882 gen_blocks = genLiveBlocks(gen);
883
884 mut = 0;
885 for (i = 0; i < n_capabilities; i++) {
886 mut += countOccupied(capabilities[i].mut_lists[g]);
887
888 // Add the pinned object block.
889 bd = capabilities[i].pinned_object_block;
890 if (bd != NULL) {
891 gen_live += bd->free - bd->start;
892 gen_blocks += bd->blocks;
893 }
894
895 gen_live += gcThreadLiveWords(i,g);
896 gen_blocks += gcThreadLiveBlocks(i,g);
897 }
898
899 debugBelch("%5d %7" FMT_SizeT " %9d", g, (lnat)gen->max_blocks, mut);
900
901 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
902
903 debugBelch("%8" FMT_SizeT " %8d %8" FMT_SizeT " %8" FMT_SizeT "\n", gen_blocks, lge,
904 gen_live*sizeof(W_), gen_slop*sizeof(W_));
905 tot_live += gen_live;
906 tot_slop += gen_slop;
907 }
908 debugBelch("----------------------------------------------------------\n");
909 debugBelch("%41s%8" FMT_SizeT " %8" FMT_SizeT "\n",
910 "",tot_live*sizeof(W_),tot_slop*sizeof(W_));
911 debugBelch("----------------------------------------------------------\n");
912 debugBelch("\n");
913 }
914
915 /* -----------------------------------------------------------------------------
916 Stats available via a programmatic interface, so eg. GHCi can time
917 each compilation and expression evaluation.
918 -------------------------------------------------------------------------- */
919
920 extern HsInt64 getAllocations( void )
921 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
922
923 /* EZY: I'm not convinced I got all the casting right. */
924
925 extern rtsBool getGCStatsEnabled( void )
926 {
927 return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
928 }
929
930 extern void getGCStats( GCStats *s )
931 {
932 nat total_collections = 0;
933 nat g;
934 Time gc_cpu = 0;
935 Time gc_elapsed = 0;
936 Time current_elapsed = 0;
937 Time current_cpu = 0;
938
939 getProcessTimes(&current_cpu, &current_elapsed);
940
941 /* EZY: static inline'ify these */
942 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
943 total_collections += generations[g].collections;
944
945 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
946 gc_cpu += GC_coll_cpu[g];
947 gc_elapsed += GC_coll_elapsed[g];
948 }
949
950 s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
951 s->num_gcs = total_collections;
952 s->num_byte_usage_samples = residency_samples;
953 s->max_bytes_used = max_residency*sizeof(W_);
954 s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
955 s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
956 s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
957 s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
958 s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
959 s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
960 /*
961 s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
962 s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
963 */
964 s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
965 s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
966 s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
967 s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
968 /* EZY: Being consistent with incremental output, but maybe should also discount init */
969 s->cpu_seconds = TimeToSecondsDbl(current_cpu);
970 s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
971 s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
972 s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
973 }
974 // extern void getTaskStats( TaskStats **s ) {}
975 #if 0
976 extern void getSparkStats( SparkCounters *s ) {
977 nat i;
978 s->created = 0;
979 s->dud = 0;
980 s->overflowed = 0;
981 s->converted = 0;
982 s->gcd = 0;
983 s->fizzled = 0;
984 for (i = 0; i < n_capabilities; i++) {
985 s->created += capabilities[i].spark_stats.created;
986 s->dud += capabilities[i].spark_stats.dud;
987 s->overflowed+= capabilities[i].spark_stats.overflowed;
988 s->converted += capabilities[i].spark_stats.converted;
989 s->gcd += capabilities[i].spark_stats.gcd;
990 s->fizzled += capabilities[i].spark_stats.fizzled;
991 }
992 }
993 #endif
994
995 /* -----------------------------------------------------------------------------
996 Dumping stuff in the stats file, or via the debug message interface
997 -------------------------------------------------------------------------- */
998
999 void
1000 statsPrintf( char *s, ... )
1001 {
1002 FILE *sf = RtsFlags.GcFlags.statsFile;
1003 va_list ap;
1004
1005 va_start(ap,s);
1006 if (sf == NULL) {
1007 vdebugBelch(s,ap);
1008 } else {
1009 vfprintf(sf, s, ap);
1010 }
1011 va_end(ap);
1012 }
1013
1014 static void
1015 statsFlush( void )
1016 {
1017 FILE *sf = RtsFlags.GcFlags.statsFile;
1018 if (sf != NULL) {
1019 fflush(sf);
1020 }
1021 }
1022
1023 static void
1024 statsClose( void )
1025 {
1026 FILE *sf = RtsFlags.GcFlags.statsFile;
1027 if (sf != NULL) {
1028 fclose(sf);
1029 }
1030 }