Add warning comment about Mach-O section name hack.
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsUtils.h"
13 #include "Schedule.h"
14 #include "Stats.h"
15 #include "Profiling.h"
16 #include "GetTime.h"
17 #include "sm/Storage.h"
18 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
19 #include "sm/GCThread.h"
20 #include "sm/BlockAlloc.h"
21
22 #if USE_PAPI
23 #include "Papi.h"
24 #endif
25
26 /* huh? */
27 #define BIG_STRING_LEN 512
28
29 #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
30
31 static Time
32 start_init_cpu, start_init_elapsed,
33 end_init_cpu, end_init_elapsed,
34 start_exit_cpu, start_exit_elapsed,
35 end_exit_cpu, end_exit_elapsed;
36
37 static Time GC_tot_cpu = 0;
38
39 static StgWord64 GC_tot_alloc = 0;
40 static StgWord64 GC_tot_copied = 0;
41
42 static StgWord64 GC_par_max_copied = 0;
43 static StgWord64 GC_par_tot_copied = 0;
44
45 #ifdef PROFILING
46 static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
47 static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
48
49 static Time HC_start_time, HC_tot_time = 0; // heap census prof user time
50 static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
51 #endif
52
53 #ifdef PROFILING
54 #define PROF_VAL(x) (x)
55 #else
56 #define PROF_VAL(x) 0
57 #endif
58
59 // current = current as of last GC
60 static W_ current_residency = 0; // in words; for stats only
61 static W_ max_residency = 0;
62 static W_ cumulative_residency = 0;
63 static W_ residency_samples = 0; // for stats only
64 static W_ current_slop = 0;
65 static W_ max_slop = 0;
66
67 static W_ GC_end_faults = 0;
68
69 static Time *GC_coll_cpu = NULL;
70 static Time *GC_coll_elapsed = NULL;
71 static Time *GC_coll_max_pause = NULL;
72
73 static void statsFlush( void );
74 static void statsClose( void );
75
76 /* -----------------------------------------------------------------------------
77 Current elapsed time
78 ------------------------------------------------------------------------- */
79
80 Time stat_getElapsedTime(void)
81 {
82 return getProcessElapsedTime() - start_init_elapsed;
83 }
84
85 /* ---------------------------------------------------------------------------
86 Measure the current MUT time, for profiling
87 ------------------------------------------------------------------------ */
88
89 double
90 mut_user_time_until( Time t )
91 {
92 return TimeToSecondsDbl(t - GC_tot_cpu);
93 // heapCensus() time is included in GC_tot_cpu, so we don't need
94 // to subtract it here.
95 }
96
97 double
98 mut_user_time( void )
99 {
100 Time cpu;
101 cpu = getProcessCPUTime();
102 return mut_user_time_until(cpu);
103 }
104
105 #ifdef PROFILING
106 /*
107 mut_user_time_during_RP() returns the MUT time during retainer profiling.
108 The same is for mut_user_time_during_HC();
109 */
110 double
111 mut_user_time_during_RP( void )
112 {
113 return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
114 }
115
116 double
117 mut_user_time_during_heap_census( void )
118 {
119 return TimeToSecondsDbl(HC_start_time - GC_tot_cpu - RP_tot_time);
120 }
121 #endif /* PROFILING */
122
123 /* ---------------------------------------------------------------------------
124 initStats0() has no dependencies, it can be called right at the beginning
125 ------------------------------------------------------------------------ */
126
127 void
128 initStats0(void)
129 {
130 start_init_cpu = 0;
131 start_init_elapsed = 0;
132 end_init_cpu = 0;
133 end_init_elapsed = 0;
134
135 start_exit_cpu = 0;
136 start_exit_elapsed = 0;
137 end_exit_cpu = 0;
138 end_exit_elapsed = 0;
139
140 GC_tot_alloc = 0;
141 GC_tot_copied = 0;
142 GC_par_max_copied = 0;
143 GC_par_tot_copied = 0;
144 GC_tot_cpu = 0;
145
146 #ifdef PROFILING
147 RP_start_time = 0;
148 RP_tot_time = 0;
149 RPe_start_time = 0;
150 RPe_tot_time = 0;
151
152 HC_start_time = 0;
153 HC_tot_time = 0;
154 HCe_start_time = 0;
155 HCe_tot_time = 0;
156 #endif
157
158 max_residency = 0;
159 cumulative_residency = 0;
160 residency_samples = 0;
161 max_slop = 0;
162
163 GC_end_faults = 0;
164 }
165
166 /* ---------------------------------------------------------------------------
167 initStats1() can be called after setupRtsFlags()
168 ------------------------------------------------------------------------ */
169
170 void
171 initStats1 (void)
172 {
173 nat i;
174
175 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
176 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
177 statsPrintf(" bytes bytes bytes user elap user elap\n");
178 }
179 GC_coll_cpu =
180 (Time *)stgMallocBytes(
181 sizeof(Time)*RtsFlags.GcFlags.generations,
182 "initStats");
183 GC_coll_elapsed =
184 (Time *)stgMallocBytes(
185 sizeof(Time)*RtsFlags.GcFlags.generations,
186 "initStats");
187 GC_coll_max_pause =
188 (Time *)stgMallocBytes(
189 sizeof(Time)*RtsFlags.GcFlags.generations,
190 "initStats");
191 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
192 GC_coll_cpu[i] = 0;
193 GC_coll_elapsed[i] = 0;
194 GC_coll_max_pause[i] = 0;
195 }
196 }
197
198 /* -----------------------------------------------------------------------------
199 Initialisation time...
200 -------------------------------------------------------------------------- */
201
202 void
203 stat_startInit(void)
204 {
205 getProcessTimes(&start_init_cpu, &start_init_elapsed);
206 }
207
208 void
209 stat_endInit(void)
210 {
211 getProcessTimes(&end_init_cpu, &end_init_elapsed);
212
213 #if USE_PAPI
214 /* We start counting events for the mutator
215 * when garbage collection starts
216 * we switch to the GC event set. */
217 papi_start_mutator_count();
218
219 /* This flag is needed to avoid counting the last GC */
220 papi_is_reporting = 1;
221
222 #endif
223 }
224
225 /* -----------------------------------------------------------------------------
226 stat_startExit and stat_endExit
227
228 These two measure the time taken in shutdownHaskell().
229 -------------------------------------------------------------------------- */
230
231 void
232 stat_startExit(void)
233 {
234 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
235
236 #if USE_PAPI
237 /* We stop counting mutator events
238 * GC events are not being counted at this point */
239 papi_stop_mutator_count();
240
241 /* This flag is needed, because GC is run once more after this function */
242 papi_is_reporting = 0;
243 #endif
244 }
245
246 void
247 stat_endExit(void)
248 {
249 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
250 }
251
252 /* -----------------------------------------------------------------------------
253 Called at the beginning of each GC
254 -------------------------------------------------------------------------- */
255
256 static nat rub_bell = 0;
257
258 void
259 stat_startGC (Capability *cap, gc_thread *gct)
260 {
261 nat bell = RtsFlags.GcFlags.ringBell;
262
263 if (bell) {
264 if (bell > 1) {
265 debugBelch(" GC ");
266 rub_bell = 1;
267 } else {
268 debugBelch("\007");
269 }
270 }
271
272 #if USE_PAPI
273 if(papi_is_reporting) {
274 /* Switch to counting GC events */
275 papi_stop_mutator_count();
276 papi_start_gc_count();
277 }
278 #endif
279
280 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
281
282 // Post EVENT_GC_START with the same timestamp as used for stats
283 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
284 // Here, as opposed to other places, the event is emitted on the cap
285 // that initiates the GC and external tools expect it to have the same
286 // timestamp as used in +RTS -s calculcations.
287 traceEventGcStartAtT(cap,
288 TimeToNS(gct->gc_start_elapsed - start_init_elapsed));
289
290 gct->gc_start_thread_cpu = getThreadCPUTime();
291
292 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
293 {
294 gct->gc_start_faults = getPageFaults();
295 }
296 }
297
298 void
299 stat_gcWorkerThreadStart (gc_thread *gct STG_UNUSED)
300 {
301 #if 0
302 /*
303 * We dont' collect per-thread GC stats any more, but this code
304 * could be used to do that if we want to in the future:
305 */
306 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
307 {
308 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
309 gct->gc_start_thread_cpu = getThreadCPUTime();
310 }
311 #endif
312 }
313
314 void
315 stat_gcWorkerThreadDone (gc_thread *gct STG_UNUSED)
316 {
317 #if 0
318 /*
319 * We dont' collect per-thread GC stats any more, but this code
320 * could be used to do that if we want to in the future:
321 */
322 Time thread_cpu, elapsed, gc_cpu, gc_elapsed;
323
324 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
325 {
326 elapsed = getProcessElapsedTime();
327 thread_cpu = getThreadCPUTime();
328
329 gc_cpu = thread_cpu - gct->gc_start_thread_cpu;
330 gc_elapsed = elapsed - gct->gc_start_elapsed;
331
332 taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
333 }
334 #endif
335 }
336
337 /* -----------------------------------------------------------------------------
338 * Calculate the total allocated memory since the start of the
339 * program. Also emits events reporting the per-cap allocation
340 * totals.
341 * -------------------------------------------------------------------------- */
342
343 static StgWord
344 calcTotalAllocated(void)
345 {
346 W_ tot_alloc = 0;
347 W_ n;
348 for (n = 0; n < n_capabilities; n++) {
349 tot_alloc += capabilities[n]->total_allocated;
350 traceEventHeapAllocated(capabilities[n],
351 CAPSET_HEAP_DEFAULT,
352 capabilities[n]->total_allocated * sizeof(W_));
353 }
354
355 return tot_alloc;
356 }
357
358 /* -----------------------------------------------------------------------------
359 Called at the end of each GC
360 -------------------------------------------------------------------------- */
361
362 void
363 stat_endGC (Capability *cap, gc_thread *gct,
364 W_ live, W_ copied, W_ slop, nat gen,
365 nat par_n_threads, W_ par_max_copied, W_ par_tot_copied)
366 {
367 W_ tot_alloc;
368 W_ alloc;
369
370 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
371 RtsFlags.ProfFlags.doHeapProfile)
372 // heap profiling needs GC_tot_time
373 {
374 Time cpu, elapsed, gc_cpu, gc_elapsed;
375
376 // Has to be emitted while all caps stopped for GC, but before GC_END.
377 // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
378 // for a detailed design rationale of the current setup
379 // of GC eventlog events.
380 traceEventGcGlobalSync(cap);
381
382 // Emitted before GC_END on all caps, which simplifies tools code.
383 traceEventGcStats(cap,
384 CAPSET_HEAP_DEFAULT,
385 gen,
386 copied * sizeof(W_),
387 slop * sizeof(W_),
388 /* current loss due to fragmentation */
389 (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
390 * BLOCK_SIZE_W * sizeof(W_),
391 par_n_threads,
392 par_max_copied * sizeof(W_),
393 par_tot_copied * sizeof(W_));
394
395 getProcessTimes(&cpu, &elapsed);
396
397 // Post EVENT_GC_END with the same timestamp as used for stats
398 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
399 // Here, as opposed to other places, the event is emitted on the cap
400 // that initiates the GC and external tools expect it to have the same
401 // timestamp as used in +RTS -s calculcations.
402 traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));
403
404 gc_elapsed = elapsed - gct->gc_start_elapsed;
405 gc_cpu = cpu - gct->gc_start_cpu;
406
407 /* For the moment we calculate both per-HEC and total allocation.
408 * There is thus redundancy here, but for the moment we will calculate
409 * it both the old and new way and assert they're the same.
410 * When we're sure it's working OK then we can simplify things.
411 */
412 tot_alloc = calcTotalAllocated();
413
414 // allocated since the last GC
415 alloc = tot_alloc - GC_tot_alloc;
416 GC_tot_alloc = tot_alloc;
417
418 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
419 W_ faults = getPageFaults();
420
421 statsPrintf("%9" FMT_SizeT " %9" FMT_SizeT " %9" FMT_SizeT,
422 alloc*sizeof(W_), copied*sizeof(W_),
423 live*sizeof(W_));
424 statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4" FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
425 TimeToSecondsDbl(gc_cpu),
426 TimeToSecondsDbl(gc_elapsed),
427 TimeToSecondsDbl(cpu),
428 TimeToSecondsDbl(elapsed - start_init_elapsed),
429 faults - gct->gc_start_faults,
430 gct->gc_start_faults - GC_end_faults,
431 gen);
432
433 GC_end_faults = faults;
434 statsFlush();
435 }
436
437 GC_coll_cpu[gen] += gc_cpu;
438 GC_coll_elapsed[gen] += gc_elapsed;
439 if (GC_coll_max_pause[gen] < gc_elapsed) {
440 GC_coll_max_pause[gen] = gc_elapsed;
441 }
442
443 GC_tot_copied += (StgWord64) copied;
444 GC_par_max_copied += (StgWord64) par_max_copied;
445 GC_par_tot_copied += (StgWord64) par_tot_copied;
446 GC_tot_cpu += gc_cpu;
447
448 traceEventHeapSize(cap,
449 CAPSET_HEAP_DEFAULT,
450 mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
451
452 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
453 if (live > max_residency) {
454 max_residency = live;
455 }
456 current_residency = live;
457 residency_samples++;
458 cumulative_residency += live;
459 traceEventHeapLive(cap,
460 CAPSET_HEAP_DEFAULT,
461 live * sizeof(W_));
462 }
463
464 if (slop > max_slop) max_slop = slop;
465 }
466
467 if (rub_bell) {
468 debugBelch("\b\b\b \b\b\b");
469 rub_bell = 0;
470 }
471
472 #if USE_PAPI
473 if(papi_is_reporting) {
474 /* Switch to counting mutator events */
475 if (gen == 0) {
476 papi_stop_gc0_count();
477 } else {
478 papi_stop_gc1_count();
479 }
480 papi_start_mutator_count();
481 }
482 #endif
483 }
484
485 /* -----------------------------------------------------------------------------
486 Called at the beginning of each Retainer Profiliing
487 -------------------------------------------------------------------------- */
488 #ifdef PROFILING
489 void
490 stat_startRP(void)
491 {
492 Time user, elapsed;
493 getProcessTimes( &user, &elapsed );
494
495 RP_start_time = user;
496 RPe_start_time = elapsed;
497 }
498 #endif /* PROFILING */
499
500 /* -----------------------------------------------------------------------------
501 Called at the end of each Retainer Profiliing
502 -------------------------------------------------------------------------- */
503
504 #ifdef PROFILING
505 void
506 stat_endRP(
507 nat retainerGeneration,
508 #ifdef DEBUG_RETAINER
509 nat maxCStackSize,
510 int maxStackSize,
511 #endif
512 double averageNumVisit)
513 {
514 Time user, elapsed;
515 getProcessTimes( &user, &elapsed );
516
517 RP_tot_time += user - RP_start_time;
518 RPe_tot_time += elapsed - RPe_start_time;
519
520 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
521 retainerGeneration, mut_user_time_during_RP());
522 #ifdef DEBUG_RETAINER
523 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
524 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
525 #endif
526 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
527 }
528 #endif /* PROFILING */
529
530 /* -----------------------------------------------------------------------------
531 Called at the beginning of each heap census
532 -------------------------------------------------------------------------- */
533 #ifdef PROFILING
534 void
535 stat_startHeapCensus(void)
536 {
537 Time user, elapsed;
538 getProcessTimes( &user, &elapsed );
539
540 HC_start_time = user;
541 HCe_start_time = elapsed;
542 }
543 #endif /* PROFILING */
544
545 /* -----------------------------------------------------------------------------
546 Called at the end of each heap census
547 -------------------------------------------------------------------------- */
548 #ifdef PROFILING
549 void
550 stat_endHeapCensus(void)
551 {
552 Time user, elapsed;
553 getProcessTimes( &user, &elapsed );
554
555 HC_tot_time += user - HC_start_time;
556 HCe_tot_time += elapsed - HCe_start_time;
557 }
558 #endif /* PROFILING */
559
560 /* -----------------------------------------------------------------------------
561 Called at the end of execution
562
563 NOTE: number of allocations is not entirely accurate: it doesn't
564 take into account the few bytes at the end of the heap that
565 were left unused when the heap-check failed.
566 -------------------------------------------------------------------------- */
567
568 #ifdef DEBUG
569 #define TICK_VAR_INI(arity) \
570 StgInt SLOW_CALLS_##arity = 1; \
571 StgInt RIGHT_ARITY_##arity = 1; \
572 StgInt TAGGED_PTR_##arity = 0;
573
574 TICK_VAR_INI(1)
575 TICK_VAR_INI(2)
576
577 StgInt TOTAL_CALLS=1;
578 #endif
579
580 /* Report the value of a counter */
581 #define REPORT(counter) \
582 { \
583 showStgWord64(counter,temp,rtsTrue/*commas*/); \
584 statsPrintf(" (" #counter ") : %s\n",temp); \
585 }
586
587 /* Report the value of a counter as a percentage of another counter */
588 #define REPORT_PCT(counter,countertot) \
589 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
590 counter*100.0/countertot)
591
592 #define TICK_PRINT(arity) \
593 REPORT(SLOW_CALLS_##arity); \
594 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
595 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
596 REPORT(RIGHT_ARITY_##arity); \
597 REPORT(TAGGED_PTR_##arity)
598
599 #define TICK_PRINT_TOT(arity) \
600 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
601 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
602
603 static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
604 static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
605
606
607 void
608 stat_exit (void)
609 {
610 generation *gen;
611 Time gc_cpu = 0;
612 Time gc_elapsed = 0;
613 Time init_cpu = 0;
614 Time init_elapsed = 0;
615 Time mut_cpu = 0;
616 Time mut_elapsed = 0;
617 Time exit_cpu = 0;
618 Time exit_elapsed = 0;
619 W_ tot_alloc;
620 W_ alloc;
621
622 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
623
624 char temp[BIG_STRING_LEN];
625 Time tot_cpu;
626 Time tot_elapsed;
627 nat i, g, total_collections = 0;
628
629 getProcessTimes( &tot_cpu, &tot_elapsed );
630 tot_elapsed -= start_init_elapsed;
631
632 tot_alloc = calcTotalAllocated();
633
634 // allocated since the last GC
635 alloc = tot_alloc - GC_tot_alloc;
636 GC_tot_alloc = tot_alloc;
637
638 /* Count total garbage collections */
639 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
640 total_collections += generations[g].collections;
641
642 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
643 if (tot_cpu == 0.0) tot_cpu = 1;
644 if (tot_elapsed == 0.0) tot_elapsed = 1;
645
646 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
647 statsPrintf("%9" FMT_SizeT " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", "");
648 statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
649 }
650
651 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
652 gc_cpu += GC_coll_cpu[i];
653 gc_elapsed += GC_coll_elapsed[i];
654 }
655
656 // heapCensus() is called by the GC, so RP and HC time are
657 // included in the GC stats. We therefore subtract them to
658 // obtain the actual GC cpu time.
659 gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time);
660 gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
661
662 init_cpu = get_init_cpu();
663 init_elapsed = get_init_elapsed();
664
665 exit_cpu = end_exit_cpu - start_exit_cpu;
666 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
667
668 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
669
670 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
671 - PROF_VAL(RP_tot_time + HC_tot_time);
672 if (mut_cpu < 0) { mut_cpu = 0; }
673
674 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
675 showStgWord64(GC_tot_alloc*sizeof(W_),
676 temp, rtsTrue/*commas*/);
677 statsPrintf("%16s bytes allocated in the heap\n", temp);
678
679 showStgWord64(GC_tot_copied*sizeof(W_),
680 temp, rtsTrue/*commas*/);
681 statsPrintf("%16s bytes copied during GC\n", temp);
682
683 if ( residency_samples > 0 ) {
684 showStgWord64(max_residency*sizeof(W_),
685 temp, rtsTrue/*commas*/);
686 statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n",
687 temp, residency_samples);
688 }
689
690 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
691 statsPrintf("%16s bytes maximum slop\n", temp);
692
693 statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n",
694 peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
695 (W_)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
696
697 /* Print garbage collections in each gen */
698 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
699 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
700 gen = &generations[g];
701 statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n",
702 gen->no,
703 gen->collections,
704 gen->par_collections,
705 TimeToSecondsDbl(GC_coll_cpu[g]),
706 TimeToSecondsDbl(GC_coll_elapsed[g]),
707 gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
708 TimeToSecondsDbl(GC_coll_max_pause[g]));
709 }
710
711 #if defined(THREADED_RTS)
712 if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
713 statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
714 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
715 / (n_capabilities - 1)
716 );
717 }
718 #endif
719 statsPrintf("\n");
720
721 #if defined(THREADED_RTS)
722 statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
723 taskCount, taskCount - workerCount,
724 peakWorkerCount, workerCount,
725 n_capabilities);
726
727 statsPrintf("\n");
728
729 {
730 nat i;
731 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
732 for (i = 0; i < n_capabilities; i++) {
733 sparks.created += capabilities[i]->spark_stats.created;
734 sparks.dud += capabilities[i]->spark_stats.dud;
735 sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
736 sparks.converted += capabilities[i]->spark_stats.converted;
737 sparks.gcd += capabilities[i]->spark_stats.gcd;
738 sparks.fizzled += capabilities[i]->spark_stats.fizzled;
739 }
740
741 statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
742 sparks.created + sparks.dud + sparks.overflowed,
743 sparks.converted, sparks.overflowed, sparks.dud,
744 sparks.gcd, sparks.fizzled);
745 }
746 #endif
747
748 statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
749 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
750
751 statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n",
752 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
753 statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n",
754 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
755
756 #ifdef PROFILING
757 statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n",
758 TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
759 statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n",
760 TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
761 #endif
762 statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n",
763 TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
764 statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n",
765 TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
766 #ifndef THREADED_RTS
767 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
768 TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
769 TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
770 #endif
771
772 if (mut_cpu == 0) {
773 showStgWord64(0, temp, rtsTrue/*commas*/);
774 } else {
775 showStgWord64(
776 (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
777 temp, rtsTrue/*commas*/);
778 }
779
780 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
781
782 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
783 TimeToSecondsDbl(tot_cpu - gc_cpu -
784 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
785 / TimeToSecondsDbl(tot_cpu),
786 TimeToSecondsDbl(tot_cpu - gc_cpu -
787 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
788 / TimeToSecondsDbl(tot_elapsed));
789
790 /*
791 TICK_PRINT(1);
792 TICK_PRINT(2);
793 REPORT(TOTAL_CALLS);
794 TICK_PRINT_TOT(1);
795 TICK_PRINT_TOT(2);
796 */
797
798 #if USE_PAPI
799 papi_stats_report();
800 #endif
801 #if defined(THREADED_RTS) && defined(PROF_SPIN)
802 {
803 nat g;
804
805 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
806 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
807 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
808 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
809 }
810 }
811 #endif
812 }
813
814 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
815 char *fmt1, *fmt2;
816 if (RtsFlags.MiscFlags.machineReadable) {
817 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
818 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
819 " ,(\"average_bytes_used\", \"%ld\")\n"
820 " ,(\"max_bytes_used\", \"%ld\")\n"
821 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
822 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
823 " ,(\"init_cpu_seconds\", \"%.2f\")\n"
824 " ,(\"init_wall_seconds\", \"%.2f\")\n"
825 " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
826 " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
827 " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
828 " ,(\"GC_wall_seconds\", \"%.2f\")\n"
829 " ]\n";
830 }
831 else {
832 fmt1 = "<<ghc: %llu bytes, ";
833 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
834 }
835 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
836 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
837 statsPrintf(fmt2,
838 total_collections,
839 residency_samples == 0 ? 0 :
840 cumulative_residency*sizeof(W_)/residency_samples,
841 max_residency*sizeof(W_),
842 residency_samples,
843 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
844 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
845 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
846 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
847 }
848
849 statsFlush();
850 statsClose();
851 }
852
853 if (GC_coll_cpu) {
854 stgFree(GC_coll_cpu);
855 GC_coll_cpu = NULL;
856 }
857 if (GC_coll_elapsed) {
858 stgFree(GC_coll_elapsed);
859 GC_coll_elapsed = NULL;
860 }
861 if (GC_coll_max_pause) {
862 stgFree(GC_coll_max_pause);
863 GC_coll_max_pause = NULL;
864 }
865 }
866
867 /* -----------------------------------------------------------------------------
868 stat_describe_gens
869
870 Produce some detailed info on the state of the generational GC.
871 -------------------------------------------------------------------------- */
872 void
873 statDescribeGens(void)
874 {
875 nat g, mut, lge, i;
876 W_ gen_slop;
877 W_ tot_live, tot_slop;
878 W_ gen_live, gen_blocks;
879 bdescr *bd;
880 generation *gen;
881
882 debugBelch(
883 "----------------------------------------------------------\n"
884 " Gen Max Mut-list Blocks Large Live Slop\n"
885 " Blocks Bytes Objects \n"
886 "----------------------------------------------------------\n");
887
888 tot_live = 0;
889 tot_slop = 0;
890
891 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
892 gen = &generations[g];
893
894 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
895 lge++;
896 }
897
898 gen_live = genLiveWords(gen);
899 gen_blocks = genLiveBlocks(gen);
900
901 mut = 0;
902 for (i = 0; i < n_capabilities; i++) {
903 mut += countOccupied(capabilities[i]->mut_lists[g]);
904
905 // Add the pinned object block.
906 bd = capabilities[i]->pinned_object_block;
907 if (bd != NULL) {
908 gen_live += bd->free - bd->start;
909 gen_blocks += bd->blocks;
910 }
911
912 gen_live += gcThreadLiveWords(i,g);
913 gen_blocks += gcThreadLiveBlocks(i,g);
914 }
915
916 debugBelch("%5d %7" FMT_Word " %9d", g, (W_)gen->max_blocks, mut);
917
918 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
919
920 debugBelch("%8" FMT_Word " %8d %8" FMT_Word " %8" FMT_Word "\n", gen_blocks, lge,
921 gen_live*(W_)sizeof(W_), gen_slop*(W_)sizeof(W_));
922 tot_live += gen_live;
923 tot_slop += gen_slop;
924 }
925 debugBelch("----------------------------------------------------------\n");
926 debugBelch("%41s%8" FMT_SizeT " %8" FMT_SizeT "\n",
927 "",tot_live*sizeof(W_),tot_slop*sizeof(W_));
928 debugBelch("----------------------------------------------------------\n");
929 debugBelch("\n");
930 }
931
932 /* -----------------------------------------------------------------------------
933 Stats available via a programmatic interface, so eg. GHCi can time
934 each compilation and expression evaluation.
935 -------------------------------------------------------------------------- */
936
937 extern HsInt64 getAllocations( void )
938 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
939
940 /* EZY: I'm not convinced I got all the casting right. */
941
942 extern rtsBool getGCStatsEnabled( void )
943 {
944 return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
945 }
946
947 extern void getGCStats( GCStats *s )
948 {
949 nat total_collections = 0;
950 nat g;
951 Time gc_cpu = 0;
952 Time gc_elapsed = 0;
953 Time current_elapsed = 0;
954 Time current_cpu = 0;
955
956 getProcessTimes(&current_cpu, &current_elapsed);
957
958 /* EZY: static inline'ify these */
959 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
960 total_collections += generations[g].collections;
961
962 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
963 gc_cpu += GC_coll_cpu[g];
964 gc_elapsed += GC_coll_elapsed[g];
965 }
966
967 s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
968 s->num_gcs = total_collections;
969 s->num_byte_usage_samples = residency_samples;
970 s->max_bytes_used = max_residency*sizeof(W_);
971 s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
972 s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
973 s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
974 s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
975 s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
976 s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
977 /*
978 s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
979 s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
980 */
981 s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
982 s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
983 s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
984 s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
985 /* EZY: Being consistent with incremental output, but maybe should also discount init */
986 s->cpu_seconds = TimeToSecondsDbl(current_cpu);
987 s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
988 s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
989 s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
990 }
991 // extern void getTaskStats( TaskStats **s ) {}
992 #if 0
993 extern void getSparkStats( SparkCounters *s ) {
994 nat i;
995 s->created = 0;
996 s->dud = 0;
997 s->overflowed = 0;
998 s->converted = 0;
999 s->gcd = 0;
1000 s->fizzled = 0;
1001 for (i = 0; i < n_capabilities; i++) {
1002 s->created += capabilities[i]->spark_stats.created;
1003 s->dud += capabilities[i]->spark_stats.dud;
1004 s->overflowed+= capabilities[i]->spark_stats.overflowed;
1005 s->converted += capabilities[i]->spark_stats.converted;
1006 s->gcd += capabilities[i]->spark_stats.gcd;
1007 s->fizzled += capabilities[i]->spark_stats.fizzled;
1008 }
1009 }
1010 #endif
1011
1012 /* -----------------------------------------------------------------------------
1013 Dumping stuff in the stats file, or via the debug message interface
1014 -------------------------------------------------------------------------- */
1015
1016 void
1017 statsPrintf( char *s, ... )
1018 {
1019 FILE *sf = RtsFlags.GcFlags.statsFile;
1020 va_list ap;
1021
1022 va_start(ap,s);
1023 if (sf == NULL) {
1024 vdebugBelch(s,ap);
1025 } else {
1026 vfprintf(sf, s, ap);
1027 }
1028 va_end(ap);
1029 }
1030
1031 static void
1032 statsFlush( void )
1033 {
1034 FILE *sf = RtsFlags.GcFlags.statsFile;
1035 if (sf != NULL) {
1036 fflush(sf);
1037 }
1038 }
1039
1040 static void
1041 statsClose( void )
1042 {
1043 FILE *sf = RtsFlags.GcFlags.statsFile;
1044 if (sf != NULL) {
1045 fclose(sf);
1046 }
1047 }