NCGMonad: Add MonadUnique NatM instance
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsFlags.h"
13 #include "RtsUtils.h"
14 #include "Schedule.h"
15 #include "Stats.h"
16 #include "Profiling.h"
17 #include "GetTime.h"
18 #include "sm/Storage.h"
19 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
20 #include "sm/GCThread.h"
21 #include "sm/BlockAlloc.h"
22
23 /* huh? */
24 #define BIG_STRING_LEN 512
25
26 #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
27
28 static Time
29 start_init_cpu, start_init_elapsed,
30 end_init_cpu, end_init_elapsed,
31 start_exit_cpu, start_exit_elapsed,
32 end_exit_cpu, end_exit_elapsed;
33
34 static Time GC_tot_cpu = 0;
35
36 static StgWord64 GC_tot_alloc = 0;
37 static StgWord64 GC_tot_copied = 0;
38
39 static StgWord64 GC_par_max_copied = 0;
40 static StgWord64 GC_par_tot_copied = 0;
41
42 #ifdef PROFILING
43 static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
44 static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
45
46 static Time HC_start_time, HC_tot_time = 0; // heap census prof user time
47 static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
48 #endif
49
50 #ifdef PROFILING
51 #define PROF_VAL(x) (x)
52 #else
53 #define PROF_VAL(x) 0
54 #endif
55
56 // current = current as of last GC
57 static W_ current_residency = 0; // in words; for stats only
58 static W_ max_residency = 0;
59 static W_ cumulative_residency = 0;
60 static W_ residency_samples = 0; // for stats only
61 static W_ current_slop = 0;
62 static W_ max_slop = 0;
63
64 static W_ GC_end_faults = 0;
65
66 static Time *GC_coll_cpu = NULL;
67 static Time *GC_coll_elapsed = NULL;
68 static Time *GC_coll_max_pause = NULL;
69
70 static void statsPrintf( char *s, ... ) GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
71 static void statsFlush( void );
72 static void statsClose( void );
73
74 /* -----------------------------------------------------------------------------
75 Current elapsed time
76 ------------------------------------------------------------------------- */
77
78 Time stat_getElapsedTime(void)
79 {
80 return getProcessElapsedTime() - start_init_elapsed;
81 }
82
83 /* ---------------------------------------------------------------------------
84 Measure the current MUT time, for profiling
85 ------------------------------------------------------------------------ */
86
87 double
88 mut_user_time_until( Time t )
89 {
90 return TimeToSecondsDbl(t - GC_tot_cpu);
91 // heapCensus() time is included in GC_tot_cpu, so we don't need
92 // to subtract it here.
93 }
94
95 double
96 mut_user_time( void )
97 {
98 Time cpu;
99 cpu = getProcessCPUTime();
100 return mut_user_time_until(cpu);
101 }
102
103 #ifdef PROFILING
104 /*
105 mut_user_time_during_RP() returns the MUT time during retainer profiling.
106 The same is for mut_user_time_during_HC();
107 */
108 static double
109 mut_user_time_during_RP( void )
110 {
111 return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
112 }
113
114 #endif /* PROFILING */
115
116 /* ---------------------------------------------------------------------------
117 initStats0() has no dependencies, it can be called right at the beginning
118 ------------------------------------------------------------------------ */
119
120 void
121 initStats0(void)
122 {
123 start_init_cpu = 0;
124 start_init_elapsed = 0;
125 end_init_cpu = 0;
126 end_init_elapsed = 0;
127
128 start_exit_cpu = 0;
129 start_exit_elapsed = 0;
130 end_exit_cpu = 0;
131 end_exit_elapsed = 0;
132
133 GC_tot_alloc = 0;
134 GC_tot_copied = 0;
135 GC_par_max_copied = 0;
136 GC_par_tot_copied = 0;
137 GC_tot_cpu = 0;
138
139 #ifdef PROFILING
140 RP_start_time = 0;
141 RP_tot_time = 0;
142 RPe_start_time = 0;
143 RPe_tot_time = 0;
144
145 HC_start_time = 0;
146 HC_tot_time = 0;
147 HCe_start_time = 0;
148 HCe_tot_time = 0;
149 #endif
150
151 max_residency = 0;
152 cumulative_residency = 0;
153 residency_samples = 0;
154 max_slop = 0;
155
156 GC_end_faults = 0;
157 }
158
159 /* ---------------------------------------------------------------------------
160 initStats1() can be called after setupRtsFlags()
161 ------------------------------------------------------------------------ */
162
163 void
164 initStats1 (void)
165 {
166 uint32_t i;
167
168 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
169 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
170 statsPrintf(" bytes bytes bytes user elap user elap\n");
171 }
172 GC_coll_cpu =
173 (Time *)stgMallocBytes(
174 sizeof(Time)*RtsFlags.GcFlags.generations,
175 "initStats");
176 GC_coll_elapsed =
177 (Time *)stgMallocBytes(
178 sizeof(Time)*RtsFlags.GcFlags.generations,
179 "initStats");
180 GC_coll_max_pause =
181 (Time *)stgMallocBytes(
182 sizeof(Time)*RtsFlags.GcFlags.generations,
183 "initStats");
184 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
185 GC_coll_cpu[i] = 0;
186 GC_coll_elapsed[i] = 0;
187 GC_coll_max_pause[i] = 0;
188 }
189 }
190
191 /* -----------------------------------------------------------------------------
192 Initialisation time...
193 -------------------------------------------------------------------------- */
194
195 void
196 stat_startInit(void)
197 {
198 getProcessTimes(&start_init_cpu, &start_init_elapsed);
199 }
200
201 void
202 stat_endInit(void)
203 {
204 getProcessTimes(&end_init_cpu, &end_init_elapsed);
205 }
206
207 /* -----------------------------------------------------------------------------
208 stat_startExit and stat_endExit
209
210 These two measure the time taken in shutdownHaskell().
211 -------------------------------------------------------------------------- */
212
213 void
214 stat_startExit(void)
215 {
216 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
217 }
218
219 void
220 stat_endExit(void)
221 {
222 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
223 }
224
225 void
226 stat_startGCSync (gc_thread *gct)
227 {
228 gct->gc_sync_start_elapsed = getProcessElapsedTime();
229 }
230
231 /* -----------------------------------------------------------------------------
232 Called at the beginning of each GC
233 -------------------------------------------------------------------------- */
234
235 static uint32_t rub_bell = 0;
236
237 void
238 stat_startGC (Capability *cap, gc_thread *gct)
239 {
240 uint32_t bell = RtsFlags.GcFlags.ringBell;
241
242 if (bell) {
243 if (bell > 1) {
244 debugBelch(" GC ");
245 rub_bell = 1;
246 } else {
247 debugBelch("\007");
248 }
249 }
250
251 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
252
253 // Post EVENT_GC_START with the same timestamp as used for stats
254 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
255 // Here, as opposed to other places, the event is emitted on the cap
256 // that initiates the GC and external tools expect it to have the same
257 // timestamp as used in +RTS -s calculcations.
258 traceEventGcStartAtT(cap,
259 TimeToNS(gct->gc_start_elapsed - start_init_elapsed));
260
261 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
262 {
263 gct->gc_start_faults = getPageFaults();
264 }
265
266 updateNurseriesStats();
267 }
268
269 /* -----------------------------------------------------------------------------
270 Called at the end of each GC
271 -------------------------------------------------------------------------- */
272
273 void
274 stat_endGC (Capability *cap, gc_thread *gct,
275 W_ live, W_ copied, W_ slop, uint32_t gen,
276 uint32_t par_n_threads, W_ par_max_copied, W_ par_tot_copied)
277 {
278 W_ tot_alloc;
279 W_ alloc;
280
281 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
282 rtsConfig.gcDoneHook != NULL ||
283 RtsFlags.ProfFlags.doHeapProfile)
284 // heap profiling needs GC_tot_time
285 {
286 Time cpu, elapsed, gc_cpu, gc_elapsed, gc_sync_elapsed;
287
288 // Has to be emitted while all caps stopped for GC, but before GC_END.
289 // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
290 // for a detailed design rationale of the current setup
291 // of GC eventlog events.
292 traceEventGcGlobalSync(cap);
293
294 // Emitted before GC_END on all caps, which simplifies tools code.
295 traceEventGcStats(cap,
296 CAPSET_HEAP_DEFAULT,
297 gen,
298 copied * sizeof(W_),
299 slop * sizeof(W_),
300 /* current loss due to fragmentation */
301 (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
302 * BLOCK_SIZE,
303 par_n_threads,
304 par_max_copied * sizeof(W_),
305 par_tot_copied * sizeof(W_));
306
307 getProcessTimes(&cpu, &elapsed);
308
309 // Post EVENT_GC_END with the same timestamp as used for stats
310 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
311 // Here, as opposed to other places, the event is emitted on the cap
312 // that initiates the GC and external tools expect it to have the same
313 // timestamp as used in +RTS -s calculcations.
314 traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));
315
316 gc_sync_elapsed = gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
317 gc_elapsed = elapsed - gct->gc_start_elapsed;
318 gc_cpu = cpu - gct->gc_start_cpu;
319
320 /* For the moment we calculate both per-HEC and total allocation.
321 * There is thus redundancy here, but for the moment we will calculate
322 * it both the old and new way and assert they're the same.
323 * When we're sure it's working OK then we can simplify things.
324 */
325 tot_alloc = calcTotalAllocated();
326
327 // allocated since the last GC
328 alloc = tot_alloc - GC_tot_alloc;
329 GC_tot_alloc = tot_alloc;
330
331 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
332 W_ faults = getPageFaults();
333
334 statsPrintf("%9" FMT_Word " %9" FMT_Word " %9" FMT_Word,
335 alloc*sizeof(W_), copied*sizeof(W_),
336 live*sizeof(W_));
337 statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4" FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
338 TimeToSecondsDbl(gc_cpu),
339 TimeToSecondsDbl(gc_elapsed),
340 TimeToSecondsDbl(cpu),
341 TimeToSecondsDbl(elapsed - start_init_elapsed),
342 faults - gct->gc_start_faults,
343 gct->gc_start_faults - GC_end_faults,
344 gen);
345
346 GC_end_faults = faults;
347 statsFlush();
348 }
349
350
351 if (rtsConfig.gcDoneHook != NULL) {
352 rtsConfig.gcDoneHook(gen,
353 alloc*sizeof(W_),
354 live*sizeof(W_),
355 copied*sizeof(W_),
356 par_max_copied * sizeof(W_),
357 mblocks_allocated * BLOCKS_PER_MBLOCK
358 * BLOCK_SIZE,
359 slop * sizeof(W_),
360 TimeToNS(gc_sync_elapsed),
361 TimeToNS(gc_elapsed),
362 TimeToNS(gc_cpu));
363 }
364
365 GC_coll_cpu[gen] += gc_cpu;
366 GC_coll_elapsed[gen] += gc_elapsed;
367 if (GC_coll_max_pause[gen] < gc_elapsed) {
368 GC_coll_max_pause[gen] = gc_elapsed;
369 }
370
371 GC_tot_copied += (StgWord64) copied;
372 GC_par_max_copied += (StgWord64) par_max_copied;
373 GC_par_tot_copied += (StgWord64) par_tot_copied;
374 GC_tot_cpu += gc_cpu;
375
376 traceEventHeapSize(cap,
377 CAPSET_HEAP_DEFAULT,
378 mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
379
380 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
381 if (live > max_residency) {
382 max_residency = live;
383 }
384 current_residency = live;
385 residency_samples++;
386 cumulative_residency += live;
387 traceEventHeapLive(cap,
388 CAPSET_HEAP_DEFAULT,
389 live * sizeof(W_));
390 }
391
392 if (slop > max_slop) max_slop = slop;
393 }
394
395 if (rub_bell) {
396 debugBelch("\b\b\b \b\b\b");
397 rub_bell = 0;
398 }
399 }
400
401 /* -----------------------------------------------------------------------------
402 Called at the beginning of each Retainer Profiliing
403 -------------------------------------------------------------------------- */
404 #ifdef PROFILING
405 void
406 stat_startRP(void)
407 {
408 Time user, elapsed;
409 getProcessTimes( &user, &elapsed );
410
411 RP_start_time = user;
412 RPe_start_time = elapsed;
413 }
414 #endif /* PROFILING */
415
416 /* -----------------------------------------------------------------------------
417 Called at the end of each Retainer Profiliing
418 -------------------------------------------------------------------------- */
419
420 #ifdef PROFILING
421 void
422 stat_endRP(
423 uint32_t retainerGeneration,
424 #ifdef DEBUG_RETAINER
425 uint32_t maxCStackSize,
426 int maxStackSize,
427 #endif
428 double averageNumVisit)
429 {
430 Time user, elapsed;
431 getProcessTimes( &user, &elapsed );
432
433 RP_tot_time += user - RP_start_time;
434 RPe_tot_time += elapsed - RPe_start_time;
435
436 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
437 retainerGeneration, mut_user_time_during_RP());
438 #ifdef DEBUG_RETAINER
439 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
440 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
441 #endif
442 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
443 }
444 #endif /* PROFILING */
445
446 /* -----------------------------------------------------------------------------
447 Called at the beginning of each heap census
448 -------------------------------------------------------------------------- */
449 #ifdef PROFILING
450 void
451 stat_startHeapCensus(void)
452 {
453 Time user, elapsed;
454 getProcessTimes( &user, &elapsed );
455
456 HC_start_time = user;
457 HCe_start_time = elapsed;
458 }
459 #endif /* PROFILING */
460
461 /* -----------------------------------------------------------------------------
462 Called at the end of each heap census
463 -------------------------------------------------------------------------- */
464 #ifdef PROFILING
465 void
466 stat_endHeapCensus(void)
467 {
468 Time user, elapsed;
469 getProcessTimes( &user, &elapsed );
470
471 HC_tot_time += user - HC_start_time;
472 HCe_tot_time += elapsed - HCe_start_time;
473 }
474 #endif /* PROFILING */
475
476 /* -----------------------------------------------------------------------------
477 Called at the end of execution
478
479 NOTE: number of allocations is not entirely accurate: it doesn't
480 take into account the few bytes at the end of the heap that
481 were left unused when the heap-check failed.
482 -------------------------------------------------------------------------- */
483
484 #ifdef DEBUG
485 #define TICK_VAR_INI(arity) \
486 StgInt SLOW_CALLS_##arity = 1; \
487 StgInt RIGHT_ARITY_##arity = 1; \
488 StgInt TAGGED_PTR_##arity = 0;
489
490 TICK_VAR_INI(1)
491 TICK_VAR_INI(2)
492
493 StgInt TOTAL_CALLS=1;
494 #endif
495
496 /* Report the value of a counter */
497 #define REPORT(counter) \
498 { \
499 showStgWord64(counter,temp,rtsTrue/*commas*/); \
500 statsPrintf(" (" #counter ") : %s\n",temp); \
501 }
502
503 /* Report the value of a counter as a percentage of another counter */
504 #define REPORT_PCT(counter,countertot) \
505 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
506 counter*100.0/countertot)
507
508 #define TICK_PRINT(arity) \
509 REPORT(SLOW_CALLS_##arity); \
510 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
511 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
512 REPORT(RIGHT_ARITY_##arity); \
513 REPORT(TAGGED_PTR_##arity)
514
515 #define TICK_PRINT_TOT(arity) \
516 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
517 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
518
519 static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
520 static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
521
522
523 void
524 stat_exit (void)
525 {
526 generation *gen;
527 Time gc_cpu = 0;
528 Time gc_elapsed = 0;
529 Time init_cpu = 0;
530 Time init_elapsed = 0;
531 Time mut_cpu = 0;
532 Time mut_elapsed = 0;
533 Time exit_cpu = 0;
534 Time exit_elapsed = 0;
535 W_ tot_alloc;
536 W_ alloc;
537
538 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
539
540 char temp[BIG_STRING_LEN];
541 Time tot_cpu;
542 Time tot_elapsed;
543 uint32_t i, g, total_collections = 0;
544
545 getProcessTimes( &tot_cpu, &tot_elapsed );
546 tot_elapsed -= start_init_elapsed;
547
548 tot_alloc = calcTotalAllocated();
549
550 // allocated since the last GC
551 alloc = tot_alloc - GC_tot_alloc;
552 GC_tot_alloc = tot_alloc;
553
554 /* Count total garbage collections */
555 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
556 total_collections += generations[g].collections;
557
558 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
559 if (tot_cpu == 0.0) tot_cpu = 1;
560 if (tot_elapsed == 0.0) tot_elapsed = 1;
561
562 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
563 statsPrintf("%9" FMT_Word " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", "");
564 statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
565 }
566
567 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
568 gc_cpu += GC_coll_cpu[i];
569 gc_elapsed += GC_coll_elapsed[i];
570 }
571
572 // heapCensus() is called by the GC, so RP and HC time are
573 // included in the GC stats. We therefore subtract them to
574 // obtain the actual GC cpu time.
575 gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time);
576 gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
577
578 init_cpu = get_init_cpu();
579 init_elapsed = get_init_elapsed();
580
581 exit_cpu = end_exit_cpu - start_exit_cpu;
582 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
583
584 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
585
586 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
587 - PROF_VAL(RP_tot_time + HC_tot_time);
588 if (mut_cpu < 0) { mut_cpu = 0; }
589
590 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
591 showStgWord64(GC_tot_alloc*sizeof(W_),
592 temp, rtsTrue/*commas*/);
593 statsPrintf("%16s bytes allocated in the heap\n", temp);
594
595 showStgWord64(GC_tot_copied*sizeof(W_),
596 temp, rtsTrue/*commas*/);
597 statsPrintf("%16s bytes copied during GC\n", temp);
598
599 if ( residency_samples > 0 ) {
600 showStgWord64(max_residency*sizeof(W_),
601 temp, rtsTrue/*commas*/);
602 statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n",
603 temp, residency_samples);
604 }
605
606 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
607 statsPrintf("%16s bytes maximum slop\n", temp);
608
609 statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n",
610 (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
611 (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
612
613 /* Print garbage collections in each gen */
614 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
615 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
616 gen = &generations[g];
617 statsPrintf(" Gen %2d %5d colls, %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
618 gen->no,
619 gen->collections,
620 gen->par_collections,
621 TimeToSecondsDbl(GC_coll_cpu[g]),
622 TimeToSecondsDbl(GC_coll_elapsed[g]),
623 gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
624 TimeToSecondsDbl(GC_coll_max_pause[g]));
625 }
626
627 #if defined(THREADED_RTS)
628 if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
629 statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
630 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
631 / (n_capabilities - 1)
632 );
633 }
634 #endif
635 statsPrintf("\n");
636
637 #if defined(THREADED_RTS)
638 statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
639 taskCount, taskCount - workerCount,
640 peakWorkerCount, workerCount,
641 n_capabilities);
642
643 statsPrintf("\n");
644
645 {
646 uint32_t i;
647 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
648 for (i = 0; i < n_capabilities; i++) {
649 sparks.created += capabilities[i]->spark_stats.created;
650 sparks.dud += capabilities[i]->spark_stats.dud;
651 sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
652 sparks.converted += capabilities[i]->spark_stats.converted;
653 sparks.gcd += capabilities[i]->spark_stats.gcd;
654 sparks.fizzled += capabilities[i]->spark_stats.fizzled;
655 }
656
657 statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
658 sparks.created + sparks.dud + sparks.overflowed,
659 sparks.converted, sparks.overflowed, sparks.dud,
660 sparks.gcd, sparks.fizzled);
661 }
662 #endif
663
664 statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n",
665 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
666
667 statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n",
668 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
669 statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n",
670 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
671
672 #ifdef PROFILING
673 statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n",
674 TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
675 statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n",
676 TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
677 #endif
678 statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n",
679 TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
680 statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n",
681 TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
682 #ifndef THREADED_RTS
683 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
684 TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
685 TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
686 #endif
687
688 if (mut_cpu == 0) {
689 showStgWord64(0, temp, rtsTrue/*commas*/);
690 } else {
691 showStgWord64(
692 (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
693 temp, rtsTrue/*commas*/);
694 }
695
696 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
697
698 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
699 TimeToSecondsDbl(tot_cpu - gc_cpu -
700 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
701 / TimeToSecondsDbl(tot_cpu),
702 TimeToSecondsDbl(tot_elapsed - gc_elapsed -
703 PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100
704 / TimeToSecondsDbl(tot_elapsed));
705
706 /*
707 TICK_PRINT(1);
708 TICK_PRINT(2);
709 REPORT(TOTAL_CALLS);
710 TICK_PRINT_TOT(1);
711 TICK_PRINT_TOT(2);
712 */
713
714 #if defined(THREADED_RTS) && defined(PROF_SPIN)
715 {
716 uint32_t g;
717
718 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
719 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
720 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
721 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
722 }
723 }
724 #endif
725 }
726
727 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
728 char *fmt1, *fmt2;
729 if (RtsFlags.MiscFlags.machineReadable) {
730 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
731 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
732 " ,(\"average_bytes_used\", \"%ld\")\n"
733 " ,(\"max_bytes_used\", \"%ld\")\n"
734 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
735 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
736 " ,(\"init_cpu_seconds\", \"%.3f\")\n"
737 " ,(\"init_wall_seconds\", \"%.3f\")\n"
738 " ,(\"mutator_cpu_seconds\", \"%.3f\")\n"
739 " ,(\"mutator_wall_seconds\", \"%.3f\")\n"
740 " ,(\"GC_cpu_seconds\", \"%.3f\")\n"
741 " ,(\"GC_wall_seconds\", \"%.3f\")\n"
742 " ]\n";
743 }
744 else {
745 fmt1 = "<<ghc: %llu bytes, ";
746 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.3f INIT (%.3f elapsed), %.3f MUT (%.3f elapsed), %.3f GC (%.3f elapsed) :ghc>>\n";
747 }
748 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
749 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
750 statsPrintf(fmt2,
751 total_collections,
752 residency_samples == 0 ? 0 :
753 cumulative_residency*sizeof(W_)/residency_samples,
754 max_residency*sizeof(W_),
755 residency_samples,
756 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
757 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
758 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
759 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
760 }
761
762 statsFlush();
763 statsClose();
764 }
765
766 if (GC_coll_cpu) {
767 stgFree(GC_coll_cpu);
768 GC_coll_cpu = NULL;
769 }
770 if (GC_coll_elapsed) {
771 stgFree(GC_coll_elapsed);
772 GC_coll_elapsed = NULL;
773 }
774 if (GC_coll_max_pause) {
775 stgFree(GC_coll_max_pause);
776 GC_coll_max_pause = NULL;
777 }
778 }
779
780 /* -----------------------------------------------------------------------------
781 stat_describe_gens
782
783 Produce some detailed info on the state of the generational GC.
784 -------------------------------------------------------------------------- */
785 void
786 statDescribeGens(void)
787 {
788 uint32_t g, mut, lge, i;
789 W_ gen_slop;
790 W_ tot_live, tot_slop;
791 W_ gen_live, gen_blocks;
792 bdescr *bd;
793 generation *gen;
794
795 debugBelch(
796 "----------------------------------------------------------\n"
797 " Gen Max Mut-list Blocks Large Live Slop\n"
798 " Blocks Bytes Objects \n"
799 "----------------------------------------------------------\n");
800
801 tot_live = 0;
802 tot_slop = 0;
803
804 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
805 gen = &generations[g];
806
807 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
808 lge++;
809 }
810
811 gen_live = genLiveWords(gen);
812 gen_blocks = genLiveBlocks(gen);
813
814 mut = 0;
815 for (i = 0; i < n_capabilities; i++) {
816 mut += countOccupied(capabilities[i]->mut_lists[g]);
817
818 // Add the pinned object block.
819 bd = capabilities[i]->pinned_object_block;
820 if (bd != NULL) {
821 gen_live += bd->free - bd->start;
822 gen_blocks += bd->blocks;
823 }
824
825 gen_live += gcThreadLiveWords(i,g);
826 gen_blocks += gcThreadLiveBlocks(i,g);
827 }
828
829 debugBelch("%5d %7" FMT_Word " %9d", g, (W_)gen->max_blocks, mut);
830
831 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
832
833 debugBelch("%8" FMT_Word " %8d %8" FMT_Word " %8" FMT_Word "\n", gen_blocks, lge,
834 gen_live*(W_)sizeof(W_), gen_slop*(W_)sizeof(W_));
835 tot_live += gen_live;
836 tot_slop += gen_slop;
837 }
838 debugBelch("----------------------------------------------------------\n");
839 debugBelch("%41s%8" FMT_Word " %8" FMT_Word "\n",
840 "",tot_live*sizeof(W_),tot_slop*sizeof(W_));
841 debugBelch("----------------------------------------------------------\n");
842 debugBelch("\n");
843 }
844
845 /* -----------------------------------------------------------------------------
846 Stats available via a programmatic interface, so eg. GHCi can time
847 each compilation and expression evaluation.
848 -------------------------------------------------------------------------- */
849
850 extern HsInt64 getAllocations( void )
851 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
852
853 /* EZY: I'm not convinced I got all the casting right. */
854
855 extern rtsBool getGCStatsEnabled( void )
856 {
857 return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
858 }
859
860 extern void getGCStats( GCStats *s )
861 {
862 uint32_t total_collections = 0;
863 uint32_t g;
864 Time gc_cpu = 0;
865 Time gc_elapsed = 0;
866 Time current_elapsed = 0;
867 Time current_cpu = 0;
868
869 getProcessTimes(&current_cpu, &current_elapsed);
870
871 /* EZY: static inline'ify these */
872 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
873 total_collections += generations[g].collections;
874
875 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
876 gc_cpu += GC_coll_cpu[g];
877 gc_elapsed += GC_coll_elapsed[g];
878 }
879
880 s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
881 s->num_gcs = total_collections;
882 s->num_byte_usage_samples = residency_samples;
883 s->max_bytes_used = max_residency*sizeof(W_);
884 s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
885 s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
886 s->mblocks_allocated = (StgWord64)mblocks_allocated;
887 s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
888 s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
889 s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
890 s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
891 /*
892 s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
893 s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
894 */
895 s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
896 s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
897 s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
898 s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
899 /* EZY: Being consistent with incremental output, but maybe should also discount init */
900 s->cpu_seconds = TimeToSecondsDbl(current_cpu);
901 s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
902 s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
903 s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
904 }
905 // extern void getTaskStats( TaskStats **s ) {}
906 #if 0
907 extern void getSparkStats( SparkCounters *s ) {
908 uint32_t i;
909 s->created = 0;
910 s->dud = 0;
911 s->overflowed = 0;
912 s->converted = 0;
913 s->gcd = 0;
914 s->fizzled = 0;
915 for (i = 0; i < n_capabilities; i++) {
916 s->created += capabilities[i]->spark_stats.created;
917 s->dud += capabilities[i]->spark_stats.dud;
918 s->overflowed+= capabilities[i]->spark_stats.overflowed;
919 s->converted += capabilities[i]->spark_stats.converted;
920 s->gcd += capabilities[i]->spark_stats.gcd;
921 s->fizzled += capabilities[i]->spark_stats.fizzled;
922 }
923 }
924 #endif
925
926 /* -----------------------------------------------------------------------------
927 Dumping stuff in the stats file, or via the debug message interface
928 -------------------------------------------------------------------------- */
929
930 void
931 statsPrintf( char *s, ... )
932 {
933 FILE *sf = RtsFlags.GcFlags.statsFile;
934 va_list ap;
935
936 va_start(ap,s);
937 if (sf == NULL) {
938 vdebugBelch(s,ap);
939 } else {
940 vfprintf(sf, s, ap);
941 }
942 va_end(ap);
943 }
944
945 static void
946 statsFlush( void )
947 {
948 FILE *sf = RtsFlags.GcFlags.statsFile;
949 if (sf != NULL) {
950 fflush(sf);
951 }
952 }
953
954 static void
955 statsClose( void )
956 {
957 FILE *sf = RtsFlags.GcFlags.statsFile;
958 if (sf != NULL) {
959 fclose(sf);
960 }
961 }