rts: Don't use strndup
[ghc.git] / rts / Stats.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2005
4 *
5 * Statistics and timing-related functions.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "RtsFlags.h"
13 #include "RtsUtils.h"
14 #include "Schedule.h"
15 #include "Stats.h"
16 #include "Profiling.h"
17 #include "GetTime.h"
18 #include "sm/Storage.h"
19 #include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
20 #include "sm/GCThread.h"
21 #include "sm/BlockAlloc.h"
22
23 /* huh? */
24 #define BIG_STRING_LEN 512
25
26 #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
27
28 static Time
29 start_init_cpu, start_init_elapsed,
30 end_init_cpu, end_init_elapsed,
31 start_exit_cpu, start_exit_elapsed,
32 end_exit_cpu, end_exit_elapsed;
33
34 static Time GC_tot_cpu = 0;
35
36 static StgWord64 GC_tot_alloc = 0;
37 static StgWord64 GC_tot_copied = 0;
38
39 static StgWord64 GC_par_max_copied = 0;
40 static StgWord64 GC_par_tot_copied = 0;
41
42 #ifdef PROFILING
43 static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
44 static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
45
46 static Time HC_start_time, HC_tot_time = 0; // heap census prof user time
47 static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
48 #endif
49
50 #ifdef PROFILING
51 #define PROF_VAL(x) (x)
52 #else
53 #define PROF_VAL(x) 0
54 #endif
55
56 // current = current as of last GC
57 static W_ current_residency = 0; // in words; for stats only
58 static W_ max_residency = 0;
59 static W_ cumulative_residency = 0;
60 static W_ residency_samples = 0; // for stats only
61 static W_ current_slop = 0;
62 static W_ max_slop = 0;
63
64 static W_ GC_end_faults = 0;
65
66 static Time *GC_coll_cpu = NULL;
67 static Time *GC_coll_elapsed = NULL;
68 static Time *GC_coll_max_pause = NULL;
69
70 static void statsPrintf( char *s, ... ) GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
71 static void statsFlush( void );
72 static void statsClose( void );
73
74 /* -----------------------------------------------------------------------------
75 Current elapsed time
76 ------------------------------------------------------------------------- */
77
78 Time stat_getElapsedTime(void)
79 {
80 return getProcessElapsedTime() - start_init_elapsed;
81 }
82
83 /* ---------------------------------------------------------------------------
84 Measure the current MUT time, for profiling
85 ------------------------------------------------------------------------ */
86
87 double
88 mut_user_time_until( Time t )
89 {
90 return TimeToSecondsDbl(t - GC_tot_cpu);
91 // heapCensus() time is included in GC_tot_cpu, so we don't need
92 // to subtract it here.
93 }
94
95 double
96 mut_user_time( void )
97 {
98 Time cpu;
99 cpu = getProcessCPUTime();
100 return mut_user_time_until(cpu);
101 }
102
103 #ifdef PROFILING
104 /*
105 mut_user_time_during_RP() returns the MUT time during retainer profiling.
106 The same is for mut_user_time_during_HC();
107 */
108 double
109 mut_user_time_during_RP( void )
110 {
111 return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
112 }
113
114 double
115 mut_user_time_during_heap_census( void )
116 {
117 return TimeToSecondsDbl(HC_start_time - GC_tot_cpu - RP_tot_time);
118 }
119 #endif /* PROFILING */
120
121 /* ---------------------------------------------------------------------------
122 initStats0() has no dependencies, it can be called right at the beginning
123 ------------------------------------------------------------------------ */
124
125 void
126 initStats0(void)
127 {
128 start_init_cpu = 0;
129 start_init_elapsed = 0;
130 end_init_cpu = 0;
131 end_init_elapsed = 0;
132
133 start_exit_cpu = 0;
134 start_exit_elapsed = 0;
135 end_exit_cpu = 0;
136 end_exit_elapsed = 0;
137
138 GC_tot_alloc = 0;
139 GC_tot_copied = 0;
140 GC_par_max_copied = 0;
141 GC_par_tot_copied = 0;
142 GC_tot_cpu = 0;
143
144 #ifdef PROFILING
145 RP_start_time = 0;
146 RP_tot_time = 0;
147 RPe_start_time = 0;
148 RPe_tot_time = 0;
149
150 HC_start_time = 0;
151 HC_tot_time = 0;
152 HCe_start_time = 0;
153 HCe_tot_time = 0;
154 #endif
155
156 max_residency = 0;
157 cumulative_residency = 0;
158 residency_samples = 0;
159 max_slop = 0;
160
161 GC_end_faults = 0;
162 }
163
164 /* ---------------------------------------------------------------------------
165 initStats1() can be called after setupRtsFlags()
166 ------------------------------------------------------------------------ */
167
168 void
169 initStats1 (void)
170 {
171 nat i;
172
173 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
174 statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
175 statsPrintf(" bytes bytes bytes user elap user elap\n");
176 }
177 GC_coll_cpu =
178 (Time *)stgMallocBytes(
179 sizeof(Time)*RtsFlags.GcFlags.generations,
180 "initStats");
181 GC_coll_elapsed =
182 (Time *)stgMallocBytes(
183 sizeof(Time)*RtsFlags.GcFlags.generations,
184 "initStats");
185 GC_coll_max_pause =
186 (Time *)stgMallocBytes(
187 sizeof(Time)*RtsFlags.GcFlags.generations,
188 "initStats");
189 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
190 GC_coll_cpu[i] = 0;
191 GC_coll_elapsed[i] = 0;
192 GC_coll_max_pause[i] = 0;
193 }
194 }
195
196 /* -----------------------------------------------------------------------------
197 Initialisation time...
198 -------------------------------------------------------------------------- */
199
200 void
201 stat_startInit(void)
202 {
203 getProcessTimes(&start_init_cpu, &start_init_elapsed);
204 }
205
206 void
207 stat_endInit(void)
208 {
209 getProcessTimes(&end_init_cpu, &end_init_elapsed);
210 }
211
212 /* -----------------------------------------------------------------------------
213 stat_startExit and stat_endExit
214
215 These two measure the time taken in shutdownHaskell().
216 -------------------------------------------------------------------------- */
217
218 void
219 stat_startExit(void)
220 {
221 getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
222 }
223
224 void
225 stat_endExit(void)
226 {
227 getProcessTimes(&end_exit_cpu, &end_exit_elapsed);
228 }
229
230 void
231 stat_startGCSync (gc_thread *gct)
232 {
233 gct->gc_sync_start_elapsed = getProcessElapsedTime();
234 }
235
236 /* -----------------------------------------------------------------------------
237 Called at the beginning of each GC
238 -------------------------------------------------------------------------- */
239
240 static nat rub_bell = 0;
241
242 void
243 stat_startGC (Capability *cap, gc_thread *gct)
244 {
245 nat bell = RtsFlags.GcFlags.ringBell;
246
247 if (bell) {
248 if (bell > 1) {
249 debugBelch(" GC ");
250 rub_bell = 1;
251 } else {
252 debugBelch("\007");
253 }
254 }
255
256 getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
257
258 // Post EVENT_GC_START with the same timestamp as used for stats
259 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
260 // Here, as opposed to other places, the event is emitted on the cap
261 // that initiates the GC and external tools expect it to have the same
262 // timestamp as used in +RTS -s calculcations.
263 traceEventGcStartAtT(cap,
264 TimeToNS(gct->gc_start_elapsed - start_init_elapsed));
265
266 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
267 {
268 gct->gc_start_faults = getPageFaults();
269 }
270
271 updateNurseriesStats();
272 }
273
274 /* -----------------------------------------------------------------------------
275 Called at the end of each GC
276 -------------------------------------------------------------------------- */
277
278 void
279 stat_endGC (Capability *cap, gc_thread *gct,
280 W_ live, W_ copied, W_ slop, nat gen,
281 nat par_n_threads, W_ par_max_copied, W_ par_tot_copied)
282 {
283 W_ tot_alloc;
284 W_ alloc;
285
286 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
287 rtsConfig.gcDoneHook != NULL ||
288 RtsFlags.ProfFlags.doHeapProfile)
289 // heap profiling needs GC_tot_time
290 {
291 Time cpu, elapsed, gc_cpu, gc_elapsed, gc_sync_elapsed;
292
293 // Has to be emitted while all caps stopped for GC, but before GC_END.
294 // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
295 // for a detailed design rationale of the current setup
296 // of GC eventlog events.
297 traceEventGcGlobalSync(cap);
298
299 // Emitted before GC_END on all caps, which simplifies tools code.
300 traceEventGcStats(cap,
301 CAPSET_HEAP_DEFAULT,
302 gen,
303 copied * sizeof(W_),
304 slop * sizeof(W_),
305 /* current loss due to fragmentation */
306 (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
307 * BLOCK_SIZE_W * sizeof(W_),
308 par_n_threads,
309 par_max_copied * sizeof(W_),
310 par_tot_copied * sizeof(W_));
311
312 getProcessTimes(&cpu, &elapsed);
313
314 // Post EVENT_GC_END with the same timestamp as used for stats
315 // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
316 // Here, as opposed to other places, the event is emitted on the cap
317 // that initiates the GC and external tools expect it to have the same
318 // timestamp as used in +RTS -s calculcations.
319 traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));
320
321 gc_sync_elapsed = gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
322 gc_elapsed = elapsed - gct->gc_start_elapsed;
323 gc_cpu = cpu - gct->gc_start_cpu;
324
325 /* For the moment we calculate both per-HEC and total allocation.
326 * There is thus redundancy here, but for the moment we will calculate
327 * it both the old and new way and assert they're the same.
328 * When we're sure it's working OK then we can simplify things.
329 */
330 tot_alloc = calcTotalAllocated();
331
332 // allocated since the last GC
333 alloc = tot_alloc - GC_tot_alloc;
334 GC_tot_alloc = tot_alloc;
335
336 if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
337 W_ faults = getPageFaults();
338
339 statsPrintf("%9" FMT_SizeT " %9" FMT_SizeT " %9" FMT_SizeT,
340 alloc*sizeof(W_), copied*sizeof(W_),
341 live*sizeof(W_));
342 statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4" FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
343 TimeToSecondsDbl(gc_cpu),
344 TimeToSecondsDbl(gc_elapsed),
345 TimeToSecondsDbl(cpu),
346 TimeToSecondsDbl(elapsed - start_init_elapsed),
347 faults - gct->gc_start_faults,
348 gct->gc_start_faults - GC_end_faults,
349 gen);
350
351 GC_end_faults = faults;
352 statsFlush();
353 }
354
355
356 if (rtsConfig.gcDoneHook != NULL) {
357 rtsConfig.gcDoneHook(gen,
358 alloc*sizeof(W_),
359 live*sizeof(W_),
360 copied*sizeof(W_),
361 par_max_copied * sizeof(W_),
362 mblocks_allocated * BLOCKS_PER_MBLOCK
363 * BLOCK_SIZE_W * sizeof(W_),
364 slop * sizeof(W_),
365 TimeToNS(gc_sync_elapsed),
366 TimeToNS(gc_elapsed),
367 TimeToNS(gc_cpu));
368 }
369
370 GC_coll_cpu[gen] += gc_cpu;
371 GC_coll_elapsed[gen] += gc_elapsed;
372 if (GC_coll_max_pause[gen] < gc_elapsed) {
373 GC_coll_max_pause[gen] = gc_elapsed;
374 }
375
376 GC_tot_copied += (StgWord64) copied;
377 GC_par_max_copied += (StgWord64) par_max_copied;
378 GC_par_tot_copied += (StgWord64) par_tot_copied;
379 GC_tot_cpu += gc_cpu;
380
381 traceEventHeapSize(cap,
382 CAPSET_HEAP_DEFAULT,
383 mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
384
385 if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
386 if (live > max_residency) {
387 max_residency = live;
388 }
389 current_residency = live;
390 residency_samples++;
391 cumulative_residency += live;
392 traceEventHeapLive(cap,
393 CAPSET_HEAP_DEFAULT,
394 live * sizeof(W_));
395 }
396
397 if (slop > max_slop) max_slop = slop;
398 }
399
400 if (rub_bell) {
401 debugBelch("\b\b\b \b\b\b");
402 rub_bell = 0;
403 }
404 }
405
406 /* -----------------------------------------------------------------------------
407 Called at the beginning of each Retainer Profiliing
408 -------------------------------------------------------------------------- */
409 #ifdef PROFILING
410 void
411 stat_startRP(void)
412 {
413 Time user, elapsed;
414 getProcessTimes( &user, &elapsed );
415
416 RP_start_time = user;
417 RPe_start_time = elapsed;
418 }
419 #endif /* PROFILING */
420
421 /* -----------------------------------------------------------------------------
422 Called at the end of each Retainer Profiliing
423 -------------------------------------------------------------------------- */
424
425 #ifdef PROFILING
426 void
427 stat_endRP(
428 nat retainerGeneration,
429 #ifdef DEBUG_RETAINER
430 nat maxCStackSize,
431 int maxStackSize,
432 #endif
433 double averageNumVisit)
434 {
435 Time user, elapsed;
436 getProcessTimes( &user, &elapsed );
437
438 RP_tot_time += user - RP_start_time;
439 RPe_tot_time += elapsed - RPe_start_time;
440
441 fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
442 retainerGeneration, mut_user_time_during_RP());
443 #ifdef DEBUG_RETAINER
444 fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
445 fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
446 #endif
447 fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
448 }
449 #endif /* PROFILING */
450
451 /* -----------------------------------------------------------------------------
452 Called at the beginning of each heap census
453 -------------------------------------------------------------------------- */
454 #ifdef PROFILING
455 void
456 stat_startHeapCensus(void)
457 {
458 Time user, elapsed;
459 getProcessTimes( &user, &elapsed );
460
461 HC_start_time = user;
462 HCe_start_time = elapsed;
463 }
464 #endif /* PROFILING */
465
466 /* -----------------------------------------------------------------------------
467 Called at the end of each heap census
468 -------------------------------------------------------------------------- */
469 #ifdef PROFILING
470 void
471 stat_endHeapCensus(void)
472 {
473 Time user, elapsed;
474 getProcessTimes( &user, &elapsed );
475
476 HC_tot_time += user - HC_start_time;
477 HCe_tot_time += elapsed - HCe_start_time;
478 }
479 #endif /* PROFILING */
480
481 /* -----------------------------------------------------------------------------
482 Called at the end of execution
483
484 NOTE: number of allocations is not entirely accurate: it doesn't
485 take into account the few bytes at the end of the heap that
486 were left unused when the heap-check failed.
487 -------------------------------------------------------------------------- */
488
489 #ifdef DEBUG
490 #define TICK_VAR_INI(arity) \
491 StgInt SLOW_CALLS_##arity = 1; \
492 StgInt RIGHT_ARITY_##arity = 1; \
493 StgInt TAGGED_PTR_##arity = 0;
494
495 TICK_VAR_INI(1)
496 TICK_VAR_INI(2)
497
498 StgInt TOTAL_CALLS=1;
499 #endif
500
501 /* Report the value of a counter */
502 #define REPORT(counter) \
503 { \
504 showStgWord64(counter,temp,rtsTrue/*commas*/); \
505 statsPrintf(" (" #counter ") : %s\n",temp); \
506 }
507
508 /* Report the value of a counter as a percentage of another counter */
509 #define REPORT_PCT(counter,countertot) \
510 statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
511 counter*100.0/countertot)
512
513 #define TICK_PRINT(arity) \
514 REPORT(SLOW_CALLS_##arity); \
515 REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
516 REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
517 REPORT(RIGHT_ARITY_##arity); \
518 REPORT(TAGGED_PTR_##arity)
519
520 #define TICK_PRINT_TOT(arity) \
521 statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
522 SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
523
524 static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
525 static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
526
527
528 void
529 stat_exit (void)
530 {
531 generation *gen;
532 Time gc_cpu = 0;
533 Time gc_elapsed = 0;
534 Time init_cpu = 0;
535 Time init_elapsed = 0;
536 Time mut_cpu = 0;
537 Time mut_elapsed = 0;
538 Time exit_cpu = 0;
539 Time exit_elapsed = 0;
540 W_ tot_alloc;
541 W_ alloc;
542
543 if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
544
545 char temp[BIG_STRING_LEN];
546 Time tot_cpu;
547 Time tot_elapsed;
548 nat i, g, total_collections = 0;
549
550 getProcessTimes( &tot_cpu, &tot_elapsed );
551 tot_elapsed -= start_init_elapsed;
552
553 tot_alloc = calcTotalAllocated();
554
555 // allocated since the last GC
556 alloc = tot_alloc - GC_tot_alloc;
557 GC_tot_alloc = tot_alloc;
558
559 /* Count total garbage collections */
560 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
561 total_collections += generations[g].collections;
562
563 /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
564 if (tot_cpu == 0.0) tot_cpu = 1;
565 if (tot_elapsed == 0.0) tot_elapsed = 1;
566
567 if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
568 statsPrintf("%9" FMT_SizeT " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", "");
569 statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
570 }
571
572 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
573 gc_cpu += GC_coll_cpu[i];
574 gc_elapsed += GC_coll_elapsed[i];
575 }
576
577 // heapCensus() is called by the GC, so RP and HC time are
578 // included in the GC stats. We therefore subtract them to
579 // obtain the actual GC cpu time.
580 gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time);
581 gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
582
583 init_cpu = get_init_cpu();
584 init_elapsed = get_init_elapsed();
585
586 exit_cpu = end_exit_cpu - start_exit_cpu;
587 exit_elapsed = end_exit_elapsed - start_exit_elapsed;
588
589 mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
590
591 mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
592 - PROF_VAL(RP_tot_time + HC_tot_time);
593 if (mut_cpu < 0) { mut_cpu = 0; }
594
595 if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
596 showStgWord64(GC_tot_alloc*sizeof(W_),
597 temp, rtsTrue/*commas*/);
598 statsPrintf("%16s bytes allocated in the heap\n", temp);
599
600 showStgWord64(GC_tot_copied*sizeof(W_),
601 temp, rtsTrue/*commas*/);
602 statsPrintf("%16s bytes copied during GC\n", temp);
603
604 if ( residency_samples > 0 ) {
605 showStgWord64(max_residency*sizeof(W_),
606 temp, rtsTrue/*commas*/);
607 statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n",
608 temp, residency_samples);
609 }
610
611 showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/);
612 statsPrintf("%16s bytes maximum slop\n", temp);
613
614 statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n",
615 (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
616 (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
617
618 /* Print garbage collections in each gen */
619 statsPrintf(" Tot time (elapsed) Avg pause Max pause\n");
620 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
621 gen = &generations[g];
622 statsPrintf(" Gen %2d %5d colls, %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n",
623 gen->no,
624 gen->collections,
625 gen->par_collections,
626 TimeToSecondsDbl(GC_coll_cpu[g]),
627 TimeToSecondsDbl(GC_coll_elapsed[g]),
628 gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections),
629 TimeToSecondsDbl(GC_coll_max_pause[g]));
630 }
631
632 #if defined(THREADED_RTS)
633 if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
634 statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
635 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
636 / (n_capabilities - 1)
637 );
638 }
639 #endif
640 statsPrintf("\n");
641
642 #if defined(THREADED_RTS)
643 statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
644 taskCount, taskCount - workerCount,
645 peakWorkerCount, workerCount,
646 n_capabilities);
647
648 statsPrintf("\n");
649
650 {
651 nat i;
652 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
653 for (i = 0; i < n_capabilities; i++) {
654 sparks.created += capabilities[i]->spark_stats.created;
655 sparks.dud += capabilities[i]->spark_stats.dud;
656 sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
657 sparks.converted += capabilities[i]->spark_stats.converted;
658 sparks.gcd += capabilities[i]->spark_stats.gcd;
659 sparks.fizzled += capabilities[i]->spark_stats.fizzled;
660 }
661
662 statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
663 sparks.created + sparks.dud + sparks.overflowed,
664 sparks.converted, sparks.overflowed, sparks.dud,
665 sparks.gcd, sparks.fizzled);
666 }
667 #endif
668
669 statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n",
670 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed));
671
672 statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n",
673 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed));
674 statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n",
675 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
676
677 #ifdef PROFILING
678 statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n",
679 TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time));
680 statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n",
681 TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time));
682 #endif
683 statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n",
684 TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed));
685 statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n",
686 TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed));
687 #ifndef THREADED_RTS
688 statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
689 TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu),
690 TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed));
691 #endif
692
693 if (mut_cpu == 0) {
694 showStgWord64(0, temp, rtsTrue/*commas*/);
695 } else {
696 showStgWord64(
697 (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
698 temp, rtsTrue/*commas*/);
699 }
700
701 statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
702
703 statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
704 TimeToSecondsDbl(tot_cpu - gc_cpu -
705 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
706 / TimeToSecondsDbl(tot_cpu),
707 TimeToSecondsDbl(tot_cpu - gc_cpu -
708 PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100
709 / TimeToSecondsDbl(tot_elapsed));
710
711 /*
712 TICK_PRINT(1);
713 TICK_PRINT(2);
714 REPORT(TOTAL_CALLS);
715 TICK_PRINT_TOT(1);
716 TICK_PRINT_TOT(2);
717 */
718
719 #if defined(THREADED_RTS) && defined(PROF_SPIN)
720 {
721 nat g;
722
723 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
724 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
725 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
726 statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
727 }
728 }
729 #endif
730 }
731
732 if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
733 char *fmt1, *fmt2;
734 if (RtsFlags.MiscFlags.machineReadable) {
735 fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
736 fmt2 = " ,(\"num_GCs\", \"%d\")\n"
737 " ,(\"average_bytes_used\", \"%ld\")\n"
738 " ,(\"max_bytes_used\", \"%ld\")\n"
739 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
740 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
741 " ,(\"init_cpu_seconds\", \"%.3f\")\n"
742 " ,(\"init_wall_seconds\", \"%.3f\")\n"
743 " ,(\"mutator_cpu_seconds\", \"%.3f\")\n"
744 " ,(\"mutator_wall_seconds\", \"%.3f\")\n"
745 " ,(\"GC_cpu_seconds\", \"%.3f\")\n"
746 " ,(\"GC_wall_seconds\", \"%.3f\")\n"
747 " ]\n";
748 }
749 else {
750 fmt1 = "<<ghc: %llu bytes, ";
751 fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.3f INIT (%.3f elapsed), %.3f MUT (%.3f elapsed), %.3f GC (%.3f elapsed) :ghc>>\n";
752 }
753 /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
754 statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
755 statsPrintf(fmt2,
756 total_collections,
757 residency_samples == 0 ? 0 :
758 cumulative_residency*sizeof(W_)/residency_samples,
759 max_residency*sizeof(W_),
760 residency_samples,
761 (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
762 TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
763 TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
764 TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed));
765 }
766
767 statsFlush();
768 statsClose();
769 }
770
771 if (GC_coll_cpu) {
772 stgFree(GC_coll_cpu);
773 GC_coll_cpu = NULL;
774 }
775 if (GC_coll_elapsed) {
776 stgFree(GC_coll_elapsed);
777 GC_coll_elapsed = NULL;
778 }
779 if (GC_coll_max_pause) {
780 stgFree(GC_coll_max_pause);
781 GC_coll_max_pause = NULL;
782 }
783 }
784
785 /* -----------------------------------------------------------------------------
786 stat_describe_gens
787
788 Produce some detailed info on the state of the generational GC.
789 -------------------------------------------------------------------------- */
790 void
791 statDescribeGens(void)
792 {
793 nat g, mut, lge, i;
794 W_ gen_slop;
795 W_ tot_live, tot_slop;
796 W_ gen_live, gen_blocks;
797 bdescr *bd;
798 generation *gen;
799
800 debugBelch(
801 "----------------------------------------------------------\n"
802 " Gen Max Mut-list Blocks Large Live Slop\n"
803 " Blocks Bytes Objects \n"
804 "----------------------------------------------------------\n");
805
806 tot_live = 0;
807 tot_slop = 0;
808
809 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
810 gen = &generations[g];
811
812 for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
813 lge++;
814 }
815
816 gen_live = genLiveWords(gen);
817 gen_blocks = genLiveBlocks(gen);
818
819 mut = 0;
820 for (i = 0; i < n_capabilities; i++) {
821 mut += countOccupied(capabilities[i]->mut_lists[g]);
822
823 // Add the pinned object block.
824 bd = capabilities[i]->pinned_object_block;
825 if (bd != NULL) {
826 gen_live += bd->free - bd->start;
827 gen_blocks += bd->blocks;
828 }
829
830 gen_live += gcThreadLiveWords(i,g);
831 gen_blocks += gcThreadLiveBlocks(i,g);
832 }
833
834 debugBelch("%5d %7" FMT_Word " %9d", g, (W_)gen->max_blocks, mut);
835
836 gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
837
838 debugBelch("%8" FMT_Word " %8d %8" FMT_Word " %8" FMT_Word "\n", gen_blocks, lge,
839 gen_live*(W_)sizeof(W_), gen_slop*(W_)sizeof(W_));
840 tot_live += gen_live;
841 tot_slop += gen_slop;
842 }
843 debugBelch("----------------------------------------------------------\n");
844 debugBelch("%41s%8" FMT_SizeT " %8" FMT_SizeT "\n",
845 "",tot_live*sizeof(W_),tot_slop*sizeof(W_));
846 debugBelch("----------------------------------------------------------\n");
847 debugBelch("\n");
848 }
849
850 /* -----------------------------------------------------------------------------
851 Stats available via a programmatic interface, so eg. GHCi can time
852 each compilation and expression evaluation.
853 -------------------------------------------------------------------------- */
854
855 extern HsInt64 getAllocations( void )
856 { return (HsInt64)GC_tot_alloc * sizeof(W_); }
857
858 /* EZY: I'm not convinced I got all the casting right. */
859
860 extern rtsBool getGCStatsEnabled( void )
861 {
862 return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
863 }
864
865 extern void getGCStats( GCStats *s )
866 {
867 nat total_collections = 0;
868 nat g;
869 Time gc_cpu = 0;
870 Time gc_elapsed = 0;
871 Time current_elapsed = 0;
872 Time current_cpu = 0;
873
874 getProcessTimes(&current_cpu, &current_elapsed);
875
876 /* EZY: static inline'ify these */
877 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
878 total_collections += generations[g].collections;
879
880 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
881 gc_cpu += GC_coll_cpu[g];
882 gc_elapsed += GC_coll_elapsed[g];
883 }
884
885 s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
886 s->num_gcs = total_collections;
887 s->num_byte_usage_samples = residency_samples;
888 s->max_bytes_used = max_residency*sizeof(W_);
889 s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
890 s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
891 s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
892 s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
893 s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
894 s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
895 /*
896 s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
897 s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
898 */
899 s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
900 s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
901 s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
902 s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
903 /* EZY: Being consistent with incremental output, but maybe should also discount init */
904 s->cpu_seconds = TimeToSecondsDbl(current_cpu);
905 s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
906 s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
907 s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
908 }
909 // extern void getTaskStats( TaskStats **s ) {}
910 #if 0
911 extern void getSparkStats( SparkCounters *s ) {
912 nat i;
913 s->created = 0;
914 s->dud = 0;
915 s->overflowed = 0;
916 s->converted = 0;
917 s->gcd = 0;
918 s->fizzled = 0;
919 for (i = 0; i < n_capabilities; i++) {
920 s->created += capabilities[i]->spark_stats.created;
921 s->dud += capabilities[i]->spark_stats.dud;
922 s->overflowed+= capabilities[i]->spark_stats.overflowed;
923 s->converted += capabilities[i]->spark_stats.converted;
924 s->gcd += capabilities[i]->spark_stats.gcd;
925 s->fizzled += capabilities[i]->spark_stats.fizzled;
926 }
927 }
928 #endif
929
930 /* -----------------------------------------------------------------------------
931 Dumping stuff in the stats file, or via the debug message interface
932 -------------------------------------------------------------------------- */
933
934 void
935 statsPrintf( char *s, ... )
936 {
937 FILE *sf = RtsFlags.GcFlags.statsFile;
938 va_list ap;
939
940 va_start(ap,s);
941 if (sf == NULL) {
942 vdebugBelch(s,ap);
943 } else {
944 vfprintf(sf, s, ap);
945 }
946 va_end(ap);
947 }
948
949 static void
950 statsFlush( void )
951 {
952 FILE *sf = RtsFlags.GcFlags.statsFile;
953 if (sf != NULL) {
954 fflush(sf);
955 }
956 }
957
958 static void
959 statsClose( void )
960 {
961 FILE *sf = RtsFlags.GcFlags.statsFile;
962 if (sf != NULL) {
963 fclose(sf);
964 }
965 }