Fix the +RTS -V0 option introduced recently; it didn't work at all, now it does.
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13 #include "RtsUtils.h"
14 #include "RtsFlags.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "Timer.h"
18 #include "ProfHeap.h"
19 #include "Arena.h"
20 #include "RetainerProfile.h"
21 #include "LdvProfile.h"
22
23 #include <string.h>
24
25 #ifdef DEBUG
26 #include "Trace.h"
27 #endif
28
29 /*
30 * Profiling allocation arena.
31 */
32 Arena *prof_arena;
33
34 /*
35 * Global variables used to assign unique IDs to cc's, ccs's, and
36 * closure_cats
37 */
38
39 unsigned int CC_ID;
40 unsigned int CCS_ID;
41 unsigned int HP_ID;
42
43 /* figures for the profiling report.
44 */
45 static ullong total_alloc;
46 static lnat total_prof_ticks;
47
48 /* Globals for opening the profiling log file(s)
49 */
50 static char *prof_filename; /* prof report file name = <program>.prof */
51 FILE *prof_file;
52
53 static char *hp_filename; /* heap profile (hp2ps style) log file */
54 FILE *hp_file;
55
56 /* The Current Cost Centre Stack (for attributing costs)
57 */
58 CostCentreStack *CCCS;
59
60 /* Linked lists to keep track of cc's and ccs's that haven't
61 * been declared in the log file yet
62 */
63 CostCentre *CC_LIST;
64 CostCentreStack *CCS_LIST;
65
66 /*
67 * Built-in cost centres and cost-centre stacks:
68 *
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
71 * to MAIN.
72 *
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
75 * to SYSTEM.
76 *
77 * GC is the storage manager / garbage collector.
78 *
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
82 *
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
86 * any costs. The is_caf flag is set on the subsumed cost
87 * centre.
88 *
89 * DONT_CARE is a placeholder cost-centre we assign to static
90 * constructors. It should *never* accumulate any costs.
91 */
92
93 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
94 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
95 CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
96 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
97 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
98 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
99
100 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
101 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
102 CCS_DECLARE(CCS_GC, CC_GC, );
103 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
104 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
105 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
106
107 /*
108 * Uniques for the XML log-file format
109 */
110 #define CC_UQ 1
111 #define CCS_UQ 2
112 #define TC_UQ 3
113 #define HEAP_OBJ_UQ 4
114 #define TIME_UPD_UQ 5
115 #define HEAP_UPD_UQ 6
116
117 /*
118 * Static Functions
119 */
120
121 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
122 CostCentreStack *new_ccs );
123 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
124 static void count_ticks ( CostCentreStack *ccs );
125 static void inherit_costs ( CostCentreStack *ccs );
126 static void reportCCS ( CostCentreStack *ccs, nat indent );
127 static void DecCCS ( CostCentreStack *ccs );
128 static void DecBackEdge ( CostCentreStack *ccs,
129 CostCentreStack *oldccs );
130 static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
131 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
132 static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
133 static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
134 static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
135 CostCentre *, unsigned int );
136 static void ccsSetSelected ( CostCentreStack *ccs );
137
138 static void initTimeProfiling ( void );
139 static void initProfilingLogFile( void );
140
141 static void reportCCS_XML ( CostCentreStack *ccs );
142
143 /* -----------------------------------------------------------------------------
144 Initialise the profiling environment
145 -------------------------------------------------------------------------- */
146
147 void
148 initProfiling1 (void)
149 {
150 // initialise our arena
151 prof_arena = newArena();
152
153 /* for the benefit of allocate()... */
154 CCCS = CCS_SYSTEM;
155
156 /* Initialize counters for IDs */
157 CC_ID = 1;
158 CCS_ID = 1;
159 HP_ID = 1;
160
161 /* Initialize Declaration lists to NULL */
162 CC_LIST = NULL;
163 CCS_LIST = NULL;
164
165 /* Register all the cost centres / stacks in the program
166 * CC_MAIN gets link = 0, all others have non-zero link.
167 */
168 REGISTER_CC(CC_MAIN);
169 REGISTER_CC(CC_SYSTEM);
170 REGISTER_CC(CC_GC);
171 REGISTER_CC(CC_OVERHEAD);
172 REGISTER_CC(CC_SUBSUMED);
173 REGISTER_CC(CC_DONT_CARE);
174 REGISTER_CCS(CCS_MAIN);
175 REGISTER_CCS(CCS_SYSTEM);
176 REGISTER_CCS(CCS_GC);
177 REGISTER_CCS(CCS_OVERHEAD);
178 REGISTER_CCS(CCS_SUBSUMED);
179 REGISTER_CCS(CCS_DONT_CARE);
180
181 CCCS = CCS_OVERHEAD;
182
183 /* cost centres are registered by the per-module
184 * initialisation code now...
185 */
186 }
187
188 void
189 freeProfiling1 (void)
190 {
191 arenaFree(prof_arena);
192 }
193
194 void
195 initProfiling2 (void)
196 {
197 CostCentreStack *ccs, *next;
198
199 CCCS = CCS_SYSTEM;
200
201 /* Set up the log file, and dump the header and cost centre
202 * information into it. */
203 initProfilingLogFile();
204
205 /* find all the "special" cost centre stacks, and make them children
206 * of CCS_MAIN.
207 */
208 ASSERT(CCS_MAIN->prevStack == 0);
209 CCS_MAIN->root = CC_MAIN;
210 ccsSetSelected(CCS_MAIN);
211 DecCCS(CCS_MAIN);
212
213 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
214 next = ccs->prevStack;
215 ccs->prevStack = 0;
216 ActualPush_(CCS_MAIN,ccs->cc,ccs);
217 ccs->root = ccs->cc;
218 ccs = next;
219 }
220
221 if (RtsFlags.CcFlags.doCostCentres) {
222 initTimeProfiling();
223 }
224
225 if (RtsFlags.ProfFlags.doHeapProfile) {
226 initHeapProfiling();
227 }
228 }
229
230 // Decide whether closures with this CCS should contribute to the heap
231 // profile.
232 static void
233 ccsSetSelected( CostCentreStack *ccs )
234 {
235 if (RtsFlags.ProfFlags.modSelector) {
236 if (! strMatchesSelector( ccs->cc->module,
237 RtsFlags.ProfFlags.modSelector ) ) {
238 ccs->selected = 0;
239 return;
240 }
241 }
242 if (RtsFlags.ProfFlags.ccSelector) {
243 if (! strMatchesSelector( ccs->cc->label,
244 RtsFlags.ProfFlags.ccSelector ) ) {
245 ccs->selected = 0;
246 return;
247 }
248 }
249 if (RtsFlags.ProfFlags.ccsSelector) {
250 CostCentreStack *c;
251 for (c = ccs; c != NULL; c = c->prevStack) {
252 if ( strMatchesSelector( c->cc->label,
253 RtsFlags.ProfFlags.ccsSelector )) {
254 break;
255 }
256 }
257 if (c == NULL) {
258 ccs->selected = 0;
259 return;
260 }
261 }
262
263 ccs->selected = 1;
264 return;
265 }
266
267
268 static void
269 initProfilingLogFile(void)
270 {
271 /* Initialise the log file name */
272 prof_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
273 sprintf(prof_filename, "%s.prof", prog_name);
274
275 /* open the log file */
276 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
277 debugBelch("Can't open profiling report file %s\n", prof_filename);
278 RtsFlags.CcFlags.doCostCentres = 0;
279 // The following line was added by Sung; retainer/LDV profiling may need
280 // two output files, i.e., <program>.prof/hp.
281 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
282 RtsFlags.ProfFlags.doHeapProfile = 0;
283 return;
284 }
285
286 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
287 /* dump the time, and the profiling interval */
288 fprintf(prof_file, "\"%s\"\n", time_str());
289 fprintf(prof_file, "\"%d ms\"\n", RtsFlags.MiscFlags.tickInterval);
290
291 /* declare all the cost centres */
292 {
293 CostCentre *cc;
294 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
295 fprintf(prof_file, "%d %ld \"%s\" \"%s\"\n",
296 CC_UQ, cc->ccID, cc->label, cc->module);
297 }
298 }
299 }
300
301 if (RtsFlags.ProfFlags.doHeapProfile) {
302 /* Initialise the log file name */
303 hp_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
304 sprintf(hp_filename, "%s.hp", prog_name);
305
306 /* open the log file */
307 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
308 debugBelch("Can't open profiling report file %s\n",
309 hp_filename);
310 RtsFlags.ProfFlags.doHeapProfile = 0;
311 return;
312 }
313 }
314 }
315
316 void
317 initTimeProfiling(void)
318 {
319 /* Start ticking */
320 startProfTimer();
321 };
322
323 void
324 endProfiling ( void )
325 {
326 if (RtsFlags.CcFlags.doCostCentres) {
327 stopProfTimer();
328 }
329 if (RtsFlags.ProfFlags.doHeapProfile) {
330 endHeapProfiling();
331 }
332 }
333
334 /* -----------------------------------------------------------------------------
335 Set cost centre stack when entering a function.
336 -------------------------------------------------------------------------- */
337 rtsBool entering_PAP;
338
339 void
340 EnterFunCCS ( CostCentreStack *ccsfn )
341 {
342 /* PAP_entry has already set CCCS for us */
343 if (entering_PAP) {
344 entering_PAP = rtsFalse;
345 return;
346 }
347
348 if (ccsfn->root->is_caf == CC_IS_CAF) {
349 CCCS = AppendCCS(CCCS,ccsfn);
350 } else {
351 CCCS = ccsfn;
352 }
353 }
354
355 /* -----------------------------------------------------------------------------
356 Cost-centre stack manipulation
357 -------------------------------------------------------------------------- */
358
359 #ifdef DEBUG
360 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
361 CostCentreStack *
362 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
363 #define PushCostCentre _PushCostCentre
364 {
365 IF_DEBUG(prof,
366 traceBegin("pushing %s on ", cc->label);
367 debugCCS(ccs);
368 traceEnd(););
369
370 return PushCostCentre(ccs,cc);
371 }
372 #endif
373
374 CostCentreStack *
375 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
376 {
377 CostCentreStack *temp_ccs;
378
379 if (ccs == EMPTY_STACK)
380 return ActualPush(ccs,cc);
381 else {
382 if (ccs->cc == cc)
383 return ccs;
384 else {
385 /* check if we've already memoized this stack */
386 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
387
388 if (temp_ccs != EMPTY_STACK)
389 return temp_ccs;
390 else {
391 temp_ccs = CheckLoop(ccs,cc);
392 if (temp_ccs != NULL) {
393 /* we have recursed to an older CCS. Mark this in
394 * the index table, and emit a "back edge" into the
395 * log file.
396 */
397 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
398 DecBackEdge(temp_ccs,ccs);
399 return temp_ccs;
400 } else {
401 return ActualPush(ccs,cc);
402 }
403 }
404 }
405 }
406 }
407
408 static CostCentreStack *
409 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
410 {
411 while (ccs != EMPTY_STACK) {
412 if (ccs->cc == cc)
413 return ccs;
414 ccs = ccs->prevStack;
415 }
416 return NULL;
417 }
418
419 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
420
421 #ifdef DEBUG
422 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
423 CostCentreStack *
424 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
425 #define AppendCCS _AppendCCS
426 {
427 IF_DEBUG(prof,
428 if (ccs1 != ccs2) {
429 debugBelch("Appending ");
430 debugCCS(ccs1);
431 debugBelch(" to ");
432 debugCCS(ccs2);
433 debugBelch("\n");});
434 return AppendCCS(ccs1,ccs2);
435 }
436 #endif
437
438 CostCentreStack *
439 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
440 {
441 CostCentreStack *ccs = NULL;
442
443 if (ccs1 == ccs2) {
444 return ccs1;
445 }
446
447 if (ccs2->cc->is_caf == CC_IS_CAF) {
448 return ccs1;
449 }
450
451 if (ccs2->prevStack != NULL) {
452 ccs = AppendCCS(ccs1, ccs2->prevStack);
453 }
454
455 return PushCostCentre(ccs,ccs2->cc);
456 }
457
458 static CostCentreStack *
459 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
460 {
461 CostCentreStack *new_ccs;
462
463 /* allocate space for a new CostCentreStack */
464 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
465
466 return ActualPush_(ccs, cc, new_ccs);
467 }
468
469 static CostCentreStack *
470 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
471 {
472 /* assign values to each member of the structure */
473 new_ccs->ccsID = CCS_ID++;
474 new_ccs->cc = cc;
475 new_ccs->prevStack = ccs;
476
477 new_ccs->indexTable = EMPTY_TABLE;
478
479 /* Initialise the various _scc_ counters to zero
480 */
481 new_ccs->scc_count = 0;
482
483 /* Initialize all other stats here. There should be a quick way
484 * that's easily used elsewhere too
485 */
486 new_ccs->time_ticks = 0;
487 new_ccs->mem_alloc = 0;
488 new_ccs->inherited_ticks = 0;
489 new_ccs->inherited_alloc = 0;
490
491 new_ccs->root = ccs->root;
492
493 // Set the selected field.
494 ccsSetSelected(new_ccs);
495
496 /* update the memoization table for the parent stack */
497 if (ccs != EMPTY_STACK)
498 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
499 0/*not a back edge*/);
500
501 /* make sure this CC is declared at the next heap/time sample */
502 DecCCS(new_ccs);
503
504 /* return a pointer to the new stack */
505 return new_ccs;
506 }
507
508
509 static CostCentreStack *
510 IsInIndexTable(IndexTable *it, CostCentre *cc)
511 {
512 while (it!=EMPTY_TABLE)
513 {
514 if (it->cc==cc)
515 return it->ccs;
516 else
517 it = it->next;
518 }
519
520 /* otherwise we never found it so return EMPTY_TABLE */
521 return EMPTY_TABLE;
522 }
523
524
525 static IndexTable *
526 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
527 CostCentre *cc, unsigned int back_edge)
528 {
529 IndexTable *new_it;
530
531 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
532
533 new_it->cc = cc;
534 new_it->ccs = new_ccs;
535 new_it->next = it;
536 new_it->back_edge = back_edge;
537 return new_it;
538 }
539
540
541 static void
542 DecCCS(CostCentreStack *ccs)
543 {
544 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
545 if (ccs->prevStack == EMPTY_STACK)
546 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
547 ccs->ccsID, ccs->cc->ccID);
548 else
549 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
550 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
551 }
552 }
553
554 static void
555 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
556 {
557 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
558 if (ccs->prevStack == EMPTY_STACK)
559 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
560 ccs->ccsID, ccs->cc->ccID);
561 else
562 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
563 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
564 }
565 }
566
567 /* -----------------------------------------------------------------------------
568 Generating a time & allocation profiling report.
569 -------------------------------------------------------------------------- */
570
571 /* We omit certain system-related CCs and CCSs from the default
572 * reports, so as not to cause confusion.
573 */
574 static rtsBool
575 cc_to_ignore (CostCentre *cc)
576 {
577 if ( cc == CC_OVERHEAD
578 || cc == CC_DONT_CARE
579 || cc == CC_GC
580 || cc == CC_SYSTEM) {
581 return rtsTrue;
582 } else {
583 return rtsFalse;
584 }
585 }
586
587 static rtsBool
588 ccs_to_ignore (CostCentreStack *ccs)
589 {
590 if ( ccs == CCS_OVERHEAD
591 || ccs == CCS_DONT_CARE
592 || ccs == CCS_GC
593 || ccs == CCS_SYSTEM) {
594 return rtsTrue;
595 } else {
596 return rtsFalse;
597 }
598 }
599
600 /* -----------------------------------------------------------------------------
601 Generating the aggregated per-cost-centre time/alloc report.
602 -------------------------------------------------------------------------- */
603
604 static CostCentre *sorted_cc_list;
605
606 static void
607 aggregate_cc_costs( CostCentreStack *ccs )
608 {
609 IndexTable *i;
610
611 ccs->cc->mem_alloc += ccs->mem_alloc;
612 ccs->cc->time_ticks += ccs->time_ticks;
613
614 for (i = ccs->indexTable; i != 0; i = i->next) {
615 if (!i->back_edge) {
616 aggregate_cc_costs(i->ccs);
617 }
618 }
619 }
620
621 static void
622 insert_cc_in_sorted_list( CostCentre *new_cc )
623 {
624 CostCentre **prev, *cc;
625
626 prev = &sorted_cc_list;
627 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
628 if (new_cc->time_ticks > cc->time_ticks) {
629 new_cc->link = cc;
630 *prev = new_cc;
631 return;
632 } else {
633 prev = &(cc->link);
634 }
635 }
636 new_cc->link = NULL;
637 *prev = new_cc;
638 }
639
640 static void
641 report_per_cc_costs( void )
642 {
643 CostCentre *cc, *next;
644
645 aggregate_cc_costs(CCS_MAIN);
646 sorted_cc_list = NULL;
647
648 for (cc = CC_LIST; cc != NULL; cc = next) {
649 next = cc->link;
650 if (cc->time_ticks > total_prof_ticks/100
651 || cc->mem_alloc > total_alloc/100
652 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
653 insert_cc_in_sorted_list(cc);
654 }
655 }
656
657 fprintf(prof_file, "%-30s %-20s", "COST CENTRE", "MODULE");
658 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
659 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
660 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
661 }
662 fprintf(prof_file, "\n\n");
663
664 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
665 if (cc_to_ignore(cc)) {
666 continue;
667 }
668 fprintf(prof_file, "%-30s %-20s", cc->label, cc->module);
669 fprintf(prof_file, "%6.1f %6.1f",
670 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
671 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
672 total_alloc * 100)
673 );
674
675 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
676 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
677 (StgWord64)(cc->time_ticks), cc->mem_alloc);
678 }
679 fprintf(prof_file, "\n");
680 }
681
682 fprintf(prof_file,"\n\n");
683 }
684
685 /* -----------------------------------------------------------------------------
686 Generate the cost-centre-stack time/alloc report
687 -------------------------------------------------------------------------- */
688
689 static void
690 fprint_header( void )
691 {
692 fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
693
694 fprintf(prof_file, "%-24s %-50s", "COST CENTRE", "MODULE");
695 fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
696
697 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
698 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
699 #if defined(PROFILING_DETAIL_COUNTS)
700 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
701 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
702 #endif
703 }
704
705 fprintf(prof_file, "\n\n");
706 }
707
708 void
709 reportCCSProfiling( void )
710 {
711 nat count;
712 char temp[128]; /* sigh: magic constant */
713
714 stopProfTimer();
715
716 total_prof_ticks = 0;
717 total_alloc = 0;
718 count_ticks(CCS_MAIN);
719
720 switch (RtsFlags.CcFlags.doCostCentres) {
721 case 0:
722 return;
723 case COST_CENTRES_XML:
724 gen_XML_logfile();
725 return;
726 default:
727 break;
728 }
729
730 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
731 time_str(), "Final");
732
733 fprintf(prof_file, "\n\t ");
734 fprintf(prof_file, " %s", prog_name);
735 fprintf(prof_file, " +RTS");
736 for (count = 0; rts_argv[count]; count++)
737 fprintf(prof_file, " %s", rts_argv[count]);
738 fprintf(prof_file, " -RTS");
739 for (count = 1; prog_argv[count]; count++)
740 fprintf(prof_file, " %s", prog_argv[count]);
741 fprintf(prof_file, "\n\n");
742
743 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
744 (double) total_prof_ticks *
745 (double) RtsFlags.MiscFlags.tickInterval / 1000,
746 (unsigned long) total_prof_ticks,
747 (int) RtsFlags.MiscFlags.tickInterval);
748
749 fprintf(prof_file, "\ttotal alloc = %11s bytes",
750 ullong_format_string(total_alloc * sizeof(W_),
751 temp, rtsTrue/*commas*/));
752
753 #if defined(PROFILING_DETAIL_COUNTS)
754 fprintf(prof_file, " (%lu closures)", total_allocs);
755 #endif
756 fprintf(prof_file, " (excludes profiling overheads)\n\n");
757
758 report_per_cc_costs();
759
760 inherit_costs(CCS_MAIN);
761
762 fprint_header();
763 reportCCS(pruneCCSTree(CCS_MAIN), 0);
764 }
765
766 static void
767 reportCCS(CostCentreStack *ccs, nat indent)
768 {
769 CostCentre *cc;
770 IndexTable *i;
771
772 cc = ccs->cc;
773
774 /* Only print cost centres with non 0 data ! */
775
776 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
777 ! ccs_to_ignore(ccs))
778 /* force printing of *all* cost centres if -P -P */
779 {
780
781 fprintf(prof_file, "%-*s%-*s %-50s",
782 indent, "", 24-indent, cc->label, cc->module);
783
784 fprintf(prof_file, "%6ld %11.0f %5.1f %5.1f %5.1f %5.1f",
785 ccs->ccsID, (double) ccs->scc_count,
786 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
787 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
788 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
789 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
790 );
791
792 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
793 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
794 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
795 #if defined(PROFILING_DETAIL_COUNTS)
796 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
797 ccs->mem_allocs, ccs->thunk_count,
798 ccs->function_count, ccs->pap_count,
799 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
800 ccs->caffun_subsumed);
801 #endif
802 }
803 fprintf(prof_file, "\n");
804 }
805
806 for (i = ccs->indexTable; i != 0; i = i->next) {
807 if (!i->back_edge) {
808 reportCCS(i->ccs, indent+1);
809 }
810 }
811 }
812
813
814 /* Traverse the cost centre stack tree and accumulate
815 * ticks/allocations.
816 */
817 static void
818 count_ticks(CostCentreStack *ccs)
819 {
820 IndexTable *i;
821
822 if (!ccs_to_ignore(ccs)) {
823 total_alloc += ccs->mem_alloc;
824 total_prof_ticks += ccs->time_ticks;
825 }
826 for (i = ccs->indexTable; i != NULL; i = i->next)
827 if (!i->back_edge) {
828 count_ticks(i->ccs);
829 }
830 }
831
832 /* Traverse the cost centre stack tree and inherit ticks & allocs.
833 */
834 static void
835 inherit_costs(CostCentreStack *ccs)
836 {
837 IndexTable *i;
838
839 if (ccs_to_ignore(ccs)) { return; }
840
841 ccs->inherited_ticks += ccs->time_ticks;
842 ccs->inherited_alloc += ccs->mem_alloc;
843
844 for (i = ccs->indexTable; i != NULL; i = i->next)
845 if (!i->back_edge) {
846 inherit_costs(i->ccs);
847 ccs->inherited_ticks += i->ccs->inherited_ticks;
848 ccs->inherited_alloc += i->ccs->inherited_alloc;
849 }
850
851 return;
852 }
853
854 static CostCentreStack *
855 pruneCCSTree( CostCentreStack *ccs )
856 {
857 CostCentreStack *ccs1;
858 IndexTable *i, **prev;
859
860 prev = &ccs->indexTable;
861 for (i = ccs->indexTable; i != 0; i = i->next) {
862 if (i->back_edge) { continue; }
863
864 ccs1 = pruneCCSTree(i->ccs);
865 if (ccs1 == NULL) {
866 *prev = i->next;
867 } else {
868 prev = &(i->next);
869 }
870 }
871
872 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
873 /* force printing of *all* cost centres if -P -P */ )
874
875 || ( ccs->indexTable != 0 )
876 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
877 ) {
878 return ccs;
879 } else {
880 return NULL;
881 }
882 }
883
884 /* -----------------------------------------------------------------------------
885 Generate the XML time/allocation profile
886 -------------------------------------------------------------------------- */
887
888 void
889 gen_XML_logfile( void )
890 {
891 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
892
893 reportCCS_XML(pruneCCSTree(CCS_MAIN));
894
895 fprintf(prof_file, " 0\n");
896 }
897
898 static void
899 reportCCS_XML(CostCentreStack *ccs)
900 {
901 CostCentre *cc;
902 IndexTable *i;
903
904 if (ccs_to_ignore(ccs)) { return; }
905
906 cc = ccs->cc;
907
908 fprintf(prof_file, " 1 %ld %" FMT_Word64 " %" FMT_Word64 " %" FMT_Word64,
909 ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc);
910
911 for (i = ccs->indexTable; i != 0; i = i->next) {
912 if (!i->back_edge) {
913 reportCCS_XML(i->ccs);
914 }
915 }
916 }
917
918 void
919 fprintCCS( FILE *f, CostCentreStack *ccs )
920 {
921 fprintf(f,"<");
922 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
923 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
924 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
925 fprintf(f,",");
926 }
927 }
928 fprintf(f,">");
929 }
930
931 /* For calling from .cmm code, where we can't reliably refer to stderr */
932 void
933 fprintCCS_stderr( CostCentreStack *ccs )
934 {
935 fprintCCS(stderr, ccs);
936 }
937
938 #ifdef DEBUG
939 void
940 debugCCS( CostCentreStack *ccs )
941 {
942 debugBelch("<");
943 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
944 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
945 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
946 debugBelch(",");
947 }
948 }
949 debugBelch(">");
950 }
951 #endif /* DEBUG */
952
953 #endif /* PROFILING */