Scale columns in cost-centre-stack report to their contents
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20
21 #include <string.h>
22
23 #ifdef DEBUG
24 #include "Trace.h"
25 #endif
26
27 /*
28 * Profiling allocation arena.
29 */
30 Arena *prof_arena;
31
32 /*
33 * Global variables used to assign unique IDs to cc's, ccs's, and
34 * closure_cats
35 */
36
37 unsigned int CC_ID;
38 unsigned int CCS_ID;
39 unsigned int HP_ID;
40
41 /* figures for the profiling report.
42 */
43 static StgWord64 total_alloc;
44 static lnat total_prof_ticks;
45
46 /* Globals for opening the profiling log file(s)
47 */
48 static char *prof_filename; /* prof report file name = <program>.prof */
49 FILE *prof_file;
50
51 static char *hp_filename; /* heap profile (hp2ps style) log file */
52 FILE *hp_file;
53
54 /* The Current Cost Centre Stack (for attributing costs)
55 */
56 CostCentreStack *CCCS;
57
58 /* Linked lists to keep track of cc's and ccs's that haven't
59 * been declared in the log file yet
60 */
61 CostCentre *CC_LIST;
62 CostCentreStack *CCS_LIST;
63
64 /*
65 * Built-in cost centres and cost-centre stacks:
66 *
67 * MAIN is the root of the cost-centre stack tree. If there are
68 * no _scc_s in the program, all costs will be attributed
69 * to MAIN.
70 *
71 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
72 * RTS operations apart from garbage collection are attributed
73 * to SYSTEM.
74 *
75 * GC is the storage manager / garbage collector.
76 *
77 * OVERHEAD gets all costs generated by the profiling system
78 * itself. These are costs that would not be incurred
79 * during non-profiled execution of the program.
80 *
81 * SUBSUMED is the one-and-only CCS placed on top-level functions.
82 * It indicates that all costs are to be attributed to the
83 * enclosing cost centre stack. SUBSUMED never accumulates
84 * any costs. The is_caf flag is set on the subsumed cost
85 * centre.
86 *
87 * DONT_CARE is a placeholder cost-centre we assign to static
88 * constructors. It should *never* accumulate any costs.
89 */
90
91 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
92 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
93 CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
94 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
95 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
96 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
97
98 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
99 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
100 CCS_DECLARE(CCS_GC, CC_GC, );
101 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
102 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
103 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
104
105 /*
106 * Uniques for the XML log-file format
107 */
108 #define CC_UQ 1
109 #define CCS_UQ 2
110 #define TC_UQ 3
111 #define HEAP_OBJ_UQ 4
112 #define TIME_UPD_UQ 5
113 #define HEAP_UPD_UQ 6
114
115 /*
116 * Static Functions
117 */
118
119 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
120 CostCentreStack *new_ccs );
121 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
122 static void count_ticks ( CostCentreStack *ccs );
123 static void inherit_costs ( CostCentreStack *ccs );
124 static void findCCSMaxLens ( CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len );
125 static void logCCS ( CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len );
126 static void reportCCS ( CostCentreStack *ccs );
127 static void DecCCS ( CostCentreStack *ccs );
128 static void DecBackEdge ( CostCentreStack *ccs,
129 CostCentreStack *oldccs );
130 static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
131 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
132 static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
133 static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
134 static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
135 CostCentre *, unsigned int );
136 static void ccsSetSelected ( CostCentreStack *ccs );
137
138 static void initTimeProfiling ( void );
139 static void initProfilingLogFile( void );
140
141 static void reportCCS_XML ( CostCentreStack *ccs );
142
143 /* -----------------------------------------------------------------------------
144 Initialise the profiling environment
145 -------------------------------------------------------------------------- */
146
147 void
148 initProfiling1 (void)
149 {
150 // initialise our arena
151 prof_arena = newArena();
152
153 /* for the benefit of allocate()... */
154 CCCS = CCS_SYSTEM;
155
156 /* Initialize counters for IDs */
157 CC_ID = 1;
158 CCS_ID = 1;
159 HP_ID = 1;
160
161 /* Initialize Declaration lists to NULL */
162 CC_LIST = NULL;
163 CCS_LIST = NULL;
164
165 /* Register all the cost centres / stacks in the program
166 * CC_MAIN gets link = 0, all others have non-zero link.
167 */
168 REGISTER_CC(CC_MAIN);
169 REGISTER_CC(CC_SYSTEM);
170 REGISTER_CC(CC_GC);
171 REGISTER_CC(CC_OVERHEAD);
172 REGISTER_CC(CC_SUBSUMED);
173 REGISTER_CC(CC_DONT_CARE);
174 REGISTER_CCS(CCS_MAIN);
175 REGISTER_CCS(CCS_SYSTEM);
176 REGISTER_CCS(CCS_GC);
177 REGISTER_CCS(CCS_OVERHEAD);
178 REGISTER_CCS(CCS_SUBSUMED);
179 REGISTER_CCS(CCS_DONT_CARE);
180
181 CCCS = CCS_OVERHEAD;
182
183 /* cost centres are registered by the per-module
184 * initialisation code now...
185 */
186 }
187
188 void
189 freeProfiling1 (void)
190 {
191 arenaFree(prof_arena);
192 }
193
194 void
195 initProfiling2 (void)
196 {
197 CostCentreStack *ccs, *next;
198
199 CCCS = CCS_SYSTEM;
200
201 /* Set up the log file, and dump the header and cost centre
202 * information into it. */
203 initProfilingLogFile();
204
205 /* find all the "special" cost centre stacks, and make them children
206 * of CCS_MAIN.
207 */
208 ASSERT(CCS_MAIN->prevStack == 0);
209 CCS_MAIN->root = CC_MAIN;
210 ccsSetSelected(CCS_MAIN);
211 DecCCS(CCS_MAIN);
212
213 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
214 next = ccs->prevStack;
215 ccs->prevStack = 0;
216 ActualPush_(CCS_MAIN,ccs->cc,ccs);
217 ccs->root = ccs->cc;
218 ccs = next;
219 }
220
221 if (RtsFlags.CcFlags.doCostCentres) {
222 initTimeProfiling();
223 }
224
225 if (RtsFlags.ProfFlags.doHeapProfile) {
226 initHeapProfiling();
227 }
228 }
229
230 // Decide whether closures with this CCS should contribute to the heap
231 // profile.
232 static void
233 ccsSetSelected( CostCentreStack *ccs )
234 {
235 if (RtsFlags.ProfFlags.modSelector) {
236 if (! strMatchesSelector( ccs->cc->module,
237 RtsFlags.ProfFlags.modSelector ) ) {
238 ccs->selected = 0;
239 return;
240 }
241 }
242 if (RtsFlags.ProfFlags.ccSelector) {
243 if (! strMatchesSelector( ccs->cc->label,
244 RtsFlags.ProfFlags.ccSelector ) ) {
245 ccs->selected = 0;
246 return;
247 }
248 }
249 if (RtsFlags.ProfFlags.ccsSelector) {
250 CostCentreStack *c;
251 for (c = ccs; c != NULL; c = c->prevStack) {
252 if ( strMatchesSelector( c->cc->label,
253 RtsFlags.ProfFlags.ccsSelector )) {
254 break;
255 }
256 }
257 if (c == NULL) {
258 ccs->selected = 0;
259 return;
260 }
261 }
262
263 ccs->selected = 1;
264 return;
265 }
266
267
268 static void
269 initProfilingLogFile(void)
270 {
271 char *prog;
272
273 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
274 strcpy(prog, prog_name);
275 #ifdef mingw32_HOST_OS
276 // on Windows, drop the .exe suffix if there is one
277 {
278 char *suff;
279 suff = strrchr(prog,'.');
280 if (suff != NULL && !strcmp(suff,".exe")) {
281 *suff = '\0';
282 }
283 }
284 #endif
285
286 if (RtsFlags.CcFlags.doCostCentres == 0 &&
287 RtsFlags.ProfFlags.doHeapProfile != HEAP_BY_RETAINER)
288 {
289 /* No need for the <prog>.prof file */
290 prof_filename = NULL;
291 prof_file = NULL;
292 }
293 else
294 {
295 /* Initialise the log file name */
296 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
297 sprintf(prof_filename, "%s.prof", prog);
298
299 /* open the log file */
300 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
301 debugBelch("Can't open profiling report file %s\n", prof_filename);
302 RtsFlags.CcFlags.doCostCentres = 0;
303 // The following line was added by Sung; retainer/LDV profiling may need
304 // two output files, i.e., <program>.prof/hp.
305 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
306 RtsFlags.ProfFlags.doHeapProfile = 0;
307 return;
308 }
309
310 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
311 /* dump the time, and the profiling interval */
312 fprintf(prof_file, "\"%s\"\n", time_str());
313 fprintf(prof_file, "\"%d ms\"\n", RtsFlags.MiscFlags.tickInterval);
314
315 /* declare all the cost centres */
316 {
317 CostCentre *cc;
318 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
319 fprintf(prof_file, "%d %ld \"%s\" \"%s\"\n",
320 CC_UQ, cc->ccID, cc->label, cc->module);
321 }
322 }
323 }
324 }
325
326 if (RtsFlags.ProfFlags.doHeapProfile) {
327 /* Initialise the log file name */
328 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
329 sprintf(hp_filename, "%s.hp", prog);
330
331 /* open the log file */
332 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
333 debugBelch("Can't open profiling report file %s\n",
334 hp_filename);
335 RtsFlags.ProfFlags.doHeapProfile = 0;
336 return;
337 }
338 }
339 }
340
341 void
342 initTimeProfiling(void)
343 {
344 /* Start ticking */
345 startProfTimer();
346 };
347
348 void
349 endProfiling ( void )
350 {
351 if (RtsFlags.CcFlags.doCostCentres) {
352 stopProfTimer();
353 }
354 if (RtsFlags.ProfFlags.doHeapProfile) {
355 endHeapProfiling();
356 }
357 }
358
359 /* -----------------------------------------------------------------------------
360 Set cost centre stack when entering a function.
361 -------------------------------------------------------------------------- */
362 rtsBool entering_PAP;
363
364 void
365 EnterFunCCS ( CostCentreStack *ccsfn )
366 {
367 /* PAP_entry has already set CCCS for us */
368 if (entering_PAP) {
369 entering_PAP = rtsFalse;
370 return;
371 }
372
373 if (ccsfn->root->is_caf == CC_IS_CAF) {
374 CCCS = AppendCCS(CCCS,ccsfn);
375 } else {
376 CCCS = ccsfn;
377 }
378 }
379
380 /* -----------------------------------------------------------------------------
381 Cost-centre stack manipulation
382 -------------------------------------------------------------------------- */
383
384 #ifdef DEBUG
385 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
386 CostCentreStack *
387 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
388 #define PushCostCentre _PushCostCentre
389 {
390 IF_DEBUG(prof,
391 traceBegin("pushing %s on ", cc->label);
392 debugCCS(ccs);
393 traceEnd(););
394
395 return PushCostCentre(ccs,cc);
396 }
397 #endif
398
399 CostCentreStack *
400 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
401 {
402 CostCentreStack *temp_ccs;
403
404 if (ccs == EMPTY_STACK)
405 return ActualPush(ccs,cc);
406 else {
407 if (ccs->cc == cc)
408 return ccs;
409 else {
410 /* check if we've already memoized this stack */
411 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
412
413 if (temp_ccs != EMPTY_STACK)
414 return temp_ccs;
415 else {
416 temp_ccs = CheckLoop(ccs,cc);
417 if (temp_ccs != NULL) {
418 /* we have recursed to an older CCS. Mark this in
419 * the index table, and emit a "back edge" into the
420 * log file.
421 */
422 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
423 DecBackEdge(temp_ccs,ccs);
424 return temp_ccs;
425 } else {
426 return ActualPush(ccs,cc);
427 }
428 }
429 }
430 }
431 }
432
433 static CostCentreStack *
434 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
435 {
436 while (ccs != EMPTY_STACK) {
437 if (ccs->cc == cc)
438 return ccs;
439 ccs = ccs->prevStack;
440 }
441 return NULL;
442 }
443
444 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
445
446 #ifdef DEBUG
447 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
448 CostCentreStack *
449 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
450 #define AppendCCS _AppendCCS
451 {
452 IF_DEBUG(prof,
453 if (ccs1 != ccs2) {
454 debugBelch("Appending ");
455 debugCCS(ccs1);
456 debugBelch(" to ");
457 debugCCS(ccs2);
458 debugBelch("\n");});
459 return AppendCCS(ccs1,ccs2);
460 }
461 #endif
462
463 CostCentreStack *
464 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
465 {
466 CostCentreStack *ccs = NULL;
467
468 if (ccs1 == ccs2) {
469 return ccs1;
470 }
471
472 if (ccs2->cc->is_caf == CC_IS_CAF) {
473 return ccs1;
474 }
475
476 if (ccs2->prevStack != NULL) {
477 ccs = AppendCCS(ccs1, ccs2->prevStack);
478 }
479
480 return PushCostCentre(ccs,ccs2->cc);
481 }
482
483 static CostCentreStack *
484 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
485 {
486 CostCentreStack *new_ccs;
487
488 /* allocate space for a new CostCentreStack */
489 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
490
491 return ActualPush_(ccs, cc, new_ccs);
492 }
493
494 static CostCentreStack *
495 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
496 {
497 /* assign values to each member of the structure */
498 new_ccs->ccsID = CCS_ID++;
499 new_ccs->cc = cc;
500 new_ccs->prevStack = ccs;
501
502 new_ccs->indexTable = EMPTY_TABLE;
503
504 /* Initialise the various _scc_ counters to zero
505 */
506 new_ccs->scc_count = 0;
507
508 /* Initialize all other stats here. There should be a quick way
509 * that's easily used elsewhere too
510 */
511 new_ccs->time_ticks = 0;
512 new_ccs->mem_alloc = 0;
513 new_ccs->inherited_ticks = 0;
514 new_ccs->inherited_alloc = 0;
515
516 new_ccs->root = ccs->root;
517
518 // Set the selected field.
519 ccsSetSelected(new_ccs);
520
521 /* update the memoization table for the parent stack */
522 if (ccs != EMPTY_STACK)
523 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
524 0/*not a back edge*/);
525
526 /* make sure this CC is declared at the next heap/time sample */
527 DecCCS(new_ccs);
528
529 /* return a pointer to the new stack */
530 return new_ccs;
531 }
532
533
534 static CostCentreStack *
535 IsInIndexTable(IndexTable *it, CostCentre *cc)
536 {
537 while (it!=EMPTY_TABLE)
538 {
539 if (it->cc==cc)
540 return it->ccs;
541 else
542 it = it->next;
543 }
544
545 /* otherwise we never found it so return EMPTY_TABLE */
546 return EMPTY_TABLE;
547 }
548
549
550 static IndexTable *
551 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
552 CostCentre *cc, unsigned int back_edge)
553 {
554 IndexTable *new_it;
555
556 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
557
558 new_it->cc = cc;
559 new_it->ccs = new_ccs;
560 new_it->next = it;
561 new_it->back_edge = back_edge;
562 return new_it;
563 }
564
565
566 static void
567 DecCCS(CostCentreStack *ccs)
568 {
569 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
570 if (ccs->prevStack == EMPTY_STACK)
571 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
572 ccs->ccsID, ccs->cc->ccID);
573 else
574 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
575 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
576 }
577 }
578
579 static void
580 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
581 {
582 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
583 if (ccs->prevStack == EMPTY_STACK)
584 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
585 ccs->ccsID, ccs->cc->ccID);
586 else
587 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
588 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
589 }
590 }
591
592 /* -----------------------------------------------------------------------------
593 Generating a time & allocation profiling report.
594 -------------------------------------------------------------------------- */
595
596 /* We omit certain system-related CCs and CCSs from the default
597 * reports, so as not to cause confusion.
598 */
599 static rtsBool
600 cc_to_ignore (CostCentre *cc)
601 {
602 if ( cc == CC_OVERHEAD
603 || cc == CC_DONT_CARE
604 || cc == CC_GC
605 || cc == CC_SYSTEM) {
606 return rtsTrue;
607 } else {
608 return rtsFalse;
609 }
610 }
611
612 static rtsBool
613 ccs_to_ignore (CostCentreStack *ccs)
614 {
615 if ( ccs == CCS_OVERHEAD
616 || ccs == CCS_DONT_CARE
617 || ccs == CCS_GC
618 || ccs == CCS_SYSTEM) {
619 return rtsTrue;
620 } else {
621 return rtsFalse;
622 }
623 }
624
625 /* -----------------------------------------------------------------------------
626 Generating the aggregated per-cost-centre time/alloc report.
627 -------------------------------------------------------------------------- */
628
629 static CostCentre *sorted_cc_list;
630
631 static void
632 aggregate_cc_costs( CostCentreStack *ccs )
633 {
634 IndexTable *i;
635
636 ccs->cc->mem_alloc += ccs->mem_alloc;
637 ccs->cc->time_ticks += ccs->time_ticks;
638
639 for (i = ccs->indexTable; i != 0; i = i->next) {
640 if (!i->back_edge) {
641 aggregate_cc_costs(i->ccs);
642 }
643 }
644 }
645
646 static void
647 insert_cc_in_sorted_list( CostCentre *new_cc )
648 {
649 CostCentre **prev, *cc;
650
651 prev = &sorted_cc_list;
652 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
653 if (new_cc->time_ticks > cc->time_ticks) {
654 new_cc->link = cc;
655 *prev = new_cc;
656 return;
657 } else {
658 prev = &(cc->link);
659 }
660 }
661 new_cc->link = NULL;
662 *prev = new_cc;
663 }
664
665 static void
666 report_per_cc_costs( void )
667 {
668 CostCentre *cc, *next;
669 nat max_label_len, max_module_len;
670
671 aggregate_cc_costs(CCS_MAIN);
672 sorted_cc_list = NULL;
673
674 max_label_len = max_module_len = 0;
675
676 for (cc = CC_LIST; cc != NULL; cc = next) {
677 next = cc->link;
678 if (cc->time_ticks > total_prof_ticks/100
679 || cc->mem_alloc > total_alloc/100
680 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
681 insert_cc_in_sorted_list(cc);
682
683 max_label_len = stg_max(strlen(cc->label), max_label_len);
684 max_module_len = stg_max(strlen(cc->module), max_module_len);
685 }
686 }
687
688 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
689 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
690 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
691 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
692 }
693 fprintf(prof_file, "\n\n");
694
695 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
696 if (cc_to_ignore(cc)) {
697 continue;
698 }
699 fprintf(prof_file, "%-*s %-*s", max_label_len, cc->label, max_module_len, cc->module);
700 fprintf(prof_file, "%6.1f %6.1f",
701 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
702 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
703 total_alloc * 100)
704 );
705
706 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
707 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
708 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
709 }
710 fprintf(prof_file, "\n");
711 }
712
713 fprintf(prof_file,"\n\n");
714 }
715
716 /* -----------------------------------------------------------------------------
717 Generate the cost-centre-stack time/alloc report
718 -------------------------------------------------------------------------- */
719
720 static void
721 fprint_header( nat max_label_len, nat max_module_len )
722 {
723 fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
724
725 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
726 fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
727
728 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
729 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
730 #if defined(PROFILING_DETAIL_COUNTS)
731 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
732 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
733 #endif
734 }
735
736 fprintf(prof_file, "\n\n");
737 }
738
739 void
740 reportCCSProfiling( void )
741 {
742 nat count;
743 char temp[128]; /* sigh: magic constant */
744
745 stopProfTimer();
746
747 total_prof_ticks = 0;
748 total_alloc = 0;
749 count_ticks(CCS_MAIN);
750
751 switch (RtsFlags.CcFlags.doCostCentres) {
752 case 0:
753 return;
754 case COST_CENTRES_XML:
755 gen_XML_logfile();
756 return;
757 default:
758 break;
759 }
760
761 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
762 time_str(), "Final");
763
764 fprintf(prof_file, "\n\t ");
765 fprintf(prof_file, " %s", prog_name);
766 fprintf(prof_file, " +RTS");
767 for (count = 0; rts_argv[count]; count++)
768 fprintf(prof_file, " %s", rts_argv[count]);
769 fprintf(prof_file, " -RTS");
770 for (count = 1; prog_argv[count]; count++)
771 fprintf(prof_file, " %s", prog_argv[count]);
772 fprintf(prof_file, "\n\n");
773
774 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
775 (double) total_prof_ticks *
776 (double) RtsFlags.MiscFlags.tickInterval / 1000,
777 (unsigned long) total_prof_ticks,
778 (int) RtsFlags.MiscFlags.tickInterval);
779
780 fprintf(prof_file, "\ttotal alloc = %11s bytes",
781 showStgWord64(total_alloc * sizeof(W_),
782 temp, rtsTrue/*commas*/));
783
784 #if defined(PROFILING_DETAIL_COUNTS)
785 fprintf(prof_file, " (%lu closures)", total_allocs);
786 #endif
787 fprintf(prof_file, " (excludes profiling overheads)\n\n");
788
789 report_per_cc_costs();
790
791 inherit_costs(CCS_MAIN);
792
793 reportCCS(pruneCCSTree(CCS_MAIN));
794 }
795
796 static void
797 findCCSMaxLens(CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len) {
798 CostCentre *cc;
799 IndexTable *i;
800
801 cc = ccs->cc;
802
803 *max_label_len = stg_max(*max_label_len, indent + strlen(cc->label));
804 *max_module_len = stg_max(*max_module_len, strlen(cc->module));
805
806 for (i = ccs->indexTable; i != 0; i = i->next) {
807 if (!i->back_edge) {
808 findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len);
809 }
810 }
811 }
812
813 static void
814 logCCS(CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len)
815 {
816 CostCentre *cc;
817 IndexTable *i;
818
819 cc = ccs->cc;
820
821 /* Only print cost centres with non 0 data ! */
822
823 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
824 ! ccs_to_ignore(ccs))
825 /* force printing of *all* cost centres if -P -P */
826 {
827
828 fprintf(prof_file, "%-*s%-*s %-*s",
829 indent, "", max_label_len-indent, cc->label, max_module_len, cc->module);
830
831 fprintf(prof_file, "%6ld %11.0f %5.1f %5.1f %5.1f %5.1f",
832 ccs->ccsID, (double) ccs->scc_count,
833 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
834 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
835 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
836 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
837 );
838
839 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
840 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
841 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
842 #if defined(PROFILING_DETAIL_COUNTS)
843 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
844 ccs->mem_allocs, ccs->thunk_count,
845 ccs->function_count, ccs->pap_count,
846 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
847 ccs->caffun_subsumed);
848 #endif
849 }
850 fprintf(prof_file, "\n");
851 }
852
853 for (i = ccs->indexTable; i != 0; i = i->next) {
854 if (!i->back_edge) {
855 logCCS(i->ccs, indent+1, max_label_len, max_module_len);
856 }
857 }
858 }
859
860 static void
861 reportCCS(CostCentreStack *ccs)
862 {
863 nat max_label_len, max_module_len;
864 max_label_len = max_module_len = 0;
865
866 findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len);
867
868 fprint_header(max_label_len, max_module_len);
869 logCCS(ccs, 0, max_label_len, max_module_len);
870 }
871
872
873 /* Traverse the cost centre stack tree and accumulate
874 * ticks/allocations.
875 */
876 static void
877 count_ticks(CostCentreStack *ccs)
878 {
879 IndexTable *i;
880
881 if (!ccs_to_ignore(ccs)) {
882 total_alloc += ccs->mem_alloc;
883 total_prof_ticks += ccs->time_ticks;
884 }
885 for (i = ccs->indexTable; i != NULL; i = i->next)
886 if (!i->back_edge) {
887 count_ticks(i->ccs);
888 }
889 }
890
891 /* Traverse the cost centre stack tree and inherit ticks & allocs.
892 */
893 static void
894 inherit_costs(CostCentreStack *ccs)
895 {
896 IndexTable *i;
897
898 if (ccs_to_ignore(ccs)) { return; }
899
900 ccs->inherited_ticks += ccs->time_ticks;
901 ccs->inherited_alloc += ccs->mem_alloc;
902
903 for (i = ccs->indexTable; i != NULL; i = i->next)
904 if (!i->back_edge) {
905 inherit_costs(i->ccs);
906 ccs->inherited_ticks += i->ccs->inherited_ticks;
907 ccs->inherited_alloc += i->ccs->inherited_alloc;
908 }
909
910 return;
911 }
912
913 static CostCentreStack *
914 pruneCCSTree( CostCentreStack *ccs )
915 {
916 CostCentreStack *ccs1;
917 IndexTable *i, **prev;
918
919 prev = &ccs->indexTable;
920 for (i = ccs->indexTable; i != 0; i = i->next) {
921 if (i->back_edge) { continue; }
922
923 ccs1 = pruneCCSTree(i->ccs);
924 if (ccs1 == NULL) {
925 *prev = i->next;
926 } else {
927 prev = &(i->next);
928 }
929 }
930
931 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
932 /* force printing of *all* cost centres if -P -P */ )
933
934 || ( ccs->indexTable != 0 )
935 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
936 ) {
937 return ccs;
938 } else {
939 return NULL;
940 }
941 }
942
943 /* -----------------------------------------------------------------------------
944 Generate the XML time/allocation profile
945 -------------------------------------------------------------------------- */
946
947 void
948 gen_XML_logfile( void )
949 {
950 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
951
952 reportCCS_XML(pruneCCSTree(CCS_MAIN));
953
954 fprintf(prof_file, " 0\n");
955 }
956
957 static void
958 reportCCS_XML(CostCentreStack *ccs)
959 {
960 CostCentre *cc;
961 IndexTable *i;
962
963 if (ccs_to_ignore(ccs)) { return; }
964
965 cc = ccs->cc;
966
967 fprintf(prof_file, " 1 %ld %" FMT_Word64 " %" FMT_Word64 " %" FMT_Word64,
968 ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc);
969
970 for (i = ccs->indexTable; i != 0; i = i->next) {
971 if (!i->back_edge) {
972 reportCCS_XML(i->ccs);
973 }
974 }
975 }
976
977 void
978 fprintCCS( FILE *f, CostCentreStack *ccs )
979 {
980 fprintf(f,"<");
981 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
982 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
983 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
984 fprintf(f,",");
985 }
986 }
987 fprintf(f,">");
988 }
989
990 /* For calling from .cmm code, where we can't reliably refer to stderr */
991 void
992 fprintCCS_stderr( CostCentreStack *ccs )
993 {
994 fprintCCS(stderr, ccs);
995 }
996
997 #ifdef DEBUG
998 void
999 debugCCS( CostCentreStack *ccs )
1000 {
1001 debugBelch("<");
1002 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1003 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1004 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1005 debugBelch(",");
1006 }
1007 }
1008 debugBelch(">");
1009 }
1010 #endif /* DEBUG */
1011
1012 #endif /* PROFILING */