remove unused includes, now that Storage.h & Stable.h are included by Rts.h
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13 #include "RtsUtils.h"
14 #include "RtsFlags.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "Timer.h"
18 #include "ProfHeap.h"
19 #include "Arena.h"
20 #include "RetainerProfile.h"
21 #include "LdvProfile.h"
22
23 #include <string.h>
24
25 #ifdef DEBUG
26 #include "Trace.h"
27 #endif
28
29 /*
30 * Profiling allocation arena.
31 */
32 Arena *prof_arena;
33
34 /*
35 * Global variables used to assign unique IDs to cc's, ccs's, and
36 * closure_cats
37 */
38
39 unsigned int CC_ID;
40 unsigned int CCS_ID;
41 unsigned int HP_ID;
42
43 /* figures for the profiling report.
44 */
45 static ullong total_alloc;
46 static lnat total_prof_ticks;
47
48 /* Globals for opening the profiling log file(s)
49 */
50 static char *prof_filename; /* prof report file name = <program>.prof */
51 FILE *prof_file;
52
53 static char *hp_filename; /* heap profile (hp2ps style) log file */
54 FILE *hp_file;
55
56 /* The Current Cost Centre Stack (for attributing costs)
57 */
58 CostCentreStack *CCCS;
59
60 /* Linked lists to keep track of cc's and ccs's that haven't
61 * been declared in the log file yet
62 */
63 CostCentre *CC_LIST;
64 CostCentreStack *CCS_LIST;
65
66 /*
67 * Built-in cost centres and cost-centre stacks:
68 *
69 * MAIN is the root of the cost-centre stack tree. If there are
70 * no _scc_s in the program, all costs will be attributed
71 * to MAIN.
72 *
73 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
74 * RTS operations apart from garbage collection are attributed
75 * to SYSTEM.
76 *
77 * GC is the storage manager / garbage collector.
78 *
79 * OVERHEAD gets all costs generated by the profiling system
80 * itself. These are costs that would not be incurred
81 * during non-profiled execution of the program.
82 *
83 * SUBSUMED is the one-and-only CCS placed on top-level functions.
84 * It indicates that all costs are to be attributed to the
85 * enclosing cost centre stack. SUBSUMED never accumulates
86 * any costs. The is_caf flag is set on the subsumed cost
87 * centre.
88 *
89 * DONT_CARE is a placeholder cost-centre we assign to static
90 * constructors. It should *never* accumulate any costs.
91 */
92
93 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
94 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
95 CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
96 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
97 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
98 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
99
100 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
101 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
102 CCS_DECLARE(CCS_GC, CC_GC, );
103 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
104 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
105 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
106
107 /*
108 * Uniques for the XML log-file format
109 */
110 #define CC_UQ 1
111 #define CCS_UQ 2
112 #define TC_UQ 3
113 #define HEAP_OBJ_UQ 4
114 #define TIME_UPD_UQ 5
115 #define HEAP_UPD_UQ 6
116
117 /*
118 * Static Functions
119 */
120
121 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
122 CostCentreStack *new_ccs );
123 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
124 static void count_ticks ( CostCentreStack *ccs );
125 static void inherit_costs ( CostCentreStack *ccs );
126 static void reportCCS ( CostCentreStack *ccs, nat indent );
127 static void DecCCS ( CostCentreStack *ccs );
128 static void DecBackEdge ( CostCentreStack *ccs,
129 CostCentreStack *oldccs );
130 static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
131 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
132 static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
133 static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
134 static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
135 CostCentre *, unsigned int );
136 static void ccsSetSelected ( CostCentreStack *ccs );
137
138 static void initTimeProfiling ( void );
139 static void initProfilingLogFile( void );
140
141 static void reportCCS_XML ( CostCentreStack *ccs );
142
143 /* -----------------------------------------------------------------------------
144 Initialise the profiling environment
145 -------------------------------------------------------------------------- */
146
147 void
148 initProfiling1 (void)
149 {
150 // initialise our arena
151 prof_arena = newArena();
152
153 /* for the benefit of allocate()... */
154 CCCS = CCS_SYSTEM;
155
156 /* Initialize counters for IDs */
157 CC_ID = 1;
158 CCS_ID = 1;
159 HP_ID = 1;
160
161 /* Initialize Declaration lists to NULL */
162 CC_LIST = NULL;
163 CCS_LIST = NULL;
164
165 /* Register all the cost centres / stacks in the program
166 * CC_MAIN gets link = 0, all others have non-zero link.
167 */
168 REGISTER_CC(CC_MAIN);
169 REGISTER_CC(CC_SYSTEM);
170 REGISTER_CC(CC_GC);
171 REGISTER_CC(CC_OVERHEAD);
172 REGISTER_CC(CC_SUBSUMED);
173 REGISTER_CC(CC_DONT_CARE);
174 REGISTER_CCS(CCS_MAIN);
175 REGISTER_CCS(CCS_SYSTEM);
176 REGISTER_CCS(CCS_GC);
177 REGISTER_CCS(CCS_OVERHEAD);
178 REGISTER_CCS(CCS_SUBSUMED);
179 REGISTER_CCS(CCS_DONT_CARE);
180
181 CCCS = CCS_OVERHEAD;
182
183 /* cost centres are registered by the per-module
184 * initialisation code now...
185 */
186 }
187
188 void
189 initProfiling2 (void)
190 {
191 CostCentreStack *ccs, *next;
192
193 CCCS = CCS_SYSTEM;
194
195 /* Set up the log file, and dump the header and cost centre
196 * information into it. */
197 initProfilingLogFile();
198
199 /* find all the "special" cost centre stacks, and make them children
200 * of CCS_MAIN.
201 */
202 ASSERT(CCS_MAIN->prevStack == 0);
203 CCS_MAIN->root = CC_MAIN;
204 ccsSetSelected(CCS_MAIN);
205 DecCCS(CCS_MAIN);
206
207 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
208 next = ccs->prevStack;
209 ccs->prevStack = 0;
210 ActualPush_(CCS_MAIN,ccs->cc,ccs);
211 ccs->root = ccs->cc;
212 ccs = next;
213 }
214
215 if (RtsFlags.CcFlags.doCostCentres) {
216 initTimeProfiling();
217 }
218
219 if (RtsFlags.ProfFlags.doHeapProfile) {
220 initHeapProfiling();
221 }
222 }
223
224 // Decide whether closures with this CCS should contribute to the heap
225 // profile.
226 static void
227 ccsSetSelected( CostCentreStack *ccs )
228 {
229 if (RtsFlags.ProfFlags.modSelector) {
230 if (! strMatchesSelector( ccs->cc->module,
231 RtsFlags.ProfFlags.modSelector ) ) {
232 ccs->selected = 0;
233 return;
234 }
235 }
236 if (RtsFlags.ProfFlags.ccSelector) {
237 if (! strMatchesSelector( ccs->cc->label,
238 RtsFlags.ProfFlags.ccSelector ) ) {
239 ccs->selected = 0;
240 return;
241 }
242 }
243 if (RtsFlags.ProfFlags.ccsSelector) {
244 CostCentreStack *c;
245 for (c = ccs; c != NULL; c = c->prevStack) {
246 if ( strMatchesSelector( c->cc->label,
247 RtsFlags.ProfFlags.ccsSelector )) {
248 break;
249 }
250 }
251 if (c == NULL) {
252 ccs->selected = 0;
253 return;
254 }
255 }
256
257 ccs->selected = 1;
258 return;
259 }
260
261
262 static void
263 initProfilingLogFile(void)
264 {
265 /* Initialise the log file name */
266 prof_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
267 sprintf(prof_filename, "%s.prof", prog_name);
268
269 /* open the log file */
270 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
271 debugBelch("Can't open profiling report file %s\n", prof_filename);
272 RtsFlags.CcFlags.doCostCentres = 0;
273 // The following line was added by Sung; retainer/LDV profiling may need
274 // two output files, i.e., <program>.prof/hp.
275 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
276 RtsFlags.ProfFlags.doHeapProfile = 0;
277 return;
278 }
279
280 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
281 /* dump the time, and the profiling interval */
282 fprintf(prof_file, "\"%s\"\n", time_str());
283 fprintf(prof_file, "\"%d ms\"\n", RtsFlags.MiscFlags.tickInterval);
284
285 /* declare all the cost centres */
286 {
287 CostCentre *cc;
288 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
289 fprintf(prof_file, "%d %ld \"%s\" \"%s\"\n",
290 CC_UQ, cc->ccID, cc->label, cc->module);
291 }
292 }
293 }
294
295 if (RtsFlags.ProfFlags.doHeapProfile) {
296 /* Initialise the log file name */
297 hp_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
298 sprintf(hp_filename, "%s.hp", prog_name);
299
300 /* open the log file */
301 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
302 debugBelch("Can't open profiling report file %s\n",
303 hp_filename);
304 RtsFlags.ProfFlags.doHeapProfile = 0;
305 return;
306 }
307 }
308 }
309
310 void
311 initTimeProfiling(void)
312 {
313 /* Start ticking */
314 startProfTimer();
315 };
316
317 void
318 endProfiling ( void )
319 {
320 if (RtsFlags.CcFlags.doCostCentres) {
321 stopProfTimer();
322 }
323 if (RtsFlags.ProfFlags.doHeapProfile) {
324 endHeapProfiling();
325 }
326 }
327
328 /* -----------------------------------------------------------------------------
329 Set cost centre stack when entering a function.
330 -------------------------------------------------------------------------- */
331 rtsBool entering_PAP;
332
333 void
334 EnterFunCCS ( CostCentreStack *ccsfn )
335 {
336 /* PAP_entry has already set CCCS for us */
337 if (entering_PAP) {
338 entering_PAP = rtsFalse;
339 return;
340 }
341
342 if (ccsfn->root->is_caf == CC_IS_CAF) {
343 CCCS = AppendCCS(CCCS,ccsfn);
344 } else {
345 CCCS = ccsfn;
346 }
347 }
348
349 /* -----------------------------------------------------------------------------
350 Cost-centre stack manipulation
351 -------------------------------------------------------------------------- */
352
353 #ifdef DEBUG
354 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
355 CostCentreStack *
356 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
357 #define PushCostCentre _PushCostCentre
358 {
359 IF_DEBUG(prof,
360 traceBegin("pushing %s on ", cc->label);
361 debugCCS(ccs);
362 traceEnd(););
363
364 return PushCostCentre(ccs,cc);
365 }
366 #endif
367
368 CostCentreStack *
369 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
370 {
371 CostCentreStack *temp_ccs;
372
373 if (ccs == EMPTY_STACK)
374 return ActualPush(ccs,cc);
375 else {
376 if (ccs->cc == cc)
377 return ccs;
378 else {
379 /* check if we've already memoized this stack */
380 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
381
382 if (temp_ccs != EMPTY_STACK)
383 return temp_ccs;
384 else {
385 temp_ccs = CheckLoop(ccs,cc);
386 if (temp_ccs != NULL) {
387 /* we have recursed to an older CCS. Mark this in
388 * the index table, and emit a "back edge" into the
389 * log file.
390 */
391 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
392 DecBackEdge(temp_ccs,ccs);
393 return temp_ccs;
394 } else {
395 return ActualPush(ccs,cc);
396 }
397 }
398 }
399 }
400 }
401
402 static CostCentreStack *
403 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
404 {
405 while (ccs != EMPTY_STACK) {
406 if (ccs->cc == cc)
407 return ccs;
408 ccs = ccs->prevStack;
409 }
410 return NULL;
411 }
412
413 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
414
415 #ifdef DEBUG
416 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
417 CostCentreStack *
418 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
419 #define AppendCCS _AppendCCS
420 {
421 IF_DEBUG(prof,
422 if (ccs1 != ccs2) {
423 debugBelch("Appending ");
424 debugCCS(ccs1);
425 debugBelch(" to ");
426 debugCCS(ccs2);
427 debugBelch("\n");});
428 return AppendCCS(ccs1,ccs2);
429 }
430 #endif
431
432 CostCentreStack *
433 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
434 {
435 CostCentreStack *ccs = NULL;
436
437 if (ccs1 == ccs2) {
438 return ccs1;
439 }
440
441 if (ccs2->cc->is_caf == CC_IS_CAF) {
442 return ccs1;
443 }
444
445 if (ccs2->prevStack != NULL) {
446 ccs = AppendCCS(ccs1, ccs2->prevStack);
447 }
448
449 return PushCostCentre(ccs,ccs2->cc);
450 }
451
452 static CostCentreStack *
453 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
454 {
455 CostCentreStack *new_ccs;
456
457 /* allocate space for a new CostCentreStack */
458 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
459
460 return ActualPush_(ccs, cc, new_ccs);
461 }
462
463 static CostCentreStack *
464 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
465 {
466 /* assign values to each member of the structure */
467 new_ccs->ccsID = CCS_ID++;
468 new_ccs->cc = cc;
469 new_ccs->prevStack = ccs;
470
471 new_ccs->indexTable = EMPTY_TABLE;
472
473 /* Initialise the various _scc_ counters to zero
474 */
475 new_ccs->scc_count = 0;
476
477 /* Initialize all other stats here. There should be a quick way
478 * that's easily used elsewhere too
479 */
480 new_ccs->time_ticks = 0;
481 new_ccs->mem_alloc = 0;
482 new_ccs->inherited_ticks = 0;
483 new_ccs->inherited_alloc = 0;
484
485 new_ccs->root = ccs->root;
486
487 // Set the selected field.
488 ccsSetSelected(new_ccs);
489
490 /* update the memoization table for the parent stack */
491 if (ccs != EMPTY_STACK)
492 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
493 0/*not a back edge*/);
494
495 /* make sure this CC is declared at the next heap/time sample */
496 DecCCS(new_ccs);
497
498 /* return a pointer to the new stack */
499 return new_ccs;
500 }
501
502
503 static CostCentreStack *
504 IsInIndexTable(IndexTable *it, CostCentre *cc)
505 {
506 while (it!=EMPTY_TABLE)
507 {
508 if (it->cc==cc)
509 return it->ccs;
510 else
511 it = it->next;
512 }
513
514 /* otherwise we never found it so return EMPTY_TABLE */
515 return EMPTY_TABLE;
516 }
517
518
519 static IndexTable *
520 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
521 CostCentre *cc, unsigned int back_edge)
522 {
523 IndexTable *new_it;
524
525 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
526
527 new_it->cc = cc;
528 new_it->ccs = new_ccs;
529 new_it->next = it;
530 new_it->back_edge = back_edge;
531 return new_it;
532 }
533
534
535 static void
536 DecCCS(CostCentreStack *ccs)
537 {
538 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
539 if (ccs->prevStack == EMPTY_STACK)
540 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
541 ccs->ccsID, ccs->cc->ccID);
542 else
543 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
544 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
545 }
546 }
547
548 static void
549 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
550 {
551 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
552 if (ccs->prevStack == EMPTY_STACK)
553 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
554 ccs->ccsID, ccs->cc->ccID);
555 else
556 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
557 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
558 }
559 }
560
561 /* -----------------------------------------------------------------------------
562 Generating a time & allocation profiling report.
563 -------------------------------------------------------------------------- */
564
565 /* We omit certain system-related CCs and CCSs from the default
566 * reports, so as not to cause confusion.
567 */
568 static rtsBool
569 cc_to_ignore (CostCentre *cc)
570 {
571 if ( cc == CC_OVERHEAD
572 || cc == CC_DONT_CARE
573 || cc == CC_GC
574 || cc == CC_SYSTEM) {
575 return rtsTrue;
576 } else {
577 return rtsFalse;
578 }
579 }
580
581 static rtsBool
582 ccs_to_ignore (CostCentreStack *ccs)
583 {
584 if ( ccs == CCS_OVERHEAD
585 || ccs == CCS_DONT_CARE
586 || ccs == CCS_GC
587 || ccs == CCS_SYSTEM) {
588 return rtsTrue;
589 } else {
590 return rtsFalse;
591 }
592 }
593
594 /* -----------------------------------------------------------------------------
595 Generating the aggregated per-cost-centre time/alloc report.
596 -------------------------------------------------------------------------- */
597
598 static CostCentre *sorted_cc_list;
599
600 static void
601 aggregate_cc_costs( CostCentreStack *ccs )
602 {
603 IndexTable *i;
604
605 ccs->cc->mem_alloc += ccs->mem_alloc;
606 ccs->cc->time_ticks += ccs->time_ticks;
607
608 for (i = ccs->indexTable; i != 0; i = i->next) {
609 if (!i->back_edge) {
610 aggregate_cc_costs(i->ccs);
611 }
612 }
613 }
614
615 static void
616 insert_cc_in_sorted_list( CostCentre *new_cc )
617 {
618 CostCentre **prev, *cc;
619
620 prev = &sorted_cc_list;
621 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
622 if (new_cc->time_ticks > cc->time_ticks) {
623 new_cc->link = cc;
624 *prev = new_cc;
625 return;
626 } else {
627 prev = &(cc->link);
628 }
629 }
630 new_cc->link = NULL;
631 *prev = new_cc;
632 }
633
634 static void
635 report_per_cc_costs( void )
636 {
637 CostCentre *cc, *next;
638
639 aggregate_cc_costs(CCS_MAIN);
640 sorted_cc_list = NULL;
641
642 for (cc = CC_LIST; cc != NULL; cc = next) {
643 next = cc->link;
644 if (cc->time_ticks > total_prof_ticks/100
645 || cc->mem_alloc > total_alloc/100
646 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
647 insert_cc_in_sorted_list(cc);
648 }
649 }
650
651 fprintf(prof_file, "%-30s %-20s", "COST CENTRE", "MODULE");
652 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
653 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
654 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
655 }
656 fprintf(prof_file, "\n\n");
657
658 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
659 if (cc_to_ignore(cc)) {
660 continue;
661 }
662 fprintf(prof_file, "%-30s %-20s", cc->label, cc->module);
663 fprintf(prof_file, "%6.1f %6.1f",
664 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
665 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
666 total_alloc * 100)
667 );
668
669 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
670 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
671 (StgWord64)(cc->time_ticks), cc->mem_alloc);
672 }
673 fprintf(prof_file, "\n");
674 }
675
676 fprintf(prof_file,"\n\n");
677 }
678
679 /* -----------------------------------------------------------------------------
680 Generate the cost-centre-stack time/alloc report
681 -------------------------------------------------------------------------- */
682
683 static void
684 fprint_header( void )
685 {
686 fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
687
688 fprintf(prof_file, "%-24s %-50s", "COST CENTRE", "MODULE");
689 fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
690
691 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
692 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
693 #if defined(PROFILING_DETAIL_COUNTS)
694 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
695 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
696 #endif
697 }
698
699 fprintf(prof_file, "\n\n");
700 }
701
702 void
703 reportCCSProfiling( void )
704 {
705 nat count;
706 char temp[128]; /* sigh: magic constant */
707
708 stopProfTimer();
709
710 total_prof_ticks = 0;
711 total_alloc = 0;
712 count_ticks(CCS_MAIN);
713
714 switch (RtsFlags.CcFlags.doCostCentres) {
715 case 0:
716 return;
717 case COST_CENTRES_XML:
718 gen_XML_logfile();
719 return;
720 default:
721 break;
722 }
723
724 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
725 time_str(), "Final");
726
727 fprintf(prof_file, "\n\t ");
728 fprintf(prof_file, " %s", prog_name);
729 fprintf(prof_file, " +RTS");
730 for (count = 0; rts_argv[count]; count++)
731 fprintf(prof_file, " %s", rts_argv[count]);
732 fprintf(prof_file, " -RTS");
733 for (count = 1; prog_argv[count]; count++)
734 fprintf(prof_file, " %s", prog_argv[count]);
735 fprintf(prof_file, "\n\n");
736
737 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
738 (double) total_prof_ticks *
739 (double) RtsFlags.MiscFlags.tickInterval / 1000,
740 (unsigned long) total_prof_ticks,
741 (int) RtsFlags.MiscFlags.tickInterval);
742
743 fprintf(prof_file, "\ttotal alloc = %11s bytes",
744 ullong_format_string(total_alloc * sizeof(W_),
745 temp, rtsTrue/*commas*/));
746
747 #if defined(PROFILING_DETAIL_COUNTS)
748 fprintf(prof_file, " (%lu closures)", total_allocs);
749 #endif
750 fprintf(prof_file, " (excludes profiling overheads)\n\n");
751
752 report_per_cc_costs();
753
754 inherit_costs(CCS_MAIN);
755
756 fprint_header();
757 reportCCS(pruneCCSTree(CCS_MAIN), 0);
758 }
759
760 static void
761 reportCCS(CostCentreStack *ccs, nat indent)
762 {
763 CostCentre *cc;
764 IndexTable *i;
765
766 cc = ccs->cc;
767
768 /* Only print cost centres with non 0 data ! */
769
770 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
771 ! ccs_to_ignore(ccs))
772 /* force printing of *all* cost centres if -P -P */
773 {
774
775 fprintf(prof_file, "%-*s%-*s %-50s",
776 indent, "", 24-indent, cc->label, cc->module);
777
778 fprintf(prof_file, "%6ld %11.0f %5.1f %5.1f %5.1f %5.1f",
779 ccs->ccsID, (double) ccs->scc_count,
780 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
781 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
782 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
783 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
784 );
785
786 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
787 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
788 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
789 #if defined(PROFILING_DETAIL_COUNTS)
790 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
791 ccs->mem_allocs, ccs->thunk_count,
792 ccs->function_count, ccs->pap_count,
793 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
794 ccs->caffun_subsumed);
795 #endif
796 }
797 fprintf(prof_file, "\n");
798 }
799
800 for (i = ccs->indexTable; i != 0; i = i->next) {
801 if (!i->back_edge) {
802 reportCCS(i->ccs, indent+1);
803 }
804 }
805 }
806
807
808 /* Traverse the cost centre stack tree and accumulate
809 * ticks/allocations.
810 */
811 static void
812 count_ticks(CostCentreStack *ccs)
813 {
814 IndexTable *i;
815
816 if (!ccs_to_ignore(ccs)) {
817 total_alloc += ccs->mem_alloc;
818 total_prof_ticks += ccs->time_ticks;
819 }
820 for (i = ccs->indexTable; i != NULL; i = i->next)
821 if (!i->back_edge) {
822 count_ticks(i->ccs);
823 }
824 }
825
826 /* Traverse the cost centre stack tree and inherit ticks & allocs.
827 */
828 static void
829 inherit_costs(CostCentreStack *ccs)
830 {
831 IndexTable *i;
832
833 if (ccs_to_ignore(ccs)) { return; }
834
835 ccs->inherited_ticks += ccs->time_ticks;
836 ccs->inherited_alloc += ccs->mem_alloc;
837
838 for (i = ccs->indexTable; i != NULL; i = i->next)
839 if (!i->back_edge) {
840 inherit_costs(i->ccs);
841 ccs->inherited_ticks += i->ccs->inherited_ticks;
842 ccs->inherited_alloc += i->ccs->inherited_alloc;
843 }
844
845 return;
846 }
847
848 static CostCentreStack *
849 pruneCCSTree( CostCentreStack *ccs )
850 {
851 CostCentreStack *ccs1;
852 IndexTable *i, **prev;
853
854 prev = &ccs->indexTable;
855 for (i = ccs->indexTable; i != 0; i = i->next) {
856 if (i->back_edge) { continue; }
857
858 ccs1 = pruneCCSTree(i->ccs);
859 if (ccs1 == NULL) {
860 *prev = i->next;
861 } else {
862 prev = &(i->next);
863 }
864 }
865
866 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
867 /* force printing of *all* cost centres if -P -P */ )
868
869 || ( ccs->indexTable != 0 )
870 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
871 ) {
872 return ccs;
873 } else {
874 return NULL;
875 }
876 }
877
878 /* -----------------------------------------------------------------------------
879 Generate the XML time/allocation profile
880 -------------------------------------------------------------------------- */
881
882 void
883 gen_XML_logfile( void )
884 {
885 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
886
887 reportCCS_XML(pruneCCSTree(CCS_MAIN));
888
889 fprintf(prof_file, " 0\n");
890 }
891
892 static void
893 reportCCS_XML(CostCentreStack *ccs)
894 {
895 CostCentre *cc;
896 IndexTable *i;
897
898 if (ccs_to_ignore(ccs)) { return; }
899
900 cc = ccs->cc;
901
902 fprintf(prof_file, " 1 %ld %" FMT_Word64 " %" FMT_Word64 " %" FMT_Word64,
903 ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc);
904
905 for (i = ccs->indexTable; i != 0; i = i->next) {
906 if (!i->back_edge) {
907 reportCCS_XML(i->ccs);
908 }
909 }
910 }
911
912 void
913 fprintCCS( FILE *f, CostCentreStack *ccs )
914 {
915 fprintf(f,"<");
916 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
917 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
918 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
919 fprintf(f,",");
920 }
921 }
922 fprintf(f,">");
923 }
924
925 /* For calling from .cmm code, where we can't reliably refer to stderr */
926 void
927 fprintCCS_stderr( CostCentreStack *ccs )
928 {
929 fprintCCS(stderr, ccs);
930 }
931
932 #ifdef DEBUG
933 void
934 debugCCS( CostCentreStack *ccs )
935 {
936 debugBelch("<");
937 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
938 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
939 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
940 debugBelch(",");
941 }
942 }
943 debugBelch(">");
944 }
945 #endif /* DEBUG */
946
947 #endif /* PROFILING */