Fix warnings traceBegin/traceEnd implicitly declared
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13 #include "RtsUtils.h"
14 #include "RtsFlags.h"
15 #include "Profiling.h"
16 #include "Storage.h"
17 #include "Proftimer.h"
18 #include "Timer.h"
19 #include "ProfHeap.h"
20 #include "Arena.h"
21 #include "RetainerProfile.h"
22 #include "LdvProfile.h"
23
24 #include <string.h>
25
26 #ifdef DEBUG
27 #include "Trace.h"
28 #endif
29
30 /*
31 * Profiling allocation arena.
32 */
33 Arena *prof_arena;
34
35 /*
36 * Global variables used to assign unique IDs to cc's, ccs's, and
37 * closure_cats
38 */
39
40 unsigned int CC_ID;
41 unsigned int CCS_ID;
42 unsigned int HP_ID;
43
44 /* figures for the profiling report.
45 */
46 static ullong total_alloc;
47 static lnat total_prof_ticks;
48
49 /* Globals for opening the profiling log file(s)
50 */
51 static char *prof_filename; /* prof report file name = <program>.prof */
52 FILE *prof_file;
53
54 static char *hp_filename; /* heap profile (hp2ps style) log file */
55 FILE *hp_file;
56
57 /* The Current Cost Centre Stack (for attributing costs)
58 */
59 CostCentreStack *CCCS;
60
61 /* Linked lists to keep track of cc's and ccs's that haven't
62 * been declared in the log file yet
63 */
64 CostCentre *CC_LIST;
65 CostCentreStack *CCS_LIST;
66
67 /*
68 * Built-in cost centres and cost-centre stacks:
69 *
70 * MAIN is the root of the cost-centre stack tree. If there are
71 * no _scc_s in the program, all costs will be attributed
72 * to MAIN.
73 *
74 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
75 * RTS operations apart from garbage collection are attributed
76 * to SYSTEM.
77 *
78 * GC is the storage manager / garbage collector.
79 *
80 * OVERHEAD gets all costs generated by the profiling system
81 * itself. These are costs that would not be incurred
82 * during non-profiled execution of the program.
83 *
84 * SUBSUMED is the one-and-only CCS placed on top-level functions.
85 * It indicates that all costs are to be attributed to the
86 * enclosing cost centre stack. SUBSUMED never accumulates
87 * any costs. The is_caf flag is set on the subsumed cost
88 * centre.
89 *
90 * DONT_CARE is a placeholder cost-centre we assign to static
91 * constructors. It should *never* accumulate any costs.
92 */
93
94 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
95 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
96 CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
97 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
98 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
99 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
100
101 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
102 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
103 CCS_DECLARE(CCS_GC, CC_GC, );
104 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
105 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
106 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
107
108 /*
109 * Uniques for the XML log-file format
110 */
111 #define CC_UQ 1
112 #define CCS_UQ 2
113 #define TC_UQ 3
114 #define HEAP_OBJ_UQ 4
115 #define TIME_UPD_UQ 5
116 #define HEAP_UPD_UQ 6
117
118 /*
119 * Static Functions
120 */
121
122 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
123 CostCentreStack *new_ccs );
124 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
125 static void count_ticks ( CostCentreStack *ccs );
126 static void inherit_costs ( CostCentreStack *ccs );
127 static void reportCCS ( CostCentreStack *ccs, nat indent );
128 static void DecCCS ( CostCentreStack *ccs );
129 static void DecBackEdge ( CostCentreStack *ccs,
130 CostCentreStack *oldccs );
131 static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
132 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
133 static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
134 static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
135 static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
136 CostCentre *, unsigned int );
137 static void ccsSetSelected ( CostCentreStack *ccs );
138
139 static void initTimeProfiling ( void );
140 static void initProfilingLogFile( void );
141
142 static void reportCCS_XML ( CostCentreStack *ccs );
143
144 /* -----------------------------------------------------------------------------
145 Initialise the profiling environment
146 -------------------------------------------------------------------------- */
147
148 void
149 initProfiling1 (void)
150 {
151 // initialise our arena
152 prof_arena = newArena();
153
154 /* for the benefit of allocate()... */
155 CCCS = CCS_SYSTEM;
156
157 /* Initialize counters for IDs */
158 CC_ID = 1;
159 CCS_ID = 1;
160 HP_ID = 1;
161
162 /* Initialize Declaration lists to NULL */
163 CC_LIST = NULL;
164 CCS_LIST = NULL;
165
166 /* Register all the cost centres / stacks in the program
167 * CC_MAIN gets link = 0, all others have non-zero link.
168 */
169 REGISTER_CC(CC_MAIN);
170 REGISTER_CC(CC_SYSTEM);
171 REGISTER_CC(CC_GC);
172 REGISTER_CC(CC_OVERHEAD);
173 REGISTER_CC(CC_SUBSUMED);
174 REGISTER_CC(CC_DONT_CARE);
175 REGISTER_CCS(CCS_MAIN);
176 REGISTER_CCS(CCS_SYSTEM);
177 REGISTER_CCS(CCS_GC);
178 REGISTER_CCS(CCS_OVERHEAD);
179 REGISTER_CCS(CCS_SUBSUMED);
180 REGISTER_CCS(CCS_DONT_CARE);
181
182 CCCS = CCS_OVERHEAD;
183
184 /* cost centres are registered by the per-module
185 * initialisation code now...
186 */
187 }
188
189 void
190 initProfiling2 (void)
191 {
192 CostCentreStack *ccs, *next;
193
194 CCCS = CCS_SYSTEM;
195
196 /* Set up the log file, and dump the header and cost centre
197 * information into it. */
198 initProfilingLogFile();
199
200 /* find all the "special" cost centre stacks, and make them children
201 * of CCS_MAIN.
202 */
203 ASSERT(CCS_MAIN->prevStack == 0);
204 CCS_MAIN->root = CC_MAIN;
205 ccsSetSelected(CCS_MAIN);
206 DecCCS(CCS_MAIN);
207
208 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
209 next = ccs->prevStack;
210 ccs->prevStack = 0;
211 ActualPush_(CCS_MAIN,ccs->cc,ccs);
212 ccs->root = ccs->cc;
213 ccs = next;
214 }
215
216 if (RtsFlags.CcFlags.doCostCentres) {
217 initTimeProfiling();
218 }
219
220 if (RtsFlags.ProfFlags.doHeapProfile) {
221 initHeapProfiling();
222 }
223 }
224
225 // Decide whether closures with this CCS should contribute to the heap
226 // profile.
227 static void
228 ccsSetSelected( CostCentreStack *ccs )
229 {
230 if (RtsFlags.ProfFlags.modSelector) {
231 if (! strMatchesSelector( ccs->cc->module,
232 RtsFlags.ProfFlags.modSelector ) ) {
233 ccs->selected = 0;
234 return;
235 }
236 }
237 if (RtsFlags.ProfFlags.ccSelector) {
238 if (! strMatchesSelector( ccs->cc->label,
239 RtsFlags.ProfFlags.ccSelector ) ) {
240 ccs->selected = 0;
241 return;
242 }
243 }
244 if (RtsFlags.ProfFlags.ccsSelector) {
245 CostCentreStack *c;
246 for (c = ccs; c != NULL; c = c->prevStack) {
247 if ( strMatchesSelector( c->cc->label,
248 RtsFlags.ProfFlags.ccsSelector )) {
249 break;
250 }
251 }
252 if (c == NULL) {
253 ccs->selected = 0;
254 return;
255 }
256 }
257
258 ccs->selected = 1;
259 return;
260 }
261
262
263 static void
264 initProfilingLogFile(void)
265 {
266 /* Initialise the log file name */
267 prof_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
268 sprintf(prof_filename, "%s.prof", prog_name);
269
270 /* open the log file */
271 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
272 debugBelch("Can't open profiling report file %s\n", prof_filename);
273 RtsFlags.CcFlags.doCostCentres = 0;
274 // The following line was added by Sung; retainer/LDV profiling may need
275 // two output files, i.e., <program>.prof/hp.
276 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
277 RtsFlags.ProfFlags.doHeapProfile = 0;
278 return;
279 }
280
281 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
282 /* dump the time, and the profiling interval */
283 fprintf(prof_file, "\"%s\"\n", time_str());
284 fprintf(prof_file, "\"%d ms\"\n", RtsFlags.MiscFlags.tickInterval);
285
286 /* declare all the cost centres */
287 {
288 CostCentre *cc;
289 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
290 fprintf(prof_file, "%d %ld \"%s\" \"%s\"\n",
291 CC_UQ, cc->ccID, cc->label, cc->module);
292 }
293 }
294 }
295
296 if (RtsFlags.ProfFlags.doHeapProfile) {
297 /* Initialise the log file name */
298 hp_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
299 sprintf(hp_filename, "%s.hp", prog_name);
300
301 /* open the log file */
302 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
303 debugBelch("Can't open profiling report file %s\n",
304 hp_filename);
305 RtsFlags.ProfFlags.doHeapProfile = 0;
306 return;
307 }
308 }
309 }
310
311 void
312 initTimeProfiling(void)
313 {
314 /* Start ticking */
315 startProfTimer();
316 };
317
318 void
319 endProfiling ( void )
320 {
321 if (RtsFlags.CcFlags.doCostCentres) {
322 stopProfTimer();
323 }
324 if (RtsFlags.ProfFlags.doHeapProfile) {
325 endHeapProfiling();
326 }
327 }
328
329 /* -----------------------------------------------------------------------------
330 Set cost centre stack when entering a function.
331 -------------------------------------------------------------------------- */
332 rtsBool entering_PAP;
333
334 void
335 EnterFunCCS ( CostCentreStack *ccsfn )
336 {
337 /* PAP_entry has already set CCCS for us */
338 if (entering_PAP) {
339 entering_PAP = rtsFalse;
340 return;
341 }
342
343 if (ccsfn->root->is_caf == CC_IS_CAF) {
344 CCCS = AppendCCS(CCCS,ccsfn);
345 } else {
346 CCCS = ccsfn;
347 }
348 }
349
350 /* -----------------------------------------------------------------------------
351 Cost-centre stack manipulation
352 -------------------------------------------------------------------------- */
353
354 #ifdef DEBUG
355 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
356 CostCentreStack *
357 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
358 #define PushCostCentre _PushCostCentre
359 {
360 IF_DEBUG(prof,
361 traceBegin("pushing %s on ", cc->label);
362 debugCCS(ccs);
363 traceEnd(););
364
365 return PushCostCentre(ccs,cc);
366 }
367 #endif
368
369 CostCentreStack *
370 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
371 {
372 CostCentreStack *temp_ccs;
373
374 if (ccs == EMPTY_STACK)
375 return ActualPush(ccs,cc);
376 else {
377 if (ccs->cc == cc)
378 return ccs;
379 else {
380 /* check if we've already memoized this stack */
381 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
382
383 if (temp_ccs != EMPTY_STACK)
384 return temp_ccs;
385 else {
386 temp_ccs = CheckLoop(ccs,cc);
387 if (temp_ccs != NULL) {
388 /* we have recursed to an older CCS. Mark this in
389 * the index table, and emit a "back edge" into the
390 * log file.
391 */
392 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
393 DecBackEdge(temp_ccs,ccs);
394 return temp_ccs;
395 } else {
396 return ActualPush(ccs,cc);
397 }
398 }
399 }
400 }
401 }
402
403 static CostCentreStack *
404 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
405 {
406 while (ccs != EMPTY_STACK) {
407 if (ccs->cc == cc)
408 return ccs;
409 ccs = ccs->prevStack;
410 }
411 return NULL;
412 }
413
414 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
415
416 #ifdef DEBUG
417 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
418 CostCentreStack *
419 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
420 #define AppendCCS _AppendCCS
421 {
422 IF_DEBUG(prof,
423 if (ccs1 != ccs2) {
424 debugBelch("Appending ");
425 debugCCS(ccs1);
426 debugBelch(" to ");
427 debugCCS(ccs2);
428 debugBelch("\n");});
429 return AppendCCS(ccs1,ccs2);
430 }
431 #endif
432
433 CostCentreStack *
434 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
435 {
436 CostCentreStack *ccs = NULL;
437
438 if (ccs1 == ccs2) {
439 return ccs1;
440 }
441
442 if (ccs2->cc->is_caf == CC_IS_CAF) {
443 return ccs1;
444 }
445
446 if (ccs2->prevStack != NULL) {
447 ccs = AppendCCS(ccs1, ccs2->prevStack);
448 }
449
450 return PushCostCentre(ccs,ccs2->cc);
451 }
452
453 static CostCentreStack *
454 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
455 {
456 CostCentreStack *new_ccs;
457
458 /* allocate space for a new CostCentreStack */
459 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
460
461 return ActualPush_(ccs, cc, new_ccs);
462 }
463
464 static CostCentreStack *
465 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
466 {
467 /* assign values to each member of the structure */
468 new_ccs->ccsID = CCS_ID++;
469 new_ccs->cc = cc;
470 new_ccs->prevStack = ccs;
471
472 new_ccs->indexTable = EMPTY_TABLE;
473
474 /* Initialise the various _scc_ counters to zero
475 */
476 new_ccs->scc_count = 0;
477
478 /* Initialize all other stats here. There should be a quick way
479 * that's easily used elsewhere too
480 */
481 new_ccs->time_ticks = 0;
482 new_ccs->mem_alloc = 0;
483 new_ccs->inherited_ticks = 0;
484 new_ccs->inherited_alloc = 0;
485
486 new_ccs->root = ccs->root;
487
488 // Set the selected field.
489 ccsSetSelected(new_ccs);
490
491 /* update the memoization table for the parent stack */
492 if (ccs != EMPTY_STACK)
493 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
494 0/*not a back edge*/);
495
496 /* make sure this CC is declared at the next heap/time sample */
497 DecCCS(new_ccs);
498
499 /* return a pointer to the new stack */
500 return new_ccs;
501 }
502
503
504 static CostCentreStack *
505 IsInIndexTable(IndexTable *it, CostCentre *cc)
506 {
507 while (it!=EMPTY_TABLE)
508 {
509 if (it->cc==cc)
510 return it->ccs;
511 else
512 it = it->next;
513 }
514
515 /* otherwise we never found it so return EMPTY_TABLE */
516 return EMPTY_TABLE;
517 }
518
519
520 static IndexTable *
521 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
522 CostCentre *cc, unsigned int back_edge)
523 {
524 IndexTable *new_it;
525
526 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
527
528 new_it->cc = cc;
529 new_it->ccs = new_ccs;
530 new_it->next = it;
531 new_it->back_edge = back_edge;
532 return new_it;
533 }
534
535
536 static void
537 DecCCS(CostCentreStack *ccs)
538 {
539 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
540 if (ccs->prevStack == EMPTY_STACK)
541 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
542 ccs->ccsID, ccs->cc->ccID);
543 else
544 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
545 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
546 }
547 }
548
549 static void
550 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
551 {
552 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
553 if (ccs->prevStack == EMPTY_STACK)
554 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
555 ccs->ccsID, ccs->cc->ccID);
556 else
557 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
558 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
559 }
560 }
561
562 /* -----------------------------------------------------------------------------
563 Generating a time & allocation profiling report.
564 -------------------------------------------------------------------------- */
565
566 /* We omit certain system-related CCs and CCSs from the default
567 * reports, so as not to cause confusion.
568 */
569 static rtsBool
570 cc_to_ignore (CostCentre *cc)
571 {
572 if ( cc == CC_OVERHEAD
573 || cc == CC_DONT_CARE
574 || cc == CC_GC
575 || cc == CC_SYSTEM) {
576 return rtsTrue;
577 } else {
578 return rtsFalse;
579 }
580 }
581
582 static rtsBool
583 ccs_to_ignore (CostCentreStack *ccs)
584 {
585 if ( ccs == CCS_OVERHEAD
586 || ccs == CCS_DONT_CARE
587 || ccs == CCS_GC
588 || ccs == CCS_SYSTEM) {
589 return rtsTrue;
590 } else {
591 return rtsFalse;
592 }
593 }
594
595 /* -----------------------------------------------------------------------------
596 Generating the aggregated per-cost-centre time/alloc report.
597 -------------------------------------------------------------------------- */
598
599 static CostCentre *sorted_cc_list;
600
601 static void
602 aggregate_cc_costs( CostCentreStack *ccs )
603 {
604 IndexTable *i;
605
606 ccs->cc->mem_alloc += ccs->mem_alloc;
607 ccs->cc->time_ticks += ccs->time_ticks;
608
609 for (i = ccs->indexTable; i != 0; i = i->next) {
610 if (!i->back_edge) {
611 aggregate_cc_costs(i->ccs);
612 }
613 }
614 }
615
616 static void
617 insert_cc_in_sorted_list( CostCentre *new_cc )
618 {
619 CostCentre **prev, *cc;
620
621 prev = &sorted_cc_list;
622 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
623 if (new_cc->time_ticks > cc->time_ticks) {
624 new_cc->link = cc;
625 *prev = new_cc;
626 return;
627 } else {
628 prev = &(cc->link);
629 }
630 }
631 new_cc->link = NULL;
632 *prev = new_cc;
633 }
634
635 static void
636 report_per_cc_costs( void )
637 {
638 CostCentre *cc, *next;
639
640 aggregate_cc_costs(CCS_MAIN);
641 sorted_cc_list = NULL;
642
643 for (cc = CC_LIST; cc != NULL; cc = next) {
644 next = cc->link;
645 if (cc->time_ticks > total_prof_ticks/100
646 || cc->mem_alloc > total_alloc/100
647 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
648 insert_cc_in_sorted_list(cc);
649 }
650 }
651
652 fprintf(prof_file, "%-30s %-20s", "COST CENTRE", "MODULE");
653 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
654 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
655 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
656 }
657 fprintf(prof_file, "\n\n");
658
659 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
660 if (cc_to_ignore(cc)) {
661 continue;
662 }
663 fprintf(prof_file, "%-30s %-20s", cc->label, cc->module);
664 fprintf(prof_file, "%6.1f %6.1f",
665 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
666 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
667 total_alloc * 100)
668 );
669
670 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
671 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
672 (StgWord64)(cc->time_ticks), cc->mem_alloc);
673 }
674 fprintf(prof_file, "\n");
675 }
676
677 fprintf(prof_file,"\n\n");
678 }
679
680 /* -----------------------------------------------------------------------------
681 Generate the cost-centre-stack time/alloc report
682 -------------------------------------------------------------------------- */
683
684 static void
685 fprint_header( void )
686 {
687 fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
688
689 fprintf(prof_file, "%-24s %-50s", "COST CENTRE", "MODULE");
690 fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
691
692 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
693 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
694 #if defined(PROFILING_DETAIL_COUNTS)
695 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
696 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
697 #endif
698 }
699
700 fprintf(prof_file, "\n\n");
701 }
702
703 void
704 reportCCSProfiling( void )
705 {
706 nat count;
707 char temp[128]; /* sigh: magic constant */
708
709 stopProfTimer();
710
711 total_prof_ticks = 0;
712 total_alloc = 0;
713 count_ticks(CCS_MAIN);
714
715 switch (RtsFlags.CcFlags.doCostCentres) {
716 case 0:
717 return;
718 case COST_CENTRES_XML:
719 gen_XML_logfile();
720 return;
721 default:
722 break;
723 }
724
725 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
726 time_str(), "Final");
727
728 fprintf(prof_file, "\n\t ");
729 fprintf(prof_file, " %s", prog_name);
730 fprintf(prof_file, " +RTS");
731 for (count = 0; rts_argv[count]; count++)
732 fprintf(prof_file, " %s", rts_argv[count]);
733 fprintf(prof_file, " -RTS");
734 for (count = 1; prog_argv[count]; count++)
735 fprintf(prof_file, " %s", prog_argv[count]);
736 fprintf(prof_file, "\n\n");
737
738 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
739 (double) total_prof_ticks *
740 (double) RtsFlags.MiscFlags.tickInterval / 1000,
741 (unsigned long) total_prof_ticks,
742 (int) RtsFlags.MiscFlags.tickInterval);
743
744 fprintf(prof_file, "\ttotal alloc = %11s bytes",
745 ullong_format_string(total_alloc * sizeof(W_),
746 temp, rtsTrue/*commas*/));
747
748 #if defined(PROFILING_DETAIL_COUNTS)
749 fprintf(prof_file, " (%lu closures)", total_allocs);
750 #endif
751 fprintf(prof_file, " (excludes profiling overheads)\n\n");
752
753 report_per_cc_costs();
754
755 inherit_costs(CCS_MAIN);
756
757 fprint_header();
758 reportCCS(pruneCCSTree(CCS_MAIN), 0);
759 }
760
761 static void
762 reportCCS(CostCentreStack *ccs, nat indent)
763 {
764 CostCentre *cc;
765 IndexTable *i;
766
767 cc = ccs->cc;
768
769 /* Only print cost centres with non 0 data ! */
770
771 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
772 ! ccs_to_ignore(ccs))
773 /* force printing of *all* cost centres if -P -P */
774 {
775
776 fprintf(prof_file, "%-*s%-*s %-50s",
777 indent, "", 24-indent, cc->label, cc->module);
778
779 fprintf(prof_file, "%6ld %11.0f %5.1f %5.1f %5.1f %5.1f",
780 ccs->ccsID, (double) ccs->scc_count,
781 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
782 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
783 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
784 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
785 );
786
787 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
788 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
789 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
790 #if defined(PROFILING_DETAIL_COUNTS)
791 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
792 ccs->mem_allocs, ccs->thunk_count,
793 ccs->function_count, ccs->pap_count,
794 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
795 ccs->caffun_subsumed);
796 #endif
797 }
798 fprintf(prof_file, "\n");
799 }
800
801 for (i = ccs->indexTable; i != 0; i = i->next) {
802 if (!i->back_edge) {
803 reportCCS(i->ccs, indent+1);
804 }
805 }
806 }
807
808
809 /* Traverse the cost centre stack tree and accumulate
810 * ticks/allocations.
811 */
812 static void
813 count_ticks(CostCentreStack *ccs)
814 {
815 IndexTable *i;
816
817 if (!ccs_to_ignore(ccs)) {
818 total_alloc += ccs->mem_alloc;
819 total_prof_ticks += ccs->time_ticks;
820 }
821 for (i = ccs->indexTable; i != NULL; i = i->next)
822 if (!i->back_edge) {
823 count_ticks(i->ccs);
824 }
825 }
826
827 /* Traverse the cost centre stack tree and inherit ticks & allocs.
828 */
829 static void
830 inherit_costs(CostCentreStack *ccs)
831 {
832 IndexTable *i;
833
834 if (ccs_to_ignore(ccs)) { return; }
835
836 ccs->inherited_ticks += ccs->time_ticks;
837 ccs->inherited_alloc += ccs->mem_alloc;
838
839 for (i = ccs->indexTable; i != NULL; i = i->next)
840 if (!i->back_edge) {
841 inherit_costs(i->ccs);
842 ccs->inherited_ticks += i->ccs->inherited_ticks;
843 ccs->inherited_alloc += i->ccs->inherited_alloc;
844 }
845
846 return;
847 }
848
849 static CostCentreStack *
850 pruneCCSTree( CostCentreStack *ccs )
851 {
852 CostCentreStack *ccs1;
853 IndexTable *i, **prev;
854
855 prev = &ccs->indexTable;
856 for (i = ccs->indexTable; i != 0; i = i->next) {
857 if (i->back_edge) { continue; }
858
859 ccs1 = pruneCCSTree(i->ccs);
860 if (ccs1 == NULL) {
861 *prev = i->next;
862 } else {
863 prev = &(i->next);
864 }
865 }
866
867 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
868 /* force printing of *all* cost centres if -P -P */ )
869
870 || ( ccs->indexTable != 0 )
871 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
872 ) {
873 return ccs;
874 } else {
875 return NULL;
876 }
877 }
878
879 /* -----------------------------------------------------------------------------
880 Generate the XML time/allocation profile
881 -------------------------------------------------------------------------- */
882
883 void
884 gen_XML_logfile( void )
885 {
886 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
887
888 reportCCS_XML(pruneCCSTree(CCS_MAIN));
889
890 fprintf(prof_file, " 0\n");
891 }
892
893 static void
894 reportCCS_XML(CostCentreStack *ccs)
895 {
896 CostCentre *cc;
897 IndexTable *i;
898
899 if (ccs_to_ignore(ccs)) { return; }
900
901 cc = ccs->cc;
902
903 fprintf(prof_file, " 1 %ld %" FMT_Word64 " %" FMT_Word64 " %" FMT_Word64,
904 ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc);
905
906 for (i = ccs->indexTable; i != 0; i = i->next) {
907 if (!i->back_edge) {
908 reportCCS_XML(i->ccs);
909 }
910 }
911 }
912
913 void
914 fprintCCS( FILE *f, CostCentreStack *ccs )
915 {
916 fprintf(f,"<");
917 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
918 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
919 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
920 fprintf(f,",");
921 }
922 }
923 fprintf(f,">");
924 }
925
926 /* For calling from .cmm code, where we can't reliably refer to stderr */
927 void
928 fprintCCS_stderr( CostCentreStack *ccs )
929 {
930 fprintCCS(stderr, ccs);
931 }
932
933 #ifdef DEBUG
934 void
935 debugCCS( CostCentreStack *ccs )
936 {
937 debugBelch("<");
938 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
939 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
940 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
941 debugBelch(",");
942 }
943 }
944 debugBelch(">");
945 }
946 #endif /* DEBUG */
947
948 #endif /* PROFILING */