Merge branch 'coloured-core' of https://github.com/nominolo/ghc into coloured-core
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20
21 #include <string.h>
22
23 #ifdef DEBUG
24 #include "Trace.h"
25 #endif
26
27 /*
28 * Profiling allocation arena.
29 */
30 Arena *prof_arena;
31
32 /*
33 * Global variables used to assign unique IDs to cc's, ccs's, and
34 * closure_cats
35 */
36
37 unsigned int CC_ID = 1;
38 unsigned int CCS_ID = 1;
39 unsigned int HP_ID = 1;
40
41 /* figures for the profiling report.
42 */
43 static StgWord64 total_alloc;
44 static lnat total_prof_ticks;
45
46 /* Globals for opening the profiling log file(s)
47 */
48 static char *prof_filename; /* prof report file name = <program>.prof */
49 FILE *prof_file;
50
51 static char *hp_filename; /* heap profile (hp2ps style) log file */
52 FILE *hp_file;
53
54 /* The Current Cost Centre Stack (for attributing costs)
55 */
56 CostCentreStack *CCCS;
57
58 /* Linked lists to keep track of cc's and ccs's that haven't
59 * been declared in the log file yet
60 */
61 CostCentre *CC_LIST = NULL;
62 CostCentreStack *CCS_LIST = NULL;
63
64 /*
65 * Built-in cost centres and cost-centre stacks:
66 *
67 * MAIN is the root of the cost-centre stack tree. If there are
68 * no _scc_s in the program, all costs will be attributed
69 * to MAIN.
70 *
71 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
72 * RTS operations apart from garbage collection are attributed
73 * to SYSTEM.
74 *
75 * GC is the storage manager / garbage collector.
76 *
77 * OVERHEAD gets all costs generated by the profiling system
78 * itself. These are costs that would not be incurred
79 * during non-profiled execution of the program.
80 *
81 * SUBSUMED is the one-and-only CCS placed on top-level functions.
82 * It indicates that all costs are to be attributed to the
83 * enclosing cost centre stack. SUBSUMED never accumulates
84 * any costs. The is_caf flag is set on the subsumed cost
85 * centre.
86 *
87 * DONT_CARE is a placeholder cost-centre we assign to static
88 * constructors. It should *never* accumulate any costs.
89 */
90
91 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
92 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
93 CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
94 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
95 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
96 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
97
98 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
99 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
100 CCS_DECLARE(CCS_GC, CC_GC, );
101 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
102 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
103 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
104
105 /*
106 * Uniques for the XML log-file format
107 */
108 #define CC_UQ 1
109 #define CCS_UQ 2
110 #define TC_UQ 3
111 #define HEAP_OBJ_UQ 4
112 #define TIME_UPD_UQ 5
113 #define HEAP_UPD_UQ 6
114
115 /*
116 * Static Functions
117 */
118
119 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
120 CostCentreStack *new_ccs );
121 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
122 static void count_ticks ( CostCentreStack *ccs );
123 static void inherit_costs ( CostCentreStack *ccs );
124 static void findCCSMaxLens ( CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len );
125 static void logCCS ( CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len );
126 static void reportCCS ( CostCentreStack *ccs );
127 static void DecCCS ( CostCentreStack *ccs );
128 static void DecBackEdge ( CostCentreStack *ccs,
129 CostCentreStack *oldccs );
130 static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
131 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
132 static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
133 static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
134 static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
135 CostCentre *, unsigned int );
136 static void ccsSetSelected ( CostCentreStack *ccs );
137
138 static void initTimeProfiling ( void );
139 static void initProfilingLogFile( void );
140
141 static void reportCCS_XML ( CostCentreStack *ccs );
142
143 /* -----------------------------------------------------------------------------
144 Initialise the profiling environment
145 -------------------------------------------------------------------------- */
146
147 void
148 initProfiling1 (void)
149 {
150 // initialise our arena
151 prof_arena = newArena();
152
153 /* for the benefit of allocate()... */
154 CCCS = CCS_SYSTEM;
155 }
156
157 void
158 freeProfiling (void)
159 {
160 arenaFree(prof_arena);
161 }
162
163 void
164 initProfiling2 (void)
165 {
166 CostCentreStack *ccs, *next;
167
168 CCCS = CCS_SYSTEM;
169
170 /* Set up the log file, and dump the header and cost centre
171 * information into it. */
172 initProfilingLogFile();
173
174 /* Register all the cost centres / stacks in the program
175 * CC_MAIN gets link = 0, all others have non-zero link.
176 */
177 REGISTER_CC(CC_MAIN);
178 REGISTER_CC(CC_SYSTEM);
179 REGISTER_CC(CC_GC);
180 REGISTER_CC(CC_OVERHEAD);
181 REGISTER_CC(CC_SUBSUMED);
182 REGISTER_CC(CC_DONT_CARE);
183
184 REGISTER_CCS(CCS_SYSTEM);
185 REGISTER_CCS(CCS_GC);
186 REGISTER_CCS(CCS_OVERHEAD);
187 REGISTER_CCS(CCS_SUBSUMED);
188 REGISTER_CCS(CCS_DONT_CARE);
189 REGISTER_CCS(CCS_MAIN);
190
191 /* find all the "special" cost centre stacks, and make them children
192 * of CCS_MAIN.
193 */
194 ASSERT(CCS_LIST == CCS_MAIN);
195 CCS_LIST = CCS_LIST->prevStack;
196 CCS_MAIN->prevStack = NULL;
197 CCS_MAIN->root = CC_MAIN;
198 ccsSetSelected(CCS_MAIN);
199 DecCCS(CCS_MAIN);
200
201 for (ccs = CCS_LIST; ccs != NULL; ) {
202 next = ccs->prevStack;
203 ccs->prevStack = NULL;
204 ActualPush_(CCS_MAIN,ccs->cc,ccs);
205 ccs->root = ccs->cc;
206 ccs = next;
207 }
208
209 if (RtsFlags.CcFlags.doCostCentres) {
210 initTimeProfiling();
211 }
212
213 if (RtsFlags.ProfFlags.doHeapProfile) {
214 initHeapProfiling();
215 }
216 }
217
218 // Decide whether closures with this CCS should contribute to the heap
219 // profile.
220 static void
221 ccsSetSelected( CostCentreStack *ccs )
222 {
223 if (RtsFlags.ProfFlags.modSelector) {
224 if (! strMatchesSelector( ccs->cc->module,
225 RtsFlags.ProfFlags.modSelector ) ) {
226 ccs->selected = 0;
227 return;
228 }
229 }
230 if (RtsFlags.ProfFlags.ccSelector) {
231 if (! strMatchesSelector( ccs->cc->label,
232 RtsFlags.ProfFlags.ccSelector ) ) {
233 ccs->selected = 0;
234 return;
235 }
236 }
237 if (RtsFlags.ProfFlags.ccsSelector) {
238 CostCentreStack *c;
239 for (c = ccs; c != NULL; c = c->prevStack) {
240 if ( strMatchesSelector( c->cc->label,
241 RtsFlags.ProfFlags.ccsSelector )) {
242 break;
243 }
244 }
245 if (c == NULL) {
246 ccs->selected = 0;
247 return;
248 }
249 }
250
251 ccs->selected = 1;
252 return;
253 }
254
255
256 static void
257 initProfilingLogFile(void)
258 {
259 char *prog;
260
261 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
262 strcpy(prog, prog_name);
263 #ifdef mingw32_HOST_OS
264 // on Windows, drop the .exe suffix if there is one
265 {
266 char *suff;
267 suff = strrchr(prog,'.');
268 if (suff != NULL && !strcmp(suff,".exe")) {
269 *suff = '\0';
270 }
271 }
272 #endif
273
274 if (RtsFlags.CcFlags.doCostCentres == 0 &&
275 RtsFlags.ProfFlags.doHeapProfile != HEAP_BY_RETAINER)
276 {
277 /* No need for the <prog>.prof file */
278 prof_filename = NULL;
279 prof_file = NULL;
280 }
281 else
282 {
283 /* Initialise the log file name */
284 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
285 sprintf(prof_filename, "%s.prof", prog);
286
287 /* open the log file */
288 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
289 debugBelch("Can't open profiling report file %s\n", prof_filename);
290 RtsFlags.CcFlags.doCostCentres = 0;
291 // The following line was added by Sung; retainer/LDV profiling may need
292 // two output files, i.e., <program>.prof/hp.
293 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
294 RtsFlags.ProfFlags.doHeapProfile = 0;
295 return;
296 }
297
298 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
299 /* dump the time, and the profiling interval */
300 fprintf(prof_file, "\"%s\"\n", time_str());
301 fprintf(prof_file, "\"%d ms\"\n", RtsFlags.MiscFlags.tickInterval);
302
303 /* declare all the cost centres */
304 {
305 CostCentre *cc;
306 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
307 fprintf(prof_file, "%d %ld \"%s\" \"%s\"\n",
308 CC_UQ, cc->ccID, cc->label, cc->module);
309 }
310 }
311 }
312 }
313
314 if (RtsFlags.ProfFlags.doHeapProfile) {
315 /* Initialise the log file name */
316 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
317 sprintf(hp_filename, "%s.hp", prog);
318
319 /* open the log file */
320 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
321 debugBelch("Can't open profiling report file %s\n",
322 hp_filename);
323 RtsFlags.ProfFlags.doHeapProfile = 0;
324 return;
325 }
326 }
327 }
328
329 void
330 initTimeProfiling(void)
331 {
332 /* Start ticking */
333 startProfTimer();
334 };
335
336 void
337 endProfiling ( void )
338 {
339 if (RtsFlags.CcFlags.doCostCentres) {
340 stopProfTimer();
341 }
342 if (RtsFlags.ProfFlags.doHeapProfile) {
343 endHeapProfiling();
344 }
345 }
346
347 /* -----------------------------------------------------------------------------
348 Set cost centre stack when entering a function.
349 -------------------------------------------------------------------------- */
350 rtsBool entering_PAP;
351
352 void
353 EnterFunCCS ( CostCentreStack *ccsfn )
354 {
355 /* PAP_entry has already set CCCS for us */
356 if (entering_PAP) {
357 entering_PAP = rtsFalse;
358 return;
359 }
360
361 if (ccsfn->root->is_caf == CC_IS_CAF) {
362 CCCS = AppendCCS(CCCS,ccsfn);
363 } else {
364 CCCS = ccsfn;
365 }
366 }
367
368 /* -----------------------------------------------------------------------------
369 Cost-centre stack manipulation
370 -------------------------------------------------------------------------- */
371
372 #ifdef DEBUG
373 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
374 CostCentreStack *
375 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
376 #define PushCostCentre _PushCostCentre
377 {
378 IF_DEBUG(prof,
379 traceBegin("pushing %s on ", cc->label);
380 debugCCS(ccs);
381 traceEnd(););
382
383 return PushCostCentre(ccs,cc);
384 }
385 #endif
386
387 CostCentreStack *
388 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
389 {
390 CostCentreStack *temp_ccs;
391
392 if (ccs == EMPTY_STACK)
393 return ActualPush(ccs,cc);
394 else {
395 if (ccs->cc == cc)
396 return ccs;
397 else {
398 /* check if we've already memoized this stack */
399 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
400
401 if (temp_ccs != EMPTY_STACK)
402 return temp_ccs;
403 else {
404 temp_ccs = CheckLoop(ccs,cc);
405 if (temp_ccs != NULL) {
406 /* we have recursed to an older CCS. Mark this in
407 * the index table, and emit a "back edge" into the
408 * log file.
409 */
410 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
411 DecBackEdge(temp_ccs,ccs);
412 return temp_ccs;
413 } else {
414 return ActualPush(ccs,cc);
415 }
416 }
417 }
418 }
419 }
420
421 static CostCentreStack *
422 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
423 {
424 while (ccs != EMPTY_STACK) {
425 if (ccs->cc == cc)
426 return ccs;
427 ccs = ccs->prevStack;
428 }
429 return NULL;
430 }
431
432 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
433
434 #ifdef DEBUG
435 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
436 CostCentreStack *
437 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
438 #define AppendCCS _AppendCCS
439 {
440 IF_DEBUG(prof,
441 if (ccs1 != ccs2) {
442 debugBelch("Appending ");
443 debugCCS(ccs1);
444 debugBelch(" to ");
445 debugCCS(ccs2);
446 debugBelch("\n");});
447 return AppendCCS(ccs1,ccs2);
448 }
449 #endif
450
451 CostCentreStack *
452 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
453 {
454 CostCentreStack *ccs = NULL;
455
456 if (ccs1 == ccs2) {
457 return ccs1;
458 }
459
460 if (ccs2->cc->is_caf == CC_IS_CAF) {
461 return ccs1;
462 }
463
464 if (ccs2->prevStack != NULL) {
465 ccs = AppendCCS(ccs1, ccs2->prevStack);
466 }
467
468 return PushCostCentre(ccs,ccs2->cc);
469 }
470
471 static CostCentreStack *
472 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
473 {
474 CostCentreStack *new_ccs;
475
476 /* allocate space for a new CostCentreStack */
477 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
478
479 return ActualPush_(ccs, cc, new_ccs);
480 }
481
482 static CostCentreStack *
483 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
484 {
485 /* assign values to each member of the structure */
486 new_ccs->ccsID = CCS_ID++;
487 new_ccs->cc = cc;
488 new_ccs->prevStack = ccs;
489
490 new_ccs->indexTable = EMPTY_TABLE;
491
492 /* Initialise the various _scc_ counters to zero
493 */
494 new_ccs->scc_count = 0;
495
496 /* Initialize all other stats here. There should be a quick way
497 * that's easily used elsewhere too
498 */
499 new_ccs->time_ticks = 0;
500 new_ccs->mem_alloc = 0;
501 new_ccs->inherited_ticks = 0;
502 new_ccs->inherited_alloc = 0;
503
504 new_ccs->root = ccs->root;
505
506 // Set the selected field.
507 ccsSetSelected(new_ccs);
508
509 /* update the memoization table for the parent stack */
510 if (ccs != EMPTY_STACK)
511 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
512 0/*not a back edge*/);
513
514 /* make sure this CC is declared at the next heap/time sample */
515 DecCCS(new_ccs);
516
517 /* return a pointer to the new stack */
518 return new_ccs;
519 }
520
521
522 static CostCentreStack *
523 IsInIndexTable(IndexTable *it, CostCentre *cc)
524 {
525 while (it!=EMPTY_TABLE)
526 {
527 if (it->cc==cc)
528 return it->ccs;
529 else
530 it = it->next;
531 }
532
533 /* otherwise we never found it so return EMPTY_TABLE */
534 return EMPTY_TABLE;
535 }
536
537
538 static IndexTable *
539 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
540 CostCentre *cc, unsigned int back_edge)
541 {
542 IndexTable *new_it;
543
544 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
545
546 new_it->cc = cc;
547 new_it->ccs = new_ccs;
548 new_it->next = it;
549 new_it->back_edge = back_edge;
550 return new_it;
551 }
552
553
554 static void
555 DecCCS(CostCentreStack *ccs)
556 {
557 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
558 if (ccs->prevStack == EMPTY_STACK)
559 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
560 ccs->ccsID, ccs->cc->ccID);
561 else
562 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
563 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
564 }
565 }
566
567 static void
568 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
569 {
570 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
571 if (ccs->prevStack == EMPTY_STACK)
572 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
573 ccs->ccsID, ccs->cc->ccID);
574 else
575 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
576 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
577 }
578 }
579
580 /* -----------------------------------------------------------------------------
581 Generating a time & allocation profiling report.
582 -------------------------------------------------------------------------- */
583
584 /* We omit certain system-related CCs and CCSs from the default
585 * reports, so as not to cause confusion.
586 */
587 static rtsBool
588 cc_to_ignore (CostCentre *cc)
589 {
590 if ( cc == CC_OVERHEAD
591 || cc == CC_DONT_CARE
592 || cc == CC_GC
593 || cc == CC_SYSTEM) {
594 return rtsTrue;
595 } else {
596 return rtsFalse;
597 }
598 }
599
600 static rtsBool
601 ccs_to_ignore (CostCentreStack *ccs)
602 {
603 if ( ccs == CCS_OVERHEAD
604 || ccs == CCS_DONT_CARE
605 || ccs == CCS_GC
606 || ccs == CCS_SYSTEM) {
607 return rtsTrue;
608 } else {
609 return rtsFalse;
610 }
611 }
612
613 /* -----------------------------------------------------------------------------
614 Generating the aggregated per-cost-centre time/alloc report.
615 -------------------------------------------------------------------------- */
616
617 static CostCentre *sorted_cc_list;
618
619 static void
620 aggregate_cc_costs( CostCentreStack *ccs )
621 {
622 IndexTable *i;
623
624 ccs->cc->mem_alloc += ccs->mem_alloc;
625 ccs->cc->time_ticks += ccs->time_ticks;
626
627 for (i = ccs->indexTable; i != 0; i = i->next) {
628 if (!i->back_edge) {
629 aggregate_cc_costs(i->ccs);
630 }
631 }
632 }
633
634 static void
635 insert_cc_in_sorted_list( CostCentre *new_cc )
636 {
637 CostCentre **prev, *cc;
638
639 prev = &sorted_cc_list;
640 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
641 if (new_cc->time_ticks > cc->time_ticks) {
642 new_cc->link = cc;
643 *prev = new_cc;
644 return;
645 } else {
646 prev = &(cc->link);
647 }
648 }
649 new_cc->link = NULL;
650 *prev = new_cc;
651 }
652
653 static void
654 report_per_cc_costs( void )
655 {
656 CostCentre *cc, *next;
657 nat max_label_len, max_module_len;
658
659 aggregate_cc_costs(CCS_MAIN);
660 sorted_cc_list = NULL;
661
662 max_label_len = max_module_len = 0;
663
664 for (cc = CC_LIST; cc != NULL; cc = next) {
665 next = cc->link;
666 if (cc->time_ticks > total_prof_ticks/100
667 || cc->mem_alloc > total_alloc/100
668 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
669 insert_cc_in_sorted_list(cc);
670
671 max_label_len = stg_max(strlen(cc->label), max_label_len);
672 max_module_len = stg_max(strlen(cc->module), max_module_len);
673 }
674 }
675
676 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
677 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
678 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
679 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
680 }
681 fprintf(prof_file, "\n\n");
682
683 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
684 if (cc_to_ignore(cc)) {
685 continue;
686 }
687 fprintf(prof_file, "%-*s %-*s", max_label_len, cc->label, max_module_len, cc->module);
688 fprintf(prof_file, "%6.1f %6.1f",
689 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
690 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
691 total_alloc * 100)
692 );
693
694 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
695 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
696 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
697 }
698 fprintf(prof_file, "\n");
699 }
700
701 fprintf(prof_file,"\n\n");
702 }
703
704 /* -----------------------------------------------------------------------------
705 Generate the cost-centre-stack time/alloc report
706 -------------------------------------------------------------------------- */
707
708 static void
709 fprint_header( nat max_label_len, nat max_module_len )
710 {
711 fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
712
713 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
714 fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
715
716 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
717 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
718 #if defined(PROFILING_DETAIL_COUNTS)
719 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
720 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
721 #endif
722 }
723
724 fprintf(prof_file, "\n\n");
725 }
726
727 void
728 reportCCSProfiling( void )
729 {
730 nat count;
731 char temp[128]; /* sigh: magic constant */
732
733 stopProfTimer();
734
735 total_prof_ticks = 0;
736 total_alloc = 0;
737 count_ticks(CCS_MAIN);
738
739 switch (RtsFlags.CcFlags.doCostCentres) {
740 case 0:
741 return;
742 case COST_CENTRES_XML:
743 gen_XML_logfile();
744 return;
745 default:
746 break;
747 }
748
749 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
750 time_str(), "Final");
751
752 fprintf(prof_file, "\n\t ");
753 fprintf(prof_file, " %s", prog_name);
754 fprintf(prof_file, " +RTS");
755 for (count = 0; rts_argv[count]; count++)
756 fprintf(prof_file, " %s", rts_argv[count]);
757 fprintf(prof_file, " -RTS");
758 for (count = 1; prog_argv[count]; count++)
759 fprintf(prof_file, " %s", prog_argv[count]);
760 fprintf(prof_file, "\n\n");
761
762 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
763 (double) total_prof_ticks *
764 (double) RtsFlags.MiscFlags.tickInterval / 1000,
765 (unsigned long) total_prof_ticks,
766 (int) RtsFlags.MiscFlags.tickInterval);
767
768 fprintf(prof_file, "\ttotal alloc = %11s bytes",
769 showStgWord64(total_alloc * sizeof(W_),
770 temp, rtsTrue/*commas*/));
771
772 #if defined(PROFILING_DETAIL_COUNTS)
773 fprintf(prof_file, " (%lu closures)", total_allocs);
774 #endif
775 fprintf(prof_file, " (excludes profiling overheads)\n\n");
776
777 report_per_cc_costs();
778
779 inherit_costs(CCS_MAIN);
780
781 reportCCS(pruneCCSTree(CCS_MAIN));
782 }
783
784 static void
785 findCCSMaxLens(CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len) {
786 CostCentre *cc;
787 IndexTable *i;
788
789 cc = ccs->cc;
790
791 *max_label_len = stg_max(*max_label_len, indent + strlen(cc->label));
792 *max_module_len = stg_max(*max_module_len, strlen(cc->module));
793
794 for (i = ccs->indexTable; i != 0; i = i->next) {
795 if (!i->back_edge) {
796 findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len);
797 }
798 }
799 }
800
801 static void
802 logCCS(CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len)
803 {
804 CostCentre *cc;
805 IndexTable *i;
806
807 cc = ccs->cc;
808
809 /* Only print cost centres with non 0 data ! */
810
811 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
812 ! ccs_to_ignore(ccs))
813 /* force printing of *all* cost centres if -P -P */
814 {
815
816 fprintf(prof_file, "%-*s%-*s %-*s",
817 indent, "", max_label_len-indent, cc->label, max_module_len, cc->module);
818
819 fprintf(prof_file, "%6ld %11.0f %5.1f %5.1f %5.1f %5.1f",
820 ccs->ccsID, (double) ccs->scc_count,
821 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
822 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
823 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
824 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
825 );
826
827 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
828 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
829 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
830 #if defined(PROFILING_DETAIL_COUNTS)
831 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
832 ccs->mem_allocs, ccs->thunk_count,
833 ccs->function_count, ccs->pap_count,
834 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
835 ccs->caffun_subsumed);
836 #endif
837 }
838 fprintf(prof_file, "\n");
839 }
840
841 for (i = ccs->indexTable; i != 0; i = i->next) {
842 if (!i->back_edge) {
843 logCCS(i->ccs, indent+1, max_label_len, max_module_len);
844 }
845 }
846 }
847
848 static void
849 reportCCS(CostCentreStack *ccs)
850 {
851 nat max_label_len, max_module_len;
852 max_label_len = max_module_len = 0;
853
854 findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len);
855
856 fprint_header(max_label_len, max_module_len);
857 logCCS(ccs, 0, max_label_len, max_module_len);
858 }
859
860
861 /* Traverse the cost centre stack tree and accumulate
862 * ticks/allocations.
863 */
864 static void
865 count_ticks(CostCentreStack *ccs)
866 {
867 IndexTable *i;
868
869 if (!ccs_to_ignore(ccs)) {
870 total_alloc += ccs->mem_alloc;
871 total_prof_ticks += ccs->time_ticks;
872 }
873 for (i = ccs->indexTable; i != NULL; i = i->next)
874 if (!i->back_edge) {
875 count_ticks(i->ccs);
876 }
877 }
878
879 /* Traverse the cost centre stack tree and inherit ticks & allocs.
880 */
881 static void
882 inherit_costs(CostCentreStack *ccs)
883 {
884 IndexTable *i;
885
886 if (ccs_to_ignore(ccs)) { return; }
887
888 ccs->inherited_ticks += ccs->time_ticks;
889 ccs->inherited_alloc += ccs->mem_alloc;
890
891 for (i = ccs->indexTable; i != NULL; i = i->next)
892 if (!i->back_edge) {
893 inherit_costs(i->ccs);
894 ccs->inherited_ticks += i->ccs->inherited_ticks;
895 ccs->inherited_alloc += i->ccs->inherited_alloc;
896 }
897
898 return;
899 }
900
901 static CostCentreStack *
902 pruneCCSTree( CostCentreStack *ccs )
903 {
904 CostCentreStack *ccs1;
905 IndexTable *i, **prev;
906
907 prev = &ccs->indexTable;
908 for (i = ccs->indexTable; i != 0; i = i->next) {
909 if (i->back_edge) { continue; }
910
911 ccs1 = pruneCCSTree(i->ccs);
912 if (ccs1 == NULL) {
913 *prev = i->next;
914 } else {
915 prev = &(i->next);
916 }
917 }
918
919 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
920 /* force printing of *all* cost centres if -P -P */ )
921
922 || ( ccs->indexTable != 0 )
923 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
924 ) {
925 return ccs;
926 } else {
927 return NULL;
928 }
929 }
930
931 /* -----------------------------------------------------------------------------
932 Generate the XML time/allocation profile
933 -------------------------------------------------------------------------- */
934
935 void
936 gen_XML_logfile( void )
937 {
938 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
939
940 reportCCS_XML(pruneCCSTree(CCS_MAIN));
941
942 fprintf(prof_file, " 0\n");
943 }
944
945 static void
946 reportCCS_XML(CostCentreStack *ccs)
947 {
948 CostCentre *cc;
949 IndexTable *i;
950
951 if (ccs_to_ignore(ccs)) { return; }
952
953 cc = ccs->cc;
954
955 fprintf(prof_file, " 1 %ld %" FMT_Word64 " %" FMT_Word64 " %" FMT_Word64,
956 ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc);
957
958 for (i = ccs->indexTable; i != 0; i = i->next) {
959 if (!i->back_edge) {
960 reportCCS_XML(i->ccs);
961 }
962 }
963 }
964
965 void
966 fprintCCS( FILE *f, CostCentreStack *ccs )
967 {
968 fprintf(f,"<");
969 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
970 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
971 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
972 fprintf(f,",");
973 }
974 }
975 fprintf(f,">");
976 }
977
978 /* For calling from .cmm code, where we can't reliably refer to stderr */
979 void
980 fprintCCS_stderr( CostCentreStack *ccs )
981 {
982 fprintCCS(stderr, ccs);
983 }
984
985 #ifdef DEBUG
986 void
987 debugCCS( CostCentreStack *ccs )
988 {
989 debugBelch("<");
990 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
991 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
992 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
993 debugBelch(",");
994 }
995 }
996 debugBelch(">");
997 }
998 #endif /* DEBUG */
999
1000 #endif /* PROFILING */