rts/Linker.c (loadArchive):
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20
21 #include <string.h>
22
23 #ifdef DEBUG
24 #include "Trace.h"
25 #endif
26
27 /*
28 * Profiling allocation arena.
29 */
30 Arena *prof_arena;
31
32 /*
33 * Global variables used to assign unique IDs to cc's, ccs's, and
34 * closure_cats
35 */
36
37 unsigned int CC_ID;
38 unsigned int CCS_ID;
39 unsigned int HP_ID;
40
41 /* figures for the profiling report.
42 */
43 static StgWord64 total_alloc;
44 static lnat total_prof_ticks;
45
46 /* Globals for opening the profiling log file(s)
47 */
48 static char *prof_filename; /* prof report file name = <program>.prof */
49 FILE *prof_file;
50
51 static char *hp_filename; /* heap profile (hp2ps style) log file */
52 FILE *hp_file;
53
54 /* The Current Cost Centre Stack (for attributing costs)
55 */
56 CostCentreStack *CCCS;
57
58 /* Linked lists to keep track of cc's and ccs's that haven't
59 * been declared in the log file yet
60 */
61 CostCentre *CC_LIST;
62 CostCentreStack *CCS_LIST;
63
64 /*
65 * Built-in cost centres and cost-centre stacks:
66 *
67 * MAIN is the root of the cost-centre stack tree. If there are
68 * no _scc_s in the program, all costs will be attributed
69 * to MAIN.
70 *
71 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
72 * RTS operations apart from garbage collection are attributed
73 * to SYSTEM.
74 *
75 * GC is the storage manager / garbage collector.
76 *
77 * OVERHEAD gets all costs generated by the profiling system
78 * itself. These are costs that would not be incurred
79 * during non-profiled execution of the program.
80 *
81 * SUBSUMED is the one-and-only CCS placed on top-level functions.
82 * It indicates that all costs are to be attributed to the
83 * enclosing cost centre stack. SUBSUMED never accumulates
84 * any costs. The is_caf flag is set on the subsumed cost
85 * centre.
86 *
87 * DONT_CARE is a placeholder cost-centre we assign to static
88 * constructors. It should *never* accumulate any costs.
89 */
90
91 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
92 CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
93 CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
94 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
95 CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
96 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
97
98 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
99 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
100 CCS_DECLARE(CCS_GC, CC_GC, );
101 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
102 CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
103 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
104
105 /*
106 * Uniques for the XML log-file format
107 */
108 #define CC_UQ 1
109 #define CCS_UQ 2
110 #define TC_UQ 3
111 #define HEAP_OBJ_UQ 4
112 #define TIME_UPD_UQ 5
113 #define HEAP_UPD_UQ 6
114
115 /*
116 * Static Functions
117 */
118
119 static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
120 CostCentreStack *new_ccs );
121 static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
122 static void count_ticks ( CostCentreStack *ccs );
123 static void inherit_costs ( CostCentreStack *ccs );
124 static void reportCCS ( CostCentreStack *ccs, nat indent );
125 static void DecCCS ( CostCentreStack *ccs );
126 static void DecBackEdge ( CostCentreStack *ccs,
127 CostCentreStack *oldccs );
128 static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
129 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
130 static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
131 static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
132 static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
133 CostCentre *, unsigned int );
134 static void ccsSetSelected ( CostCentreStack *ccs );
135
136 static void initTimeProfiling ( void );
137 static void initProfilingLogFile( void );
138
139 static void reportCCS_XML ( CostCentreStack *ccs );
140
141 /* -----------------------------------------------------------------------------
142 Initialise the profiling environment
143 -------------------------------------------------------------------------- */
144
145 void
146 initProfiling1 (void)
147 {
148 // initialise our arena
149 prof_arena = newArena();
150
151 /* for the benefit of allocate()... */
152 CCCS = CCS_SYSTEM;
153
154 /* Initialize counters for IDs */
155 CC_ID = 1;
156 CCS_ID = 1;
157 HP_ID = 1;
158
159 /* Initialize Declaration lists to NULL */
160 CC_LIST = NULL;
161 CCS_LIST = NULL;
162
163 /* Register all the cost centres / stacks in the program
164 * CC_MAIN gets link = 0, all others have non-zero link.
165 */
166 REGISTER_CC(CC_MAIN);
167 REGISTER_CC(CC_SYSTEM);
168 REGISTER_CC(CC_GC);
169 REGISTER_CC(CC_OVERHEAD);
170 REGISTER_CC(CC_SUBSUMED);
171 REGISTER_CC(CC_DONT_CARE);
172 REGISTER_CCS(CCS_MAIN);
173 REGISTER_CCS(CCS_SYSTEM);
174 REGISTER_CCS(CCS_GC);
175 REGISTER_CCS(CCS_OVERHEAD);
176 REGISTER_CCS(CCS_SUBSUMED);
177 REGISTER_CCS(CCS_DONT_CARE);
178
179 CCCS = CCS_OVERHEAD;
180
181 /* cost centres are registered by the per-module
182 * initialisation code now...
183 */
184 }
185
186 void
187 freeProfiling1 (void)
188 {
189 arenaFree(prof_arena);
190 }
191
192 void
193 initProfiling2 (void)
194 {
195 CostCentreStack *ccs, *next;
196
197 CCCS = CCS_SYSTEM;
198
199 /* Set up the log file, and dump the header and cost centre
200 * information into it. */
201 initProfilingLogFile();
202
203 /* find all the "special" cost centre stacks, and make them children
204 * of CCS_MAIN.
205 */
206 ASSERT(CCS_MAIN->prevStack == 0);
207 CCS_MAIN->root = CC_MAIN;
208 ccsSetSelected(CCS_MAIN);
209 DecCCS(CCS_MAIN);
210
211 for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
212 next = ccs->prevStack;
213 ccs->prevStack = 0;
214 ActualPush_(CCS_MAIN,ccs->cc,ccs);
215 ccs->root = ccs->cc;
216 ccs = next;
217 }
218
219 if (RtsFlags.CcFlags.doCostCentres) {
220 initTimeProfiling();
221 }
222
223 if (RtsFlags.ProfFlags.doHeapProfile) {
224 initHeapProfiling();
225 }
226 }
227
228 // Decide whether closures with this CCS should contribute to the heap
229 // profile.
230 static void
231 ccsSetSelected( CostCentreStack *ccs )
232 {
233 if (RtsFlags.ProfFlags.modSelector) {
234 if (! strMatchesSelector( ccs->cc->module,
235 RtsFlags.ProfFlags.modSelector ) ) {
236 ccs->selected = 0;
237 return;
238 }
239 }
240 if (RtsFlags.ProfFlags.ccSelector) {
241 if (! strMatchesSelector( ccs->cc->label,
242 RtsFlags.ProfFlags.ccSelector ) ) {
243 ccs->selected = 0;
244 return;
245 }
246 }
247 if (RtsFlags.ProfFlags.ccsSelector) {
248 CostCentreStack *c;
249 for (c = ccs; c != NULL; c = c->prevStack) {
250 if ( strMatchesSelector( c->cc->label,
251 RtsFlags.ProfFlags.ccsSelector )) {
252 break;
253 }
254 }
255 if (c == NULL) {
256 ccs->selected = 0;
257 return;
258 }
259 }
260
261 ccs->selected = 1;
262 return;
263 }
264
265
266 static void
267 initProfilingLogFile(void)
268 {
269 char *prog;
270
271 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
272 strcpy(prog, prog_name);
273 #ifdef mingw32_HOST_OS
274 // on Windows, drop the .exe suffix if there is one
275 {
276 char *suff;
277 suff = strrchr(prog,'.');
278 if (suff != NULL && !strcmp(suff,".exe")) {
279 *suff = '\0';
280 }
281 }
282 #endif
283
284 if (RtsFlags.CcFlags.doCostCentres == 0 &&
285 RtsFlags.ProfFlags.doHeapProfile != HEAP_BY_RETAINER)
286 {
287 /* No need for the <prog>.prof file */
288 prof_filename = NULL;
289 prof_file = NULL;
290 }
291 else
292 {
293 /* Initialise the log file name */
294 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
295 sprintf(prof_filename, "%s.prof", prog);
296
297 /* open the log file */
298 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
299 debugBelch("Can't open profiling report file %s\n", prof_filename);
300 RtsFlags.CcFlags.doCostCentres = 0;
301 // The following line was added by Sung; retainer/LDV profiling may need
302 // two output files, i.e., <program>.prof/hp.
303 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
304 RtsFlags.ProfFlags.doHeapProfile = 0;
305 return;
306 }
307
308 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
309 /* dump the time, and the profiling interval */
310 fprintf(prof_file, "\"%s\"\n", time_str());
311 fprintf(prof_file, "\"%d ms\"\n", RtsFlags.MiscFlags.tickInterval);
312
313 /* declare all the cost centres */
314 {
315 CostCentre *cc;
316 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
317 fprintf(prof_file, "%d %ld \"%s\" \"%s\"\n",
318 CC_UQ, cc->ccID, cc->label, cc->module);
319 }
320 }
321 }
322 }
323
324 if (RtsFlags.ProfFlags.doHeapProfile) {
325 /* Initialise the log file name */
326 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
327 sprintf(hp_filename, "%s.hp", prog);
328
329 /* open the log file */
330 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
331 debugBelch("Can't open profiling report file %s\n",
332 hp_filename);
333 RtsFlags.ProfFlags.doHeapProfile = 0;
334 return;
335 }
336 }
337 }
338
339 void
340 initTimeProfiling(void)
341 {
342 /* Start ticking */
343 startProfTimer();
344 };
345
346 void
347 endProfiling ( void )
348 {
349 if (RtsFlags.CcFlags.doCostCentres) {
350 stopProfTimer();
351 }
352 if (RtsFlags.ProfFlags.doHeapProfile) {
353 endHeapProfiling();
354 }
355 }
356
357 /* -----------------------------------------------------------------------------
358 Set cost centre stack when entering a function.
359 -------------------------------------------------------------------------- */
360 rtsBool entering_PAP;
361
362 void
363 EnterFunCCS ( CostCentreStack *ccsfn )
364 {
365 /* PAP_entry has already set CCCS for us */
366 if (entering_PAP) {
367 entering_PAP = rtsFalse;
368 return;
369 }
370
371 if (ccsfn->root->is_caf == CC_IS_CAF) {
372 CCCS = AppendCCS(CCCS,ccsfn);
373 } else {
374 CCCS = ccsfn;
375 }
376 }
377
378 /* -----------------------------------------------------------------------------
379 Cost-centre stack manipulation
380 -------------------------------------------------------------------------- */
381
382 #ifdef DEBUG
383 CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
384 CostCentreStack *
385 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
386 #define PushCostCentre _PushCostCentre
387 {
388 IF_DEBUG(prof,
389 traceBegin("pushing %s on ", cc->label);
390 debugCCS(ccs);
391 traceEnd(););
392
393 return PushCostCentre(ccs,cc);
394 }
395 #endif
396
397 CostCentreStack *
398 PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
399 {
400 CostCentreStack *temp_ccs;
401
402 if (ccs == EMPTY_STACK)
403 return ActualPush(ccs,cc);
404 else {
405 if (ccs->cc == cc)
406 return ccs;
407 else {
408 /* check if we've already memoized this stack */
409 temp_ccs = IsInIndexTable(ccs->indexTable,cc);
410
411 if (temp_ccs != EMPTY_STACK)
412 return temp_ccs;
413 else {
414 temp_ccs = CheckLoop(ccs,cc);
415 if (temp_ccs != NULL) {
416 /* we have recursed to an older CCS. Mark this in
417 * the index table, and emit a "back edge" into the
418 * log file.
419 */
420 ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
421 DecBackEdge(temp_ccs,ccs);
422 return temp_ccs;
423 } else {
424 return ActualPush(ccs,cc);
425 }
426 }
427 }
428 }
429 }
430
431 static CostCentreStack *
432 CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
433 {
434 while (ccs != EMPTY_STACK) {
435 if (ccs->cc == cc)
436 return ccs;
437 ccs = ccs->prevStack;
438 }
439 return NULL;
440 }
441
442 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
443
444 #ifdef DEBUG
445 CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
446 CostCentreStack *
447 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
448 #define AppendCCS _AppendCCS
449 {
450 IF_DEBUG(prof,
451 if (ccs1 != ccs2) {
452 debugBelch("Appending ");
453 debugCCS(ccs1);
454 debugBelch(" to ");
455 debugCCS(ccs2);
456 debugBelch("\n");});
457 return AppendCCS(ccs1,ccs2);
458 }
459 #endif
460
461 CostCentreStack *
462 AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
463 {
464 CostCentreStack *ccs = NULL;
465
466 if (ccs1 == ccs2) {
467 return ccs1;
468 }
469
470 if (ccs2->cc->is_caf == CC_IS_CAF) {
471 return ccs1;
472 }
473
474 if (ccs2->prevStack != NULL) {
475 ccs = AppendCCS(ccs1, ccs2->prevStack);
476 }
477
478 return PushCostCentre(ccs,ccs2->cc);
479 }
480
481 static CostCentreStack *
482 ActualPush ( CostCentreStack *ccs, CostCentre *cc )
483 {
484 CostCentreStack *new_ccs;
485
486 /* allocate space for a new CostCentreStack */
487 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
488
489 return ActualPush_(ccs, cc, new_ccs);
490 }
491
492 static CostCentreStack *
493 ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
494 {
495 /* assign values to each member of the structure */
496 new_ccs->ccsID = CCS_ID++;
497 new_ccs->cc = cc;
498 new_ccs->prevStack = ccs;
499
500 new_ccs->indexTable = EMPTY_TABLE;
501
502 /* Initialise the various _scc_ counters to zero
503 */
504 new_ccs->scc_count = 0;
505
506 /* Initialize all other stats here. There should be a quick way
507 * that's easily used elsewhere too
508 */
509 new_ccs->time_ticks = 0;
510 new_ccs->mem_alloc = 0;
511 new_ccs->inherited_ticks = 0;
512 new_ccs->inherited_alloc = 0;
513
514 new_ccs->root = ccs->root;
515
516 // Set the selected field.
517 ccsSetSelected(new_ccs);
518
519 /* update the memoization table for the parent stack */
520 if (ccs != EMPTY_STACK)
521 ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
522 0/*not a back edge*/);
523
524 /* make sure this CC is declared at the next heap/time sample */
525 DecCCS(new_ccs);
526
527 /* return a pointer to the new stack */
528 return new_ccs;
529 }
530
531
532 static CostCentreStack *
533 IsInIndexTable(IndexTable *it, CostCentre *cc)
534 {
535 while (it!=EMPTY_TABLE)
536 {
537 if (it->cc==cc)
538 return it->ccs;
539 else
540 it = it->next;
541 }
542
543 /* otherwise we never found it so return EMPTY_TABLE */
544 return EMPTY_TABLE;
545 }
546
547
548 static IndexTable *
549 AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
550 CostCentre *cc, unsigned int back_edge)
551 {
552 IndexTable *new_it;
553
554 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
555
556 new_it->cc = cc;
557 new_it->ccs = new_ccs;
558 new_it->next = it;
559 new_it->back_edge = back_edge;
560 return new_it;
561 }
562
563
564 static void
565 DecCCS(CostCentreStack *ccs)
566 {
567 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
568 if (ccs->prevStack == EMPTY_STACK)
569 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
570 ccs->ccsID, ccs->cc->ccID);
571 else
572 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
573 ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
574 }
575 }
576
577 static void
578 DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
579 {
580 if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
581 if (ccs->prevStack == EMPTY_STACK)
582 fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ,
583 ccs->ccsID, ccs->cc->ccID);
584 else
585 fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ,
586 ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
587 }
588 }
589
590 /* -----------------------------------------------------------------------------
591 Generating a time & allocation profiling report.
592 -------------------------------------------------------------------------- */
593
594 /* We omit certain system-related CCs and CCSs from the default
595 * reports, so as not to cause confusion.
596 */
597 static rtsBool
598 cc_to_ignore (CostCentre *cc)
599 {
600 if ( cc == CC_OVERHEAD
601 || cc == CC_DONT_CARE
602 || cc == CC_GC
603 || cc == CC_SYSTEM) {
604 return rtsTrue;
605 } else {
606 return rtsFalse;
607 }
608 }
609
610 static rtsBool
611 ccs_to_ignore (CostCentreStack *ccs)
612 {
613 if ( ccs == CCS_OVERHEAD
614 || ccs == CCS_DONT_CARE
615 || ccs == CCS_GC
616 || ccs == CCS_SYSTEM) {
617 return rtsTrue;
618 } else {
619 return rtsFalse;
620 }
621 }
622
623 /* -----------------------------------------------------------------------------
624 Generating the aggregated per-cost-centre time/alloc report.
625 -------------------------------------------------------------------------- */
626
627 static CostCentre *sorted_cc_list;
628
629 static void
630 aggregate_cc_costs( CostCentreStack *ccs )
631 {
632 IndexTable *i;
633
634 ccs->cc->mem_alloc += ccs->mem_alloc;
635 ccs->cc->time_ticks += ccs->time_ticks;
636
637 for (i = ccs->indexTable; i != 0; i = i->next) {
638 if (!i->back_edge) {
639 aggregate_cc_costs(i->ccs);
640 }
641 }
642 }
643
644 static void
645 insert_cc_in_sorted_list( CostCentre *new_cc )
646 {
647 CostCentre **prev, *cc;
648
649 prev = &sorted_cc_list;
650 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
651 if (new_cc->time_ticks > cc->time_ticks) {
652 new_cc->link = cc;
653 *prev = new_cc;
654 return;
655 } else {
656 prev = &(cc->link);
657 }
658 }
659 new_cc->link = NULL;
660 *prev = new_cc;
661 }
662
663 static void
664 report_per_cc_costs( void )
665 {
666 CostCentre *cc, *next;
667
668 aggregate_cc_costs(CCS_MAIN);
669 sorted_cc_list = NULL;
670
671 for (cc = CC_LIST; cc != NULL; cc = next) {
672 next = cc->link;
673 if (cc->time_ticks > total_prof_ticks/100
674 || cc->mem_alloc > total_alloc/100
675 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
676 insert_cc_in_sorted_list(cc);
677 }
678 }
679
680 fprintf(prof_file, "%-30s %-20s", "COST CENTRE", "MODULE");
681 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
682 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
683 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
684 }
685 fprintf(prof_file, "\n\n");
686
687 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
688 if (cc_to_ignore(cc)) {
689 continue;
690 }
691 fprintf(prof_file, "%-30s %-20s", cc->label, cc->module);
692 fprintf(prof_file, "%6.1f %6.1f",
693 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
694 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
695 total_alloc * 100)
696 );
697
698 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
699 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
700 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
701 }
702 fprintf(prof_file, "\n");
703 }
704
705 fprintf(prof_file,"\n\n");
706 }
707
708 /* -----------------------------------------------------------------------------
709 Generate the cost-centre-stack time/alloc report
710 -------------------------------------------------------------------------- */
711
712 static void
713 fprint_header( void )
714 {
715 fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
716
717 fprintf(prof_file, "%-24s %-50s", "COST CENTRE", "MODULE");
718 fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
719
720 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
721 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
722 #if defined(PROFILING_DETAIL_COUNTS)
723 fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
724 "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
725 #endif
726 }
727
728 fprintf(prof_file, "\n\n");
729 }
730
731 void
732 reportCCSProfiling( void )
733 {
734 nat count;
735 char temp[128]; /* sigh: magic constant */
736
737 stopProfTimer();
738
739 total_prof_ticks = 0;
740 total_alloc = 0;
741 count_ticks(CCS_MAIN);
742
743 switch (RtsFlags.CcFlags.doCostCentres) {
744 case 0:
745 return;
746 case COST_CENTRES_XML:
747 gen_XML_logfile();
748 return;
749 default:
750 break;
751 }
752
753 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
754 time_str(), "Final");
755
756 fprintf(prof_file, "\n\t ");
757 fprintf(prof_file, " %s", prog_name);
758 fprintf(prof_file, " +RTS");
759 for (count = 0; rts_argv[count]; count++)
760 fprintf(prof_file, " %s", rts_argv[count]);
761 fprintf(prof_file, " -RTS");
762 for (count = 1; prog_argv[count]; count++)
763 fprintf(prof_file, " %s", prog_argv[count]);
764 fprintf(prof_file, "\n\n");
765
766 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
767 (double) total_prof_ticks *
768 (double) RtsFlags.MiscFlags.tickInterval / 1000,
769 (unsigned long) total_prof_ticks,
770 (int) RtsFlags.MiscFlags.tickInterval);
771
772 fprintf(prof_file, "\ttotal alloc = %11s bytes",
773 showStgWord64(total_alloc * sizeof(W_),
774 temp, rtsTrue/*commas*/));
775
776 #if defined(PROFILING_DETAIL_COUNTS)
777 fprintf(prof_file, " (%lu closures)", total_allocs);
778 #endif
779 fprintf(prof_file, " (excludes profiling overheads)\n\n");
780
781 report_per_cc_costs();
782
783 inherit_costs(CCS_MAIN);
784
785 fprint_header();
786 reportCCS(pruneCCSTree(CCS_MAIN), 0);
787 }
788
789 static void
790 reportCCS(CostCentreStack *ccs, nat indent)
791 {
792 CostCentre *cc;
793 IndexTable *i;
794
795 cc = ccs->cc;
796
797 /* Only print cost centres with non 0 data ! */
798
799 if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
800 ! ccs_to_ignore(ccs))
801 /* force printing of *all* cost centres if -P -P */
802 {
803
804 fprintf(prof_file, "%-*s%-*s %-50s",
805 indent, "", 24-indent, cc->label, cc->module);
806
807 fprintf(prof_file, "%6ld %11.0f %5.1f %5.1f %5.1f %5.1f",
808 ccs->ccsID, (double) ccs->scc_count,
809 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
810 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
811 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
812 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
813 );
814
815 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
816 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
817 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
818 #if defined(PROFILING_DETAIL_COUNTS)
819 fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
820 ccs->mem_allocs, ccs->thunk_count,
821 ccs->function_count, ccs->pap_count,
822 ccs->subsumed_fun_count, ccs->subsumed_caf_count,
823 ccs->caffun_subsumed);
824 #endif
825 }
826 fprintf(prof_file, "\n");
827 }
828
829 for (i = ccs->indexTable; i != 0; i = i->next) {
830 if (!i->back_edge) {
831 reportCCS(i->ccs, indent+1);
832 }
833 }
834 }
835
836
837 /* Traverse the cost centre stack tree and accumulate
838 * ticks/allocations.
839 */
840 static void
841 count_ticks(CostCentreStack *ccs)
842 {
843 IndexTable *i;
844
845 if (!ccs_to_ignore(ccs)) {
846 total_alloc += ccs->mem_alloc;
847 total_prof_ticks += ccs->time_ticks;
848 }
849 for (i = ccs->indexTable; i != NULL; i = i->next)
850 if (!i->back_edge) {
851 count_ticks(i->ccs);
852 }
853 }
854
855 /* Traverse the cost centre stack tree and inherit ticks & allocs.
856 */
857 static void
858 inherit_costs(CostCentreStack *ccs)
859 {
860 IndexTable *i;
861
862 if (ccs_to_ignore(ccs)) { return; }
863
864 ccs->inherited_ticks += ccs->time_ticks;
865 ccs->inherited_alloc += ccs->mem_alloc;
866
867 for (i = ccs->indexTable; i != NULL; i = i->next)
868 if (!i->back_edge) {
869 inherit_costs(i->ccs);
870 ccs->inherited_ticks += i->ccs->inherited_ticks;
871 ccs->inherited_alloc += i->ccs->inherited_alloc;
872 }
873
874 return;
875 }
876
877 static CostCentreStack *
878 pruneCCSTree( CostCentreStack *ccs )
879 {
880 CostCentreStack *ccs1;
881 IndexTable *i, **prev;
882
883 prev = &ccs->indexTable;
884 for (i = ccs->indexTable; i != 0; i = i->next) {
885 if (i->back_edge) { continue; }
886
887 ccs1 = pruneCCSTree(i->ccs);
888 if (ccs1 == NULL) {
889 *prev = i->next;
890 } else {
891 prev = &(i->next);
892 }
893 }
894
895 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
896 /* force printing of *all* cost centres if -P -P */ )
897
898 || ( ccs->indexTable != 0 )
899 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
900 ) {
901 return ccs;
902 } else {
903 return NULL;
904 }
905 }
906
907 /* -----------------------------------------------------------------------------
908 Generate the XML time/allocation profile
909 -------------------------------------------------------------------------- */
910
911 void
912 gen_XML_logfile( void )
913 {
914 fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
915
916 reportCCS_XML(pruneCCSTree(CCS_MAIN));
917
918 fprintf(prof_file, " 0\n");
919 }
920
921 static void
922 reportCCS_XML(CostCentreStack *ccs)
923 {
924 CostCentre *cc;
925 IndexTable *i;
926
927 if (ccs_to_ignore(ccs)) { return; }
928
929 cc = ccs->cc;
930
931 fprintf(prof_file, " 1 %ld %" FMT_Word64 " %" FMT_Word64 " %" FMT_Word64,
932 ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc);
933
934 for (i = ccs->indexTable; i != 0; i = i->next) {
935 if (!i->back_edge) {
936 reportCCS_XML(i->ccs);
937 }
938 }
939 }
940
941 void
942 fprintCCS( FILE *f, CostCentreStack *ccs )
943 {
944 fprintf(f,"<");
945 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
946 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
947 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
948 fprintf(f,",");
949 }
950 }
951 fprintf(f,">");
952 }
953
954 /* For calling from .cmm code, where we can't reliably refer to stderr */
955 void
956 fprintCCS_stderr( CostCentreStack *ccs )
957 {
958 fprintCCS(stderr, ccs);
959 }
960
961 #ifdef DEBUG
962 void
963 debugCCS( CostCentreStack *ccs )
964 {
965 debugBelch("<");
966 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
967 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
968 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
969 debugBelch(",");
970 }
971 }
972 debugBelch(">");
973 }
974 #endif /* DEBUG */
975
976 #endif /* PROFILING */