Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "Printer.h"
21
22 #include <string.h>
23
24 #ifdef DEBUG
25 #include "Trace.h"
26 #endif
27
28 /*
29 * Profiling allocation arena.
30 */
31 Arena *prof_arena;
32
33 /*
34 * Global variables used to assign unique IDs to cc's, ccs's, and
35 * closure_cats
36 */
37
38 unsigned int CC_ID = 1;
39 unsigned int CCS_ID = 1;
40
41 /* figures for the profiling report.
42 */
43 static StgWord64 total_alloc;
44 static lnat total_prof_ticks;
45
46 /* Globals for opening the profiling log file(s)
47 */
48 static char *prof_filename; /* prof report file name = <program>.prof */
49 FILE *prof_file;
50
51 static char *hp_filename; /* heap profile (hp2ps style) log file */
52 FILE *hp_file;
53
54 /* The Current Cost Centre Stack (for attributing costs)
55 */
56 CostCentreStack *CCCS;
57
58 /* Linked lists to keep track of CCs and CCSs that haven't
59 * been declared in the log file yet
60 */
61 CostCentre *CC_LIST = NULL;
62 CostCentreStack *CCS_LIST = NULL;
63
64 /*
65 * Built-in cost centres and cost-centre stacks:
66 *
67 * MAIN is the root of the cost-centre stack tree. If there are
68 * no _scc_s in the program, all costs will be attributed
69 * to MAIN.
70 *
71 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
72 * RTS operations apart from garbage collection are attributed
73 * to SYSTEM.
74 *
75 * GC is the storage manager / garbage collector.
76 *
77 * OVERHEAD gets all costs generated by the profiling system
78 * itself. These are costs that would not be incurred
79 * during non-profiled execution of the program.
80 *
81 * DONT_CARE is a placeholder cost-centre we assign to static
82 * constructors. It should *never* accumulate any costs.
83 *
84 * PINNED accumulates memory allocated to pinned objects, which
85 * cannot be profiled separately because we cannot reliably
86 * traverse pinned memory.
87 */
88
89 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_NOT_CAF, );
90 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", CC_NOT_CAF, );
91 CC_DECLARE(CC_GC, "GC", "GC", CC_NOT_CAF, );
92 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_NOT_CAF, );
93 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_NOT_CAF, );
94 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", CC_NOT_CAF, );
95
96 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
97 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
98 CCS_DECLARE(CCS_GC, CC_GC, );
99 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
100 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
101 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
102
103 /*
104 * Static Functions
105 */
106
107 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
108 CostCentreStack *ccs2 );
109 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
110 CostCentreStack *new_ccs );
111 static rtsBool ignoreCCS ( CostCentreStack *ccs );
112 static void countTickss ( CostCentreStack *ccs );
113 static void inheritCosts ( CostCentreStack *ccs );
114 static void findCCSMaxLens ( CostCentreStack *ccs,
115 nat indent,
116 nat *max_label_len,
117 nat *max_module_len );
118 static void logCCS ( CostCentreStack *ccs,
119 nat indent,
120 nat max_label_len,
121 nat max_module_len );
122 static void reportCCS ( CostCentreStack *ccs );
123 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
124 CostCentre *cc );
125 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
126 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
127 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
128 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
129 CostCentre *, unsigned int );
130 static void ccsSetSelected ( CostCentreStack *ccs );
131
132 static void initTimeProfiling ( void );
133 static void initProfilingLogFile ( void );
134
135 /* -----------------------------------------------------------------------------
136 Initialise the profiling environment
137 -------------------------------------------------------------------------- */
138
139 void
140 initProfiling1 (void)
141 {
142 // initialise our arena
143 prof_arena = newArena();
144
145 /* for the benefit of allocate()... */
146 CCCS = CCS_SYSTEM;
147 }
148
149 void
150 freeProfiling (void)
151 {
152 arenaFree(prof_arena);
153 }
154
155 void
156 initProfiling2 (void)
157 {
158 CostCentreStack *ccs, *next;
159
160 CCCS = CCS_SYSTEM;
161
162 /* Set up the log file, and dump the header and cost centre
163 * information into it.
164 */
165 initProfilingLogFile();
166
167 /* Register all the cost centres / stacks in the program
168 * CC_MAIN gets link = 0, all others have non-zero link.
169 */
170 REGISTER_CC(CC_MAIN);
171 REGISTER_CC(CC_SYSTEM);
172 REGISTER_CC(CC_GC);
173 REGISTER_CC(CC_OVERHEAD);
174 REGISTER_CC(CC_DONT_CARE);
175 REGISTER_CC(CC_PINNED);
176
177 REGISTER_CCS(CCS_SYSTEM);
178 REGISTER_CCS(CCS_GC);
179 REGISTER_CCS(CCS_OVERHEAD);
180 REGISTER_CCS(CCS_DONT_CARE);
181 REGISTER_CCS(CCS_PINNED);
182 REGISTER_CCS(CCS_MAIN);
183
184 /* find all the registered cost centre stacks, and make them
185 * children of CCS_MAIN.
186 */
187 ASSERT(CCS_LIST == CCS_MAIN);
188 CCS_LIST = CCS_LIST->prevStack;
189 CCS_MAIN->prevStack = NULL;
190 CCS_MAIN->root = CCS_MAIN;
191 ccsSetSelected(CCS_MAIN);
192
193 // make CCS_MAIN the parent of all the pre-defined CCSs.
194 for (ccs = CCS_LIST; ccs != NULL; ) {
195 next = ccs->prevStack;
196 ccs->prevStack = NULL;
197 actualPush_(CCS_MAIN,ccs->cc,ccs);
198 ccs->root = ccs;
199 ccs = next;
200 }
201
202 if (RtsFlags.CcFlags.doCostCentres) {
203 initTimeProfiling();
204 }
205
206 if (RtsFlags.ProfFlags.doHeapProfile) {
207 initHeapProfiling();
208 }
209 }
210
211
212 static void
213 initProfilingLogFile(void)
214 {
215 char *prog;
216
217 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
218 strcpy(prog, prog_name);
219 #ifdef mingw32_HOST_OS
220 // on Windows, drop the .exe suffix if there is one
221 {
222 char *suff;
223 suff = strrchr(prog,'.');
224 if (suff != NULL && !strcmp(suff,".exe")) {
225 *suff = '\0';
226 }
227 }
228 #endif
229
230 if (RtsFlags.CcFlags.doCostCentres == 0 &&
231 RtsFlags.ProfFlags.doHeapProfile != HEAP_BY_RETAINER)
232 {
233 /* No need for the <prog>.prof file */
234 prof_filename = NULL;
235 prof_file = NULL;
236 }
237 else
238 {
239 /* Initialise the log file name */
240 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
241 sprintf(prof_filename, "%s.prof", prog);
242
243 /* open the log file */
244 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
245 debugBelch("Can't open profiling report file %s\n", prof_filename);
246 RtsFlags.CcFlags.doCostCentres = 0;
247 // The following line was added by Sung; retainer/LDV profiling may need
248 // two output files, i.e., <program>.prof/hp.
249 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
250 RtsFlags.ProfFlags.doHeapProfile = 0;
251 return;
252 }
253 }
254
255 if (RtsFlags.ProfFlags.doHeapProfile) {
256 /* Initialise the log file name */
257 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
258 sprintf(hp_filename, "%s.hp", prog);
259
260 /* open the log file */
261 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
262 debugBelch("Can't open profiling report file %s\n",
263 hp_filename);
264 RtsFlags.ProfFlags.doHeapProfile = 0;
265 return;
266 }
267 }
268 }
269
270 void
271 initTimeProfiling(void)
272 {
273 /* Start ticking */
274 startProfTimer();
275 };
276
277 void
278 endProfiling ( void )
279 {
280 if (RtsFlags.CcFlags.doCostCentres) {
281 stopProfTimer();
282 }
283 if (RtsFlags.ProfFlags.doHeapProfile) {
284 endHeapProfiling();
285 }
286 }
287
288 /* -----------------------------------------------------------------------------
289 Set CCCS when entering a function.
290
291 The algorithm is as follows.
292
293 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
294
295 where
296
297 dropCommonPrefix A B
298 -- returns the suffix of B after removing any prefix common
299 -- to both A and B.
300
301 e.g.
302
303 <a,b,c> ++> <> = <a,b,c>
304 <a,b,c> ++> <d> = <a,b,c,d>
305 <a,b,c> ++> <a,b> = <a,b,c>
306 <a,b> ++> <a,b,c> = <a,b,c>
307 <a,b,c> ++> <a,b,d> = <a,b,c,d>
308
309 -------------------------------------------------------------------------- */
310
311 // implements c1 ++> c2, where c1 and c2 are equal depth
312 //
313 static void enterFunEqualStacks (CostCentreStack *ccs, CostCentreStack *ccsfn)
314 {
315 ASSERT(ccs->depth == ccsfn->depth);
316 if (ccs == ccsfn) return;
317 enterFunEqualStacks(ccs->prevStack, ccsfn->prevStack);
318 CCCS = pushCostCentre(CCCS, ccsfn->cc);
319 }
320
321 // implements c1 ++> c2, where c2 is deeper than c1.
322 // Drop elements of c2 until we have equal stacks, call
323 // enterFunEqualStacks(), and then push on the elements that we
324 // dropped in reverse order.
325 //
326 static void enterFunCurShorter (CostCentreStack *ccsfn, StgWord n)
327 {
328 if (n == 0) {
329 ASSERT(ccsfn->depth == CCCS->depth);
330 enterFunEqualStacks(CCCS,ccsfn);
331 return;
332 }
333 enterFunCurShorter(ccsfn->prevStack, n-1);
334 CCCS = pushCostCentre(CCCS, ccsfn->cc);
335 }
336
337 void enterFunCCS ( CostCentreStack *ccsfn )
338 {
339 // common case 1: both stacks are the same
340 if (ccsfn == CCCS) {
341 return;
342 }
343
344 // common case 2: the function stack is empty, or just CAF
345 if (ccsfn->prevStack == CCS_MAIN) {
346 return;
347 }
348
349 // common case 3: the stacks are completely different (e.g. one is a
350 // descendent of MAIN and the other of a CAF): we append the whole
351 // of the function stack to the current CCS.
352 if (ccsfn->root != CCCS->root) {
353 CCCS = appendCCS(CCCS,ccsfn);
354 return;
355 }
356
357 // uncommon case 4: CCCS is deeper than ccsfn
358 if (CCCS->depth > ccsfn->depth) {
359 nat i, n;
360 CostCentreStack *tmp = CCCS;
361 n = CCCS->depth - ccsfn->depth;
362 for (i = 0; i < n; i++) {
363 tmp = tmp->prevStack;
364 }
365 enterFunEqualStacks(tmp,ccsfn);
366 return;
367 }
368
369 // uncommon case 5: ccsfn is deeper than CCCS
370 if (ccsfn->depth > CCCS->depth) {
371 enterFunCurShorter(ccsfn, ccsfn->depth - CCCS->depth);
372 return;
373 }
374
375 // uncommon case 6: stacks are equal depth, but different
376 enterFunEqualStacks(CCCS,ccsfn);
377 }
378
379 /* -----------------------------------------------------------------------------
380 Decide whether closures with this CCS should contribute to the heap
381 profile.
382 -------------------------------------------------------------------------- */
383
384 static void
385 ccsSetSelected (CostCentreStack *ccs)
386 {
387 if (RtsFlags.ProfFlags.modSelector) {
388 if (! strMatchesSelector (ccs->cc->module,
389 RtsFlags.ProfFlags.modSelector) ) {
390 ccs->selected = 0;
391 return;
392 }
393 }
394 if (RtsFlags.ProfFlags.ccSelector) {
395 if (! strMatchesSelector (ccs->cc->label,
396 RtsFlags.ProfFlags.ccSelector) ) {
397 ccs->selected = 0;
398 return;
399 }
400 }
401 if (RtsFlags.ProfFlags.ccsSelector) {
402 CostCentreStack *c;
403 for (c = ccs; c != NULL; c = c->prevStack)
404 {
405 if ( strMatchesSelector (c->cc->label,
406 RtsFlags.ProfFlags.ccsSelector) ) {
407 break;
408 }
409 }
410 if (c == NULL) {
411 ccs->selected = 0;
412 return;
413 }
414 }
415
416 ccs->selected = 1;
417 return;
418 }
419
420 /* -----------------------------------------------------------------------------
421 Cost-centre stack manipulation
422 -------------------------------------------------------------------------- */
423
424 #ifdef DEBUG
425 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
426 CostCentreStack *
427 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
428 #define pushCostCentre _pushCostCentre
429 {
430 IF_DEBUG(prof,
431 traceBegin("pushing %s on ", cc->label);
432 debugCCS(ccs);
433 traceEnd(););
434
435 return pushCostCentre(ccs,cc);
436 }
437 #endif
438
439 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
440
441 #ifdef DEBUG
442 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
443 CostCentreStack *
444 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
445 #define appendCCS _appendCCS
446 {
447 IF_DEBUG(prof,
448 if (ccs1 != ccs2) {
449 debugBelch("Appending ");
450 debugCCS(ccs1);
451 debugBelch(" to ");
452 debugCCS(ccs2);
453 debugBelch("\n");});
454 return appendCCS(ccs1,ccs2);
455 }
456 #endif
457
458 CostCentreStack *
459 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
460 {
461 if (ccs1 == ccs2) {
462 return ccs1;
463 }
464
465 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
466 // stop at a CAF element
467 return ccs1;
468 }
469
470 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
471 }
472
473 // Pick one:
474 // #define RECURSION_DROPS
475 #define RECURSION_TRUNCATES
476
477 CostCentreStack *
478 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
479 {
480 CostCentreStack *temp_ccs;
481
482 if (ccs == EMPTY_STACK)
483 return actualPush(ccs,cc);
484 else {
485 if (ccs->cc == cc)
486 return ccs;
487 else {
488 // check if we've already memoized this stack
489 temp_ccs = isInIndexTable(ccs->indexTable,cc);
490
491 if (temp_ccs != EMPTY_STACK)
492 return temp_ccs;
493 else {
494 temp_ccs = checkLoop(ccs,cc);
495 if (temp_ccs != NULL) {
496 // This CC is already in the stack somewhere.
497 // This could be recursion, or just calling
498 // another function with the same CC.
499 // A number of policies are possible at this
500 // point, we implement two here:
501 // - truncate the stack to the previous instance
502 // of this CC
503 // - ignore this push, return the same stack.
504 //
505 CostCentreStack *new_ccs;
506 #if defined(RECURSION_TRUNCATES)
507 new_ccs = temp_ccs;
508 #else // defined(RECURSION_DROPS)
509 new_ccs = ccs;
510 #endif
511 ccs->indexTable = addToIndexTable (ccs->indexTable,
512 new_ccs, cc, 1);
513 return new_ccs;
514 } else {
515 return actualPush (ccs,cc);
516 }
517 }
518 }
519 }
520 }
521
522 static CostCentreStack *
523 checkLoop (CostCentreStack *ccs, CostCentre *cc)
524 {
525 while (ccs != EMPTY_STACK) {
526 if (ccs->cc == cc)
527 return ccs;
528 ccs = ccs->prevStack;
529 }
530 return NULL;
531 }
532
533 static CostCentreStack *
534 actualPush (CostCentreStack *ccs, CostCentre *cc)
535 {
536 CostCentreStack *new_ccs;
537
538 // allocate space for a new CostCentreStack
539 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
540
541 return actualPush_(ccs, cc, new_ccs);
542 }
543
544 static CostCentreStack *
545 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
546 {
547 /* assign values to each member of the structure */
548 new_ccs->ccsID = CCS_ID++;
549 new_ccs->cc = cc;
550 new_ccs->prevStack = ccs;
551 new_ccs->root = ccs->root;
552 new_ccs->depth = ccs->depth + 1;
553
554 new_ccs->indexTable = EMPTY_TABLE;
555
556 /* Initialise the various _scc_ counters to zero
557 */
558 new_ccs->scc_count = 0;
559
560 /* Initialize all other stats here. There should be a quick way
561 * that's easily used elsewhere too
562 */
563 new_ccs->time_ticks = 0;
564 new_ccs->mem_alloc = 0;
565 new_ccs->inherited_ticks = 0;
566 new_ccs->inherited_alloc = 0;
567
568 // Set the selected field.
569 ccsSetSelected(new_ccs);
570
571 /* update the memoization table for the parent stack */
572 if (ccs != EMPTY_STACK) {
573 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
574 0/*not a back edge*/);
575 }
576
577 /* return a pointer to the new stack */
578 return new_ccs;
579 }
580
581
582 static CostCentreStack *
583 isInIndexTable(IndexTable *it, CostCentre *cc)
584 {
585 while (it!=EMPTY_TABLE)
586 {
587 if (it->cc == cc)
588 return it->ccs;
589 else
590 it = it->next;
591 }
592
593 /* otherwise we never found it so return EMPTY_TABLE */
594 return EMPTY_TABLE;
595 }
596
597
598 static IndexTable *
599 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
600 CostCentre *cc, unsigned int back_edge)
601 {
602 IndexTable *new_it;
603
604 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
605
606 new_it->cc = cc;
607 new_it->ccs = new_ccs;
608 new_it->next = it;
609 new_it->back_edge = back_edge;
610 return new_it;
611 }
612
613 /* -----------------------------------------------------------------------------
614 Generating a time & allocation profiling report.
615 -------------------------------------------------------------------------- */
616
617 /* We omit certain system-related CCs and CCSs from the default
618 * reports, so as not to cause confusion.
619 */
620 static rtsBool
621 ignoreCC (CostCentre *cc)
622 {
623 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
624 ( cc == CC_OVERHEAD
625 || cc == CC_DONT_CARE
626 || cc == CC_GC
627 || cc == CC_SYSTEM)) {
628 return rtsTrue;
629 } else {
630 return rtsFalse;
631 }
632 }
633
634 static rtsBool
635 ignoreCCS (CostCentreStack *ccs)
636 {
637 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
638 ( ccs == CCS_OVERHEAD
639 || ccs == CCS_DONT_CARE
640 || ccs == CCS_GC
641 || ccs == CCS_SYSTEM)) {
642 return rtsTrue;
643 } else {
644 return rtsFalse;
645 }
646 }
647
648 /* -----------------------------------------------------------------------------
649 Generating the aggregated per-cost-centre time/alloc report.
650 -------------------------------------------------------------------------- */
651
652 static CostCentre *sorted_cc_list;
653
654 static void
655 aggregateCCCosts( CostCentreStack *ccs )
656 {
657 IndexTable *i;
658
659 ccs->cc->mem_alloc += ccs->mem_alloc;
660 ccs->cc->time_ticks += ccs->time_ticks;
661
662 for (i = ccs->indexTable; i != 0; i = i->next) {
663 if (!i->back_edge) {
664 aggregateCCCosts(i->ccs);
665 }
666 }
667 }
668
669 static void
670 insertCCInSortedList( CostCentre *new_cc )
671 {
672 CostCentre **prev, *cc;
673
674 prev = &sorted_cc_list;
675 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
676 if (new_cc->time_ticks > cc->time_ticks) {
677 new_cc->link = cc;
678 *prev = new_cc;
679 return;
680 } else {
681 prev = &(cc->link);
682 }
683 }
684 new_cc->link = NULL;
685 *prev = new_cc;
686 }
687
688 static nat
689 strlen_utf8 (char *s)
690 {
691 nat n = 0;
692 unsigned char c;
693
694 for (; *s != '\0'; s++) {
695 c = *s;
696 if (c < 0x80 || c > 0xBF) n++;
697 }
698 return n;
699 }
700
701 static void
702 reportPerCCCosts( void )
703 {
704 CostCentre *cc, *next;
705 nat max_label_len, max_module_len;
706
707 aggregateCCCosts(CCS_MAIN);
708 sorted_cc_list = NULL;
709
710 max_label_len = 11; // no shorter than the "COST CENTRE" header
711 max_module_len = 7; // no shorter than the "MODULE" header
712
713 for (cc = CC_LIST; cc != NULL; cc = next) {
714 next = cc->link;
715 if (cc->time_ticks > total_prof_ticks/100
716 || cc->mem_alloc > total_alloc/100
717 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
718 insertCCInSortedList(cc);
719
720 max_label_len = stg_max(strlen_utf8(cc->label), max_label_len);
721 max_module_len = stg_max(strlen_utf8(cc->module), max_module_len);
722 }
723 }
724
725 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
726 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
727 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
728 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
729 }
730 fprintf(prof_file, "\n\n");
731
732 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
733 if (ignoreCC(cc)) {
734 continue;
735 }
736 fprintf(prof_file, "%s%*s %s%*s",
737 cc->label,
738 max_label_len - strlen_utf8(cc->label), "",
739 cc->module,
740 max_module_len - strlen_utf8(cc->module), "");
741
742 fprintf(prof_file, "%6.1f %6.1f",
743 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
744 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
745 total_alloc * 100)
746 );
747
748 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
749 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
750 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
751 }
752 fprintf(prof_file, "\n");
753 }
754
755 fprintf(prof_file,"\n\n");
756 }
757
758 /* -----------------------------------------------------------------------------
759 Generate the cost-centre-stack time/alloc report
760 -------------------------------------------------------------------------- */
761
762 static void
763 fprintHeader( nat max_label_len, nat max_module_len )
764 {
765 fprintf(prof_file, "%-*s %-*s%6s %11s %11s %11s\n", max_label_len, "", max_module_len, "", "", "", "individual", "inherited");
766
767 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
768 fprintf(prof_file, "%6s %11s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
769
770 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
771 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
772 }
773
774 fprintf(prof_file, "\n\n");
775 }
776
777 void
778 reportCCSProfiling( void )
779 {
780 nat count;
781 char temp[128]; /* sigh: magic constant */
782
783 stopProfTimer();
784
785 total_prof_ticks = 0;
786 total_alloc = 0;
787 countTickss(CCS_MAIN);
788
789 if (RtsFlags.CcFlags.doCostCentres == 0) return;
790
791 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
792 time_str(), "Final");
793
794 fprintf(prof_file, "\n\t ");
795 fprintf(prof_file, " %s", prog_name);
796 fprintf(prof_file, " +RTS");
797 for (count = 0; rts_argv[count]; count++)
798 fprintf(prof_file, " %s", rts_argv[count]);
799 fprintf(prof_file, " -RTS");
800 for (count = 1; prog_argv[count]; count++)
801 fprintf(prof_file, " %s", prog_argv[count]);
802 fprintf(prof_file, "\n\n");
803
804 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us)\n",
805 ((double) total_prof_ticks *
806 (double) RtsFlags.MiscFlags.tickInterval) / TIME_RESOLUTION,
807 (unsigned long) total_prof_ticks,
808 (int) TimeToUS(RtsFlags.MiscFlags.tickInterval));
809
810 fprintf(prof_file, "\ttotal alloc = %11s bytes",
811 showStgWord64(total_alloc * sizeof(W_),
812 temp, rtsTrue/*commas*/));
813
814 fprintf(prof_file, " (excludes profiling overheads)\n\n");
815
816 reportPerCCCosts();
817
818 inheritCosts(CCS_MAIN);
819
820 reportCCS(pruneCCSTree(CCS_MAIN));
821 }
822
823 static void
824 findCCSMaxLens(CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len) {
825 CostCentre *cc;
826 IndexTable *i;
827
828 cc = ccs->cc;
829
830 *max_label_len = stg_max(*max_label_len, indent + strlen_utf8(cc->label));
831 *max_module_len = stg_max(*max_module_len, strlen_utf8(cc->module));
832
833 for (i = ccs->indexTable; i != 0; i = i->next) {
834 if (!i->back_edge) {
835 findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len);
836 }
837 }
838 }
839
840 static void
841 logCCS(CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len)
842 {
843 CostCentre *cc;
844 IndexTable *i;
845
846 cc = ccs->cc;
847
848 /* Only print cost centres with non 0 data ! */
849
850 if (!ignoreCCS(ccs))
851 /* force printing of *all* cost centres if -Pa */
852 {
853
854 fprintf(prof_file, "%-*s%s%*s %s%*s",
855 indent, "",
856 cc->label,
857 max_label_len-indent - strlen_utf8(cc->label), "",
858 cc->module,
859 max_module_len - strlen_utf8(cc->module), "");
860
861 fprintf(prof_file, "%6ld %11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
862 ccs->ccsID, ccs->scc_count,
863 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
864 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
865 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
866 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
867 );
868
869 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
870 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
871 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
872 }
873 fprintf(prof_file, "\n");
874 }
875
876 for (i = ccs->indexTable; i != 0; i = i->next) {
877 if (!i->back_edge) {
878 logCCS(i->ccs, indent+1, max_label_len, max_module_len);
879 }
880 }
881 }
882
883 static void
884 reportCCS(CostCentreStack *ccs)
885 {
886 nat max_label_len, max_module_len;
887
888 max_label_len = 11; // no shorter than "COST CENTRE" header
889 max_module_len = 7; // no shorter than "MODULE" header
890
891 findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len);
892
893 fprintHeader(max_label_len, max_module_len);
894 logCCS(ccs, 0, max_label_len, max_module_len);
895 }
896
897
898 /* Traverse the cost centre stack tree and accumulate
899 * ticks/allocations.
900 */
901 static void
902 countTickss(CostCentreStack *ccs)
903 {
904 IndexTable *i;
905
906 if (!ignoreCCS(ccs)) {
907 total_alloc += ccs->mem_alloc;
908 total_prof_ticks += ccs->time_ticks;
909 }
910 for (i = ccs->indexTable; i != NULL; i = i->next)
911 if (!i->back_edge) {
912 countTickss(i->ccs);
913 }
914 }
915
916 /* Traverse the cost centre stack tree and inherit ticks & allocs.
917 */
918 static void
919 inheritCosts(CostCentreStack *ccs)
920 {
921 IndexTable *i;
922
923 if (ignoreCCS(ccs)) { return; }
924
925 ccs->inherited_ticks += ccs->time_ticks;
926 ccs->inherited_alloc += ccs->mem_alloc;
927
928 for (i = ccs->indexTable; i != NULL; i = i->next)
929 if (!i->back_edge) {
930 inheritCosts(i->ccs);
931 ccs->inherited_ticks += i->ccs->inherited_ticks;
932 ccs->inherited_alloc += i->ccs->inherited_alloc;
933 }
934
935 return;
936 }
937
938 //
939 // Prune CCSs with zero entries, zero ticks or zero allocation from
940 // the tree, unless COST_CENTRES_ALL is on.
941 //
942 static CostCentreStack *
943 pruneCCSTree (CostCentreStack *ccs)
944 {
945 CostCentreStack *ccs1;
946 IndexTable *i, **prev;
947
948 prev = &ccs->indexTable;
949 for (i = ccs->indexTable; i != 0; i = i->next) {
950 if (i->back_edge) { continue; }
951
952 ccs1 = pruneCCSTree(i->ccs);
953 if (ccs1 == NULL) {
954 *prev = i->next;
955 } else {
956 prev = &(i->next);
957 }
958 }
959
960 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
961 /* force printing of *all* cost centres if -P -P */ )
962
963 || ( ccs->indexTable != 0 )
964 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
965 ) {
966 return ccs;
967 } else {
968 return NULL;
969 }
970 }
971
972 void
973 fprintCCS( FILE *f, CostCentreStack *ccs )
974 {
975 fprintf(f,"<");
976 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
977 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
978 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
979 fprintf(f,",");
980 }
981 }
982 fprintf(f,">");
983 }
984
985 // Returns: True if the call stack ended with CAF
986 static rtsBool fprintCallStack (CostCentreStack *ccs)
987 {
988 CostCentreStack *prev;
989
990 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
991 prev = ccs->prevStack;
992 while (prev && prev != CCS_MAIN) {
993 ccs = prev;
994 fprintf(stderr, ",\n called from %s.%s",
995 ccs->cc->module, ccs->cc->label);
996 prev = ccs->prevStack;
997 }
998 fprintf(stderr, "\n");
999
1000 return (!strncmp(ccs->cc->label, "CAF", 3));
1001 }
1002
1003 /* For calling from .cmm code, where we can't reliably refer to stderr */
1004 void
1005 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
1006 {
1007 rtsBool is_caf;
1008 StgPtr frame;
1009 StgStack *stack;
1010 CostCentreStack *prev_ccs;
1011 nat depth = 0;
1012 const nat MAX_DEPTH = 10; // don't print gigantic chains of stacks
1013
1014 {
1015 char *desc;
1016 StgInfoTable *info;
1017 info = get_itbl(exception);
1018 switch (info->type) {
1019 case CONSTR:
1020 case CONSTR_1_0:
1021 case CONSTR_0_1:
1022 case CONSTR_2_0:
1023 case CONSTR_1_1:
1024 case CONSTR_0_2:
1025 case CONSTR_STATIC:
1026 case CONSTR_NOCAF_STATIC:
1027 desc = GET_CON_DESC(itbl_to_con_itbl(info));
1028 default:
1029 desc = closure_type_names[info->type];
1030 }
1031 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
1032 }
1033
1034 is_caf = fprintCallStack(ccs);
1035
1036 // traverse the stack down to the enclosing update frame to
1037 // find out where this CCS was evaluated from...
1038
1039 stack = tso->stackobj;
1040 frame = stack->sp;
1041 prev_ccs = ccs;
1042
1043 for (; is_caf && depth < MAX_DEPTH; depth++)
1044 {
1045 switch (get_itbl((StgClosure*)frame)->type)
1046 {
1047 case UPDATE_FRAME:
1048 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
1049 frame += sizeofW(StgUpdateFrame);
1050 if (ccs == CCS_MAIN) {
1051 goto done;
1052 }
1053 if (ccs == prev_ccs) {
1054 // ignore if this is the same as the previous stack,
1055 // we're probably in library code and haven't
1056 // accumulated any more interesting stack items
1057 // since the last update frame.
1058 break;
1059 }
1060 prev_ccs = ccs;
1061 fprintf(stderr, " --> evaluated by: ");
1062 is_caf = fprintCallStack(ccs);
1063 break;
1064 case UNDERFLOW_FRAME:
1065 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1066 frame = stack->sp;
1067 break;
1068 case STOP_FRAME:
1069 goto done;
1070 default:
1071 frame += stack_frame_sizeW((StgClosure*)frame);
1072 break;
1073 }
1074 }
1075 done:
1076 return;
1077 }
1078
1079 #ifdef DEBUG
1080 void
1081 debugCCS( CostCentreStack *ccs )
1082 {
1083 debugBelch("<");
1084 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1085 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1086 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1087 debugBelch(",");
1088 }
1089 }
1090 debugBelch(">");
1091 }
1092 #endif /* DEBUG */
1093
1094 #endif /* PROFILING */