Overhaul of infrastructure for profiling, coverage (HPC) and breakpoints
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20
21 #include <string.h>
22
23 #ifdef DEBUG
24 #include "Trace.h"
25 #endif
26
27 /*
28 * Profiling allocation arena.
29 */
30 Arena *prof_arena;
31
32 /*
33 * Global variables used to assign unique IDs to cc's, ccs's, and
34 * closure_cats
35 */
36
37 unsigned int CC_ID = 1;
38 unsigned int CCS_ID = 1;
39
40 /* figures for the profiling report.
41 */
42 static StgWord64 total_alloc;
43 static lnat total_prof_ticks;
44
45 /* Globals for opening the profiling log file(s)
46 */
47 static char *prof_filename; /* prof report file name = <program>.prof */
48 FILE *prof_file;
49
50 static char *hp_filename; /* heap profile (hp2ps style) log file */
51 FILE *hp_file;
52
53 /* The Current Cost Centre Stack (for attributing costs)
54 */
55 CostCentreStack *CCCS;
56
57 /* Linked lists to keep track of CCs and CCSs that haven't
58 * been declared in the log file yet
59 */
60 CostCentre *CC_LIST = NULL;
61 CostCentreStack *CCS_LIST = NULL;
62
63 /*
64 * Built-in cost centres and cost-centre stacks:
65 *
66 * MAIN is the root of the cost-centre stack tree. If there are
67 * no _scc_s in the program, all costs will be attributed
68 * to MAIN.
69 *
70 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
71 * RTS operations apart from garbage collection are attributed
72 * to SYSTEM.
73 *
74 * GC is the storage manager / garbage collector.
75 *
76 * OVERHEAD gets all costs generated by the profiling system
77 * itself. These are costs that would not be incurred
78 * during non-profiled execution of the program.
79 *
80 * DONT_CARE is a placeholder cost-centre we assign to static
81 * constructors. It should *never* accumulate any costs.
82 *
83 * PINNED accumulates memory allocated to pinned objects, which
84 * cannot be profiled separately because we cannot reliably
85 * traverse pinned memory.
86 */
87
88 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_NOT_CAF, );
89 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", CC_NOT_CAF, );
90 CC_DECLARE(CC_GC, "GC", "GC", CC_NOT_CAF, );
91 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_NOT_CAF, );
92 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_NOT_CAF, );
93 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", CC_NOT_CAF, );
94
95 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
96 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
97 CCS_DECLARE(CCS_GC, CC_GC, );
98 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
99 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
100 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
101
102 /*
103 * Static Functions
104 */
105
106 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
107 CostCentreStack *ccs2 );
108 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
109 CostCentreStack *new_ccs );
110 static rtsBool ignoreCCS ( CostCentreStack *ccs );
111 static void countTickss ( CostCentreStack *ccs );
112 static void inheritCosts ( CostCentreStack *ccs );
113 static void findCCSMaxLens ( CostCentreStack *ccs,
114 nat indent,
115 nat *max_label_len,
116 nat *max_module_len );
117 static void logCCS ( CostCentreStack *ccs,
118 nat indent,
119 nat max_label_len,
120 nat max_module_len );
121 static void reportCCS ( CostCentreStack *ccs );
122 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
123 CostCentre *cc );
124 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
125 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
126 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
127 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
128 CostCentre *, unsigned int );
129 static void ccsSetSelected ( CostCentreStack *ccs );
130
131 static void initTimeProfiling ( void );
132 static void initProfilingLogFile ( void );
133
134 /* -----------------------------------------------------------------------------
135 Initialise the profiling environment
136 -------------------------------------------------------------------------- */
137
138 void
139 initProfiling1 (void)
140 {
141 // initialise our arena
142 prof_arena = newArena();
143
144 /* for the benefit of allocate()... */
145 CCCS = CCS_SYSTEM;
146 }
147
148 void
149 freeProfiling (void)
150 {
151 arenaFree(prof_arena);
152 }
153
154 void
155 initProfiling2 (void)
156 {
157 CostCentreStack *ccs, *next;
158
159 CCCS = CCS_SYSTEM;
160
161 /* Set up the log file, and dump the header and cost centre
162 * information into it.
163 */
164 initProfilingLogFile();
165
166 /* Register all the cost centres / stacks in the program
167 * CC_MAIN gets link = 0, all others have non-zero link.
168 */
169 REGISTER_CC(CC_MAIN);
170 REGISTER_CC(CC_SYSTEM);
171 REGISTER_CC(CC_GC);
172 REGISTER_CC(CC_OVERHEAD);
173 REGISTER_CC(CC_DONT_CARE);
174 REGISTER_CC(CC_PINNED);
175
176 REGISTER_CCS(CCS_SYSTEM);
177 REGISTER_CCS(CCS_GC);
178 REGISTER_CCS(CCS_OVERHEAD);
179 REGISTER_CCS(CCS_DONT_CARE);
180 REGISTER_CCS(CCS_PINNED);
181 REGISTER_CCS(CCS_MAIN);
182
183 /* find all the registered cost centre stacks, and make them
184 * children of CCS_MAIN.
185 */
186 ASSERT(CCS_LIST == CCS_MAIN);
187 CCS_LIST = CCS_LIST->prevStack;
188 CCS_MAIN->prevStack = NULL;
189 CCS_MAIN->root = CCS_MAIN;
190 ccsSetSelected(CCS_MAIN);
191
192 // make CCS_MAIN the parent of all the pre-defined CCSs.
193 for (ccs = CCS_LIST; ccs != NULL; ) {
194 next = ccs->prevStack;
195 ccs->prevStack = NULL;
196 actualPush_(CCS_MAIN,ccs->cc,ccs);
197 ccs->root = ccs;
198 ccs = next;
199 }
200
201 if (RtsFlags.CcFlags.doCostCentres) {
202 initTimeProfiling();
203 }
204
205 if (RtsFlags.ProfFlags.doHeapProfile) {
206 initHeapProfiling();
207 }
208 }
209
210
211 static void
212 initProfilingLogFile(void)
213 {
214 char *prog;
215
216 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
217 strcpy(prog, prog_name);
218 #ifdef mingw32_HOST_OS
219 // on Windows, drop the .exe suffix if there is one
220 {
221 char *suff;
222 suff = strrchr(prog,'.');
223 if (suff != NULL && !strcmp(suff,".exe")) {
224 *suff = '\0';
225 }
226 }
227 #endif
228
229 if (RtsFlags.CcFlags.doCostCentres == 0 &&
230 RtsFlags.ProfFlags.doHeapProfile != HEAP_BY_RETAINER)
231 {
232 /* No need for the <prog>.prof file */
233 prof_filename = NULL;
234 prof_file = NULL;
235 }
236 else
237 {
238 /* Initialise the log file name */
239 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
240 sprintf(prof_filename, "%s.prof", prog);
241
242 /* open the log file */
243 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
244 debugBelch("Can't open profiling report file %s\n", prof_filename);
245 RtsFlags.CcFlags.doCostCentres = 0;
246 // The following line was added by Sung; retainer/LDV profiling may need
247 // two output files, i.e., <program>.prof/hp.
248 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
249 RtsFlags.ProfFlags.doHeapProfile = 0;
250 return;
251 }
252 }
253
254 if (RtsFlags.ProfFlags.doHeapProfile) {
255 /* Initialise the log file name */
256 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
257 sprintf(hp_filename, "%s.hp", prog);
258
259 /* open the log file */
260 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
261 debugBelch("Can't open profiling report file %s\n",
262 hp_filename);
263 RtsFlags.ProfFlags.doHeapProfile = 0;
264 return;
265 }
266 }
267 }
268
269 void
270 initTimeProfiling(void)
271 {
272 /* Start ticking */
273 startProfTimer();
274 };
275
276 void
277 endProfiling ( void )
278 {
279 if (RtsFlags.CcFlags.doCostCentres) {
280 stopProfTimer();
281 }
282 if (RtsFlags.ProfFlags.doHeapProfile) {
283 endHeapProfiling();
284 }
285 }
286
287 /* -----------------------------------------------------------------------------
288 Set CCCS when entering a function.
289
290 The algorithm is as follows.
291
292 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
293
294 where
295
296 dropCommonPrefix A B
297 -- returns the suffix of B after removing any prefix common
298 -- to both A and B.
299
300 e.g.
301
302 <a,b,c> ++> <> = <a,b,c>
303 <a,b,c> ++> <d> = <a,b,c,d>
304 <a,b,c> ++> <a,b> = <a,b,c>
305 <a,b> ++> <a,b,c> = <a,b,c>
306 <a,b,c> ++> <a,b,d> = <a,b,c,d>
307
308 -------------------------------------------------------------------------- */
309
310 // implements c1 ++> c2, where c1 and c2 are equal depth
311 //
312 static void enterFunEqualStacks (CostCentreStack *ccs, CostCentreStack *ccsfn)
313 {
314 ASSERT(ccs->depth == ccsfn->depth);
315 if (ccs == ccsfn) return;
316 enterFunEqualStacks(ccs->prevStack, ccsfn->prevStack);
317 CCCS = pushCostCentre(CCCS, ccsfn->cc);
318 }
319
320 // implements c1 ++> c2, where c2 is deeper than c1.
321 // Drop elements of c2 until we have equal stacks, call
322 // enterFunEqualStacks(), and then push on the elements that we
323 // dropped in reverse order.
324 //
325 static void enterFunCurShorter (CostCentreStack *ccsfn, StgWord n)
326 {
327 if (n == 0) {
328 ASSERT(ccsfn->depth == CCCS->depth);
329 enterFunEqualStacks(CCCS,ccsfn);
330 return;
331 }
332 enterFunCurShorter(ccsfn->prevStack, n-1);
333 CCCS = pushCostCentre(CCCS, ccsfn->cc);
334 }
335
336 void enterFunCCS ( CostCentreStack *ccsfn )
337 {
338 // common case 1: both stacks are the same
339 if (ccsfn == CCCS) {
340 return;
341 }
342
343 // common case 2: the function stack is empty, or just CAF
344 if (ccsfn->prevStack == CCS_MAIN) {
345 return;
346 }
347
348 // common case 3: the stacks are completely different (e.g. one is a
349 // descendent of MAIN and the other of a CAF): we append the whole
350 // of the function stack to the current CCS.
351 if (ccsfn->root != CCCS->root) {
352 CCCS = appendCCS(CCCS,ccsfn);
353 return;
354 }
355
356 // uncommon case 4: CCCS is deeper than ccsfn
357 if (CCCS->depth > ccsfn->depth) {
358 nat i, n;
359 CostCentreStack *tmp = CCCS;
360 n = CCCS->depth - ccsfn->depth;
361 for (i = 0; i < n; i++) {
362 tmp = tmp->prevStack;
363 }
364 enterFunEqualStacks(tmp,ccsfn);
365 return;
366 }
367
368 // uncommon case 5: ccsfn is deeper than CCCS
369 if (ccsfn->depth > CCCS->depth) {
370 enterFunCurShorter(ccsfn, ccsfn->depth - CCCS->depth);
371 return;
372 }
373
374 // uncommon case 6: stacks are equal depth, but different
375 enterFunEqualStacks(CCCS,ccsfn);
376 }
377
378 /* -----------------------------------------------------------------------------
379 Decide whether closures with this CCS should contribute to the heap
380 profile.
381 -------------------------------------------------------------------------- */
382
383 static void
384 ccsSetSelected (CostCentreStack *ccs)
385 {
386 if (RtsFlags.ProfFlags.modSelector) {
387 if (! strMatchesSelector (ccs->cc->module,
388 RtsFlags.ProfFlags.modSelector) ) {
389 ccs->selected = 0;
390 return;
391 }
392 }
393 if (RtsFlags.ProfFlags.ccSelector) {
394 if (! strMatchesSelector (ccs->cc->label,
395 RtsFlags.ProfFlags.ccSelector) ) {
396 ccs->selected = 0;
397 return;
398 }
399 }
400 if (RtsFlags.ProfFlags.ccsSelector) {
401 CostCentreStack *c;
402 for (c = ccs; c != NULL; c = c->prevStack)
403 {
404 if ( strMatchesSelector (c->cc->label,
405 RtsFlags.ProfFlags.ccsSelector) ) {
406 break;
407 }
408 }
409 if (c == NULL) {
410 ccs->selected = 0;
411 return;
412 }
413 }
414
415 ccs->selected = 1;
416 return;
417 }
418
419 /* -----------------------------------------------------------------------------
420 Cost-centre stack manipulation
421 -------------------------------------------------------------------------- */
422
423 #ifdef DEBUG
424 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
425 CostCentreStack *
426 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
427 #define pushCostCentre _pushCostCentre
428 {
429 IF_DEBUG(prof,
430 traceBegin("pushing %s on ", cc->label);
431 debugCCS(ccs);
432 traceEnd(););
433
434 return pushCostCentre(ccs,cc);
435 }
436 #endif
437
438 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
439
440 #ifdef DEBUG
441 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
442 CostCentreStack *
443 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
444 #define appendCCS _appendCCS
445 {
446 IF_DEBUG(prof,
447 if (ccs1 != ccs2) {
448 debugBelch("Appending ");
449 debugCCS(ccs1);
450 debugBelch(" to ");
451 debugCCS(ccs2);
452 debugBelch("\n");});
453 return appendCCS(ccs1,ccs2);
454 }
455 #endif
456
457 CostCentreStack *
458 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
459 {
460 if (ccs1 == ccs2) {
461 return ccs1;
462 }
463
464 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
465 // stop at a CAF element
466 return ccs1;
467 }
468
469 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
470 }
471
472 // Pick one:
473 // #define RECURSION_DROPS
474 #define RECURSION_TRUNCATES
475
476 CostCentreStack *
477 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
478 {
479 CostCentreStack *temp_ccs;
480
481 if (ccs == EMPTY_STACK)
482 return actualPush(ccs,cc);
483 else {
484 if (ccs->cc == cc)
485 return ccs;
486 else {
487 // check if we've already memoized this stack
488 temp_ccs = isInIndexTable(ccs->indexTable,cc);
489
490 if (temp_ccs != EMPTY_STACK)
491 return temp_ccs;
492 else {
493 temp_ccs = checkLoop(ccs,cc);
494 if (temp_ccs != NULL) {
495 // This CC is already in the stack somewhere.
496 // This could be recursion, or just calling
497 // another function with the same CC.
498 // A number of policies are possible at this
499 // point, we implement two here:
500 // - truncate the stack to the previous instance
501 // of this CC
502 // - ignore this push, return the same stack.
503 //
504 CostCentreStack *new_ccs;
505 #if defined(RECURSION_TRUNCATES)
506 new_ccs = temp_ccs;
507 #else // defined(RECURSION_DROPS)
508 new_ccs = ccs;
509 #endif
510 ccs->indexTable = addToIndexTable (ccs->indexTable,
511 new_ccs, cc, 1);
512 return new_ccs;
513 } else {
514 return actualPush (ccs,cc);
515 }
516 }
517 }
518 }
519 }
520
521 static CostCentreStack *
522 checkLoop (CostCentreStack *ccs, CostCentre *cc)
523 {
524 while (ccs != EMPTY_STACK) {
525 if (ccs->cc == cc)
526 return ccs;
527 ccs = ccs->prevStack;
528 }
529 return NULL;
530 }
531
532 static CostCentreStack *
533 actualPush (CostCentreStack *ccs, CostCentre *cc)
534 {
535 CostCentreStack *new_ccs;
536
537 // allocate space for a new CostCentreStack
538 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
539
540 return actualPush_(ccs, cc, new_ccs);
541 }
542
543 static CostCentreStack *
544 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
545 {
546 /* assign values to each member of the structure */
547 new_ccs->ccsID = CCS_ID++;
548 new_ccs->cc = cc;
549 new_ccs->prevStack = ccs;
550 new_ccs->root = ccs->root;
551 new_ccs->depth = ccs->depth + 1;
552
553 new_ccs->indexTable = EMPTY_TABLE;
554
555 /* Initialise the various _scc_ counters to zero
556 */
557 new_ccs->scc_count = 0;
558
559 /* Initialize all other stats here. There should be a quick way
560 * that's easily used elsewhere too
561 */
562 new_ccs->time_ticks = 0;
563 new_ccs->mem_alloc = 0;
564 new_ccs->inherited_ticks = 0;
565 new_ccs->inherited_alloc = 0;
566
567 // Set the selected field.
568 ccsSetSelected(new_ccs);
569
570 /* update the memoization table for the parent stack */
571 if (ccs != EMPTY_STACK) {
572 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
573 0/*not a back edge*/);
574 }
575
576 /* return a pointer to the new stack */
577 return new_ccs;
578 }
579
580
581 static CostCentreStack *
582 isInIndexTable(IndexTable *it, CostCentre *cc)
583 {
584 while (it!=EMPTY_TABLE)
585 {
586 if (it->cc == cc)
587 return it->ccs;
588 else
589 it = it->next;
590 }
591
592 /* otherwise we never found it so return EMPTY_TABLE */
593 return EMPTY_TABLE;
594 }
595
596
597 static IndexTable *
598 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
599 CostCentre *cc, unsigned int back_edge)
600 {
601 IndexTable *new_it;
602
603 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
604
605 new_it->cc = cc;
606 new_it->ccs = new_ccs;
607 new_it->next = it;
608 new_it->back_edge = back_edge;
609 return new_it;
610 }
611
612 /* -----------------------------------------------------------------------------
613 Generating a time & allocation profiling report.
614 -------------------------------------------------------------------------- */
615
616 /* We omit certain system-related CCs and CCSs from the default
617 * reports, so as not to cause confusion.
618 */
619 static rtsBool
620 ignoreCC (CostCentre *cc)
621 {
622 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
623 ( cc == CC_OVERHEAD
624 || cc == CC_DONT_CARE
625 || cc == CC_GC
626 || cc == CC_SYSTEM)) {
627 return rtsTrue;
628 } else {
629 return rtsFalse;
630 }
631 }
632
633 static rtsBool
634 ignoreCCS (CostCentreStack *ccs)
635 {
636 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
637 ( ccs == CCS_OVERHEAD
638 || ccs == CCS_DONT_CARE
639 || ccs == CCS_GC
640 || ccs == CCS_SYSTEM)) {
641 return rtsTrue;
642 } else {
643 return rtsFalse;
644 }
645 }
646
647 /* -----------------------------------------------------------------------------
648 Generating the aggregated per-cost-centre time/alloc report.
649 -------------------------------------------------------------------------- */
650
651 static CostCentre *sorted_cc_list;
652
653 static void
654 aggregateCCCosts( CostCentreStack *ccs )
655 {
656 IndexTable *i;
657
658 ccs->cc->mem_alloc += ccs->mem_alloc;
659 ccs->cc->time_ticks += ccs->time_ticks;
660
661 for (i = ccs->indexTable; i != 0; i = i->next) {
662 if (!i->back_edge) {
663 aggregateCCCosts(i->ccs);
664 }
665 }
666 }
667
668 static void
669 insertCCInSortedList( CostCentre *new_cc )
670 {
671 CostCentre **prev, *cc;
672
673 prev = &sorted_cc_list;
674 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
675 if (new_cc->time_ticks > cc->time_ticks) {
676 new_cc->link = cc;
677 *prev = new_cc;
678 return;
679 } else {
680 prev = &(cc->link);
681 }
682 }
683 new_cc->link = NULL;
684 *prev = new_cc;
685 }
686
687 static void
688 reportPerCCCosts( void )
689 {
690 CostCentre *cc, *next;
691 nat max_label_len, max_module_len;
692
693 aggregateCCCosts(CCS_MAIN);
694 sorted_cc_list = NULL;
695
696 max_label_len = 11; // no shorter than the "COST CENTRE" header
697 max_module_len = 7; // no shorter than the "MODULE" header
698
699 for (cc = CC_LIST; cc != NULL; cc = next) {
700 next = cc->link;
701 if (cc->time_ticks > total_prof_ticks/100
702 || cc->mem_alloc > total_alloc/100
703 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
704 insertCCInSortedList(cc);
705
706 max_label_len = stg_max(strlen(cc->label), max_label_len);
707 max_module_len = stg_max(strlen(cc->module), max_module_len);
708 }
709 }
710
711 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
712 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
713 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
714 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
715 }
716 fprintf(prof_file, "\n\n");
717
718 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
719 if (ignoreCC(cc)) {
720 continue;
721 }
722 fprintf(prof_file, "%-*s %-*s", max_label_len, cc->label, max_module_len, cc->module);
723 fprintf(prof_file, "%6.1f %6.1f",
724 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
725 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
726 total_alloc * 100)
727 );
728
729 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
730 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
731 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
732 }
733 fprintf(prof_file, "\n");
734 }
735
736 fprintf(prof_file,"\n\n");
737 }
738
739 /* -----------------------------------------------------------------------------
740 Generate the cost-centre-stack time/alloc report
741 -------------------------------------------------------------------------- */
742
743 static void
744 fprintHeader( nat max_label_len, nat max_module_len )
745 {
746 fprintf(prof_file, "%-*s %-*s%6s %11s %11s %11s\n", max_label_len, "", max_module_len, "", "", "", "individual", "inherited");
747
748 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
749 fprintf(prof_file, "%6s %11s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
750
751 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
752 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
753 }
754
755 fprintf(prof_file, "\n\n");
756 }
757
758 void
759 reportCCSProfiling( void )
760 {
761 nat count;
762 char temp[128]; /* sigh: magic constant */
763
764 stopProfTimer();
765
766 total_prof_ticks = 0;
767 total_alloc = 0;
768 countTickss(CCS_MAIN);
769
770 if (RtsFlags.CcFlags.doCostCentres == 0) return;
771
772 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
773 time_str(), "Final");
774
775 fprintf(prof_file, "\n\t ");
776 fprintf(prof_file, " %s", prog_name);
777 fprintf(prof_file, " +RTS");
778 for (count = 0; rts_argv[count]; count++)
779 fprintf(prof_file, " %s", rts_argv[count]);
780 fprintf(prof_file, " -RTS");
781 for (count = 1; prog_argv[count]; count++)
782 fprintf(prof_file, " %s", prog_argv[count]);
783 fprintf(prof_file, "\n\n");
784
785 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
786 (double) total_prof_ticks *
787 (double) RtsFlags.MiscFlags.tickInterval / 1000,
788 (unsigned long) total_prof_ticks,
789 (int) RtsFlags.MiscFlags.tickInterval);
790
791 fprintf(prof_file, "\ttotal alloc = %11s bytes",
792 showStgWord64(total_alloc * sizeof(W_),
793 temp, rtsTrue/*commas*/));
794
795 fprintf(prof_file, " (excludes profiling overheads)\n\n");
796
797 reportPerCCCosts();
798
799 inheritCosts(CCS_MAIN);
800
801 reportCCS(pruneCCSTree(CCS_MAIN));
802 }
803
804 static void
805 findCCSMaxLens(CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len) {
806 CostCentre *cc;
807 IndexTable *i;
808
809 cc = ccs->cc;
810
811 *max_label_len = stg_max(*max_label_len, indent + strlen(cc->label));
812 *max_module_len = stg_max(*max_module_len, strlen(cc->module));
813
814 for (i = ccs->indexTable; i != 0; i = i->next) {
815 if (!i->back_edge) {
816 findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len);
817 }
818 }
819 }
820
821 static void
822 logCCS(CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len)
823 {
824 CostCentre *cc;
825 IndexTable *i;
826
827 cc = ccs->cc;
828
829 /* Only print cost centres with non 0 data ! */
830
831 if (!ignoreCCS(ccs))
832 /* force printing of *all* cost centres if -Pa */
833 {
834
835 fprintf(prof_file, "%-*s%-*s %-*s",
836 indent, "", max_label_len-indent, cc->label, max_module_len, cc->module);
837
838 fprintf(prof_file, "%6ld %11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
839 ccs->ccsID, ccs->scc_count,
840 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
841 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
842 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
843 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
844 );
845
846 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
847 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
848 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
849 }
850 fprintf(prof_file, "\n");
851 }
852
853 for (i = ccs->indexTable; i != 0; i = i->next) {
854 if (!i->back_edge) {
855 logCCS(i->ccs, indent+1, max_label_len, max_module_len);
856 }
857 }
858 }
859
860 static void
861 reportCCS(CostCentreStack *ccs)
862 {
863 nat max_label_len, max_module_len;
864
865 max_label_len = 11; // no shorter than "COST CENTRE" header
866 max_module_len = 7; // no shorter than "MODULE" header
867
868 findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len);
869
870 fprintHeader(max_label_len, max_module_len);
871 logCCS(ccs, 0, max_label_len, max_module_len);
872 }
873
874
875 /* Traverse the cost centre stack tree and accumulate
876 * ticks/allocations.
877 */
878 static void
879 countTickss(CostCentreStack *ccs)
880 {
881 IndexTable *i;
882
883 if (!ignoreCCS(ccs)) {
884 total_alloc += ccs->mem_alloc;
885 total_prof_ticks += ccs->time_ticks;
886 }
887 for (i = ccs->indexTable; i != NULL; i = i->next)
888 if (!i->back_edge) {
889 countTickss(i->ccs);
890 }
891 }
892
893 /* Traverse the cost centre stack tree and inherit ticks & allocs.
894 */
895 static void
896 inheritCosts(CostCentreStack *ccs)
897 {
898 IndexTable *i;
899
900 if (ignoreCCS(ccs)) { return; }
901
902 ccs->inherited_ticks += ccs->time_ticks;
903 ccs->inherited_alloc += ccs->mem_alloc;
904
905 for (i = ccs->indexTable; i != NULL; i = i->next)
906 if (!i->back_edge) {
907 inheritCosts(i->ccs);
908 ccs->inherited_ticks += i->ccs->inherited_ticks;
909 ccs->inherited_alloc += i->ccs->inherited_alloc;
910 }
911
912 return;
913 }
914
915 //
916 // Prune CCSs with zero entries, zero ticks or zero allocation from
917 // the tree, unless COST_CENTRES_ALL is on.
918 //
919 static CostCentreStack *
920 pruneCCSTree (CostCentreStack *ccs)
921 {
922 CostCentreStack *ccs1;
923 IndexTable *i, **prev;
924
925 prev = &ccs->indexTable;
926 for (i = ccs->indexTable; i != 0; i = i->next) {
927 if (i->back_edge) { continue; }
928
929 ccs1 = pruneCCSTree(i->ccs);
930 if (ccs1 == NULL) {
931 *prev = i->next;
932 } else {
933 prev = &(i->next);
934 }
935 }
936
937 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
938 /* force printing of *all* cost centres if -P -P */ )
939
940 || ( ccs->indexTable != 0 )
941 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
942 ) {
943 return ccs;
944 } else {
945 return NULL;
946 }
947 }
948
949 void
950 fprintCCS( FILE *f, CostCentreStack *ccs )
951 {
952 fprintf(f,"<");
953 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
954 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
955 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
956 fprintf(f,",");
957 }
958 }
959 fprintf(f,">");
960 }
961
962 // Returns: True if the call stack ended with CAF
963 static rtsBool fprintCallStack (CostCentreStack *ccs)
964 {
965 CostCentreStack *prev;
966
967 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
968 prev = ccs->prevStack;
969 while (prev && prev != CCS_MAIN) {
970 ccs = prev;
971 fprintf(stderr, ",\n called from %s.%s",
972 ccs->cc->module, ccs->cc->label);
973 prev = ccs->prevStack;
974 }
975 fprintf(stderr, "\n");
976
977 return (!strncmp(ccs->cc->label, "CAF", 3));
978 }
979
980 /* For calling from .cmm code, where we can't reliably refer to stderr */
981 void
982 fprintCCS_stderr (CostCentreStack *ccs, StgTSO *tso)
983 {
984 rtsBool is_caf;
985 StgPtr frame;
986 StgStack *stack;
987 CostCentreStack *prev_ccs;
988 nat depth = 0;
989 const nat MAX_DEPTH = 10; // don't print gigantic chains of stacks
990
991 fprintf(stderr, "*** Exception raised (reporting due to +RTS -xc), stack trace:\n ");
992 is_caf = fprintCallStack(ccs);
993
994 // traverse the stack down to the enclosing update frame to
995 // find out where this CCS was evaluated from...
996
997 stack = tso->stackobj;
998 frame = stack->sp;
999 prev_ccs = ccs;
1000
1001 for (; is_caf && depth < MAX_DEPTH; depth++)
1002 {
1003 switch (get_itbl((StgClosure*)frame)->type)
1004 {
1005 case UPDATE_FRAME:
1006 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
1007 frame += sizeofW(StgUpdateFrame);
1008 if (ccs == CCS_MAIN) {
1009 goto done;
1010 }
1011 if (ccs == prev_ccs) {
1012 // ignore if this is the same as the previous stack,
1013 // we're probably in library code and haven't
1014 // accumulated any more interesting stack items
1015 // since the last update frame.
1016 break;
1017 }
1018 prev_ccs = ccs;
1019 fprintf(stderr, " --> evaluated by: ");
1020 is_caf = fprintCallStack(ccs);
1021 break;
1022 case UNDERFLOW_FRAME:
1023 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1024 frame = stack->sp;
1025 break;
1026 case STOP_FRAME:
1027 goto done;
1028 default:
1029 frame += stack_frame_sizeW((StgClosure*)frame);
1030 break;
1031 }
1032 }
1033 done:
1034 return;
1035 }
1036
1037 #ifdef DEBUG
1038 void
1039 debugCCS( CostCentreStack *ccs )
1040 {
1041 debugBelch("<");
1042 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1043 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1044 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1045 debugBelch(",");
1046 }
1047 }
1048 debugBelch(">");
1049 }
1050 #endif /* DEBUG */
1051
1052 #endif /* PROFILING */