PPC NCG: Use liveness information in CmmCall
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if defined(PROFILING)
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "ProfilerReport.h"
21 #include "ProfilerReportJson.h"
22 #include "Printer.h"
23 #include "Capability.h"
24
25 #include <fs_rts.h>
26 #include <string.h>
27
28 #if defined(DEBUG)
29 #include "Trace.h"
30 #endif
31
32 /*
33 * Profiling allocation arena.
34 */
35 #if defined(DEBUG)
36 Arena *prof_arena;
37 #else
38 static Arena *prof_arena;
39 #endif
40
41 /*
42 * Global variables used to assign unique IDs to cc's, ccs's, and
43 * closure_cats
44 */
45
46 static unsigned int CC_ID = 1;
47 static unsigned int CCS_ID = 1;
48
49 /* Globals for opening the profiling log file(s)
50 */
51 static char *prof_filename; /* prof report file name = <program>.prof */
52 FILE *prof_file;
53
54 static char *hp_filename; /* heap profile (hp2ps style) log file */
55 FILE *hp_file;
56
57 // List of all cost centres. Used for reporting.
58 CostCentre *CC_LIST = NULL;
59 // All cost centre stacks temporarily appear here, to be able to make CCS_MAIN a
60 // parent of all cost centres stacks (done in initProfiling2()).
61 static CostCentreStack *CCS_LIST = NULL;
62
63 #if defined(THREADED_RTS)
64 static Mutex ccs_mutex;
65 #endif
66
67 /*
68 * Built-in cost centres and cost-centre stacks:
69 *
70 * MAIN is the root of the cost-centre stack tree. If there are
71 * no {-# SCC #-}s in the program, all costs will be attributed
72 * to MAIN.
73 *
74 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
75 * RTS operations apart from garbage collection are attributed
76 * to SYSTEM.
77 *
78 * GC is the storage manager / garbage collector.
79 *
80 * OVERHEAD gets all costs generated by the profiling system
81 * itself. These are costs that would not be incurred
82 * during non-profiled execution of the program.
83 *
84 * DONT_CARE is a placeholder cost-centre we assign to static
85 * constructors. It should *never* accumulate any costs.
86 *
87 * PINNED accumulates memory allocated to pinned objects, which
88 * cannot be profiled separately because we cannot reliably
89 * traverse pinned memory.
90 */
91
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
96 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
97 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
98 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
99
100 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
101 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
102 CCS_DECLARE(CCS_GC, CC_GC, );
103 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
104 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
105 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
106 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
107
108 /*
109 * Static Functions
110 */
111
112 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
113 CostCentreStack *ccs2 );
114 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
115 CostCentreStack *new_ccs );
116 static void inheritCosts ( CostCentreStack *ccs );
117 static ProfilerTotals countTickss ( CostCentreStack const *ccs );
118 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
119 CostCentre *cc );
120 static void sortCCSTree ( CostCentreStack *ccs );
121 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
122 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
123 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
124 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
125 CostCentre *, bool );
126 static void ccsSetSelected ( CostCentreStack *ccs );
127 static void aggregateCCCosts( CostCentreStack *ccs );
128 static void registerCC ( CostCentre *cc );
129 static void registerCCS ( CostCentreStack *ccs );
130
131 static void initTimeProfiling ( void );
132 static void initProfilingLogFile ( void );
133
134 /* -----------------------------------------------------------------------------
135 Initialise the profiling environment
136 -------------------------------------------------------------------------- */
137
138 void initProfiling (void)
139 {
140 // initialise our arena
141 prof_arena = newArena();
142
143 /* for the benefit of allocate()... */
144 {
145 uint32_t n;
146 for (n=0; n < n_capabilities; n++) {
147 capabilities[n]->r.rCCCS = CCS_SYSTEM;
148 }
149 }
150
151 #if defined(THREADED_RTS)
152 initMutex(&ccs_mutex);
153 #endif
154
155 /* Set up the log file, and dump the header and cost centre
156 * information into it.
157 */
158 initProfilingLogFile();
159
160 /* Register all the cost centres / stacks in the program
161 * CC_MAIN gets link = 0, all others have non-zero link.
162 */
163 registerCC(CC_MAIN);
164 registerCC(CC_SYSTEM);
165 registerCC(CC_GC);
166 registerCC(CC_OVERHEAD);
167 registerCC(CC_DONT_CARE);
168 registerCC(CC_PINNED);
169 registerCC(CC_IDLE);
170
171 registerCCS(CCS_SYSTEM);
172 registerCCS(CCS_GC);
173 registerCCS(CCS_OVERHEAD);
174 registerCCS(CCS_DONT_CARE);
175 registerCCS(CCS_PINNED);
176 registerCCS(CCS_IDLE);
177 registerCCS(CCS_MAIN);
178
179 /* find all the registered cost centre stacks, and make them
180 * children of CCS_MAIN.
181 */
182 ASSERT(CCS_LIST == CCS_MAIN);
183 CCS_LIST = CCS_LIST->prevStack;
184 CCS_MAIN->prevStack = NULL;
185 CCS_MAIN->root = CCS_MAIN;
186 ccsSetSelected(CCS_MAIN);
187
188 initProfiling2();
189
190 if (RtsFlags.CcFlags.doCostCentres) {
191 initTimeProfiling();
192 }
193
194 if (RtsFlags.ProfFlags.doHeapProfile) {
195 initHeapProfiling();
196 }
197 }
198
199 //
200 // Should be called after loading any new Haskell code.
201 //
202 void initProfiling2 (void)
203 {
204 // make CCS_MAIN the parent of all the pre-defined CCSs.
205 CostCentreStack *next;
206 for (CostCentreStack *ccs = CCS_LIST; ccs != NULL; ) {
207 next = ccs->prevStack;
208 ccs->prevStack = NULL;
209 actualPush_(CCS_MAIN,ccs->cc,ccs);
210 ccs->root = ccs;
211 ccs = next;
212 }
213 CCS_LIST = NULL;
214 }
215
216 void
217 freeProfiling (void)
218 {
219 arenaFree(prof_arena);
220 }
221
222 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
223 {
224 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
225 cc->label = label;
226 cc->module = module;
227 cc->srcloc = srcloc;
228 cc->is_caf = 0;
229 cc->mem_alloc = 0;
230 cc->time_ticks = 0;
231 cc->link = NULL;
232 return cc;
233 }
234
235 static void
236 initProfilingLogFile(void)
237 {
238 // Figure out output file name stem.
239 char const *stem;
240 if (RtsFlags.CcFlags.outputFileNameStem) {
241 stem = RtsFlags.CcFlags.outputFileNameStem;
242 } else {
243 char *prog;
244
245 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
246 strcpy(prog, prog_name);
247 #if defined(mingw32_HOST_OS)
248 // on Windows, drop the .exe suffix if there is one
249 {
250 char *suff;
251 suff = strrchr(prog,'.');
252 if (suff != NULL && !strcmp(suff,".exe")) {
253 *suff = '\0';
254 }
255 }
256 #endif
257 stem = prog;
258 }
259
260 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
261 {
262 /* No need for the <stem>.prof file */
263 prof_filename = NULL;
264 prof_file = NULL;
265 }
266 else
267 {
268 /* Initialise the log file name */
269 prof_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
270 sprintf(prof_filename, "%s.prof", stem);
271
272 /* open the log file */
273 if ((prof_file = __rts_fopen(prof_filename, "w")) == NULL) {
274 debugBelch("Can't open profiling report file %s\n", prof_filename);
275 RtsFlags.CcFlags.doCostCentres = 0;
276 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
277 // both <program>.hp as <program>.prof.
278 if (doingRetainerProfiling()) {
279 RtsFlags.ProfFlags.doHeapProfile = 0;
280 }
281 }
282 }
283
284 if (RtsFlags.ProfFlags.doHeapProfile) {
285 /* Initialise the log file name */
286 hp_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
287 sprintf(hp_filename, "%s.hp", stem);
288
289 /* open the log file */
290 if ((hp_file = __rts_fopen(hp_filename, "w")) == NULL) {
291 debugBelch("Can't open profiling report file %s\n",
292 hp_filename);
293 RtsFlags.ProfFlags.doHeapProfile = 0;
294 }
295 }
296 }
297
298 void
299 initTimeProfiling(void)
300 {
301 /* Start ticking */
302 startProfTimer();
303 };
304
305 void
306 endProfiling ( void )
307 {
308 if (RtsFlags.CcFlags.doCostCentres) {
309 stopProfTimer();
310 }
311 if (RtsFlags.ProfFlags.doHeapProfile) {
312 endHeapProfiling();
313 }
314 }
315
316 /* -----------------------------------------------------------------------------
317 Registering CCs and CCSs
318
319 Registering a CC or CCS consists of
320 - assigning it a unique ID
321 - linking it onto the list of registered CCs/CCSs
322
323 Cost centres are registered at startup by a C constructor function
324 generated by the compiler (ProfInit.hs) in the _stub.c file for each module.
325 -------------------------------------------------------------------------- */
326
327 static void
328 registerCC(CostCentre *cc)
329 {
330 if (cc->link == NULL) {
331 cc->link = CC_LIST;
332 CC_LIST = cc;
333 cc->ccID = CC_ID++;
334 }
335 }
336
337 static void registerCCS(CostCentreStack *ccs)
338 {
339 if (ccs->prevStack == NULL) {
340 ccs->prevStack = CCS_LIST;
341 CCS_LIST = ccs;
342 ccs->ccsID = CCS_ID++;
343 }
344 }
345
346 void registerCcList(CostCentre **cc_list)
347 {
348 for (CostCentre **i = cc_list; *i != NULL; i++) {
349 registerCC(*i);
350 }
351 }
352
353 void registerCcsList(CostCentreStack **cc_list)
354 {
355 for (CostCentreStack **i = cc_list; *i != NULL; i++) {
356 registerCCS(*i);
357 }
358 }
359
360 /* -----------------------------------------------------------------------------
361 Set CCCS when entering a function.
362
363 The algorithm is as follows.
364
365 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
366
367 where
368
369 dropCommonPrefix A B
370 -- returns the suffix of B after removing any prefix common
371 -- to both A and B.
372
373 e.g.
374
375 <a,b,c> ++> <> = <a,b,c>
376 <a,b,c> ++> <d> = <a,b,c,d>
377 <a,b,c> ++> <a,b> = <a,b,c>
378 <a,b> ++> <a,b,c> = <a,b,c>
379 <a,b,c> ++> <a,b,d> = <a,b,c,d>
380
381 -------------------------------------------------------------------------- */
382
383 // implements c1 ++> c2, where c1 and c2 are equal depth
384 //
385 static CostCentreStack *
386 enterFunEqualStacks (CostCentreStack *ccs0,
387 CostCentreStack *ccsapp,
388 CostCentreStack *ccsfn)
389 {
390 ASSERT(ccsapp->depth == ccsfn->depth);
391 if (ccsapp == ccsfn) return ccs0;
392 return pushCostCentre(enterFunEqualStacks(ccs0,
393 ccsapp->prevStack,
394 ccsfn->prevStack),
395 ccsfn->cc);
396 }
397
398 // implements c1 ++> c2, where c2 is deeper than c1.
399 // Drop elements of c2 until we have equal stacks, call
400 // enterFunEqualStacks(), and then push on the elements that we
401 // dropped in reverse order.
402 //
403 static CostCentreStack *
404 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
405 {
406 if (n == 0) {
407 ASSERT(ccsfn->depth == ccsapp->depth);
408 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
409 } else {
410 ASSERT(ccsfn->depth > ccsapp->depth);
411 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
412 ccsfn->cc);
413 }
414 }
415
416 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
417 {
418 CostCentreStack *ccsapp;
419
420 // common case 1: both stacks are the same
421 if (ccsfn == reg->rCCCS) {
422 return;
423 }
424
425 // common case 2: the function stack is empty, or just CAF
426 if (ccsfn->cc->is_caf) {
427 return;
428 }
429
430 ccsapp = reg->rCCCS;
431 reg->rCCCS = CCS_OVERHEAD;
432
433 // common case 3: the stacks are completely different (e.g. one is a
434 // descendent of MAIN and the other of a CAF): we append the whole
435 // of the function stack to the current CCS.
436 if (ccsfn->root != ccsapp->root) {
437 reg->rCCCS = appendCCS(ccsapp,ccsfn);
438 return;
439 }
440
441 // uncommon case 4: ccsapp is deeper than ccsfn
442 if (ccsapp->depth > ccsfn->depth) {
443 uint32_t i, n;
444 CostCentreStack *tmp = ccsapp;
445 n = ccsapp->depth - ccsfn->depth;
446 for (i = 0; i < n; i++) {
447 tmp = tmp->prevStack;
448 }
449 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
450 return;
451 }
452
453 // uncommon case 5: ccsfn is deeper than CCCS
454 if (ccsfn->depth > ccsapp->depth) {
455 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
456 ccsfn->depth - ccsapp->depth);
457 return;
458 }
459
460 // uncommon case 6: stacks are equal depth, but different
461 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
462 }
463
464 /* -----------------------------------------------------------------------------
465 Decide whether closures with this CCS should contribute to the heap
466 profile.
467 -------------------------------------------------------------------------- */
468
469 static void
470 ccsSetSelected (CostCentreStack *ccs)
471 {
472 if (RtsFlags.ProfFlags.modSelector) {
473 if (! strMatchesSelector (ccs->cc->module,
474 RtsFlags.ProfFlags.modSelector) ) {
475 ccs->selected = 0;
476 return;
477 }
478 }
479 if (RtsFlags.ProfFlags.ccSelector) {
480 if (! strMatchesSelector (ccs->cc->label,
481 RtsFlags.ProfFlags.ccSelector) ) {
482 ccs->selected = 0;
483 return;
484 }
485 }
486 if (RtsFlags.ProfFlags.ccsSelector) {
487 CostCentreStack *c;
488 for (c = ccs; c != NULL; c = c->prevStack)
489 {
490 if ( strMatchesSelector (c->cc->label,
491 RtsFlags.ProfFlags.ccsSelector) ) {
492 break;
493 }
494 }
495 if (c == NULL) {
496 ccs->selected = 0;
497 return;
498 }
499 }
500
501 ccs->selected = 1;
502 return;
503 }
504
505 /* -----------------------------------------------------------------------------
506 Cost-centre stack manipulation
507 -------------------------------------------------------------------------- */
508
509 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
510 CostCentreStack *
511 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
512 {
513 IF_DEBUG(prof,
514 if (ccs1 != ccs2) {
515 debugBelch("Appending ");
516 debugCCS(ccs1);
517 debugBelch(" to ");
518 debugCCS(ccs2);
519 debugBelch("\n");});
520
521 if (ccs1 == ccs2) {
522 return ccs1;
523 }
524
525 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf) {
526 // stop at a CAF element
527 return ccs1;
528 }
529
530 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
531 }
532
533 // Pick one:
534 // #define RECURSION_DROPS
535 #define RECURSION_TRUNCATES
536
537 CostCentreStack *
538 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
539 {
540 IF_DEBUG(prof,
541 traceBegin("pushing %s on ", cc->label);
542 debugCCS(ccs);
543 traceEnd(););
544
545 CostCentreStack *ret;
546
547 if (ccs == EMPTY_STACK) {
548 ACQUIRE_LOCK(&ccs_mutex);
549 ret = actualPush(ccs,cc);
550 }
551 else
552 {
553 if (ccs->cc == cc) {
554 return ccs;
555 } else {
556 // check if we've already memoized this stack
557 IndexTable *ixtable = ccs->indexTable;
558 CostCentreStack *temp_ccs = isInIndexTable(ixtable,cc);
559
560 if (temp_ccs != EMPTY_STACK) {
561 return temp_ccs;
562 } else {
563
564 // not in the IndexTable, now we take the lock:
565 ACQUIRE_LOCK(&ccs_mutex);
566
567 if (ccs->indexTable != ixtable)
568 {
569 // someone modified ccs->indexTable while
570 // we did not hold the lock, so we must
571 // check it again:
572 temp_ccs = isInIndexTable(ixtable,cc);
573 if (temp_ccs != EMPTY_STACK)
574 {
575 RELEASE_LOCK(&ccs_mutex);
576 return temp_ccs;
577 }
578 }
579 temp_ccs = checkLoop(ccs,cc);
580 if (temp_ccs != NULL) {
581 // This CC is already in the stack somewhere.
582 // This could be recursion, or just calling
583 // another function with the same CC.
584 // A number of policies are possible at this
585 // point, we implement two here:
586 // - truncate the stack to the previous instance
587 // of this CC
588 // - ignore this push, return the same stack.
589 //
590 CostCentreStack *new_ccs;
591 #if defined(RECURSION_TRUNCATES)
592 new_ccs = temp_ccs;
593 #else // defined(RECURSION_DROPS)
594 new_ccs = ccs;
595 #endif
596 ccs->indexTable = addToIndexTable (ccs->indexTable,
597 new_ccs, cc, true);
598 ret = new_ccs;
599 } else {
600 ret = actualPush (ccs,cc);
601 }
602 }
603 }
604 }
605
606 RELEASE_LOCK(&ccs_mutex);
607 return ret;
608 }
609
610 static CostCentreStack *
611 checkLoop (CostCentreStack *ccs, CostCentre *cc)
612 {
613 while (ccs != EMPTY_STACK) {
614 if (ccs->cc == cc)
615 return ccs;
616 ccs = ccs->prevStack;
617 }
618 return NULL;
619 }
620
621 static CostCentreStack *
622 actualPush (CostCentreStack *ccs, CostCentre *cc)
623 {
624 CostCentreStack *new_ccs;
625
626 // allocate space for a new CostCentreStack
627 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
628
629 return actualPush_(ccs, cc, new_ccs);
630 }
631
632 static CostCentreStack *
633 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
634 {
635 /* assign values to each member of the structure */
636 new_ccs->ccsID = CCS_ID++;
637 new_ccs->cc = cc;
638 new_ccs->prevStack = ccs;
639 new_ccs->root = ccs->root;
640 new_ccs->depth = ccs->depth + 1;
641
642 new_ccs->indexTable = EMPTY_TABLE;
643
644 /* Initialise the various _scc_ counters to zero
645 */
646 new_ccs->scc_count = 0;
647
648 /* Initialize all other stats here. There should be a quick way
649 * that's easily used elsewhere too
650 */
651 new_ccs->time_ticks = 0;
652 new_ccs->mem_alloc = 0;
653 new_ccs->inherited_ticks = 0;
654 new_ccs->inherited_alloc = 0;
655
656 // Set the selected field.
657 ccsSetSelected(new_ccs);
658
659 /* update the memoization table for the parent stack */
660 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
661 false/*not a back edge*/);
662
663 /* return a pointer to the new stack */
664 return new_ccs;
665 }
666
667
668 static CostCentreStack *
669 isInIndexTable(IndexTable *it, CostCentre *cc)
670 {
671 while (it!=EMPTY_TABLE)
672 {
673 if (it->cc == cc)
674 return it->ccs;
675 else
676 it = it->next;
677 }
678
679 /* otherwise we never found it so return EMPTY_TABLE */
680 return EMPTY_TABLE;
681 }
682
683
684 static IndexTable *
685 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
686 CostCentre *cc, bool back_edge)
687 {
688 IndexTable *new_it;
689
690 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
691
692 new_it->cc = cc;
693 new_it->ccs = new_ccs;
694 new_it->next = it;
695 new_it->back_edge = back_edge;
696 return new_it;
697 }
698
699 /* -----------------------------------------------------------------------------
700 Generating a time & allocation profiling report.
701 -------------------------------------------------------------------------- */
702
703 /* We omit certain system-related CCs and CCSs from the default
704 * reports, so as not to cause confusion.
705 */
706 bool
707 ignoreCC (CostCentre const *cc)
708 {
709 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
710 ( cc == CC_OVERHEAD
711 || cc == CC_DONT_CARE
712 || cc == CC_GC
713 || cc == CC_SYSTEM
714 || cc == CC_IDLE);
715 }
716
717 bool
718 ignoreCCS (CostCentreStack const *ccs)
719 {
720 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
721 ( ccs == CCS_OVERHEAD
722 || ccs == CCS_DONT_CARE
723 || ccs == CCS_GC
724 || ccs == CCS_SYSTEM
725 || ccs == CCS_IDLE);
726 }
727
728 void
729 reportCCSProfiling( void )
730 {
731 stopProfTimer();
732 if (RtsFlags.CcFlags.doCostCentres == 0) return;
733
734 ProfilerTotals totals = countTickss(CCS_MAIN);
735 aggregateCCCosts(CCS_MAIN);
736 inheritCosts(CCS_MAIN);
737 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
738 sortCCSTree(stack);
739
740 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_JSON) {
741 writeCCSReportJson(prof_file, stack, totals);
742 } else {
743 writeCCSReport(prof_file, stack, totals);
744 }
745 }
746
747 /* -----------------------------------------------------------------------------
748 * Accumulating total allocatinos and tick count
749 -------------------------------------------------------------------------- */
750
751 /* Helper */
752 static void
753 countTickss_(CostCentreStack const *ccs, ProfilerTotals *totals)
754 {
755 if (!ignoreCCS(ccs)) {
756 totals->total_alloc += ccs->mem_alloc;
757 totals->total_prof_ticks += ccs->time_ticks;
758 }
759 for (IndexTable *i = ccs->indexTable; i != NULL; i = i->next) {
760 if (!i->back_edge) {
761 countTickss_(i->ccs, totals);
762 }
763 }
764 }
765
766 /* Traverse the cost centre stack tree and accumulate
767 * total ticks/allocations.
768 */
769 static ProfilerTotals
770 countTickss(CostCentreStack const *ccs)
771 {
772 ProfilerTotals totals = {0,0};
773 countTickss_(ccs, &totals);
774 return totals;
775 }
776
777 /* Traverse the cost centre stack tree and inherit ticks & allocs.
778 */
779 static void
780 inheritCosts(CostCentreStack *ccs)
781 {
782 IndexTable *i;
783
784 if (ignoreCCS(ccs)) { return; }
785
786 ccs->inherited_ticks += ccs->time_ticks;
787 ccs->inherited_alloc += ccs->mem_alloc;
788
789 for (i = ccs->indexTable; i != NULL; i = i->next)
790 if (!i->back_edge) {
791 inheritCosts(i->ccs);
792 ccs->inherited_ticks += i->ccs->inherited_ticks;
793 ccs->inherited_alloc += i->ccs->inherited_alloc;
794 }
795
796 return;
797 }
798
799 static void
800 aggregateCCCosts( CostCentreStack *ccs )
801 {
802 IndexTable *i;
803
804 ccs->cc->mem_alloc += ccs->mem_alloc;
805 ccs->cc->time_ticks += ccs->time_ticks;
806
807 for (i = ccs->indexTable; i != 0; i = i->next) {
808 if (!i->back_edge) {
809 aggregateCCCosts(i->ccs);
810 }
811 }
812 }
813
814 //
815 // Prune CCSs with zero entries, zero ticks or zero allocation from
816 // the tree, unless COST_CENTRES_ALL is on.
817 //
818 static CostCentreStack *
819 pruneCCSTree (CostCentreStack *ccs)
820 {
821 CostCentreStack *ccs1;
822 IndexTable *i, **prev;
823
824 prev = &ccs->indexTable;
825 for (i = ccs->indexTable; i != 0; i = i->next) {
826 if (i->back_edge) { continue; }
827
828 ccs1 = pruneCCSTree(i->ccs);
829 if (ccs1 == NULL) {
830 *prev = i->next;
831 } else {
832 prev = &(i->next);
833 }
834 }
835
836 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
837 /* force printing of *all* cost centres if -P -P */ )
838
839 || ( ccs->indexTable != 0 )
840 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
841 ) {
842 return ccs;
843 } else {
844 return NULL;
845 }
846 }
847
848 static IndexTable*
849 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
850 {
851 StgWord tbl_ticks = tbl->ccs->scc_count;
852 char* tbl_label = tbl->ccs->cc->label;
853
854 IndexTable *prev = NULL;
855 IndexTable *cursor = sortedList;
856
857 while (cursor != NULL) {
858 StgWord cursor_ticks = cursor->ccs->scc_count;
859 char* cursor_label = cursor->ccs->cc->label;
860
861 if (tbl_ticks > cursor_ticks ||
862 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
863 if (prev == NULL) {
864 tbl->next = sortedList;
865 return tbl;
866 } else {
867 prev->next = tbl;
868 tbl->next = cursor;
869 return sortedList;
870 }
871 } else {
872 prev = cursor;
873 cursor = cursor->next;
874 }
875 }
876
877 prev->next = tbl;
878 return sortedList;
879 }
880
881 static void
882 sortCCSTree(CostCentreStack *ccs)
883 {
884 if (ccs->indexTable == NULL) return;
885
886 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
887 if (!tbl->back_edge)
888 sortCCSTree(tbl->ccs);
889
890 IndexTable *sortedList = ccs->indexTable;
891 IndexTable *nonSortedList = sortedList->next;
892 sortedList->next = NULL;
893
894 while (nonSortedList != NULL)
895 {
896 IndexTable *nonSortedTail = nonSortedList->next;
897 nonSortedList->next = NULL;
898 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
899 nonSortedList = nonSortedTail;
900 }
901
902 ccs->indexTable = sortedList;
903 }
904
905 void
906 fprintCCS( FILE *f, CostCentreStack *ccs )
907 {
908 fprintf(f,"<");
909 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
910 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
911 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
912 fprintf(f,",");
913 }
914 }
915 fprintf(f,">");
916 }
917
918 // Returns: True if the call stack ended with CAF
919 static bool fprintCallStack (CostCentreStack *ccs)
920 {
921 CostCentreStack *prev;
922
923 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
924 prev = ccs->prevStack;
925 while (prev && prev != CCS_MAIN) {
926 ccs = prev;
927 fprintf(stderr, ",\n called from %s.%s",
928 ccs->cc->module, ccs->cc->label);
929 prev = ccs->prevStack;
930 }
931 fprintf(stderr, "\n");
932
933 return (!strncmp(ccs->cc->label, "CAF", 3));
934 }
935
936 /* For calling from .cmm code, where we can't reliably refer to stderr */
937 void
938 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
939 {
940 bool is_caf;
941 StgPtr frame;
942 StgStack *stack;
943 CostCentreStack *prev_ccs;
944 uint32_t depth = 0;
945 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
946
947 {
948 const char *desc;
949 const StgInfoTable *info;
950 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
951 switch (info->type) {
952 case CONSTR:
953 case CONSTR_1_0:
954 case CONSTR_0_1:
955 case CONSTR_2_0:
956 case CONSTR_1_1:
957 case CONSTR_0_2:
958 case CONSTR_NOCAF:
959 desc = GET_CON_DESC(itbl_to_con_itbl(info));
960 break;
961 default:
962 desc = closure_type_names[info->type];
963 break;
964 }
965 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
966 }
967
968 is_caf = fprintCallStack(ccs);
969
970 // traverse the stack down to the enclosing update frame to
971 // find out where this CCS was evaluated from...
972
973 stack = tso->stackobj;
974 frame = stack->sp;
975 prev_ccs = ccs;
976
977 for (; is_caf && depth < MAX_DEPTH; depth++)
978 {
979 switch (get_itbl((StgClosure*)frame)->type)
980 {
981 case UPDATE_FRAME:
982 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
983 frame += sizeofW(StgUpdateFrame);
984 if (ccs == CCS_MAIN) {
985 goto done;
986 }
987 if (ccs == prev_ccs) {
988 // ignore if this is the same as the previous stack,
989 // we're probably in library code and haven't
990 // accumulated any more interesting stack items
991 // since the last update frame.
992 break;
993 }
994 prev_ccs = ccs;
995 fprintf(stderr, " --> evaluated by: ");
996 is_caf = fprintCallStack(ccs);
997 break;
998 case UNDERFLOW_FRAME:
999 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1000 frame = stack->sp;
1001 break;
1002 case STOP_FRAME:
1003 goto done;
1004 default:
1005 frame += stack_frame_sizeW((StgClosure*)frame);
1006 break;
1007 }
1008 }
1009 done:
1010 return;
1011 }
1012
1013 #if defined(DEBUG)
1014 void
1015 debugCCS( CostCentreStack *ccs )
1016 {
1017 debugBelch("<");
1018 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1019 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1020 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1021 debugBelch(",");
1022 }
1023 }
1024 debugBelch(">");
1025 }
1026 #endif /* DEBUG */
1027
1028 #endif /* PROFILING */