70bf3750e386573b2787e67ba14e3572010a0385
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if defined(PROFILING)
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "ProfilerReport.h"
21 #include "ProfilerReportJson.h"
22 #include "Printer.h"
23 #include "Capability.h"
24
25 #include <fs_rts.h>
26 #include <string.h>
27
28 #if defined(DEBUG)
29 #include "Trace.h"
30 #endif
31
32 /*
33 * Profiling allocation arena.
34 */
35 #if defined(DEBUG)
36 Arena *prof_arena;
37 #else
38 static Arena *prof_arena;
39 #endif
40
41 /*
42 * Global variables used to assign unique IDs to cc's, ccs's, and
43 * closure_cats
44 */
45
46 unsigned int CC_ID = 1;
47 unsigned int CCS_ID = 1;
48
49 /* Globals for opening the profiling log file(s)
50 */
51 static char *prof_filename; /* prof report file name = <program>.prof */
52 FILE *prof_file;
53
54 static char *hp_filename; /* heap profile (hp2ps style) log file */
55 FILE *hp_file;
56
57 /* Linked lists to keep track of CCs and CCSs that haven't
58 * been declared in the log file yet
59 */
60 CostCentre *CC_LIST = NULL;
61 CostCentreStack *CCS_LIST = NULL;
62
63 #if defined(THREADED_RTS)
64 static Mutex ccs_mutex;
65 #endif
66
67 /*
68 * Built-in cost centres and cost-centre stacks:
69 *
70 * MAIN is the root of the cost-centre stack tree. If there are
71 * no {-# SCC #-}s in the program, all costs will be attributed
72 * to MAIN.
73 *
74 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
75 * RTS operations apart from garbage collection are attributed
76 * to SYSTEM.
77 *
78 * GC is the storage manager / garbage collector.
79 *
80 * OVERHEAD gets all costs generated by the profiling system
81 * itself. These are costs that would not be incurred
82 * during non-profiled execution of the program.
83 *
84 * DONT_CARE is a placeholder cost-centre we assign to static
85 * constructors. It should *never* accumulate any costs.
86 *
87 * PINNED accumulates memory allocated to pinned objects, which
88 * cannot be profiled separately because we cannot reliably
89 * traverse pinned memory.
90 */
91
92 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
95 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
96 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
97 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
98 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
99
100 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
101 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
102 CCS_DECLARE(CCS_GC, CC_GC, );
103 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
104 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
105 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
106 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
107
108 /*
109 * Static Functions
110 */
111
112 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
113 CostCentreStack *ccs2 );
114 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
115 CostCentreStack *new_ccs );
116 static void inheritCosts ( CostCentreStack *ccs );
117 static ProfilerTotals countTickss ( CostCentreStack const *ccs );
118 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
119 CostCentre *cc );
120 static void sortCCSTree ( CostCentreStack *ccs );
121 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
122 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
123 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
124 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
125 CostCentre *, bool );
126 static void ccsSetSelected ( CostCentreStack *ccs );
127 static void aggregateCCCosts( CostCentreStack *ccs );
128
129 static void initTimeProfiling ( void );
130 static void initProfilingLogFile ( void );
131
132 /* -----------------------------------------------------------------------------
133 Initialise the profiling environment
134 -------------------------------------------------------------------------- */
135
136 void initProfiling (void)
137 {
138 // initialise our arena
139 prof_arena = newArena();
140
141 /* for the benefit of allocate()... */
142 {
143 uint32_t n;
144 for (n=0; n < n_capabilities; n++) {
145 capabilities[n]->r.rCCCS = CCS_SYSTEM;
146 }
147 }
148
149 #if defined(THREADED_RTS)
150 initMutex(&ccs_mutex);
151 #endif
152
153 /* Set up the log file, and dump the header and cost centre
154 * information into it.
155 */
156 initProfilingLogFile();
157
158 /* Register all the cost centres / stacks in the program
159 * CC_MAIN gets link = 0, all others have non-zero link.
160 */
161 REGISTER_CC(CC_MAIN);
162 REGISTER_CC(CC_SYSTEM);
163 REGISTER_CC(CC_GC);
164 REGISTER_CC(CC_OVERHEAD);
165 REGISTER_CC(CC_DONT_CARE);
166 REGISTER_CC(CC_PINNED);
167 REGISTER_CC(CC_IDLE);
168
169 REGISTER_CCS(CCS_SYSTEM);
170 REGISTER_CCS(CCS_GC);
171 REGISTER_CCS(CCS_OVERHEAD);
172 REGISTER_CCS(CCS_DONT_CARE);
173 REGISTER_CCS(CCS_PINNED);
174 REGISTER_CCS(CCS_IDLE);
175 REGISTER_CCS(CCS_MAIN);
176
177 /* find all the registered cost centre stacks, and make them
178 * children of CCS_MAIN.
179 */
180 ASSERT(CCS_LIST == CCS_MAIN);
181 CCS_LIST = CCS_LIST->prevStack;
182 CCS_MAIN->prevStack = NULL;
183 CCS_MAIN->root = CCS_MAIN;
184 ccsSetSelected(CCS_MAIN);
185
186 initProfiling2();
187
188 if (RtsFlags.CcFlags.doCostCentres) {
189 initTimeProfiling();
190 }
191
192 if (RtsFlags.ProfFlags.doHeapProfile) {
193 initHeapProfiling();
194 }
195 }
196
197 //
198 // Should be called after loading any new Haskell code.
199 //
200 void initProfiling2 (void)
201 {
202 CostCentreStack *ccs, *next;
203
204 // make CCS_MAIN the parent of all the pre-defined CCSs.
205 for (ccs = CCS_LIST; ccs != NULL; ) {
206 next = ccs->prevStack;
207 ccs->prevStack = NULL;
208 actualPush_(CCS_MAIN,ccs->cc,ccs);
209 ccs->root = ccs;
210 ccs = next;
211 }
212 CCS_LIST = NULL;
213 }
214
215 void
216 freeProfiling (void)
217 {
218 arenaFree(prof_arena);
219 }
220
221 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
222 {
223 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
224 cc->label = label;
225 cc->module = module;
226 cc->srcloc = srcloc;
227 cc->is_caf = 0;
228 cc->mem_alloc = 0;
229 cc->time_ticks = 0;
230 cc->link = NULL;
231 return cc;
232 }
233
234 static void
235 initProfilingLogFile(void)
236 {
237 // Figure out output file name stem.
238 char const *stem;
239 if (RtsFlags.CcFlags.outputFileNameStem) {
240 stem = RtsFlags.CcFlags.outputFileNameStem;
241 } else {
242 char *prog;
243
244 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
245 strcpy(prog, prog_name);
246 #if defined(mingw32_HOST_OS)
247 // on Windows, drop the .exe suffix if there is one
248 {
249 char *suff;
250 suff = strrchr(prog,'.');
251 if (suff != NULL && !strcmp(suff,".exe")) {
252 *suff = '\0';
253 }
254 }
255 #endif
256 stem = prog;
257 }
258
259 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
260 {
261 /* No need for the <stem>.prof file */
262 prof_filename = NULL;
263 prof_file = NULL;
264 }
265 else
266 {
267 /* Initialise the log file name */
268 prof_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
269 sprintf(prof_filename, "%s.prof", stem);
270
271 /* open the log file */
272 if ((prof_file = __rts_fopen(prof_filename, "w")) == NULL) {
273 debugBelch("Can't open profiling report file %s\n", prof_filename);
274 RtsFlags.CcFlags.doCostCentres = 0;
275 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
276 // both <program>.hp as <program>.prof.
277 if (doingRetainerProfiling()) {
278 RtsFlags.ProfFlags.doHeapProfile = 0;
279 }
280 }
281 }
282
283 if (RtsFlags.ProfFlags.doHeapProfile) {
284 /* Initialise the log file name */
285 hp_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
286 sprintf(hp_filename, "%s.hp", stem);
287
288 /* open the log file */
289 if ((hp_file = __rts_fopen(hp_filename, "w")) == NULL) {
290 debugBelch("Can't open profiling report file %s\n",
291 hp_filename);
292 RtsFlags.ProfFlags.doHeapProfile = 0;
293 }
294 }
295 }
296
297 void
298 initTimeProfiling(void)
299 {
300 /* Start ticking */
301 startProfTimer();
302 };
303
304 void
305 endProfiling ( void )
306 {
307 if (RtsFlags.CcFlags.doCostCentres) {
308 stopProfTimer();
309 }
310 if (RtsFlags.ProfFlags.doHeapProfile) {
311 endHeapProfiling();
312 }
313 }
314
315
316 /*
317 These are used in the C stubs produced by the code generator
318 to register code.
319 */
320 void registerCcList(CostCentre **cc_list)
321 {
322 for (CostCentre **i = cc_list; *i != NULL; i++) {
323 REGISTER_CC(*i);
324 }
325 }
326
327 void registerCcsList(CostCentreStack **cc_list)
328 {
329 for (CostCentreStack **i = cc_list; *i != NULL; i++) {
330 REGISTER_CCS(*i);
331 }
332 }
333
334 /* -----------------------------------------------------------------------------
335 Set CCCS when entering a function.
336
337 The algorithm is as follows.
338
339 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
340
341 where
342
343 dropCommonPrefix A B
344 -- returns the suffix of B after removing any prefix common
345 -- to both A and B.
346
347 e.g.
348
349 <a,b,c> ++> <> = <a,b,c>
350 <a,b,c> ++> <d> = <a,b,c,d>
351 <a,b,c> ++> <a,b> = <a,b,c>
352 <a,b> ++> <a,b,c> = <a,b,c>
353 <a,b,c> ++> <a,b,d> = <a,b,c,d>
354
355 -------------------------------------------------------------------------- */
356
357 // implements c1 ++> c2, where c1 and c2 are equal depth
358 //
359 static CostCentreStack *
360 enterFunEqualStacks (CostCentreStack *ccs0,
361 CostCentreStack *ccsapp,
362 CostCentreStack *ccsfn)
363 {
364 ASSERT(ccsapp->depth == ccsfn->depth);
365 if (ccsapp == ccsfn) return ccs0;
366 return pushCostCentre(enterFunEqualStacks(ccs0,
367 ccsapp->prevStack,
368 ccsfn->prevStack),
369 ccsfn->cc);
370 }
371
372 // implements c1 ++> c2, where c2 is deeper than c1.
373 // Drop elements of c2 until we have equal stacks, call
374 // enterFunEqualStacks(), and then push on the elements that we
375 // dropped in reverse order.
376 //
377 static CostCentreStack *
378 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
379 {
380 if (n == 0) {
381 ASSERT(ccsfn->depth == ccsapp->depth);
382 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
383 } else {
384 ASSERT(ccsfn->depth > ccsapp->depth);
385 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
386 ccsfn->cc);
387 }
388 }
389
390 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
391 {
392 CostCentreStack *ccsapp;
393
394 // common case 1: both stacks are the same
395 if (ccsfn == reg->rCCCS) {
396 return;
397 }
398
399 // common case 2: the function stack is empty, or just CAF
400 if (ccsfn->cc->is_caf) {
401 return;
402 }
403
404 ccsapp = reg->rCCCS;
405 reg->rCCCS = CCS_OVERHEAD;
406
407 // common case 3: the stacks are completely different (e.g. one is a
408 // descendent of MAIN and the other of a CAF): we append the whole
409 // of the function stack to the current CCS.
410 if (ccsfn->root != ccsapp->root) {
411 reg->rCCCS = appendCCS(ccsapp,ccsfn);
412 return;
413 }
414
415 // uncommon case 4: ccsapp is deeper than ccsfn
416 if (ccsapp->depth > ccsfn->depth) {
417 uint32_t i, n;
418 CostCentreStack *tmp = ccsapp;
419 n = ccsapp->depth - ccsfn->depth;
420 for (i = 0; i < n; i++) {
421 tmp = tmp->prevStack;
422 }
423 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
424 return;
425 }
426
427 // uncommon case 5: ccsfn is deeper than CCCS
428 if (ccsfn->depth > ccsapp->depth) {
429 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
430 ccsfn->depth - ccsapp->depth);
431 return;
432 }
433
434 // uncommon case 6: stacks are equal depth, but different
435 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
436 }
437
438 /* -----------------------------------------------------------------------------
439 Decide whether closures with this CCS should contribute to the heap
440 profile.
441 -------------------------------------------------------------------------- */
442
443 static void
444 ccsSetSelected (CostCentreStack *ccs)
445 {
446 if (RtsFlags.ProfFlags.modSelector) {
447 if (! strMatchesSelector (ccs->cc->module,
448 RtsFlags.ProfFlags.modSelector) ) {
449 ccs->selected = 0;
450 return;
451 }
452 }
453 if (RtsFlags.ProfFlags.ccSelector) {
454 if (! strMatchesSelector (ccs->cc->label,
455 RtsFlags.ProfFlags.ccSelector) ) {
456 ccs->selected = 0;
457 return;
458 }
459 }
460 if (RtsFlags.ProfFlags.ccsSelector) {
461 CostCentreStack *c;
462 for (c = ccs; c != NULL; c = c->prevStack)
463 {
464 if ( strMatchesSelector (c->cc->label,
465 RtsFlags.ProfFlags.ccsSelector) ) {
466 break;
467 }
468 }
469 if (c == NULL) {
470 ccs->selected = 0;
471 return;
472 }
473 }
474
475 ccs->selected = 1;
476 return;
477 }
478
479 /* -----------------------------------------------------------------------------
480 Cost-centre stack manipulation
481 -------------------------------------------------------------------------- */
482
483 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
484 CostCentreStack *
485 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
486 {
487 IF_DEBUG(prof,
488 if (ccs1 != ccs2) {
489 debugBelch("Appending ");
490 debugCCS(ccs1);
491 debugBelch(" to ");
492 debugCCS(ccs2);
493 debugBelch("\n");});
494
495 if (ccs1 == ccs2) {
496 return ccs1;
497 }
498
499 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf) {
500 // stop at a CAF element
501 return ccs1;
502 }
503
504 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
505 }
506
507 // Pick one:
508 // #define RECURSION_DROPS
509 #define RECURSION_TRUNCATES
510
511 CostCentreStack *
512 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
513 {
514 IF_DEBUG(prof,
515 traceBegin("pushing %s on ", cc->label);
516 debugCCS(ccs);
517 traceEnd(););
518
519 CostCentreStack *ret;
520
521 if (ccs == EMPTY_STACK) {
522 ACQUIRE_LOCK(&ccs_mutex);
523 ret = actualPush(ccs,cc);
524 }
525 else
526 {
527 if (ccs->cc == cc) {
528 return ccs;
529 } else {
530 // check if we've already memoized this stack
531 IndexTable *ixtable = ccs->indexTable;
532 CostCentreStack *temp_ccs = isInIndexTable(ixtable,cc);
533
534 if (temp_ccs != EMPTY_STACK) {
535 return temp_ccs;
536 } else {
537
538 // not in the IndexTable, now we take the lock:
539 ACQUIRE_LOCK(&ccs_mutex);
540
541 if (ccs->indexTable != ixtable)
542 {
543 // someone modified ccs->indexTable while
544 // we did not hold the lock, so we must
545 // check it again:
546 temp_ccs = isInIndexTable(ixtable,cc);
547 if (temp_ccs != EMPTY_STACK)
548 {
549 RELEASE_LOCK(&ccs_mutex);
550 return temp_ccs;
551 }
552 }
553 temp_ccs = checkLoop(ccs,cc);
554 if (temp_ccs != NULL) {
555 // This CC is already in the stack somewhere.
556 // This could be recursion, or just calling
557 // another function with the same CC.
558 // A number of policies are possible at this
559 // point, we implement two here:
560 // - truncate the stack to the previous instance
561 // of this CC
562 // - ignore this push, return the same stack.
563 //
564 CostCentreStack *new_ccs;
565 #if defined(RECURSION_TRUNCATES)
566 new_ccs = temp_ccs;
567 #else // defined(RECURSION_DROPS)
568 new_ccs = ccs;
569 #endif
570 ccs->indexTable = addToIndexTable (ccs->indexTable,
571 new_ccs, cc, true);
572 ret = new_ccs;
573 } else {
574 ret = actualPush (ccs,cc);
575 }
576 }
577 }
578 }
579
580 RELEASE_LOCK(&ccs_mutex);
581 return ret;
582 }
583
584 static CostCentreStack *
585 checkLoop (CostCentreStack *ccs, CostCentre *cc)
586 {
587 while (ccs != EMPTY_STACK) {
588 if (ccs->cc == cc)
589 return ccs;
590 ccs = ccs->prevStack;
591 }
592 return NULL;
593 }
594
595 static CostCentreStack *
596 actualPush (CostCentreStack *ccs, CostCentre *cc)
597 {
598 CostCentreStack *new_ccs;
599
600 // allocate space for a new CostCentreStack
601 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
602
603 return actualPush_(ccs, cc, new_ccs);
604 }
605
606 static CostCentreStack *
607 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
608 {
609 /* assign values to each member of the structure */
610 new_ccs->ccsID = CCS_ID++;
611 new_ccs->cc = cc;
612 new_ccs->prevStack = ccs;
613 new_ccs->root = ccs->root;
614 new_ccs->depth = ccs->depth + 1;
615
616 new_ccs->indexTable = EMPTY_TABLE;
617
618 /* Initialise the various _scc_ counters to zero
619 */
620 new_ccs->scc_count = 0;
621
622 /* Initialize all other stats here. There should be a quick way
623 * that's easily used elsewhere too
624 */
625 new_ccs->time_ticks = 0;
626 new_ccs->mem_alloc = 0;
627 new_ccs->inherited_ticks = 0;
628 new_ccs->inherited_alloc = 0;
629
630 // Set the selected field.
631 ccsSetSelected(new_ccs);
632
633 /* update the memoization table for the parent stack */
634 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
635 false/*not a back edge*/);
636
637 /* return a pointer to the new stack */
638 return new_ccs;
639 }
640
641
642 static CostCentreStack *
643 isInIndexTable(IndexTable *it, CostCentre *cc)
644 {
645 while (it!=EMPTY_TABLE)
646 {
647 if (it->cc == cc)
648 return it->ccs;
649 else
650 it = it->next;
651 }
652
653 /* otherwise we never found it so return EMPTY_TABLE */
654 return EMPTY_TABLE;
655 }
656
657
658 static IndexTable *
659 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
660 CostCentre *cc, bool back_edge)
661 {
662 IndexTable *new_it;
663
664 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
665
666 new_it->cc = cc;
667 new_it->ccs = new_ccs;
668 new_it->next = it;
669 new_it->back_edge = back_edge;
670 return new_it;
671 }
672
673 /* -----------------------------------------------------------------------------
674 Generating a time & allocation profiling report.
675 -------------------------------------------------------------------------- */
676
677 /* We omit certain system-related CCs and CCSs from the default
678 * reports, so as not to cause confusion.
679 */
680 bool
681 ignoreCC (CostCentre const *cc)
682 {
683 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
684 ( cc == CC_OVERHEAD
685 || cc == CC_DONT_CARE
686 || cc == CC_GC
687 || cc == CC_SYSTEM
688 || cc == CC_IDLE);
689 }
690
691 bool
692 ignoreCCS (CostCentreStack const *ccs)
693 {
694 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
695 ( ccs == CCS_OVERHEAD
696 || ccs == CCS_DONT_CARE
697 || ccs == CCS_GC
698 || ccs == CCS_SYSTEM
699 || ccs == CCS_IDLE);
700 }
701
702 void
703 reportCCSProfiling( void )
704 {
705 stopProfTimer();
706 if (RtsFlags.CcFlags.doCostCentres == 0) return;
707
708 ProfilerTotals totals = countTickss(CCS_MAIN);
709 aggregateCCCosts(CCS_MAIN);
710 inheritCosts(CCS_MAIN);
711 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
712 sortCCSTree(stack);
713
714 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_JSON) {
715 writeCCSReportJson(prof_file, stack, totals);
716 } else {
717 writeCCSReport(prof_file, stack, totals);
718 }
719 }
720
721 /* -----------------------------------------------------------------------------
722 * Accumulating total allocatinos and tick count
723 -------------------------------------------------------------------------- */
724
725 /* Helper */
726 static void
727 countTickss_(CostCentreStack const *ccs, ProfilerTotals *totals)
728 {
729 if (!ignoreCCS(ccs)) {
730 totals->total_alloc += ccs->mem_alloc;
731 totals->total_prof_ticks += ccs->time_ticks;
732 }
733 for (IndexTable *i = ccs->indexTable; i != NULL; i = i->next) {
734 if (!i->back_edge) {
735 countTickss_(i->ccs, totals);
736 }
737 }
738 }
739
740 /* Traverse the cost centre stack tree and accumulate
741 * total ticks/allocations.
742 */
743 static ProfilerTotals
744 countTickss(CostCentreStack const *ccs)
745 {
746 ProfilerTotals totals = {0,0};
747 countTickss_(ccs, &totals);
748 return totals;
749 }
750
751 /* Traverse the cost centre stack tree and inherit ticks & allocs.
752 */
753 static void
754 inheritCosts(CostCentreStack *ccs)
755 {
756 IndexTable *i;
757
758 if (ignoreCCS(ccs)) { return; }
759
760 ccs->inherited_ticks += ccs->time_ticks;
761 ccs->inherited_alloc += ccs->mem_alloc;
762
763 for (i = ccs->indexTable; i != NULL; i = i->next)
764 if (!i->back_edge) {
765 inheritCosts(i->ccs);
766 ccs->inherited_ticks += i->ccs->inherited_ticks;
767 ccs->inherited_alloc += i->ccs->inherited_alloc;
768 }
769
770 return;
771 }
772
773 static void
774 aggregateCCCosts( CostCentreStack *ccs )
775 {
776 IndexTable *i;
777
778 ccs->cc->mem_alloc += ccs->mem_alloc;
779 ccs->cc->time_ticks += ccs->time_ticks;
780
781 for (i = ccs->indexTable; i != 0; i = i->next) {
782 if (!i->back_edge) {
783 aggregateCCCosts(i->ccs);
784 }
785 }
786 }
787
788 //
789 // Prune CCSs with zero entries, zero ticks or zero allocation from
790 // the tree, unless COST_CENTRES_ALL is on.
791 //
792 static CostCentreStack *
793 pruneCCSTree (CostCentreStack *ccs)
794 {
795 CostCentreStack *ccs1;
796 IndexTable *i, **prev;
797
798 prev = &ccs->indexTable;
799 for (i = ccs->indexTable; i != 0; i = i->next) {
800 if (i->back_edge) { continue; }
801
802 ccs1 = pruneCCSTree(i->ccs);
803 if (ccs1 == NULL) {
804 *prev = i->next;
805 } else {
806 prev = &(i->next);
807 }
808 }
809
810 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
811 /* force printing of *all* cost centres if -P -P */ )
812
813 || ( ccs->indexTable != 0 )
814 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
815 ) {
816 return ccs;
817 } else {
818 return NULL;
819 }
820 }
821
822 static IndexTable*
823 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
824 {
825 StgWord tbl_ticks = tbl->ccs->scc_count;
826 char* tbl_label = tbl->ccs->cc->label;
827
828 IndexTable *prev = NULL;
829 IndexTable *cursor = sortedList;
830
831 while (cursor != NULL) {
832 StgWord cursor_ticks = cursor->ccs->scc_count;
833 char* cursor_label = cursor->ccs->cc->label;
834
835 if (tbl_ticks > cursor_ticks ||
836 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
837 if (prev == NULL) {
838 tbl->next = sortedList;
839 return tbl;
840 } else {
841 prev->next = tbl;
842 tbl->next = cursor;
843 return sortedList;
844 }
845 } else {
846 prev = cursor;
847 cursor = cursor->next;
848 }
849 }
850
851 prev->next = tbl;
852 return sortedList;
853 }
854
855 static void
856 sortCCSTree(CostCentreStack *ccs)
857 {
858 if (ccs->indexTable == NULL) return;
859
860 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
861 if (!tbl->back_edge)
862 sortCCSTree(tbl->ccs);
863
864 IndexTable *sortedList = ccs->indexTable;
865 IndexTable *nonSortedList = sortedList->next;
866 sortedList->next = NULL;
867
868 while (nonSortedList != NULL)
869 {
870 IndexTable *nonSortedTail = nonSortedList->next;
871 nonSortedList->next = NULL;
872 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
873 nonSortedList = nonSortedTail;
874 }
875
876 ccs->indexTable = sortedList;
877 }
878
879 void
880 fprintCCS( FILE *f, CostCentreStack *ccs )
881 {
882 fprintf(f,"<");
883 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
884 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
885 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
886 fprintf(f,",");
887 }
888 }
889 fprintf(f,">");
890 }
891
892 // Returns: True if the call stack ended with CAF
893 static bool fprintCallStack (CostCentreStack *ccs)
894 {
895 CostCentreStack *prev;
896
897 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
898 prev = ccs->prevStack;
899 while (prev && prev != CCS_MAIN) {
900 ccs = prev;
901 fprintf(stderr, ",\n called from %s.%s",
902 ccs->cc->module, ccs->cc->label);
903 prev = ccs->prevStack;
904 }
905 fprintf(stderr, "\n");
906
907 return (!strncmp(ccs->cc->label, "CAF", 3));
908 }
909
910 /* For calling from .cmm code, where we can't reliably refer to stderr */
911 void
912 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
913 {
914 bool is_caf;
915 StgPtr frame;
916 StgStack *stack;
917 CostCentreStack *prev_ccs;
918 uint32_t depth = 0;
919 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
920
921 {
922 const char *desc;
923 const StgInfoTable *info;
924 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
925 switch (info->type) {
926 case CONSTR:
927 case CONSTR_1_0:
928 case CONSTR_0_1:
929 case CONSTR_2_0:
930 case CONSTR_1_1:
931 case CONSTR_0_2:
932 case CONSTR_NOCAF:
933 desc = GET_CON_DESC(itbl_to_con_itbl(info));
934 break;
935 default:
936 desc = closure_type_names[info->type];
937 break;
938 }
939 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
940 }
941
942 is_caf = fprintCallStack(ccs);
943
944 // traverse the stack down to the enclosing update frame to
945 // find out where this CCS was evaluated from...
946
947 stack = tso->stackobj;
948 frame = stack->sp;
949 prev_ccs = ccs;
950
951 for (; is_caf && depth < MAX_DEPTH; depth++)
952 {
953 switch (get_itbl((StgClosure*)frame)->type)
954 {
955 case UPDATE_FRAME:
956 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
957 frame += sizeofW(StgUpdateFrame);
958 if (ccs == CCS_MAIN) {
959 goto done;
960 }
961 if (ccs == prev_ccs) {
962 // ignore if this is the same as the previous stack,
963 // we're probably in library code and haven't
964 // accumulated any more interesting stack items
965 // since the last update frame.
966 break;
967 }
968 prev_ccs = ccs;
969 fprintf(stderr, " --> evaluated by: ");
970 is_caf = fprintCallStack(ccs);
971 break;
972 case UNDERFLOW_FRAME:
973 stack = ((StgUnderflowFrame*)frame)->next_chunk;
974 frame = stack->sp;
975 break;
976 case STOP_FRAME:
977 goto done;
978 default:
979 frame += stack_frame_sizeW((StgClosure*)frame);
980 break;
981 }
982 }
983 done:
984 return;
985 }
986
987 #if defined(DEBUG)
988 void
989 debugCCS( CostCentreStack *ccs )
990 {
991 debugBelch("<");
992 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
993 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
994 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
995 debugBelch(",");
996 }
997 }
998 debugBelch(">");
999 }
1000 #endif /* DEBUG */
1001
1002 #endif /* PROFILING */