Minor refactoring and documentation in profiling RTS code
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if defined(PROFILING)
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "ProfilerReport.h"
21 #include "ProfilerReportJson.h"
22 #include "Printer.h"
23 #include "Capability.h"
24
25 #include <fs_rts.h>
26 #include <string.h>
27
28 #if defined(DEBUG)
29 #include "Trace.h"
30 #endif
31
32 /*
33 * Profiling allocation arena.
34 */
35 static Arena *prof_arena;
36
37 /*
38 * Global variables used to assign unique IDs to cc's, ccs's, and
39 * closure_cats
40 */
41
42 unsigned int CC_ID = 1;
43 unsigned int CCS_ID = 1;
44
45 /* Globals for opening the profiling log file(s)
46 */
47 static char *prof_filename; /* prof report file name = <program>.prof */
48 FILE *prof_file;
49
50 static char *hp_filename; /* heap profile (hp2ps style) log file */
51 FILE *hp_file;
52
53 /* Linked lists to keep track of CCs and CCSs that haven't
54 * been declared in the log file yet
55 */
56 CostCentre *CC_LIST = NULL;
57 CostCentreStack *CCS_LIST = NULL;
58
59 #if defined(THREADED_RTS)
60 static Mutex ccs_mutex;
61 #endif
62
63 /*
64 * Built-in cost centres and cost-centre stacks:
65 *
66 * MAIN is the root of the cost-centre stack tree. If there are
67 * no {-# SCC #-}s in the program, all costs will be attributed
68 * to MAIN.
69 *
70 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
71 * RTS operations apart from garbage collection are attributed
72 * to SYSTEM.
73 *
74 * GC is the storage manager / garbage collector.
75 *
76 * OVERHEAD gets all costs generated by the profiling system
77 * itself. These are costs that would not be incurred
78 * during non-profiled execution of the program.
79 *
80 * DONT_CARE is a placeholder cost-centre we assign to static
81 * constructors. It should *never* accumulate any costs.
82 *
83 * PINNED accumulates memory allocated to pinned objects, which
84 * cannot be profiled separately because we cannot reliably
85 * traverse pinned memory.
86 */
87
88 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
89 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
90 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
95
96 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
97 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
98 CCS_DECLARE(CCS_GC, CC_GC, );
99 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
100 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
101 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
102 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
103
104 /*
105 * Static Functions
106 */
107
108 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
109 CostCentreStack *ccs2 );
110 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
111 CostCentreStack *new_ccs );
112 static void inheritCosts ( CostCentreStack *ccs );
113 static ProfilerTotals countTickss ( CostCentreStack const *ccs );
114 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
115 CostCentre *cc );
116 static void sortCCSTree ( CostCentreStack *ccs );
117 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
118 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
119 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
120 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
121 CostCentre *, bool );
122 static void ccsSetSelected ( CostCentreStack *ccs );
123 static void aggregateCCCosts( CostCentreStack *ccs );
124
125 static void initTimeProfiling ( void );
126 static void initProfilingLogFile ( void );
127
128 /* -----------------------------------------------------------------------------
129 Initialise the profiling environment
130 -------------------------------------------------------------------------- */
131
132 void initProfiling (void)
133 {
134 // initialise our arena
135 prof_arena = newArena();
136
137 /* for the benefit of allocate()... */
138 {
139 uint32_t n;
140 for (n=0; n < n_capabilities; n++) {
141 capabilities[n]->r.rCCCS = CCS_SYSTEM;
142 }
143 }
144
145 #if defined(THREADED_RTS)
146 initMutex(&ccs_mutex);
147 #endif
148
149 /* Set up the log file, and dump the header and cost centre
150 * information into it.
151 */
152 initProfilingLogFile();
153
154 /* Register all the cost centres / stacks in the program
155 * CC_MAIN gets link = 0, all others have non-zero link.
156 */
157 REGISTER_CC(CC_MAIN);
158 REGISTER_CC(CC_SYSTEM);
159 REGISTER_CC(CC_GC);
160 REGISTER_CC(CC_OVERHEAD);
161 REGISTER_CC(CC_DONT_CARE);
162 REGISTER_CC(CC_PINNED);
163 REGISTER_CC(CC_IDLE);
164
165 REGISTER_CCS(CCS_SYSTEM);
166 REGISTER_CCS(CCS_GC);
167 REGISTER_CCS(CCS_OVERHEAD);
168 REGISTER_CCS(CCS_DONT_CARE);
169 REGISTER_CCS(CCS_PINNED);
170 REGISTER_CCS(CCS_IDLE);
171 REGISTER_CCS(CCS_MAIN);
172
173 /* find all the registered cost centre stacks, and make them
174 * children of CCS_MAIN.
175 */
176 ASSERT(CCS_LIST == CCS_MAIN);
177 CCS_LIST = CCS_LIST->prevStack;
178 CCS_MAIN->prevStack = NULL;
179 CCS_MAIN->root = CCS_MAIN;
180 ccsSetSelected(CCS_MAIN);
181
182 initProfiling2();
183
184 if (RtsFlags.CcFlags.doCostCentres) {
185 initTimeProfiling();
186 }
187
188 if (RtsFlags.ProfFlags.doHeapProfile) {
189 initHeapProfiling();
190 }
191 }
192
193 //
194 // Should be called after loading any new Haskell code.
195 //
196 void initProfiling2 (void)
197 {
198 CostCentreStack *ccs, *next;
199
200 // make CCS_MAIN the parent of all the pre-defined CCSs.
201 for (ccs = CCS_LIST; ccs != NULL; ) {
202 next = ccs->prevStack;
203 ccs->prevStack = NULL;
204 actualPush_(CCS_MAIN,ccs->cc,ccs);
205 ccs->root = ccs;
206 ccs = next;
207 }
208 CCS_LIST = NULL;
209 }
210
211 void
212 freeProfiling (void)
213 {
214 arenaFree(prof_arena);
215 }
216
217 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
218 {
219 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
220 cc->label = label;
221 cc->module = module;
222 cc->srcloc = srcloc;
223 cc->is_caf = 0;
224 cc->mem_alloc = 0;
225 cc->time_ticks = 0;
226 cc->link = NULL;
227 return cc;
228 }
229
230 static void
231 initProfilingLogFile(void)
232 {
233 // Figure out output file name stem.
234 char const *stem;
235 if (RtsFlags.CcFlags.outputFileNameStem) {
236 stem = RtsFlags.CcFlags.outputFileNameStem;
237 } else {
238 char *prog;
239
240 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
241 strcpy(prog, prog_name);
242 #if defined(mingw32_HOST_OS)
243 // on Windows, drop the .exe suffix if there is one
244 {
245 char *suff;
246 suff = strrchr(prog,'.');
247 if (suff != NULL && !strcmp(suff,".exe")) {
248 *suff = '\0';
249 }
250 }
251 #endif
252 stem = prog;
253 }
254
255 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
256 {
257 /* No need for the <stem>.prof file */
258 prof_filename = NULL;
259 prof_file = NULL;
260 }
261 else
262 {
263 /* Initialise the log file name */
264 prof_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
265 sprintf(prof_filename, "%s.prof", stem);
266
267 /* open the log file */
268 if ((prof_file = __rts_fopen(prof_filename, "w")) == NULL) {
269 debugBelch("Can't open profiling report file %s\n", prof_filename);
270 RtsFlags.CcFlags.doCostCentres = 0;
271 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
272 // both <program>.hp as <program>.prof.
273 if (doingRetainerProfiling()) {
274 RtsFlags.ProfFlags.doHeapProfile = 0;
275 }
276 }
277 }
278
279 if (RtsFlags.ProfFlags.doHeapProfile) {
280 /* Initialise the log file name */
281 hp_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
282 sprintf(hp_filename, "%s.hp", stem);
283
284 /* open the log file */
285 if ((hp_file = __rts_fopen(hp_filename, "w")) == NULL) {
286 debugBelch("Can't open profiling report file %s\n",
287 hp_filename);
288 RtsFlags.ProfFlags.doHeapProfile = 0;
289 }
290 }
291 }
292
293 void
294 initTimeProfiling(void)
295 {
296 /* Start ticking */
297 startProfTimer();
298 };
299
300 void
301 endProfiling ( void )
302 {
303 if (RtsFlags.CcFlags.doCostCentres) {
304 stopProfTimer();
305 }
306 if (RtsFlags.ProfFlags.doHeapProfile) {
307 endHeapProfiling();
308 }
309 }
310
311
312 /*
313 These are used in the C stubs produced by the code generator
314 to register code.
315 */
316 void registerCcList(CostCentre **cc_list)
317 {
318 for (CostCentre **i = cc_list; *i != NULL; i++) {
319 REGISTER_CC(*i);
320 }
321 }
322
323 void registerCcsList(CostCentreStack **cc_list)
324 {
325 for (CostCentreStack **i = cc_list; *i != NULL; i++) {
326 REGISTER_CCS(*i);
327 }
328 }
329
330 /* -----------------------------------------------------------------------------
331 Set CCCS when entering a function.
332
333 The algorithm is as follows.
334
335 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
336
337 where
338
339 dropCommonPrefix A B
340 -- returns the suffix of B after removing any prefix common
341 -- to both A and B.
342
343 e.g.
344
345 <a,b,c> ++> <> = <a,b,c>
346 <a,b,c> ++> <d> = <a,b,c,d>
347 <a,b,c> ++> <a,b> = <a,b,c>
348 <a,b> ++> <a,b,c> = <a,b,c>
349 <a,b,c> ++> <a,b,d> = <a,b,c,d>
350
351 -------------------------------------------------------------------------- */
352
353 // implements c1 ++> c2, where c1 and c2 are equal depth
354 //
355 static CostCentreStack *
356 enterFunEqualStacks (CostCentreStack *ccs0,
357 CostCentreStack *ccsapp,
358 CostCentreStack *ccsfn)
359 {
360 ASSERT(ccsapp->depth == ccsfn->depth);
361 if (ccsapp == ccsfn) return ccs0;
362 return pushCostCentre(enterFunEqualStacks(ccs0,
363 ccsapp->prevStack,
364 ccsfn->prevStack),
365 ccsfn->cc);
366 }
367
368 // implements c1 ++> c2, where c2 is deeper than c1.
369 // Drop elements of c2 until we have equal stacks, call
370 // enterFunEqualStacks(), and then push on the elements that we
371 // dropped in reverse order.
372 //
373 static CostCentreStack *
374 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
375 {
376 if (n == 0) {
377 ASSERT(ccsfn->depth == ccsapp->depth);
378 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
379 } else {
380 ASSERT(ccsfn->depth > ccsapp->depth);
381 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
382 ccsfn->cc);
383 }
384 }
385
386 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
387 {
388 CostCentreStack *ccsapp;
389
390 // common case 1: both stacks are the same
391 if (ccsfn == reg->rCCCS) {
392 return;
393 }
394
395 // common case 2: the function stack is empty, or just CAF
396 if (ccsfn->cc->is_caf) {
397 return;
398 }
399
400 ccsapp = reg->rCCCS;
401 reg->rCCCS = CCS_OVERHEAD;
402
403 // common case 3: the stacks are completely different (e.g. one is a
404 // descendent of MAIN and the other of a CAF): we append the whole
405 // of the function stack to the current CCS.
406 if (ccsfn->root != ccsapp->root) {
407 reg->rCCCS = appendCCS(ccsapp,ccsfn);
408 return;
409 }
410
411 // uncommon case 4: ccsapp is deeper than ccsfn
412 if (ccsapp->depth > ccsfn->depth) {
413 uint32_t i, n;
414 CostCentreStack *tmp = ccsapp;
415 n = ccsapp->depth - ccsfn->depth;
416 for (i = 0; i < n; i++) {
417 tmp = tmp->prevStack;
418 }
419 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
420 return;
421 }
422
423 // uncommon case 5: ccsfn is deeper than CCCS
424 if (ccsfn->depth > ccsapp->depth) {
425 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
426 ccsfn->depth - ccsapp->depth);
427 return;
428 }
429
430 // uncommon case 6: stacks are equal depth, but different
431 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
432 }
433
434 /* -----------------------------------------------------------------------------
435 Decide whether closures with this CCS should contribute to the heap
436 profile.
437 -------------------------------------------------------------------------- */
438
439 static void
440 ccsSetSelected (CostCentreStack *ccs)
441 {
442 if (RtsFlags.ProfFlags.modSelector) {
443 if (! strMatchesSelector (ccs->cc->module,
444 RtsFlags.ProfFlags.modSelector) ) {
445 ccs->selected = 0;
446 return;
447 }
448 }
449 if (RtsFlags.ProfFlags.ccSelector) {
450 if (! strMatchesSelector (ccs->cc->label,
451 RtsFlags.ProfFlags.ccSelector) ) {
452 ccs->selected = 0;
453 return;
454 }
455 }
456 if (RtsFlags.ProfFlags.ccsSelector) {
457 CostCentreStack *c;
458 for (c = ccs; c != NULL; c = c->prevStack)
459 {
460 if ( strMatchesSelector (c->cc->label,
461 RtsFlags.ProfFlags.ccsSelector) ) {
462 break;
463 }
464 }
465 if (c == NULL) {
466 ccs->selected = 0;
467 return;
468 }
469 }
470
471 ccs->selected = 1;
472 return;
473 }
474
475 /* -----------------------------------------------------------------------------
476 Cost-centre stack manipulation
477 -------------------------------------------------------------------------- */
478
479 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
480 CostCentreStack *
481 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
482 {
483 IF_DEBUG(prof,
484 if (ccs1 != ccs2) {
485 debugBelch("Appending ");
486 debugCCS(ccs1);
487 debugBelch(" to ");
488 debugCCS(ccs2);
489 debugBelch("\n");});
490
491 if (ccs1 == ccs2) {
492 return ccs1;
493 }
494
495 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf) {
496 // stop at a CAF element
497 return ccs1;
498 }
499
500 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
501 }
502
503 // Pick one:
504 // #define RECURSION_DROPS
505 #define RECURSION_TRUNCATES
506
507 CostCentreStack *
508 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
509 {
510 IF_DEBUG(prof,
511 traceBegin("pushing %s on ", cc->label);
512 debugCCS(ccs);
513 traceEnd(););
514
515 CostCentreStack *ret;
516
517 if (ccs == EMPTY_STACK) {
518 ACQUIRE_LOCK(&ccs_mutex);
519 ret = actualPush(ccs,cc);
520 }
521 else
522 {
523 if (ccs->cc == cc) {
524 return ccs;
525 } else {
526 // check if we've already memoized this stack
527 IndexTable *ixtable = ccs->indexTable;
528 CostCentreStack *temp_ccs = isInIndexTable(ixtable,cc);
529
530 if (temp_ccs != EMPTY_STACK) {
531 return temp_ccs;
532 } else {
533
534 // not in the IndexTable, now we take the lock:
535 ACQUIRE_LOCK(&ccs_mutex);
536
537 if (ccs->indexTable != ixtable)
538 {
539 // someone modified ccs->indexTable while
540 // we did not hold the lock, so we must
541 // check it again:
542 temp_ccs = isInIndexTable(ixtable,cc);
543 if (temp_ccs != EMPTY_STACK)
544 {
545 RELEASE_LOCK(&ccs_mutex);
546 return temp_ccs;
547 }
548 }
549 temp_ccs = checkLoop(ccs,cc);
550 if (temp_ccs != NULL) {
551 // This CC is already in the stack somewhere.
552 // This could be recursion, or just calling
553 // another function with the same CC.
554 // A number of policies are possible at this
555 // point, we implement two here:
556 // - truncate the stack to the previous instance
557 // of this CC
558 // - ignore this push, return the same stack.
559 //
560 CostCentreStack *new_ccs;
561 #if defined(RECURSION_TRUNCATES)
562 new_ccs = temp_ccs;
563 #else // defined(RECURSION_DROPS)
564 new_ccs = ccs;
565 #endif
566 ccs->indexTable = addToIndexTable (ccs->indexTable,
567 new_ccs, cc, true);
568 ret = new_ccs;
569 } else {
570 ret = actualPush (ccs,cc);
571 }
572 }
573 }
574 }
575
576 RELEASE_LOCK(&ccs_mutex);
577 return ret;
578 }
579
580 static CostCentreStack *
581 checkLoop (CostCentreStack *ccs, CostCentre *cc)
582 {
583 while (ccs != EMPTY_STACK) {
584 if (ccs->cc == cc)
585 return ccs;
586 ccs = ccs->prevStack;
587 }
588 return NULL;
589 }
590
591 static CostCentreStack *
592 actualPush (CostCentreStack *ccs, CostCentre *cc)
593 {
594 CostCentreStack *new_ccs;
595
596 // allocate space for a new CostCentreStack
597 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
598
599 return actualPush_(ccs, cc, new_ccs);
600 }
601
602 static CostCentreStack *
603 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
604 {
605 /* assign values to each member of the structure */
606 new_ccs->ccsID = CCS_ID++;
607 new_ccs->cc = cc;
608 new_ccs->prevStack = ccs;
609 new_ccs->root = ccs->root;
610 new_ccs->depth = ccs->depth + 1;
611
612 new_ccs->indexTable = EMPTY_TABLE;
613
614 /* Initialise the various _scc_ counters to zero
615 */
616 new_ccs->scc_count = 0;
617
618 /* Initialize all other stats here. There should be a quick way
619 * that's easily used elsewhere too
620 */
621 new_ccs->time_ticks = 0;
622 new_ccs->mem_alloc = 0;
623 new_ccs->inherited_ticks = 0;
624 new_ccs->inherited_alloc = 0;
625
626 // Set the selected field.
627 ccsSetSelected(new_ccs);
628
629 /* update the memoization table for the parent stack */
630 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
631 false/*not a back edge*/);
632
633 /* return a pointer to the new stack */
634 return new_ccs;
635 }
636
637
638 static CostCentreStack *
639 isInIndexTable(IndexTable *it, CostCentre *cc)
640 {
641 while (it!=EMPTY_TABLE)
642 {
643 if (it->cc == cc)
644 return it->ccs;
645 else
646 it = it->next;
647 }
648
649 /* otherwise we never found it so return EMPTY_TABLE */
650 return EMPTY_TABLE;
651 }
652
653
654 static IndexTable *
655 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
656 CostCentre *cc, bool back_edge)
657 {
658 IndexTable *new_it;
659
660 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
661
662 new_it->cc = cc;
663 new_it->ccs = new_ccs;
664 new_it->next = it;
665 new_it->back_edge = back_edge;
666 return new_it;
667 }
668
669 /* -----------------------------------------------------------------------------
670 Generating a time & allocation profiling report.
671 -------------------------------------------------------------------------- */
672
673 /* We omit certain system-related CCs and CCSs from the default
674 * reports, so as not to cause confusion.
675 */
676 bool
677 ignoreCC (CostCentre const *cc)
678 {
679 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
680 ( cc == CC_OVERHEAD
681 || cc == CC_DONT_CARE
682 || cc == CC_GC
683 || cc == CC_SYSTEM
684 || cc == CC_IDLE);
685 }
686
687 bool
688 ignoreCCS (CostCentreStack const *ccs)
689 {
690 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
691 ( ccs == CCS_OVERHEAD
692 || ccs == CCS_DONT_CARE
693 || ccs == CCS_GC
694 || ccs == CCS_SYSTEM
695 || ccs == CCS_IDLE);
696 }
697
698 void
699 reportCCSProfiling( void )
700 {
701 stopProfTimer();
702 if (RtsFlags.CcFlags.doCostCentres == 0) return;
703
704 ProfilerTotals totals = countTickss(CCS_MAIN);
705 aggregateCCCosts(CCS_MAIN);
706 inheritCosts(CCS_MAIN);
707 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
708 sortCCSTree(stack);
709
710 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_JSON) {
711 writeCCSReportJson(prof_file, stack, totals);
712 } else {
713 writeCCSReport(prof_file, stack, totals);
714 }
715 }
716
717 /* -----------------------------------------------------------------------------
718 * Accumulating total allocatinos and tick count
719 -------------------------------------------------------------------------- */
720
721 /* Helper */
722 static void
723 countTickss_(CostCentreStack const *ccs, ProfilerTotals *totals)
724 {
725 if (!ignoreCCS(ccs)) {
726 totals->total_alloc += ccs->mem_alloc;
727 totals->total_prof_ticks += ccs->time_ticks;
728 }
729 for (IndexTable *i = ccs->indexTable; i != NULL; i = i->next) {
730 if (!i->back_edge) {
731 countTickss_(i->ccs, totals);
732 }
733 }
734 }
735
736 /* Traverse the cost centre stack tree and accumulate
737 * total ticks/allocations.
738 */
739 static ProfilerTotals
740 countTickss(CostCentreStack const *ccs)
741 {
742 ProfilerTotals totals = {0,0};
743 countTickss_(ccs, &totals);
744 return totals;
745 }
746
747 /* Traverse the cost centre stack tree and inherit ticks & allocs.
748 */
749 static void
750 inheritCosts(CostCentreStack *ccs)
751 {
752 IndexTable *i;
753
754 if (ignoreCCS(ccs)) { return; }
755
756 ccs->inherited_ticks += ccs->time_ticks;
757 ccs->inherited_alloc += ccs->mem_alloc;
758
759 for (i = ccs->indexTable; i != NULL; i = i->next)
760 if (!i->back_edge) {
761 inheritCosts(i->ccs);
762 ccs->inherited_ticks += i->ccs->inherited_ticks;
763 ccs->inherited_alloc += i->ccs->inherited_alloc;
764 }
765
766 return;
767 }
768
769 static void
770 aggregateCCCosts( CostCentreStack *ccs )
771 {
772 IndexTable *i;
773
774 ccs->cc->mem_alloc += ccs->mem_alloc;
775 ccs->cc->time_ticks += ccs->time_ticks;
776
777 for (i = ccs->indexTable; i != 0; i = i->next) {
778 if (!i->back_edge) {
779 aggregateCCCosts(i->ccs);
780 }
781 }
782 }
783
784 //
785 // Prune CCSs with zero entries, zero ticks or zero allocation from
786 // the tree, unless COST_CENTRES_ALL is on.
787 //
788 static CostCentreStack *
789 pruneCCSTree (CostCentreStack *ccs)
790 {
791 CostCentreStack *ccs1;
792 IndexTable *i, **prev;
793
794 prev = &ccs->indexTable;
795 for (i = ccs->indexTable; i != 0; i = i->next) {
796 if (i->back_edge) { continue; }
797
798 ccs1 = pruneCCSTree(i->ccs);
799 if (ccs1 == NULL) {
800 *prev = i->next;
801 } else {
802 prev = &(i->next);
803 }
804 }
805
806 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
807 /* force printing of *all* cost centres if -P -P */ )
808
809 || ( ccs->indexTable != 0 )
810 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
811 ) {
812 return ccs;
813 } else {
814 return NULL;
815 }
816 }
817
818 static IndexTable*
819 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
820 {
821 StgWord tbl_ticks = tbl->ccs->scc_count;
822 char* tbl_label = tbl->ccs->cc->label;
823
824 IndexTable *prev = NULL;
825 IndexTable *cursor = sortedList;
826
827 while (cursor != NULL) {
828 StgWord cursor_ticks = cursor->ccs->scc_count;
829 char* cursor_label = cursor->ccs->cc->label;
830
831 if (tbl_ticks > cursor_ticks ||
832 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
833 if (prev == NULL) {
834 tbl->next = sortedList;
835 return tbl;
836 } else {
837 prev->next = tbl;
838 tbl->next = cursor;
839 return sortedList;
840 }
841 } else {
842 prev = cursor;
843 cursor = cursor->next;
844 }
845 }
846
847 prev->next = tbl;
848 return sortedList;
849 }
850
851 static void
852 sortCCSTree(CostCentreStack *ccs)
853 {
854 if (ccs->indexTable == NULL) return;
855
856 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
857 if (!tbl->back_edge)
858 sortCCSTree(tbl->ccs);
859
860 IndexTable *sortedList = ccs->indexTable;
861 IndexTable *nonSortedList = sortedList->next;
862 sortedList->next = NULL;
863
864 while (nonSortedList != NULL)
865 {
866 IndexTable *nonSortedTail = nonSortedList->next;
867 nonSortedList->next = NULL;
868 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
869 nonSortedList = nonSortedTail;
870 }
871
872 ccs->indexTable = sortedList;
873 }
874
875 void
876 fprintCCS( FILE *f, CostCentreStack *ccs )
877 {
878 fprintf(f,"<");
879 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
880 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
881 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
882 fprintf(f,",");
883 }
884 }
885 fprintf(f,">");
886 }
887
888 // Returns: True if the call stack ended with CAF
889 static bool fprintCallStack (CostCentreStack *ccs)
890 {
891 CostCentreStack *prev;
892
893 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
894 prev = ccs->prevStack;
895 while (prev && prev != CCS_MAIN) {
896 ccs = prev;
897 fprintf(stderr, ",\n called from %s.%s",
898 ccs->cc->module, ccs->cc->label);
899 prev = ccs->prevStack;
900 }
901 fprintf(stderr, "\n");
902
903 return (!strncmp(ccs->cc->label, "CAF", 3));
904 }
905
906 /* For calling from .cmm code, where we can't reliably refer to stderr */
907 void
908 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
909 {
910 bool is_caf;
911 StgPtr frame;
912 StgStack *stack;
913 CostCentreStack *prev_ccs;
914 uint32_t depth = 0;
915 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
916
917 {
918 const char *desc;
919 const StgInfoTable *info;
920 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
921 switch (info->type) {
922 case CONSTR:
923 case CONSTR_1_0:
924 case CONSTR_0_1:
925 case CONSTR_2_0:
926 case CONSTR_1_1:
927 case CONSTR_0_2:
928 case CONSTR_NOCAF:
929 desc = GET_CON_DESC(itbl_to_con_itbl(info));
930 break;
931 default:
932 desc = closure_type_names[info->type];
933 break;
934 }
935 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
936 }
937
938 is_caf = fprintCallStack(ccs);
939
940 // traverse the stack down to the enclosing update frame to
941 // find out where this CCS was evaluated from...
942
943 stack = tso->stackobj;
944 frame = stack->sp;
945 prev_ccs = ccs;
946
947 for (; is_caf && depth < MAX_DEPTH; depth++)
948 {
949 switch (get_itbl((StgClosure*)frame)->type)
950 {
951 case UPDATE_FRAME:
952 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
953 frame += sizeofW(StgUpdateFrame);
954 if (ccs == CCS_MAIN) {
955 goto done;
956 }
957 if (ccs == prev_ccs) {
958 // ignore if this is the same as the previous stack,
959 // we're probably in library code and haven't
960 // accumulated any more interesting stack items
961 // since the last update frame.
962 break;
963 }
964 prev_ccs = ccs;
965 fprintf(stderr, " --> evaluated by: ");
966 is_caf = fprintCallStack(ccs);
967 break;
968 case UNDERFLOW_FRAME:
969 stack = ((StgUnderflowFrame*)frame)->next_chunk;
970 frame = stack->sp;
971 break;
972 case STOP_FRAME:
973 goto done;
974 default:
975 frame += stack_frame_sizeW((StgClosure*)frame);
976 break;
977 }
978 }
979 done:
980 return;
981 }
982
983 #if defined(DEBUG)
984 void
985 debugCCS( CostCentreStack *ccs )
986 {
987 debugBelch("<");
988 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
989 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
990 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
991 debugBelch(",");
992 }
993 }
994 debugBelch(">");
995 }
996 #endif /* DEBUG */
997
998 #endif /* PROFILING */