rts: Rename the nondescript initProfiling2 to refreshProfilingCCSs
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if defined(PROFILING)
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "ProfilerReport.h"
21 #include "ProfilerReportJson.h"
22 #include "Printer.h"
23 #include "Capability.h"
24
25 #include <fs_rts.h>
26 #include <string.h>
27
28 #if defined(DEBUG)
29 #include "Trace.h"
30 #endif
31
32 /*
33 * Profiling allocation arena.
34 */
35 #if defined(DEBUG)
36 Arena *prof_arena;
37 #else
38 static Arena *prof_arena;
39 #endif
40
41 /*
42 * Global variables used to assign unique IDs to cc's, ccs's, and
43 * closure_cats
44 */
45
46 static unsigned int CC_ID = 1;
47 static unsigned int CCS_ID = 1;
48
49 /* Globals for opening the profiling log file(s)
50 */
51 static char *prof_filename; /* prof report file name = <program>.prof */
52 FILE *prof_file;
53
54 // List of all cost centres. Used for reporting.
55 CostCentre *CC_LIST = NULL;
56 // All cost centre stacks temporarily appear here, to be able to make CCS_MAIN a
57 // parent of all cost centres stacks (done in initProfiling2()).
58 static CostCentreStack *CCS_LIST = NULL;
59
60 #if defined(THREADED_RTS)
61 static Mutex ccs_mutex;
62 #endif
63
64 /*
65 * Built-in cost centres and cost-centre stacks:
66 *
67 * MAIN is the root of the cost-centre stack tree. If there are
68 * no {-# SCC #-}s in the program, all costs will be attributed
69 * to MAIN.
70 *
71 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
72 * RTS operations apart from garbage collection are attributed
73 * to SYSTEM.
74 *
75 * GC is the storage manager / garbage collector.
76 *
77 * OVERHEAD gets all costs generated by the profiling system
78 * itself. These are costs that would not be incurred
79 * during non-profiled execution of the program.
80 *
81 * DONT_CARE is a placeholder cost-centre we assign to static
82 * constructors. It should *never* accumulate any costs.
83 *
84 * PINNED accumulates memory allocated to pinned objects, which
85 * cannot be profiled separately because we cannot reliably
86 * traverse pinned memory.
87 */
88
89 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
90 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
95 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
96
97 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
98 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
99 CCS_DECLARE(CCS_GC, CC_GC, );
100 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
101 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
102 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
103 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
104
105 /*
106 * Static Functions
107 */
108
109 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
110 CostCentreStack *ccs2 );
111 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
112 CostCentreStack *new_ccs );
113 static void inheritCosts ( CostCentreStack *ccs );
114 static ProfilerTotals countTickss ( CostCentreStack const *ccs );
115 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
116 CostCentre *cc );
117 static void sortCCSTree ( CostCentreStack *ccs );
118 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
119 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
120 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
121 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
122 CostCentre *, bool );
123 static void ccsSetSelected ( CostCentreStack *ccs );
124 static void aggregateCCCosts( CostCentreStack *ccs );
125 static void registerCC ( CostCentre *cc );
126 static void registerCCS ( CostCentreStack *ccs );
127
128 static void initTimeProfiling ( void );
129 static void initProfilingLogFile ( void );
130
131 /* -----------------------------------------------------------------------------
132 Initialise the profiling environment
133 -------------------------------------------------------------------------- */
134
135 void initProfiling (void)
136 {
137 // initialise our arena
138 prof_arena = newArena();
139
140 /* for the benefit of allocate()... */
141 {
142 uint32_t n;
143 for (n=0; n < n_capabilities; n++) {
144 capabilities[n]->r.rCCCS = CCS_SYSTEM;
145 }
146 }
147
148 #if defined(THREADED_RTS)
149 initMutex(&ccs_mutex);
150 #endif
151
152 /* Set up the log file, and dump the header and cost centre
153 * information into it.
154 */
155 initProfilingLogFile();
156
157 /* Register all the cost centres / stacks in the program
158 * CC_MAIN gets link = 0, all others have non-zero link.
159 */
160 registerCC(CC_MAIN);
161 registerCC(CC_SYSTEM);
162 registerCC(CC_GC);
163 registerCC(CC_OVERHEAD);
164 registerCC(CC_DONT_CARE);
165 registerCC(CC_PINNED);
166 registerCC(CC_IDLE);
167
168 registerCCS(CCS_SYSTEM);
169 registerCCS(CCS_GC);
170 registerCCS(CCS_OVERHEAD);
171 registerCCS(CCS_DONT_CARE);
172 registerCCS(CCS_PINNED);
173 registerCCS(CCS_IDLE);
174 registerCCS(CCS_MAIN);
175
176 /* find all the registered cost centre stacks, and make them
177 * children of CCS_MAIN.
178 */
179 ASSERT(CCS_LIST == CCS_MAIN);
180 CCS_LIST = CCS_LIST->prevStack;
181 CCS_MAIN->prevStack = NULL;
182 CCS_MAIN->root = CCS_MAIN;
183 ccsSetSelected(CCS_MAIN);
184
185 refreshProfilingCCSs();
186
187 if (RtsFlags.CcFlags.doCostCentres) {
188 initTimeProfiling();
189 }
190 }
191
192 //
193 // Should be called after loading any new Haskell code.
194 //
195 void refreshProfilingCCSs (void)
196 {
197 // make CCS_MAIN the parent of all the pre-defined CCSs.
198 CostCentreStack *next;
199 for (CostCentreStack *ccs = CCS_LIST; ccs != NULL; ) {
200 next = ccs->prevStack;
201 ccs->prevStack = NULL;
202 actualPush_(CCS_MAIN,ccs->cc,ccs);
203 ccs->root = ccs;
204 ccs = next;
205 }
206 CCS_LIST = NULL;
207 }
208
209 void
210 freeProfiling (void)
211 {
212 arenaFree(prof_arena);
213 }
214
215 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
216 {
217 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
218 cc->label = label;
219 cc->module = module;
220 cc->srcloc = srcloc;
221 cc->is_caf = 0;
222 cc->mem_alloc = 0;
223 cc->time_ticks = 0;
224 cc->link = NULL;
225 return cc;
226 }
227
228 static void
229 initProfilingLogFile(void)
230 {
231 // Figure out output file name stem.
232 char const *stem;
233 if (RtsFlags.CcFlags.outputFileNameStem) {
234 stem = RtsFlags.CcFlags.outputFileNameStem;
235 } else {
236 char *prog;
237
238 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
239 strcpy(prog, prog_name);
240 #if defined(mingw32_HOST_OS)
241 // on Windows, drop the .exe suffix if there is one
242 {
243 char *suff;
244 suff = strrchr(prog,'.');
245 if (suff != NULL && !strcmp(suff,".exe")) {
246 *suff = '\0';
247 }
248 }
249 #endif
250 stem = prog;
251 }
252
253 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
254 {
255 /* No need for the <stem>.prof file */
256 prof_filename = NULL;
257 prof_file = NULL;
258 }
259 else
260 {
261 /* Initialise the log file name */
262 prof_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
263 sprintf(prof_filename, "%s.prof", stem);
264
265 /* open the log file */
266 if ((prof_file = __rts_fopen(prof_filename, "w")) == NULL) {
267 debugBelch("Can't open profiling report file %s\n", prof_filename);
268 RtsFlags.CcFlags.doCostCentres = 0;
269 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
270 // both <program>.hp as <program>.prof.
271 if (doingRetainerProfiling()) {
272 RtsFlags.ProfFlags.doHeapProfile = 0;
273 }
274 }
275 }
276 }
277
278 void
279 initTimeProfiling(void)
280 {
281 /* Start ticking */
282 startProfTimer();
283 };
284
285 void
286 endProfiling ( void )
287 {
288 if (RtsFlags.CcFlags.doCostCentres) {
289 stopProfTimer();
290 }
291 }
292
293 /* -----------------------------------------------------------------------------
294 Registering CCs and CCSs
295
296 Registering a CC or CCS consists of
297 - assigning it a unique ID
298 - linking it onto the list of registered CCs/CCSs
299
300 Cost centres are registered at startup by a C constructor function
301 generated by the compiler (ProfInit.hs) in the _stub.c file for each module.
302 -------------------------------------------------------------------------- */
303
304 static void
305 registerCC(CostCentre *cc)
306 {
307 if (cc->link == NULL) {
308 cc->link = CC_LIST;
309 CC_LIST = cc;
310 cc->ccID = CC_ID++;
311 }
312 }
313
314 static void registerCCS(CostCentreStack *ccs)
315 {
316 if (ccs->prevStack == NULL) {
317 ccs->prevStack = CCS_LIST;
318 CCS_LIST = ccs;
319 ccs->ccsID = CCS_ID++;
320 }
321 }
322
323 void registerCcList(CostCentre **cc_list)
324 {
325 for (CostCentre **i = cc_list; *i != NULL; i++) {
326 registerCC(*i);
327 }
328 }
329
330 void registerCcsList(CostCentreStack **cc_list)
331 {
332 for (CostCentreStack **i = cc_list; *i != NULL; i++) {
333 registerCCS(*i);
334 }
335 }
336
337 /* -----------------------------------------------------------------------------
338 Set CCCS when entering a function.
339
340 The algorithm is as follows.
341
342 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
343
344 where
345
346 dropCommonPrefix A B
347 -- returns the suffix of B after removing any prefix common
348 -- to both A and B.
349
350 e.g.
351
352 <a,b,c> ++> <> = <a,b,c>
353 <a,b,c> ++> <d> = <a,b,c,d>
354 <a,b,c> ++> <a,b> = <a,b,c>
355 <a,b> ++> <a,b,c> = <a,b,c>
356 <a,b,c> ++> <a,b,d> = <a,b,c,d>
357
358 -------------------------------------------------------------------------- */
359
360 // implements c1 ++> c2, where c1 and c2 are equal depth
361 //
362 static CostCentreStack *
363 enterFunEqualStacks (CostCentreStack *ccs0,
364 CostCentreStack *ccsapp,
365 CostCentreStack *ccsfn)
366 {
367 ASSERT(ccsapp->depth == ccsfn->depth);
368 if (ccsapp == ccsfn) return ccs0;
369 return pushCostCentre(enterFunEqualStacks(ccs0,
370 ccsapp->prevStack,
371 ccsfn->prevStack),
372 ccsfn->cc);
373 }
374
375 // implements c1 ++> c2, where c2 is deeper than c1.
376 // Drop elements of c2 until we have equal stacks, call
377 // enterFunEqualStacks(), and then push on the elements that we
378 // dropped in reverse order.
379 //
380 static CostCentreStack *
381 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
382 {
383 if (n == 0) {
384 ASSERT(ccsfn->depth == ccsapp->depth);
385 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
386 } else {
387 ASSERT(ccsfn->depth > ccsapp->depth);
388 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
389 ccsfn->cc);
390 }
391 }
392
393 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
394 {
395 CostCentreStack *ccsapp;
396
397 // common case 1: both stacks are the same
398 if (ccsfn == reg->rCCCS) {
399 return;
400 }
401
402 // common case 2: the function stack is empty, or just CAF
403 if (ccsfn->cc->is_caf) {
404 return;
405 }
406
407 ccsapp = reg->rCCCS;
408 reg->rCCCS = CCS_OVERHEAD;
409
410 // common case 3: the stacks are completely different (e.g. one is a
411 // descendent of MAIN and the other of a CAF): we append the whole
412 // of the function stack to the current CCS.
413 if (ccsfn->root != ccsapp->root) {
414 reg->rCCCS = appendCCS(ccsapp,ccsfn);
415 return;
416 }
417
418 // uncommon case 4: ccsapp is deeper than ccsfn
419 if (ccsapp->depth > ccsfn->depth) {
420 uint32_t i, n;
421 CostCentreStack *tmp = ccsapp;
422 n = ccsapp->depth - ccsfn->depth;
423 for (i = 0; i < n; i++) {
424 tmp = tmp->prevStack;
425 }
426 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
427 return;
428 }
429
430 // uncommon case 5: ccsfn is deeper than CCCS
431 if (ccsfn->depth > ccsapp->depth) {
432 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
433 ccsfn->depth - ccsapp->depth);
434 return;
435 }
436
437 // uncommon case 6: stacks are equal depth, but different
438 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
439 }
440
441 /* -----------------------------------------------------------------------------
442 Decide whether closures with this CCS should contribute to the heap
443 profile.
444 -------------------------------------------------------------------------- */
445
446 static void
447 ccsSetSelected (CostCentreStack *ccs)
448 {
449 if (RtsFlags.ProfFlags.modSelector) {
450 if (! strMatchesSelector (ccs->cc->module,
451 RtsFlags.ProfFlags.modSelector) ) {
452 ccs->selected = 0;
453 return;
454 }
455 }
456 if (RtsFlags.ProfFlags.ccSelector) {
457 if (! strMatchesSelector (ccs->cc->label,
458 RtsFlags.ProfFlags.ccSelector) ) {
459 ccs->selected = 0;
460 return;
461 }
462 }
463 if (RtsFlags.ProfFlags.ccsSelector) {
464 CostCentreStack *c;
465 for (c = ccs; c != NULL; c = c->prevStack)
466 {
467 if ( strMatchesSelector (c->cc->label,
468 RtsFlags.ProfFlags.ccsSelector) ) {
469 break;
470 }
471 }
472 if (c == NULL) {
473 ccs->selected = 0;
474 return;
475 }
476 }
477
478 ccs->selected = 1;
479 return;
480 }
481
482 /* -----------------------------------------------------------------------------
483 Cost-centre stack manipulation
484 -------------------------------------------------------------------------- */
485
486 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
487 CostCentreStack *
488 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
489 {
490 IF_DEBUG(prof,
491 if (ccs1 != ccs2) {
492 debugBelch("Appending ");
493 debugCCS(ccs1);
494 debugBelch(" to ");
495 debugCCS(ccs2);
496 debugBelch("\n");});
497
498 if (ccs1 == ccs2) {
499 return ccs1;
500 }
501
502 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf) {
503 // stop at a CAF element
504 return ccs1;
505 }
506
507 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
508 }
509
510 // Pick one:
511 // #define RECURSION_DROPS
512 #define RECURSION_TRUNCATES
513
514 CostCentreStack *
515 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
516 {
517 IF_DEBUG(prof,
518 traceBegin("pushing %s on ", cc->label);
519 debugCCS(ccs);
520 traceEnd(););
521
522 CostCentreStack *ret;
523
524 if (ccs == EMPTY_STACK) {
525 ACQUIRE_LOCK(&ccs_mutex);
526 ret = actualPush(ccs,cc);
527 }
528 else
529 {
530 if (ccs->cc == cc) {
531 return ccs;
532 } else {
533 // check if we've already memoized this stack
534 IndexTable *ixtable = ccs->indexTable;
535 CostCentreStack *temp_ccs = isInIndexTable(ixtable,cc);
536
537 if (temp_ccs != EMPTY_STACK) {
538 return temp_ccs;
539 } else {
540
541 // not in the IndexTable, now we take the lock:
542 ACQUIRE_LOCK(&ccs_mutex);
543
544 if (ccs->indexTable != ixtable)
545 {
546 // someone modified ccs->indexTable while
547 // we did not hold the lock, so we must
548 // check it again:
549 temp_ccs = isInIndexTable(ixtable,cc);
550 if (temp_ccs != EMPTY_STACK)
551 {
552 RELEASE_LOCK(&ccs_mutex);
553 return temp_ccs;
554 }
555 }
556 temp_ccs = checkLoop(ccs,cc);
557 if (temp_ccs != NULL) {
558 // This CC is already in the stack somewhere.
559 // This could be recursion, or just calling
560 // another function with the same CC.
561 // A number of policies are possible at this
562 // point, we implement two here:
563 // - truncate the stack to the previous instance
564 // of this CC
565 // - ignore this push, return the same stack.
566 //
567 CostCentreStack *new_ccs;
568 #if defined(RECURSION_TRUNCATES)
569 new_ccs = temp_ccs;
570 #else // defined(RECURSION_DROPS)
571 new_ccs = ccs;
572 #endif
573 ccs->indexTable = addToIndexTable (ccs->indexTable,
574 new_ccs, cc, true);
575 ret = new_ccs;
576 } else {
577 ret = actualPush (ccs,cc);
578 }
579 }
580 }
581 }
582
583 RELEASE_LOCK(&ccs_mutex);
584 return ret;
585 }
586
587 static CostCentreStack *
588 checkLoop (CostCentreStack *ccs, CostCentre *cc)
589 {
590 while (ccs != EMPTY_STACK) {
591 if (ccs->cc == cc)
592 return ccs;
593 ccs = ccs->prevStack;
594 }
595 return NULL;
596 }
597
598 static CostCentreStack *
599 actualPush (CostCentreStack *ccs, CostCentre *cc)
600 {
601 CostCentreStack *new_ccs;
602
603 // allocate space for a new CostCentreStack
604 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
605
606 return actualPush_(ccs, cc, new_ccs);
607 }
608
609 static CostCentreStack *
610 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
611 {
612 /* assign values to each member of the structure */
613 new_ccs->ccsID = CCS_ID++;
614 new_ccs->cc = cc;
615 new_ccs->prevStack = ccs;
616 new_ccs->root = ccs->root;
617 new_ccs->depth = ccs->depth + 1;
618
619 new_ccs->indexTable = EMPTY_TABLE;
620
621 /* Initialise the various _scc_ counters to zero
622 */
623 new_ccs->scc_count = 0;
624
625 /* Initialize all other stats here. There should be a quick way
626 * that's easily used elsewhere too
627 */
628 new_ccs->time_ticks = 0;
629 new_ccs->mem_alloc = 0;
630 new_ccs->inherited_ticks = 0;
631 new_ccs->inherited_alloc = 0;
632
633 // Set the selected field.
634 ccsSetSelected(new_ccs);
635
636 /* update the memoization table for the parent stack */
637 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
638 false/*not a back edge*/);
639
640 /* return a pointer to the new stack */
641 return new_ccs;
642 }
643
644
645 static CostCentreStack *
646 isInIndexTable(IndexTable *it, CostCentre *cc)
647 {
648 while (it!=EMPTY_TABLE)
649 {
650 if (it->cc == cc)
651 return it->ccs;
652 else
653 it = it->next;
654 }
655
656 /* otherwise we never found it so return EMPTY_TABLE */
657 return EMPTY_TABLE;
658 }
659
660
661 static IndexTable *
662 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
663 CostCentre *cc, bool back_edge)
664 {
665 IndexTable *new_it;
666
667 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
668
669 new_it->cc = cc;
670 new_it->ccs = new_ccs;
671 new_it->next = it;
672 new_it->back_edge = back_edge;
673 return new_it;
674 }
675
676 /* -----------------------------------------------------------------------------
677 Generating a time & allocation profiling report.
678 -------------------------------------------------------------------------- */
679
680 /* We omit certain system-related CCs and CCSs from the default
681 * reports, so as not to cause confusion.
682 */
683 bool
684 ignoreCC (CostCentre const *cc)
685 {
686 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
687 ( cc == CC_OVERHEAD
688 || cc == CC_DONT_CARE
689 || cc == CC_GC
690 || cc == CC_SYSTEM
691 || cc == CC_IDLE);
692 }
693
694 bool
695 ignoreCCS (CostCentreStack const *ccs)
696 {
697 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
698 ( ccs == CCS_OVERHEAD
699 || ccs == CCS_DONT_CARE
700 || ccs == CCS_GC
701 || ccs == CCS_SYSTEM
702 || ccs == CCS_IDLE);
703 }
704
705 void
706 reportCCSProfiling( void )
707 {
708 stopProfTimer();
709 if (RtsFlags.CcFlags.doCostCentres == 0) return;
710
711 ProfilerTotals totals = countTickss(CCS_MAIN);
712 aggregateCCCosts(CCS_MAIN);
713 inheritCosts(CCS_MAIN);
714 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
715 sortCCSTree(stack);
716
717 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_JSON) {
718 writeCCSReportJson(prof_file, stack, totals);
719 } else {
720 writeCCSReport(prof_file, stack, totals);
721 }
722 }
723
724 /* -----------------------------------------------------------------------------
725 * Accumulating total allocatinos and tick count
726 -------------------------------------------------------------------------- */
727
728 /* Helper */
729 static void
730 countTickss_(CostCentreStack const *ccs, ProfilerTotals *totals)
731 {
732 if (!ignoreCCS(ccs)) {
733 totals->total_alloc += ccs->mem_alloc;
734 totals->total_prof_ticks += ccs->time_ticks;
735 }
736 for (IndexTable *i = ccs->indexTable; i != NULL; i = i->next) {
737 if (!i->back_edge) {
738 countTickss_(i->ccs, totals);
739 }
740 }
741 }
742
743 /* Traverse the cost centre stack tree and accumulate
744 * total ticks/allocations.
745 */
746 static ProfilerTotals
747 countTickss(CostCentreStack const *ccs)
748 {
749 ProfilerTotals totals = {0,0};
750 countTickss_(ccs, &totals);
751 return totals;
752 }
753
754 /* Traverse the cost centre stack tree and inherit ticks & allocs.
755 */
756 static void
757 inheritCosts(CostCentreStack *ccs)
758 {
759 IndexTable *i;
760
761 if (ignoreCCS(ccs)) { return; }
762
763 ccs->inherited_ticks += ccs->time_ticks;
764 ccs->inherited_alloc += ccs->mem_alloc;
765
766 for (i = ccs->indexTable; i != NULL; i = i->next)
767 if (!i->back_edge) {
768 inheritCosts(i->ccs);
769 ccs->inherited_ticks += i->ccs->inherited_ticks;
770 ccs->inherited_alloc += i->ccs->inherited_alloc;
771 }
772
773 return;
774 }
775
776 static void
777 aggregateCCCosts( CostCentreStack *ccs )
778 {
779 IndexTable *i;
780
781 ccs->cc->mem_alloc += ccs->mem_alloc;
782 ccs->cc->time_ticks += ccs->time_ticks;
783
784 for (i = ccs->indexTable; i != 0; i = i->next) {
785 if (!i->back_edge) {
786 aggregateCCCosts(i->ccs);
787 }
788 }
789 }
790
791 //
792 // Prune CCSs with zero entries, zero ticks or zero allocation from
793 // the tree, unless COST_CENTRES_ALL is on.
794 //
795 static CostCentreStack *
796 pruneCCSTree (CostCentreStack *ccs)
797 {
798 CostCentreStack *ccs1;
799 IndexTable *i, **prev;
800
801 prev = &ccs->indexTable;
802 for (i = ccs->indexTable; i != 0; i = i->next) {
803 if (i->back_edge) { continue; }
804
805 ccs1 = pruneCCSTree(i->ccs);
806 if (ccs1 == NULL) {
807 *prev = i->next;
808 } else {
809 prev = &(i->next);
810 }
811 }
812
813 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
814 /* force printing of *all* cost centres if -P -P */ )
815
816 || ( ccs->indexTable != 0 )
817 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
818 ) {
819 return ccs;
820 } else {
821 return NULL;
822 }
823 }
824
825 static IndexTable*
826 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
827 {
828 StgWord tbl_ticks = tbl->ccs->scc_count;
829 char* tbl_label = tbl->ccs->cc->label;
830
831 IndexTable *prev = NULL;
832 IndexTable *cursor = sortedList;
833
834 while (cursor != NULL) {
835 StgWord cursor_ticks = cursor->ccs->scc_count;
836 char* cursor_label = cursor->ccs->cc->label;
837
838 if (tbl_ticks > cursor_ticks ||
839 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
840 if (prev == NULL) {
841 tbl->next = sortedList;
842 return tbl;
843 } else {
844 prev->next = tbl;
845 tbl->next = cursor;
846 return sortedList;
847 }
848 } else {
849 prev = cursor;
850 cursor = cursor->next;
851 }
852 }
853
854 prev->next = tbl;
855 return sortedList;
856 }
857
858 static void
859 sortCCSTree(CostCentreStack *ccs)
860 {
861 if (ccs->indexTable == NULL) return;
862
863 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
864 if (!tbl->back_edge)
865 sortCCSTree(tbl->ccs);
866
867 IndexTable *sortedList = ccs->indexTable;
868 IndexTable *nonSortedList = sortedList->next;
869 sortedList->next = NULL;
870
871 while (nonSortedList != NULL)
872 {
873 IndexTable *nonSortedTail = nonSortedList->next;
874 nonSortedList->next = NULL;
875 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
876 nonSortedList = nonSortedTail;
877 }
878
879 ccs->indexTable = sortedList;
880 }
881
882 void
883 fprintCCS( FILE *f, CostCentreStack *ccs )
884 {
885 fprintf(f,"<");
886 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
887 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
888 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
889 fprintf(f,",");
890 }
891 }
892 fprintf(f,">");
893 }
894
895 // Returns: True if the call stack ended with CAF
896 static bool fprintCallStack (CostCentreStack *ccs)
897 {
898 CostCentreStack *prev;
899
900 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
901 prev = ccs->prevStack;
902 while (prev && prev != CCS_MAIN) {
903 ccs = prev;
904 fprintf(stderr, ",\n called from %s.%s",
905 ccs->cc->module, ccs->cc->label);
906 prev = ccs->prevStack;
907 }
908 fprintf(stderr, "\n");
909
910 return (!strncmp(ccs->cc->label, "CAF", 3));
911 }
912
913 /* For calling from .cmm code, where we can't reliably refer to stderr */
914 void
915 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
916 {
917 bool is_caf;
918 StgPtr frame;
919 StgStack *stack;
920 CostCentreStack *prev_ccs;
921 uint32_t depth = 0;
922 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
923
924 {
925 const char *desc;
926 const StgInfoTable *info;
927 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
928 switch (info->type) {
929 case CONSTR:
930 case CONSTR_1_0:
931 case CONSTR_0_1:
932 case CONSTR_2_0:
933 case CONSTR_1_1:
934 case CONSTR_0_2:
935 case CONSTR_NOCAF:
936 desc = GET_CON_DESC(itbl_to_con_itbl(info));
937 break;
938 default:
939 desc = closure_type_names[info->type];
940 break;
941 }
942 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
943 }
944
945 is_caf = fprintCallStack(ccs);
946
947 // traverse the stack down to the enclosing update frame to
948 // find out where this CCS was evaluated from...
949
950 stack = tso->stackobj;
951 frame = stack->sp;
952 prev_ccs = ccs;
953
954 for (; is_caf && depth < MAX_DEPTH; depth++)
955 {
956 switch (get_itbl((StgClosure*)frame)->type)
957 {
958 case UPDATE_FRAME:
959 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
960 frame += sizeofW(StgUpdateFrame);
961 if (ccs == CCS_MAIN) {
962 goto done;
963 }
964 if (ccs == prev_ccs) {
965 // ignore if this is the same as the previous stack,
966 // we're probably in library code and haven't
967 // accumulated any more interesting stack items
968 // since the last update frame.
969 break;
970 }
971 prev_ccs = ccs;
972 fprintf(stderr, " --> evaluated by: ");
973 is_caf = fprintCallStack(ccs);
974 break;
975 case UNDERFLOW_FRAME:
976 stack = ((StgUnderflowFrame*)frame)->next_chunk;
977 frame = stack->sp;
978 break;
979 case STOP_FRAME:
980 goto done;
981 default:
982 frame += stack_frame_sizeW((StgClosure*)frame);
983 break;
984 }
985 }
986 done:
987 return;
988 }
989
990 #if defined(DEBUG)
991 void
992 debugCCS( CostCentreStack *ccs )
993 {
994 debugBelch("<");
995 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
996 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
997 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
998 debugBelch(",");
999 }
1000 }
1001 debugBelch(">");
1002 }
1003 #endif /* DEBUG */
1004
1005 #endif /* PROFILING */