Replace hashing function for string keys implementation with xxhash
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if defined(PROFILING)
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "ProfilerReport.h"
21 #include "ProfilerReportJson.h"
22 #include "Printer.h"
23 #include "Capability.h"
24
25 #include <string.h>
26
27 #if defined(DEBUG)
28 #include "Trace.h"
29 #endif
30
31 /*
32 * Profiling allocation arena.
33 */
34 static Arena *prof_arena;
35
36 /*
37 * Global variables used to assign unique IDs to cc's, ccs's, and
38 * closure_cats
39 */
40
41 unsigned int CC_ID = 1;
42 unsigned int CCS_ID = 1;
43
44 /* Globals for opening the profiling log file(s)
45 */
46 static char *prof_filename; /* prof report file name = <program>.prof */
47 FILE *prof_file;
48
49 static char *hp_filename; /* heap profile (hp2ps style) log file */
50 FILE *hp_file;
51
52 /* Linked lists to keep track of CCs and CCSs that haven't
53 * been declared in the log file yet
54 */
55 CostCentre *CC_LIST = NULL;
56 CostCentreStack *CCS_LIST = NULL;
57
58 #if defined(THREADED_RTS)
59 static Mutex ccs_mutex;
60 #endif
61
62 /*
63 * Built-in cost centres and cost-centre stacks:
64 *
65 * MAIN is the root of the cost-centre stack tree. If there are
66 * no {-# SCC #-}s in the program, all costs will be attributed
67 * to MAIN.
68 *
69 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
70 * RTS operations apart from garbage collection are attributed
71 * to SYSTEM.
72 *
73 * GC is the storage manager / garbage collector.
74 *
75 * OVERHEAD gets all costs generated by the profiling system
76 * itself. These are costs that would not be incurred
77 * during non-profiled execution of the program.
78 *
79 * DONT_CARE is a placeholder cost-centre we assign to static
80 * constructors. It should *never* accumulate any costs.
81 *
82 * PINNED accumulates memory allocated to pinned objects, which
83 * cannot be profiled separately because we cannot reliably
84 * traverse pinned memory.
85 */
86
87 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
88 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
89 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
90 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
94
95 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
96 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
97 CCS_DECLARE(CCS_GC, CC_GC, );
98 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
99 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
100 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
101 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
102
103 /*
104 * Static Functions
105 */
106
107 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
108 CostCentreStack *ccs2 );
109 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
110 CostCentreStack *new_ccs );
111 static void inheritCosts ( CostCentreStack *ccs );
112 static ProfilerTotals countTickss ( CostCentreStack const *ccs );
113 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
114 CostCentre *cc );
115 static void sortCCSTree ( CostCentreStack *ccs );
116 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
117 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
118 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
119 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
120 CostCentre *, unsigned int );
121 static void ccsSetSelected ( CostCentreStack *ccs );
122 static void aggregateCCCosts( CostCentreStack *ccs );
123
124 static void initTimeProfiling ( void );
125 static void initProfilingLogFile ( void );
126
127 /* -----------------------------------------------------------------------------
128 Initialise the profiling environment
129 -------------------------------------------------------------------------- */
130
131 void initProfiling (void)
132 {
133 // initialise our arena
134 prof_arena = newArena();
135
136 /* for the benefit of allocate()... */
137 {
138 uint32_t n;
139 for (n=0; n < n_capabilities; n++) {
140 capabilities[n]->r.rCCCS = CCS_SYSTEM;
141 }
142 }
143
144 #if defined(THREADED_RTS)
145 initMutex(&ccs_mutex);
146 #endif
147
148 /* Set up the log file, and dump the header and cost centre
149 * information into it.
150 */
151 initProfilingLogFile();
152
153 /* Register all the cost centres / stacks in the program
154 * CC_MAIN gets link = 0, all others have non-zero link.
155 */
156 REGISTER_CC(CC_MAIN);
157 REGISTER_CC(CC_SYSTEM);
158 REGISTER_CC(CC_GC);
159 REGISTER_CC(CC_OVERHEAD);
160 REGISTER_CC(CC_DONT_CARE);
161 REGISTER_CC(CC_PINNED);
162 REGISTER_CC(CC_IDLE);
163
164 REGISTER_CCS(CCS_SYSTEM);
165 REGISTER_CCS(CCS_GC);
166 REGISTER_CCS(CCS_OVERHEAD);
167 REGISTER_CCS(CCS_DONT_CARE);
168 REGISTER_CCS(CCS_PINNED);
169 REGISTER_CCS(CCS_IDLE);
170 REGISTER_CCS(CCS_MAIN);
171
172 /* find all the registered cost centre stacks, and make them
173 * children of CCS_MAIN.
174 */
175 ASSERT(CCS_LIST == CCS_MAIN);
176 CCS_LIST = CCS_LIST->prevStack;
177 CCS_MAIN->prevStack = NULL;
178 CCS_MAIN->root = CCS_MAIN;
179 ccsSetSelected(CCS_MAIN);
180
181 initProfiling2();
182
183 if (RtsFlags.CcFlags.doCostCentres) {
184 initTimeProfiling();
185 }
186
187 if (RtsFlags.ProfFlags.doHeapProfile) {
188 initHeapProfiling();
189 }
190 }
191
192 //
193 // Should be called after loading any new Haskell code.
194 //
195 void initProfiling2 (void)
196 {
197 CostCentreStack *ccs, *next;
198
199 // make CCS_MAIN the parent of all the pre-defined CCSs.
200 for (ccs = CCS_LIST; ccs != NULL; ) {
201 next = ccs->prevStack;
202 ccs->prevStack = NULL;
203 actualPush_(CCS_MAIN,ccs->cc,ccs);
204 ccs->root = ccs;
205 ccs = next;
206 }
207 CCS_LIST = NULL;
208 }
209
210 void
211 freeProfiling (void)
212 {
213 arenaFree(prof_arena);
214 }
215
216 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
217 {
218 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
219 cc->label = label;
220 cc->module = module;
221 cc->srcloc = srcloc;
222 cc->is_caf = 0;
223 cc->mem_alloc = 0;
224 cc->time_ticks = 0;
225 cc->link = NULL;
226 return cc;
227 }
228
229 static void
230 initProfilingLogFile(void)
231 {
232 // Figure out output file name stem.
233 char const *stem;
234 if (RtsFlags.CcFlags.outputFileNameStem) {
235 stem = RtsFlags.CcFlags.outputFileNameStem;
236 } else {
237 char *prog;
238
239 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
240 strcpy(prog, prog_name);
241 #if defined(mingw32_HOST_OS)
242 // on Windows, drop the .exe suffix if there is one
243 {
244 char *suff;
245 suff = strrchr(prog,'.');
246 if (suff != NULL && !strcmp(suff,".exe")) {
247 *suff = '\0';
248 }
249 }
250 #endif
251 stem = prog;
252 }
253
254 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
255 {
256 /* No need for the <stem>.prof file */
257 prof_filename = NULL;
258 prof_file = NULL;
259 }
260 else
261 {
262 /* Initialise the log file name */
263 prof_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
264 sprintf(prof_filename, "%s.prof", stem);
265
266 /* open the log file */
267 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
268 debugBelch("Can't open profiling report file %s\n", prof_filename);
269 RtsFlags.CcFlags.doCostCentres = 0;
270 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
271 // both <program>.hp as <program>.prof.
272 if (doingRetainerProfiling()) {
273 RtsFlags.ProfFlags.doHeapProfile = 0;
274 }
275 }
276 }
277
278 if (RtsFlags.ProfFlags.doHeapProfile) {
279 /* Initialise the log file name */
280 hp_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
281 sprintf(hp_filename, "%s.hp", stem);
282
283 /* open the log file */
284 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
285 debugBelch("Can't open profiling report file %s\n",
286 hp_filename);
287 RtsFlags.ProfFlags.doHeapProfile = 0;
288 }
289 }
290 }
291
292 void
293 initTimeProfiling(void)
294 {
295 /* Start ticking */
296 startProfTimer();
297 };
298
299 void
300 endProfiling ( void )
301 {
302 if (RtsFlags.CcFlags.doCostCentres) {
303 stopProfTimer();
304 }
305 if (RtsFlags.ProfFlags.doHeapProfile) {
306 endHeapProfiling();
307 }
308 }
309
310
311 /*
312 These are used in the C stubs produced by the code generator
313 to register code.
314 */
315 void registerCcList(CostCentre **cc_list)
316 {
317 for (CostCentre **i = cc_list; *i != NULL; i++) {
318 REGISTER_CC(*i);
319 }
320 }
321
322 void registerCcsList(CostCentreStack **cc_list)
323 {
324 for (CostCentreStack **i = cc_list; *i != NULL; i++) {
325 REGISTER_CCS(*i);
326 }
327 }
328
329 /* -----------------------------------------------------------------------------
330 Set CCCS when entering a function.
331
332 The algorithm is as follows.
333
334 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
335
336 where
337
338 dropCommonPrefix A B
339 -- returns the suffix of B after removing any prefix common
340 -- to both A and B.
341
342 e.g.
343
344 <a,b,c> ++> <> = <a,b,c>
345 <a,b,c> ++> <d> = <a,b,c,d>
346 <a,b,c> ++> <a,b> = <a,b,c>
347 <a,b> ++> <a,b,c> = <a,b,c>
348 <a,b,c> ++> <a,b,d> = <a,b,c,d>
349
350 -------------------------------------------------------------------------- */
351
352 // implements c1 ++> c2, where c1 and c2 are equal depth
353 //
354 static CostCentreStack *
355 enterFunEqualStacks (CostCentreStack *ccs0,
356 CostCentreStack *ccsapp,
357 CostCentreStack *ccsfn)
358 {
359 ASSERT(ccsapp->depth == ccsfn->depth);
360 if (ccsapp == ccsfn) return ccs0;
361 return pushCostCentre(enterFunEqualStacks(ccs0,
362 ccsapp->prevStack,
363 ccsfn->prevStack),
364 ccsfn->cc);
365 }
366
367 // implements c1 ++> c2, where c2 is deeper than c1.
368 // Drop elements of c2 until we have equal stacks, call
369 // enterFunEqualStacks(), and then push on the elements that we
370 // dropped in reverse order.
371 //
372 static CostCentreStack *
373 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
374 {
375 if (n == 0) {
376 ASSERT(ccsfn->depth == ccsapp->depth);
377 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
378 } else {
379 ASSERT(ccsfn->depth > ccsapp->depth);
380 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
381 ccsfn->cc);
382 }
383 }
384
385 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
386 {
387 CostCentreStack *ccsapp;
388
389 // common case 1: both stacks are the same
390 if (ccsfn == reg->rCCCS) {
391 return;
392 }
393
394 // common case 2: the function stack is empty, or just CAF
395 if (ccsfn->cc->is_caf) {
396 return;
397 }
398
399 ccsapp = reg->rCCCS;
400 reg->rCCCS = CCS_OVERHEAD;
401
402 // common case 3: the stacks are completely different (e.g. one is a
403 // descendent of MAIN and the other of a CAF): we append the whole
404 // of the function stack to the current CCS.
405 if (ccsfn->root != ccsapp->root) {
406 reg->rCCCS = appendCCS(ccsapp,ccsfn);
407 return;
408 }
409
410 // uncommon case 4: ccsapp is deeper than ccsfn
411 if (ccsapp->depth > ccsfn->depth) {
412 uint32_t i, n;
413 CostCentreStack *tmp = ccsapp;
414 n = ccsapp->depth - ccsfn->depth;
415 for (i = 0; i < n; i++) {
416 tmp = tmp->prevStack;
417 }
418 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
419 return;
420 }
421
422 // uncommon case 5: ccsfn is deeper than CCCS
423 if (ccsfn->depth > ccsapp->depth) {
424 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
425 ccsfn->depth - ccsapp->depth);
426 return;
427 }
428
429 // uncommon case 6: stacks are equal depth, but different
430 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
431 }
432
433 /* -----------------------------------------------------------------------------
434 Decide whether closures with this CCS should contribute to the heap
435 profile.
436 -------------------------------------------------------------------------- */
437
438 static void
439 ccsSetSelected (CostCentreStack *ccs)
440 {
441 if (RtsFlags.ProfFlags.modSelector) {
442 if (! strMatchesSelector (ccs->cc->module,
443 RtsFlags.ProfFlags.modSelector) ) {
444 ccs->selected = 0;
445 return;
446 }
447 }
448 if (RtsFlags.ProfFlags.ccSelector) {
449 if (! strMatchesSelector (ccs->cc->label,
450 RtsFlags.ProfFlags.ccSelector) ) {
451 ccs->selected = 0;
452 return;
453 }
454 }
455 if (RtsFlags.ProfFlags.ccsSelector) {
456 CostCentreStack *c;
457 for (c = ccs; c != NULL; c = c->prevStack)
458 {
459 if ( strMatchesSelector (c->cc->label,
460 RtsFlags.ProfFlags.ccsSelector) ) {
461 break;
462 }
463 }
464 if (c == NULL) {
465 ccs->selected = 0;
466 return;
467 }
468 }
469
470 ccs->selected = 1;
471 return;
472 }
473
474 /* -----------------------------------------------------------------------------
475 Cost-centre stack manipulation
476 -------------------------------------------------------------------------- */
477
478 #if defined(DEBUG)
479 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
480 CostCentreStack *
481 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
482 #define pushCostCentre _pushCostCentre
483 {
484 IF_DEBUG(prof,
485 traceBegin("pushing %s on ", cc->label);
486 debugCCS(ccs);
487 traceEnd(););
488
489 return pushCostCentre(ccs,cc);
490 }
491 #endif
492
493 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
494
495 #if defined(DEBUG)
496 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
497 CostCentreStack *
498 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
499 #define appendCCS _appendCCS
500 {
501 IF_DEBUG(prof,
502 if (ccs1 != ccs2) {
503 debugBelch("Appending ");
504 debugCCS(ccs1);
505 debugBelch(" to ");
506 debugCCS(ccs2);
507 debugBelch("\n");});
508 return appendCCS(ccs1,ccs2);
509 }
510 #endif
511
512 CostCentreStack *
513 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
514 {
515 if (ccs1 == ccs2) {
516 return ccs1;
517 }
518
519 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
520 // stop at a CAF element
521 return ccs1;
522 }
523
524 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
525 }
526
527 // Pick one:
528 // #define RECURSION_DROPS
529 #define RECURSION_TRUNCATES
530
531 CostCentreStack *
532 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
533 {
534 CostCentreStack *temp_ccs, *ret;
535 IndexTable *ixtable;
536
537 if (ccs == EMPTY_STACK) {
538 ACQUIRE_LOCK(&ccs_mutex);
539 ret = actualPush(ccs,cc);
540 }
541 else
542 {
543 if (ccs->cc == cc) {
544 return ccs;
545 } else {
546 // check if we've already memoized this stack
547 ixtable = ccs->indexTable;
548 temp_ccs = isInIndexTable(ixtable,cc);
549
550 if (temp_ccs != EMPTY_STACK) {
551 return temp_ccs;
552 } else {
553
554 // not in the IndexTable, now we take the lock:
555 ACQUIRE_LOCK(&ccs_mutex);
556
557 if (ccs->indexTable != ixtable)
558 {
559 // someone modified ccs->indexTable while
560 // we did not hold the lock, so we must
561 // check it again:
562 temp_ccs = isInIndexTable(ixtable,cc);
563 if (temp_ccs != EMPTY_STACK)
564 {
565 RELEASE_LOCK(&ccs_mutex);
566 return temp_ccs;
567 }
568 }
569 temp_ccs = checkLoop(ccs,cc);
570 if (temp_ccs != NULL) {
571 // This CC is already in the stack somewhere.
572 // This could be recursion, or just calling
573 // another function with the same CC.
574 // A number of policies are possible at this
575 // point, we implement two here:
576 // - truncate the stack to the previous instance
577 // of this CC
578 // - ignore this push, return the same stack.
579 //
580 CostCentreStack *new_ccs;
581 #if defined(RECURSION_TRUNCATES)
582 new_ccs = temp_ccs;
583 #else // defined(RECURSION_DROPS)
584 new_ccs = ccs;
585 #endif
586 ccs->indexTable = addToIndexTable (ccs->indexTable,
587 new_ccs, cc, 1);
588 ret = new_ccs;
589 } else {
590 ret = actualPush (ccs,cc);
591 }
592 }
593 }
594 }
595
596 RELEASE_LOCK(&ccs_mutex);
597 return ret;
598 }
599
600 static CostCentreStack *
601 checkLoop (CostCentreStack *ccs, CostCentre *cc)
602 {
603 while (ccs != EMPTY_STACK) {
604 if (ccs->cc == cc)
605 return ccs;
606 ccs = ccs->prevStack;
607 }
608 return NULL;
609 }
610
611 static CostCentreStack *
612 actualPush (CostCentreStack *ccs, CostCentre *cc)
613 {
614 CostCentreStack *new_ccs;
615
616 // allocate space for a new CostCentreStack
617 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
618
619 return actualPush_(ccs, cc, new_ccs);
620 }
621
622 static CostCentreStack *
623 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
624 {
625 /* assign values to each member of the structure */
626 new_ccs->ccsID = CCS_ID++;
627 new_ccs->cc = cc;
628 new_ccs->prevStack = ccs;
629 new_ccs->root = ccs->root;
630 new_ccs->depth = ccs->depth + 1;
631
632 new_ccs->indexTable = EMPTY_TABLE;
633
634 /* Initialise the various _scc_ counters to zero
635 */
636 new_ccs->scc_count = 0;
637
638 /* Initialize all other stats here. There should be a quick way
639 * that's easily used elsewhere too
640 */
641 new_ccs->time_ticks = 0;
642 new_ccs->mem_alloc = 0;
643 new_ccs->inherited_ticks = 0;
644 new_ccs->inherited_alloc = 0;
645
646 // Set the selected field.
647 ccsSetSelected(new_ccs);
648
649 /* update the memoization table for the parent stack */
650 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
651 0/*not a back edge*/);
652
653 /* return a pointer to the new stack */
654 return new_ccs;
655 }
656
657
658 static CostCentreStack *
659 isInIndexTable(IndexTable *it, CostCentre *cc)
660 {
661 while (it!=EMPTY_TABLE)
662 {
663 if (it->cc == cc)
664 return it->ccs;
665 else
666 it = it->next;
667 }
668
669 /* otherwise we never found it so return EMPTY_TABLE */
670 return EMPTY_TABLE;
671 }
672
673
674 static IndexTable *
675 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
676 CostCentre *cc, unsigned int back_edge)
677 {
678 IndexTable *new_it;
679
680 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
681
682 new_it->cc = cc;
683 new_it->ccs = new_ccs;
684 new_it->next = it;
685 new_it->back_edge = back_edge;
686 return new_it;
687 }
688
689 /* -----------------------------------------------------------------------------
690 Generating a time & allocation profiling report.
691 -------------------------------------------------------------------------- */
692
693 /* We omit certain system-related CCs and CCSs from the default
694 * reports, so as not to cause confusion.
695 */
696 bool
697 ignoreCC (CostCentre const *cc)
698 {
699 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
700 ( cc == CC_OVERHEAD
701 || cc == CC_DONT_CARE
702 || cc == CC_GC
703 || cc == CC_SYSTEM
704 || cc == CC_IDLE);
705 }
706
707 bool
708 ignoreCCS (CostCentreStack const *ccs)
709 {
710 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
711 ( ccs == CCS_OVERHEAD
712 || ccs == CCS_DONT_CARE
713 || ccs == CCS_GC
714 || ccs == CCS_SYSTEM
715 || ccs == CCS_IDLE);
716 }
717
718 void
719 reportCCSProfiling( void )
720 {
721 stopProfTimer();
722 if (RtsFlags.CcFlags.doCostCentres == 0) return;
723
724 ProfilerTotals totals = countTickss(CCS_MAIN);
725 aggregateCCCosts(CCS_MAIN);
726 inheritCosts(CCS_MAIN);
727 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
728 sortCCSTree(stack);
729
730 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_JSON) {
731 writeCCSReportJson(prof_file, stack, totals);
732 } else {
733 writeCCSReport(prof_file, stack, totals);
734 }
735 }
736
737 /* -----------------------------------------------------------------------------
738 * Accumulating total allocatinos and tick count
739 -------------------------------------------------------------------------- */
740
741 /* Helper */
742 static void
743 countTickss_(CostCentreStack const *ccs, ProfilerTotals *totals)
744 {
745 if (!ignoreCCS(ccs)) {
746 totals->total_alloc += ccs->mem_alloc;
747 totals->total_prof_ticks += ccs->time_ticks;
748 }
749 for (IndexTable *i = ccs->indexTable; i != NULL; i = i->next) {
750 if (!i->back_edge) {
751 countTickss_(i->ccs, totals);
752 }
753 }
754 }
755
756 /* Traverse the cost centre stack tree and accumulate
757 * total ticks/allocations.
758 */
759 static ProfilerTotals
760 countTickss(CostCentreStack const *ccs)
761 {
762 ProfilerTotals totals = {0,0};
763 countTickss_(ccs, &totals);
764 return totals;
765 }
766
767 /* Traverse the cost centre stack tree and inherit ticks & allocs.
768 */
769 static void
770 inheritCosts(CostCentreStack *ccs)
771 {
772 IndexTable *i;
773
774 if (ignoreCCS(ccs)) { return; }
775
776 ccs->inherited_ticks += ccs->time_ticks;
777 ccs->inherited_alloc += ccs->mem_alloc;
778
779 for (i = ccs->indexTable; i != NULL; i = i->next)
780 if (!i->back_edge) {
781 inheritCosts(i->ccs);
782 ccs->inherited_ticks += i->ccs->inherited_ticks;
783 ccs->inherited_alloc += i->ccs->inherited_alloc;
784 }
785
786 return;
787 }
788
789 static void
790 aggregateCCCosts( CostCentreStack *ccs )
791 {
792 IndexTable *i;
793
794 ccs->cc->mem_alloc += ccs->mem_alloc;
795 ccs->cc->time_ticks += ccs->time_ticks;
796
797 for (i = ccs->indexTable; i != 0; i = i->next) {
798 if (!i->back_edge) {
799 aggregateCCCosts(i->ccs);
800 }
801 }
802 }
803
804 //
805 // Prune CCSs with zero entries, zero ticks or zero allocation from
806 // the tree, unless COST_CENTRES_ALL is on.
807 //
808 static CostCentreStack *
809 pruneCCSTree (CostCentreStack *ccs)
810 {
811 CostCentreStack *ccs1;
812 IndexTable *i, **prev;
813
814 prev = &ccs->indexTable;
815 for (i = ccs->indexTable; i != 0; i = i->next) {
816 if (i->back_edge) { continue; }
817
818 ccs1 = pruneCCSTree(i->ccs);
819 if (ccs1 == NULL) {
820 *prev = i->next;
821 } else {
822 prev = &(i->next);
823 }
824 }
825
826 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
827 /* force printing of *all* cost centres if -P -P */ )
828
829 || ( ccs->indexTable != 0 )
830 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
831 ) {
832 return ccs;
833 } else {
834 return NULL;
835 }
836 }
837
838 static IndexTable*
839 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
840 {
841 StgWord tbl_ticks = tbl->ccs->scc_count;
842 char* tbl_label = tbl->ccs->cc->label;
843
844 IndexTable *prev = NULL;
845 IndexTable *cursor = sortedList;
846
847 while (cursor != NULL) {
848 StgWord cursor_ticks = cursor->ccs->scc_count;
849 char* cursor_label = cursor->ccs->cc->label;
850
851 if (tbl_ticks > cursor_ticks ||
852 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
853 if (prev == NULL) {
854 tbl->next = sortedList;
855 return tbl;
856 } else {
857 prev->next = tbl;
858 tbl->next = cursor;
859 return sortedList;
860 }
861 } else {
862 prev = cursor;
863 cursor = cursor->next;
864 }
865 }
866
867 prev->next = tbl;
868 return sortedList;
869 }
870
871 static void
872 sortCCSTree(CostCentreStack *ccs)
873 {
874 if (ccs->indexTable == NULL) return;
875
876 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
877 if (!tbl->back_edge)
878 sortCCSTree(tbl->ccs);
879
880 IndexTable *sortedList = ccs->indexTable;
881 IndexTable *nonSortedList = sortedList->next;
882 sortedList->next = NULL;
883
884 while (nonSortedList != NULL)
885 {
886 IndexTable *nonSortedTail = nonSortedList->next;
887 nonSortedList->next = NULL;
888 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
889 nonSortedList = nonSortedTail;
890 }
891
892 ccs->indexTable = sortedList;
893 }
894
895 void
896 fprintCCS( FILE *f, CostCentreStack *ccs )
897 {
898 fprintf(f,"<");
899 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
900 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
901 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
902 fprintf(f,",");
903 }
904 }
905 fprintf(f,">");
906 }
907
908 // Returns: True if the call stack ended with CAF
909 static bool fprintCallStack (CostCentreStack *ccs)
910 {
911 CostCentreStack *prev;
912
913 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
914 prev = ccs->prevStack;
915 while (prev && prev != CCS_MAIN) {
916 ccs = prev;
917 fprintf(stderr, ",\n called from %s.%s",
918 ccs->cc->module, ccs->cc->label);
919 prev = ccs->prevStack;
920 }
921 fprintf(stderr, "\n");
922
923 return (!strncmp(ccs->cc->label, "CAF", 3));
924 }
925
926 /* For calling from .cmm code, where we can't reliably refer to stderr */
927 void
928 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
929 {
930 bool is_caf;
931 StgPtr frame;
932 StgStack *stack;
933 CostCentreStack *prev_ccs;
934 uint32_t depth = 0;
935 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
936
937 {
938 const char *desc;
939 const StgInfoTable *info;
940 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
941 switch (info->type) {
942 case CONSTR:
943 case CONSTR_1_0:
944 case CONSTR_0_1:
945 case CONSTR_2_0:
946 case CONSTR_1_1:
947 case CONSTR_0_2:
948 case CONSTR_NOCAF:
949 desc = GET_CON_DESC(itbl_to_con_itbl(info));
950 break;
951 default:
952 desc = closure_type_names[info->type];
953 break;
954 }
955 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
956 }
957
958 is_caf = fprintCallStack(ccs);
959
960 // traverse the stack down to the enclosing update frame to
961 // find out where this CCS was evaluated from...
962
963 stack = tso->stackobj;
964 frame = stack->sp;
965 prev_ccs = ccs;
966
967 for (; is_caf && depth < MAX_DEPTH; depth++)
968 {
969 switch (get_itbl((StgClosure*)frame)->type)
970 {
971 case UPDATE_FRAME:
972 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
973 frame += sizeofW(StgUpdateFrame);
974 if (ccs == CCS_MAIN) {
975 goto done;
976 }
977 if (ccs == prev_ccs) {
978 // ignore if this is the same as the previous stack,
979 // we're probably in library code and haven't
980 // accumulated any more interesting stack items
981 // since the last update frame.
982 break;
983 }
984 prev_ccs = ccs;
985 fprintf(stderr, " --> evaluated by: ");
986 is_caf = fprintCallStack(ccs);
987 break;
988 case UNDERFLOW_FRAME:
989 stack = ((StgUnderflowFrame*)frame)->next_chunk;
990 frame = stack->sp;
991 break;
992 case STOP_FRAME:
993 goto done;
994 default:
995 frame += stack_frame_sizeW((StgClosure*)frame);
996 break;
997 }
998 }
999 done:
1000 return;
1001 }
1002
1003 #if defined(DEBUG)
1004 void
1005 debugCCS( CostCentreStack *ccs )
1006 {
1007 debugBelch("<");
1008 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1009 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1010 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1011 debugBelch(",");
1012 }
1013 }
1014 debugBelch(">");
1015 }
1016 #endif /* DEBUG */
1017
1018 #endif /* PROFILING */