Building GHC with hadrian on FreeBSD
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if defined(PROFILING)
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "ProfilerReport.h"
21 #include "ProfilerReportJson.h"
22 #include "Printer.h"
23 #include "Capability.h"
24
25 #include <fs_rts.h>
26 #include <string.h>
27
28 #if defined(DEBUG)
29 #include "Trace.h"
30 #endif
31
32 /*
33 * Profiling allocation arena.
34 */
35 static Arena *prof_arena;
36
37 /*
38 * Global variables used to assign unique IDs to cc's, ccs's, and
39 * closure_cats
40 */
41
42 unsigned int CC_ID = 1;
43 unsigned int CCS_ID = 1;
44
45 /* Globals for opening the profiling log file(s)
46 */
47 static char *prof_filename; /* prof report file name = <program>.prof */
48 FILE *prof_file;
49
50 static char *hp_filename; /* heap profile (hp2ps style) log file */
51 FILE *hp_file;
52
53 /* Linked lists to keep track of CCs and CCSs that haven't
54 * been declared in the log file yet
55 */
56 CostCentre *CC_LIST = NULL;
57 CostCentreStack *CCS_LIST = NULL;
58
59 #if defined(THREADED_RTS)
60 static Mutex ccs_mutex;
61 #endif
62
63 /*
64 * Built-in cost centres and cost-centre stacks:
65 *
66 * MAIN is the root of the cost-centre stack tree. If there are
67 * no {-# SCC #-}s in the program, all costs will be attributed
68 * to MAIN.
69 *
70 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
71 * RTS operations apart from garbage collection are attributed
72 * to SYSTEM.
73 *
74 * GC is the storage manager / garbage collector.
75 *
76 * OVERHEAD gets all costs generated by the profiling system
77 * itself. These are costs that would not be incurred
78 * during non-profiled execution of the program.
79 *
80 * DONT_CARE is a placeholder cost-centre we assign to static
81 * constructors. It should *never* accumulate any costs.
82 *
83 * PINNED accumulates memory allocated to pinned objects, which
84 * cannot be profiled separately because we cannot reliably
85 * traverse pinned memory.
86 */
87
88 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
89 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
90 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
95
96 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
97 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
98 CCS_DECLARE(CCS_GC, CC_GC, );
99 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
100 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
101 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
102 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
103
104 /*
105 * Static Functions
106 */
107
108 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
109 CostCentreStack *ccs2 );
110 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
111 CostCentreStack *new_ccs );
112 static void inheritCosts ( CostCentreStack *ccs );
113 static ProfilerTotals countTickss ( CostCentreStack const *ccs );
114 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
115 CostCentre *cc );
116 static void sortCCSTree ( CostCentreStack *ccs );
117 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
118 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
119 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
120 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
121 CostCentre *, unsigned int );
122 static void ccsSetSelected ( CostCentreStack *ccs );
123 static void aggregateCCCosts( CostCentreStack *ccs );
124
125 static void initTimeProfiling ( void );
126 static void initProfilingLogFile ( void );
127
128 /* -----------------------------------------------------------------------------
129 Initialise the profiling environment
130 -------------------------------------------------------------------------- */
131
132 void initProfiling (void)
133 {
134 // initialise our arena
135 prof_arena = newArena();
136
137 /* for the benefit of allocate()... */
138 {
139 uint32_t n;
140 for (n=0; n < n_capabilities; n++) {
141 capabilities[n]->r.rCCCS = CCS_SYSTEM;
142 }
143 }
144
145 #if defined(THREADED_RTS)
146 initMutex(&ccs_mutex);
147 #endif
148
149 /* Set up the log file, and dump the header and cost centre
150 * information into it.
151 */
152 initProfilingLogFile();
153
154 /* Register all the cost centres / stacks in the program
155 * CC_MAIN gets link = 0, all others have non-zero link.
156 */
157 REGISTER_CC(CC_MAIN);
158 REGISTER_CC(CC_SYSTEM);
159 REGISTER_CC(CC_GC);
160 REGISTER_CC(CC_OVERHEAD);
161 REGISTER_CC(CC_DONT_CARE);
162 REGISTER_CC(CC_PINNED);
163 REGISTER_CC(CC_IDLE);
164
165 REGISTER_CCS(CCS_SYSTEM);
166 REGISTER_CCS(CCS_GC);
167 REGISTER_CCS(CCS_OVERHEAD);
168 REGISTER_CCS(CCS_DONT_CARE);
169 REGISTER_CCS(CCS_PINNED);
170 REGISTER_CCS(CCS_IDLE);
171 REGISTER_CCS(CCS_MAIN);
172
173 /* find all the registered cost centre stacks, and make them
174 * children of CCS_MAIN.
175 */
176 ASSERT(CCS_LIST == CCS_MAIN);
177 CCS_LIST = CCS_LIST->prevStack;
178 CCS_MAIN->prevStack = NULL;
179 CCS_MAIN->root = CCS_MAIN;
180 ccsSetSelected(CCS_MAIN);
181
182 initProfiling2();
183
184 if (RtsFlags.CcFlags.doCostCentres) {
185 initTimeProfiling();
186 }
187
188 if (RtsFlags.ProfFlags.doHeapProfile) {
189 initHeapProfiling();
190 }
191 }
192
193 //
194 // Should be called after loading any new Haskell code.
195 //
196 void initProfiling2 (void)
197 {
198 CostCentreStack *ccs, *next;
199
200 // make CCS_MAIN the parent of all the pre-defined CCSs.
201 for (ccs = CCS_LIST; ccs != NULL; ) {
202 next = ccs->prevStack;
203 ccs->prevStack = NULL;
204 actualPush_(CCS_MAIN,ccs->cc,ccs);
205 ccs->root = ccs;
206 ccs = next;
207 }
208 CCS_LIST = NULL;
209 }
210
211 void
212 freeProfiling (void)
213 {
214 arenaFree(prof_arena);
215 }
216
217 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
218 {
219 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
220 cc->label = label;
221 cc->module = module;
222 cc->srcloc = srcloc;
223 cc->is_caf = 0;
224 cc->mem_alloc = 0;
225 cc->time_ticks = 0;
226 cc->link = NULL;
227 return cc;
228 }
229
230 static void
231 initProfilingLogFile(void)
232 {
233 // Figure out output file name stem.
234 char const *stem;
235 if (RtsFlags.CcFlags.outputFileNameStem) {
236 stem = RtsFlags.CcFlags.outputFileNameStem;
237 } else {
238 char *prog;
239
240 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
241 strcpy(prog, prog_name);
242 #if defined(mingw32_HOST_OS)
243 // on Windows, drop the .exe suffix if there is one
244 {
245 char *suff;
246 suff = strrchr(prog,'.');
247 if (suff != NULL && !strcmp(suff,".exe")) {
248 *suff = '\0';
249 }
250 }
251 #endif
252 stem = prog;
253 }
254
255 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
256 {
257 /* No need for the <stem>.prof file */
258 prof_filename = NULL;
259 prof_file = NULL;
260 }
261 else
262 {
263 /* Initialise the log file name */
264 prof_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
265 sprintf(prof_filename, "%s.prof", stem);
266
267 /* open the log file */
268 if ((prof_file = __rts_fopen(prof_filename, "w")) == NULL) {
269 debugBelch("Can't open profiling report file %s\n", prof_filename);
270 RtsFlags.CcFlags.doCostCentres = 0;
271 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
272 // both <program>.hp as <program>.prof.
273 if (doingRetainerProfiling()) {
274 RtsFlags.ProfFlags.doHeapProfile = 0;
275 }
276 }
277 }
278
279 if (RtsFlags.ProfFlags.doHeapProfile) {
280 /* Initialise the log file name */
281 hp_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
282 sprintf(hp_filename, "%s.hp", stem);
283
284 /* open the log file */
285 if ((hp_file = __rts_fopen(hp_filename, "w")) == NULL) {
286 debugBelch("Can't open profiling report file %s\n",
287 hp_filename);
288 RtsFlags.ProfFlags.doHeapProfile = 0;
289 }
290 }
291 }
292
293 void
294 initTimeProfiling(void)
295 {
296 /* Start ticking */
297 startProfTimer();
298 };
299
300 void
301 endProfiling ( void )
302 {
303 if (RtsFlags.CcFlags.doCostCentres) {
304 stopProfTimer();
305 }
306 if (RtsFlags.ProfFlags.doHeapProfile) {
307 endHeapProfiling();
308 }
309 }
310
311
312 /*
313 These are used in the C stubs produced by the code generator
314 to register code.
315 */
316 void registerCcList(CostCentre **cc_list)
317 {
318 for (CostCentre **i = cc_list; *i != NULL; i++) {
319 REGISTER_CC(*i);
320 }
321 }
322
323 void registerCcsList(CostCentreStack **cc_list)
324 {
325 for (CostCentreStack **i = cc_list; *i != NULL; i++) {
326 REGISTER_CCS(*i);
327 }
328 }
329
330 /* -----------------------------------------------------------------------------
331 Set CCCS when entering a function.
332
333 The algorithm is as follows.
334
335 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
336
337 where
338
339 dropCommonPrefix A B
340 -- returns the suffix of B after removing any prefix common
341 -- to both A and B.
342
343 e.g.
344
345 <a,b,c> ++> <> = <a,b,c>
346 <a,b,c> ++> <d> = <a,b,c,d>
347 <a,b,c> ++> <a,b> = <a,b,c>
348 <a,b> ++> <a,b,c> = <a,b,c>
349 <a,b,c> ++> <a,b,d> = <a,b,c,d>
350
351 -------------------------------------------------------------------------- */
352
353 // implements c1 ++> c2, where c1 and c2 are equal depth
354 //
355 static CostCentreStack *
356 enterFunEqualStacks (CostCentreStack *ccs0,
357 CostCentreStack *ccsapp,
358 CostCentreStack *ccsfn)
359 {
360 ASSERT(ccsapp->depth == ccsfn->depth);
361 if (ccsapp == ccsfn) return ccs0;
362 return pushCostCentre(enterFunEqualStacks(ccs0,
363 ccsapp->prevStack,
364 ccsfn->prevStack),
365 ccsfn->cc);
366 }
367
368 // implements c1 ++> c2, where c2 is deeper than c1.
369 // Drop elements of c2 until we have equal stacks, call
370 // enterFunEqualStacks(), and then push on the elements that we
371 // dropped in reverse order.
372 //
373 static CostCentreStack *
374 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
375 {
376 if (n == 0) {
377 ASSERT(ccsfn->depth == ccsapp->depth);
378 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
379 } else {
380 ASSERT(ccsfn->depth > ccsapp->depth);
381 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
382 ccsfn->cc);
383 }
384 }
385
386 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
387 {
388 CostCentreStack *ccsapp;
389
390 // common case 1: both stacks are the same
391 if (ccsfn == reg->rCCCS) {
392 return;
393 }
394
395 // common case 2: the function stack is empty, or just CAF
396 if (ccsfn->cc->is_caf) {
397 return;
398 }
399
400 ccsapp = reg->rCCCS;
401 reg->rCCCS = CCS_OVERHEAD;
402
403 // common case 3: the stacks are completely different (e.g. one is a
404 // descendent of MAIN and the other of a CAF): we append the whole
405 // of the function stack to the current CCS.
406 if (ccsfn->root != ccsapp->root) {
407 reg->rCCCS = appendCCS(ccsapp,ccsfn);
408 return;
409 }
410
411 // uncommon case 4: ccsapp is deeper than ccsfn
412 if (ccsapp->depth > ccsfn->depth) {
413 uint32_t i, n;
414 CostCentreStack *tmp = ccsapp;
415 n = ccsapp->depth - ccsfn->depth;
416 for (i = 0; i < n; i++) {
417 tmp = tmp->prevStack;
418 }
419 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
420 return;
421 }
422
423 // uncommon case 5: ccsfn is deeper than CCCS
424 if (ccsfn->depth > ccsapp->depth) {
425 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
426 ccsfn->depth - ccsapp->depth);
427 return;
428 }
429
430 // uncommon case 6: stacks are equal depth, but different
431 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
432 }
433
434 /* -----------------------------------------------------------------------------
435 Decide whether closures with this CCS should contribute to the heap
436 profile.
437 -------------------------------------------------------------------------- */
438
439 static void
440 ccsSetSelected (CostCentreStack *ccs)
441 {
442 if (RtsFlags.ProfFlags.modSelector) {
443 if (! strMatchesSelector (ccs->cc->module,
444 RtsFlags.ProfFlags.modSelector) ) {
445 ccs->selected = 0;
446 return;
447 }
448 }
449 if (RtsFlags.ProfFlags.ccSelector) {
450 if (! strMatchesSelector (ccs->cc->label,
451 RtsFlags.ProfFlags.ccSelector) ) {
452 ccs->selected = 0;
453 return;
454 }
455 }
456 if (RtsFlags.ProfFlags.ccsSelector) {
457 CostCentreStack *c;
458 for (c = ccs; c != NULL; c = c->prevStack)
459 {
460 if ( strMatchesSelector (c->cc->label,
461 RtsFlags.ProfFlags.ccsSelector) ) {
462 break;
463 }
464 }
465 if (c == NULL) {
466 ccs->selected = 0;
467 return;
468 }
469 }
470
471 ccs->selected = 1;
472 return;
473 }
474
475 /* -----------------------------------------------------------------------------
476 Cost-centre stack manipulation
477 -------------------------------------------------------------------------- */
478
479 #if defined(DEBUG)
480 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
481 CostCentreStack *
482 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
483 #define pushCostCentre _pushCostCentre
484 {
485 IF_DEBUG(prof,
486 traceBegin("pushing %s on ", cc->label);
487 debugCCS(ccs);
488 traceEnd(););
489
490 return pushCostCentre(ccs,cc);
491 }
492 #endif
493
494 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
495
496 #if defined(DEBUG)
497 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
498 CostCentreStack *
499 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
500 #define appendCCS _appendCCS
501 {
502 IF_DEBUG(prof,
503 if (ccs1 != ccs2) {
504 debugBelch("Appending ");
505 debugCCS(ccs1);
506 debugBelch(" to ");
507 debugCCS(ccs2);
508 debugBelch("\n");});
509 return appendCCS(ccs1,ccs2);
510 }
511 #endif
512
513 CostCentreStack *
514 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
515 {
516 if (ccs1 == ccs2) {
517 return ccs1;
518 }
519
520 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
521 // stop at a CAF element
522 return ccs1;
523 }
524
525 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
526 }
527
528 // Pick one:
529 // #define RECURSION_DROPS
530 #define RECURSION_TRUNCATES
531
532 CostCentreStack *
533 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
534 {
535 CostCentreStack *temp_ccs, *ret;
536 IndexTable *ixtable;
537
538 if (ccs == EMPTY_STACK) {
539 ACQUIRE_LOCK(&ccs_mutex);
540 ret = actualPush(ccs,cc);
541 }
542 else
543 {
544 if (ccs->cc == cc) {
545 return ccs;
546 } else {
547 // check if we've already memoized this stack
548 ixtable = ccs->indexTable;
549 temp_ccs = isInIndexTable(ixtable,cc);
550
551 if (temp_ccs != EMPTY_STACK) {
552 return temp_ccs;
553 } else {
554
555 // not in the IndexTable, now we take the lock:
556 ACQUIRE_LOCK(&ccs_mutex);
557
558 if (ccs->indexTable != ixtable)
559 {
560 // someone modified ccs->indexTable while
561 // we did not hold the lock, so we must
562 // check it again:
563 temp_ccs = isInIndexTable(ixtable,cc);
564 if (temp_ccs != EMPTY_STACK)
565 {
566 RELEASE_LOCK(&ccs_mutex);
567 return temp_ccs;
568 }
569 }
570 temp_ccs = checkLoop(ccs,cc);
571 if (temp_ccs != NULL) {
572 // This CC is already in the stack somewhere.
573 // This could be recursion, or just calling
574 // another function with the same CC.
575 // A number of policies are possible at this
576 // point, we implement two here:
577 // - truncate the stack to the previous instance
578 // of this CC
579 // - ignore this push, return the same stack.
580 //
581 CostCentreStack *new_ccs;
582 #if defined(RECURSION_TRUNCATES)
583 new_ccs = temp_ccs;
584 #else // defined(RECURSION_DROPS)
585 new_ccs = ccs;
586 #endif
587 ccs->indexTable = addToIndexTable (ccs->indexTable,
588 new_ccs, cc, 1);
589 ret = new_ccs;
590 } else {
591 ret = actualPush (ccs,cc);
592 }
593 }
594 }
595 }
596
597 RELEASE_LOCK(&ccs_mutex);
598 return ret;
599 }
600
601 static CostCentreStack *
602 checkLoop (CostCentreStack *ccs, CostCentre *cc)
603 {
604 while (ccs != EMPTY_STACK) {
605 if (ccs->cc == cc)
606 return ccs;
607 ccs = ccs->prevStack;
608 }
609 return NULL;
610 }
611
612 static CostCentreStack *
613 actualPush (CostCentreStack *ccs, CostCentre *cc)
614 {
615 CostCentreStack *new_ccs;
616
617 // allocate space for a new CostCentreStack
618 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
619
620 return actualPush_(ccs, cc, new_ccs);
621 }
622
623 static CostCentreStack *
624 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
625 {
626 /* assign values to each member of the structure */
627 new_ccs->ccsID = CCS_ID++;
628 new_ccs->cc = cc;
629 new_ccs->prevStack = ccs;
630 new_ccs->root = ccs->root;
631 new_ccs->depth = ccs->depth + 1;
632
633 new_ccs->indexTable = EMPTY_TABLE;
634
635 /* Initialise the various _scc_ counters to zero
636 */
637 new_ccs->scc_count = 0;
638
639 /* Initialize all other stats here. There should be a quick way
640 * that's easily used elsewhere too
641 */
642 new_ccs->time_ticks = 0;
643 new_ccs->mem_alloc = 0;
644 new_ccs->inherited_ticks = 0;
645 new_ccs->inherited_alloc = 0;
646
647 // Set the selected field.
648 ccsSetSelected(new_ccs);
649
650 /* update the memoization table for the parent stack */
651 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
652 0/*not a back edge*/);
653
654 /* return a pointer to the new stack */
655 return new_ccs;
656 }
657
658
659 static CostCentreStack *
660 isInIndexTable(IndexTable *it, CostCentre *cc)
661 {
662 while (it!=EMPTY_TABLE)
663 {
664 if (it->cc == cc)
665 return it->ccs;
666 else
667 it = it->next;
668 }
669
670 /* otherwise we never found it so return EMPTY_TABLE */
671 return EMPTY_TABLE;
672 }
673
674
675 static IndexTable *
676 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
677 CostCentre *cc, unsigned int back_edge)
678 {
679 IndexTable *new_it;
680
681 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
682
683 new_it->cc = cc;
684 new_it->ccs = new_ccs;
685 new_it->next = it;
686 new_it->back_edge = back_edge;
687 return new_it;
688 }
689
690 /* -----------------------------------------------------------------------------
691 Generating a time & allocation profiling report.
692 -------------------------------------------------------------------------- */
693
694 /* We omit certain system-related CCs and CCSs from the default
695 * reports, so as not to cause confusion.
696 */
697 bool
698 ignoreCC (CostCentre const *cc)
699 {
700 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
701 ( cc == CC_OVERHEAD
702 || cc == CC_DONT_CARE
703 || cc == CC_GC
704 || cc == CC_SYSTEM
705 || cc == CC_IDLE);
706 }
707
708 bool
709 ignoreCCS (CostCentreStack const *ccs)
710 {
711 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
712 ( ccs == CCS_OVERHEAD
713 || ccs == CCS_DONT_CARE
714 || ccs == CCS_GC
715 || ccs == CCS_SYSTEM
716 || ccs == CCS_IDLE);
717 }
718
719 void
720 reportCCSProfiling( void )
721 {
722 stopProfTimer();
723 if (RtsFlags.CcFlags.doCostCentres == 0) return;
724
725 ProfilerTotals totals = countTickss(CCS_MAIN);
726 aggregateCCCosts(CCS_MAIN);
727 inheritCosts(CCS_MAIN);
728 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
729 sortCCSTree(stack);
730
731 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_JSON) {
732 writeCCSReportJson(prof_file, stack, totals);
733 } else {
734 writeCCSReport(prof_file, stack, totals);
735 }
736 }
737
738 /* -----------------------------------------------------------------------------
739 * Accumulating total allocatinos and tick count
740 -------------------------------------------------------------------------- */
741
742 /* Helper */
743 static void
744 countTickss_(CostCentreStack const *ccs, ProfilerTotals *totals)
745 {
746 if (!ignoreCCS(ccs)) {
747 totals->total_alloc += ccs->mem_alloc;
748 totals->total_prof_ticks += ccs->time_ticks;
749 }
750 for (IndexTable *i = ccs->indexTable; i != NULL; i = i->next) {
751 if (!i->back_edge) {
752 countTickss_(i->ccs, totals);
753 }
754 }
755 }
756
757 /* Traverse the cost centre stack tree and accumulate
758 * total ticks/allocations.
759 */
760 static ProfilerTotals
761 countTickss(CostCentreStack const *ccs)
762 {
763 ProfilerTotals totals = {0,0};
764 countTickss_(ccs, &totals);
765 return totals;
766 }
767
768 /* Traverse the cost centre stack tree and inherit ticks & allocs.
769 */
770 static void
771 inheritCosts(CostCentreStack *ccs)
772 {
773 IndexTable *i;
774
775 if (ignoreCCS(ccs)) { return; }
776
777 ccs->inherited_ticks += ccs->time_ticks;
778 ccs->inherited_alloc += ccs->mem_alloc;
779
780 for (i = ccs->indexTable; i != NULL; i = i->next)
781 if (!i->back_edge) {
782 inheritCosts(i->ccs);
783 ccs->inherited_ticks += i->ccs->inherited_ticks;
784 ccs->inherited_alloc += i->ccs->inherited_alloc;
785 }
786
787 return;
788 }
789
790 static void
791 aggregateCCCosts( CostCentreStack *ccs )
792 {
793 IndexTable *i;
794
795 ccs->cc->mem_alloc += ccs->mem_alloc;
796 ccs->cc->time_ticks += ccs->time_ticks;
797
798 for (i = ccs->indexTable; i != 0; i = i->next) {
799 if (!i->back_edge) {
800 aggregateCCCosts(i->ccs);
801 }
802 }
803 }
804
805 //
806 // Prune CCSs with zero entries, zero ticks or zero allocation from
807 // the tree, unless COST_CENTRES_ALL is on.
808 //
809 static CostCentreStack *
810 pruneCCSTree (CostCentreStack *ccs)
811 {
812 CostCentreStack *ccs1;
813 IndexTable *i, **prev;
814
815 prev = &ccs->indexTable;
816 for (i = ccs->indexTable; i != 0; i = i->next) {
817 if (i->back_edge) { continue; }
818
819 ccs1 = pruneCCSTree(i->ccs);
820 if (ccs1 == NULL) {
821 *prev = i->next;
822 } else {
823 prev = &(i->next);
824 }
825 }
826
827 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
828 /* force printing of *all* cost centres if -P -P */ )
829
830 || ( ccs->indexTable != 0 )
831 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
832 ) {
833 return ccs;
834 } else {
835 return NULL;
836 }
837 }
838
839 static IndexTable*
840 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
841 {
842 StgWord tbl_ticks = tbl->ccs->scc_count;
843 char* tbl_label = tbl->ccs->cc->label;
844
845 IndexTable *prev = NULL;
846 IndexTable *cursor = sortedList;
847
848 while (cursor != NULL) {
849 StgWord cursor_ticks = cursor->ccs->scc_count;
850 char* cursor_label = cursor->ccs->cc->label;
851
852 if (tbl_ticks > cursor_ticks ||
853 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
854 if (prev == NULL) {
855 tbl->next = sortedList;
856 return tbl;
857 } else {
858 prev->next = tbl;
859 tbl->next = cursor;
860 return sortedList;
861 }
862 } else {
863 prev = cursor;
864 cursor = cursor->next;
865 }
866 }
867
868 prev->next = tbl;
869 return sortedList;
870 }
871
872 static void
873 sortCCSTree(CostCentreStack *ccs)
874 {
875 if (ccs->indexTable == NULL) return;
876
877 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
878 if (!tbl->back_edge)
879 sortCCSTree(tbl->ccs);
880
881 IndexTable *sortedList = ccs->indexTable;
882 IndexTable *nonSortedList = sortedList->next;
883 sortedList->next = NULL;
884
885 while (nonSortedList != NULL)
886 {
887 IndexTable *nonSortedTail = nonSortedList->next;
888 nonSortedList->next = NULL;
889 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
890 nonSortedList = nonSortedTail;
891 }
892
893 ccs->indexTable = sortedList;
894 }
895
896 void
897 fprintCCS( FILE *f, CostCentreStack *ccs )
898 {
899 fprintf(f,"<");
900 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
901 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
902 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
903 fprintf(f,",");
904 }
905 }
906 fprintf(f,">");
907 }
908
909 // Returns: True if the call stack ended with CAF
910 static bool fprintCallStack (CostCentreStack *ccs)
911 {
912 CostCentreStack *prev;
913
914 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
915 prev = ccs->prevStack;
916 while (prev && prev != CCS_MAIN) {
917 ccs = prev;
918 fprintf(stderr, ",\n called from %s.%s",
919 ccs->cc->module, ccs->cc->label);
920 prev = ccs->prevStack;
921 }
922 fprintf(stderr, "\n");
923
924 return (!strncmp(ccs->cc->label, "CAF", 3));
925 }
926
927 /* For calling from .cmm code, where we can't reliably refer to stderr */
928 void
929 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
930 {
931 bool is_caf;
932 StgPtr frame;
933 StgStack *stack;
934 CostCentreStack *prev_ccs;
935 uint32_t depth = 0;
936 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
937
938 {
939 const char *desc;
940 const StgInfoTable *info;
941 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
942 switch (info->type) {
943 case CONSTR:
944 case CONSTR_1_0:
945 case CONSTR_0_1:
946 case CONSTR_2_0:
947 case CONSTR_1_1:
948 case CONSTR_0_2:
949 case CONSTR_NOCAF:
950 desc = GET_CON_DESC(itbl_to_con_itbl(info));
951 break;
952 default:
953 desc = closure_type_names[info->type];
954 break;
955 }
956 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
957 }
958
959 is_caf = fprintCallStack(ccs);
960
961 // traverse the stack down to the enclosing update frame to
962 // find out where this CCS was evaluated from...
963
964 stack = tso->stackobj;
965 frame = stack->sp;
966 prev_ccs = ccs;
967
968 for (; is_caf && depth < MAX_DEPTH; depth++)
969 {
970 switch (get_itbl((StgClosure*)frame)->type)
971 {
972 case UPDATE_FRAME:
973 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
974 frame += sizeofW(StgUpdateFrame);
975 if (ccs == CCS_MAIN) {
976 goto done;
977 }
978 if (ccs == prev_ccs) {
979 // ignore if this is the same as the previous stack,
980 // we're probably in library code and haven't
981 // accumulated any more interesting stack items
982 // since the last update frame.
983 break;
984 }
985 prev_ccs = ccs;
986 fprintf(stderr, " --> evaluated by: ");
987 is_caf = fprintCallStack(ccs);
988 break;
989 case UNDERFLOW_FRAME:
990 stack = ((StgUnderflowFrame*)frame)->next_chunk;
991 frame = stack->sp;
992 break;
993 case STOP_FRAME:
994 goto done;
995 default:
996 frame += stack_frame_sizeW((StgClosure*)frame);
997 break;
998 }
999 }
1000 done:
1001 return;
1002 }
1003
1004 #if defined(DEBUG)
1005 void
1006 debugCCS( CostCentreStack *ccs )
1007 {
1008 debugBelch("<");
1009 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1010 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1011 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1012 debugBelch(",");
1013 }
1014 }
1015 debugBelch(">");
1016 }
1017 #endif /* DEBUG */
1018
1019 #endif /* PROFILING */