rts: Allow profile output path to be specified on RTS command line
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "ProfilerReport.h"
21 #include "Printer.h"
22 #include "Capability.h"
23
24 #include <string.h>
25
26 #ifdef DEBUG
27 #include "Trace.h"
28 #endif
29
30 /*
31 * Profiling allocation arena.
32 */
33 static Arena *prof_arena;
34
35 /*
36 * Global variables used to assign unique IDs to cc's, ccs's, and
37 * closure_cats
38 */
39
40 unsigned int CC_ID = 1;
41 unsigned int CCS_ID = 1;
42
43 /* Globals for opening the profiling log file(s)
44 */
45 static char *prof_filename; /* prof report file name = <program>.prof */
46 FILE *prof_file;
47
48 static char *hp_filename; /* heap profile (hp2ps style) log file */
49 FILE *hp_file;
50
51 /* Linked lists to keep track of CCs and CCSs that haven't
52 * been declared in the log file yet
53 */
54 CostCentre *CC_LIST = NULL;
55 CostCentreStack *CCS_LIST = NULL;
56
57 #ifdef THREADED_RTS
58 static Mutex ccs_mutex;
59 #endif
60
61 /*
62 * Built-in cost centres and cost-centre stacks:
63 *
64 * MAIN is the root of the cost-centre stack tree. If there are
65 * no {-# SCC #-}s in the program, all costs will be attributed
66 * to MAIN.
67 *
68 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
69 * RTS operations apart from garbage collection are attributed
70 * to SYSTEM.
71 *
72 * GC is the storage manager / garbage collector.
73 *
74 * OVERHEAD gets all costs generated by the profiling system
75 * itself. These are costs that would not be incurred
76 * during non-profiled execution of the program.
77 *
78 * DONT_CARE is a placeholder cost-centre we assign to static
79 * constructors. It should *never* accumulate any costs.
80 *
81 * PINNED accumulates memory allocated to pinned objects, which
82 * cannot be profiled separately because we cannot reliably
83 * traverse pinned memory.
84 */
85
86 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
87 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
88 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
89 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
90 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
93
94 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
95 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
96 CCS_DECLARE(CCS_GC, CC_GC, );
97 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
98 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
99 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
100 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
101
102 /*
103 * Static Functions
104 */
105
106 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
107 CostCentreStack *ccs2 );
108 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
109 CostCentreStack *new_ccs );
110 static void inheritCosts ( CostCentreStack *ccs );
111 static ProfilerTotals countTickss ( CostCentreStack const *ccs );
112 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
113 CostCentre *cc );
114 static void sortCCSTree ( CostCentreStack *ccs );
115 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
116 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
117 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
118 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
119 CostCentre *, unsigned int );
120 static void ccsSetSelected ( CostCentreStack *ccs );
121 static void aggregateCCCosts( CostCentreStack *ccs );
122
123 static void initTimeProfiling ( void );
124 static void initProfilingLogFile ( void );
125
126 /* -----------------------------------------------------------------------------
127 Initialise the profiling environment
128 -------------------------------------------------------------------------- */
129
130 void initProfiling (void)
131 {
132 // initialise our arena
133 prof_arena = newArena();
134
135 /* for the benefit of allocate()... */
136 {
137 uint32_t n;
138 for (n=0; n < n_capabilities; n++) {
139 capabilities[n]->r.rCCCS = CCS_SYSTEM;
140 }
141 }
142
143 #ifdef THREADED_RTS
144 initMutex(&ccs_mutex);
145 #endif
146
147 /* Set up the log file, and dump the header and cost centre
148 * information into it.
149 */
150 initProfilingLogFile();
151
152 /* Register all the cost centres / stacks in the program
153 * CC_MAIN gets link = 0, all others have non-zero link.
154 */
155 REGISTER_CC(CC_MAIN);
156 REGISTER_CC(CC_SYSTEM);
157 REGISTER_CC(CC_GC);
158 REGISTER_CC(CC_OVERHEAD);
159 REGISTER_CC(CC_DONT_CARE);
160 REGISTER_CC(CC_PINNED);
161 REGISTER_CC(CC_IDLE);
162
163 REGISTER_CCS(CCS_SYSTEM);
164 REGISTER_CCS(CCS_GC);
165 REGISTER_CCS(CCS_OVERHEAD);
166 REGISTER_CCS(CCS_DONT_CARE);
167 REGISTER_CCS(CCS_PINNED);
168 REGISTER_CCS(CCS_IDLE);
169 REGISTER_CCS(CCS_MAIN);
170
171 /* find all the registered cost centre stacks, and make them
172 * children of CCS_MAIN.
173 */
174 ASSERT(CCS_LIST == CCS_MAIN);
175 CCS_LIST = CCS_LIST->prevStack;
176 CCS_MAIN->prevStack = NULL;
177 CCS_MAIN->root = CCS_MAIN;
178 ccsSetSelected(CCS_MAIN);
179
180 initProfiling2();
181
182 if (RtsFlags.CcFlags.doCostCentres) {
183 initTimeProfiling();
184 }
185
186 if (RtsFlags.ProfFlags.doHeapProfile) {
187 initHeapProfiling();
188 }
189 }
190
191 //
192 // Should be called after loading any new Haskell code.
193 //
194 void initProfiling2 (void)
195 {
196 CostCentreStack *ccs, *next;
197
198 // make CCS_MAIN the parent of all the pre-defined CCSs.
199 for (ccs = CCS_LIST; ccs != NULL; ) {
200 next = ccs->prevStack;
201 ccs->prevStack = NULL;
202 actualPush_(CCS_MAIN,ccs->cc,ccs);
203 ccs->root = ccs;
204 ccs = next;
205 }
206 CCS_LIST = NULL;
207 }
208
209 void
210 freeProfiling (void)
211 {
212 arenaFree(prof_arena);
213 }
214
215 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
216 {
217 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
218 cc->label = label;
219 cc->module = module;
220 cc->srcloc = srcloc;
221 cc->is_caf = 0;
222 cc->mem_alloc = 0;
223 cc->time_ticks = 0;
224 cc->link = NULL;
225 return cc;
226 }
227
228 static void
229 initProfilingLogFile(void)
230 {
231 // Figure out output file name stem.
232 char const *stem;
233 if (RtsFlags.CcFlags.outputFileNameStem) {
234 stem = RtsFlags.CcFlags.outputFileNameStem;
235 } else {
236 char *prog;
237
238 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
239 strcpy(prog, prog_name);
240 #ifdef mingw32_HOST_OS
241 // on Windows, drop the .exe suffix if there is one
242 {
243 char *suff;
244 suff = strrchr(prog,'.');
245 if (suff != NULL && !strcmp(suff,".exe")) {
246 *suff = '\0';
247 }
248 }
249 #endif
250 stem = prog;
251 }
252
253 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
254 {
255 /* No need for the <stem>.prof file */
256 prof_filename = NULL;
257 prof_file = NULL;
258 }
259 else
260 {
261 /* Initialise the log file name */
262 prof_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
263 sprintf(prof_filename, "%s.prof", stem);
264
265 /* open the log file */
266 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
267 debugBelch("Can't open profiling report file %s\n", prof_filename);
268 RtsFlags.CcFlags.doCostCentres = 0;
269 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
270 // both <program>.hp as <program>.prof.
271 if (doingRetainerProfiling()) {
272 RtsFlags.ProfFlags.doHeapProfile = 0;
273 }
274 }
275 }
276
277 if (RtsFlags.ProfFlags.doHeapProfile) {
278 /* Initialise the log file name */
279 hp_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
280 sprintf(hp_filename, "%s.hp", stem);
281
282 /* open the log file */
283 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
284 debugBelch("Can't open profiling report file %s\n",
285 hp_filename);
286 RtsFlags.ProfFlags.doHeapProfile = 0;
287 }
288 }
289 }
290
291 void
292 initTimeProfiling(void)
293 {
294 /* Start ticking */
295 startProfTimer();
296 };
297
298 void
299 endProfiling ( void )
300 {
301 if (RtsFlags.CcFlags.doCostCentres) {
302 stopProfTimer();
303 }
304 if (RtsFlags.ProfFlags.doHeapProfile) {
305 endHeapProfiling();
306 }
307 }
308
309 /* -----------------------------------------------------------------------------
310 Set CCCS when entering a function.
311
312 The algorithm is as follows.
313
314 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
315
316 where
317
318 dropCommonPrefix A B
319 -- returns the suffix of B after removing any prefix common
320 -- to both A and B.
321
322 e.g.
323
324 <a,b,c> ++> <> = <a,b,c>
325 <a,b,c> ++> <d> = <a,b,c,d>
326 <a,b,c> ++> <a,b> = <a,b,c>
327 <a,b> ++> <a,b,c> = <a,b,c>
328 <a,b,c> ++> <a,b,d> = <a,b,c,d>
329
330 -------------------------------------------------------------------------- */
331
332 // implements c1 ++> c2, where c1 and c2 are equal depth
333 //
334 static CostCentreStack *
335 enterFunEqualStacks (CostCentreStack *ccs0,
336 CostCentreStack *ccsapp,
337 CostCentreStack *ccsfn)
338 {
339 ASSERT(ccsapp->depth == ccsfn->depth);
340 if (ccsapp == ccsfn) return ccs0;
341 return pushCostCentre(enterFunEqualStacks(ccs0,
342 ccsapp->prevStack,
343 ccsfn->prevStack),
344 ccsfn->cc);
345 }
346
347 // implements c1 ++> c2, where c2 is deeper than c1.
348 // Drop elements of c2 until we have equal stacks, call
349 // enterFunEqualStacks(), and then push on the elements that we
350 // dropped in reverse order.
351 //
352 static CostCentreStack *
353 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
354 {
355 if (n == 0) {
356 ASSERT(ccsfn->depth == ccsapp->depth);
357 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
358 } else {
359 ASSERT(ccsfn->depth > ccsapp->depth);
360 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
361 ccsfn->cc);
362 }
363 }
364
365 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
366 {
367 CostCentreStack *ccsapp;
368
369 // common case 1: both stacks are the same
370 if (ccsfn == reg->rCCCS) {
371 return;
372 }
373
374 // common case 2: the function stack is empty, or just CAF
375 if (ccsfn->cc->is_caf) {
376 return;
377 }
378
379 ccsapp = reg->rCCCS;
380 reg->rCCCS = CCS_OVERHEAD;
381
382 // common case 3: the stacks are completely different (e.g. one is a
383 // descendent of MAIN and the other of a CAF): we append the whole
384 // of the function stack to the current CCS.
385 if (ccsfn->root != ccsapp->root) {
386 reg->rCCCS = appendCCS(ccsapp,ccsfn);
387 return;
388 }
389
390 // uncommon case 4: ccsapp is deeper than ccsfn
391 if (ccsapp->depth > ccsfn->depth) {
392 uint32_t i, n;
393 CostCentreStack *tmp = ccsapp;
394 n = ccsapp->depth - ccsfn->depth;
395 for (i = 0; i < n; i++) {
396 tmp = tmp->prevStack;
397 }
398 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
399 return;
400 }
401
402 // uncommon case 5: ccsfn is deeper than CCCS
403 if (ccsfn->depth > ccsapp->depth) {
404 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
405 ccsfn->depth - ccsapp->depth);
406 return;
407 }
408
409 // uncommon case 6: stacks are equal depth, but different
410 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
411 }
412
413 /* -----------------------------------------------------------------------------
414 Decide whether closures with this CCS should contribute to the heap
415 profile.
416 -------------------------------------------------------------------------- */
417
418 static void
419 ccsSetSelected (CostCentreStack *ccs)
420 {
421 if (RtsFlags.ProfFlags.modSelector) {
422 if (! strMatchesSelector (ccs->cc->module,
423 RtsFlags.ProfFlags.modSelector) ) {
424 ccs->selected = 0;
425 return;
426 }
427 }
428 if (RtsFlags.ProfFlags.ccSelector) {
429 if (! strMatchesSelector (ccs->cc->label,
430 RtsFlags.ProfFlags.ccSelector) ) {
431 ccs->selected = 0;
432 return;
433 }
434 }
435 if (RtsFlags.ProfFlags.ccsSelector) {
436 CostCentreStack *c;
437 for (c = ccs; c != NULL; c = c->prevStack)
438 {
439 if ( strMatchesSelector (c->cc->label,
440 RtsFlags.ProfFlags.ccsSelector) ) {
441 break;
442 }
443 }
444 if (c == NULL) {
445 ccs->selected = 0;
446 return;
447 }
448 }
449
450 ccs->selected = 1;
451 return;
452 }
453
454 /* -----------------------------------------------------------------------------
455 Cost-centre stack manipulation
456 -------------------------------------------------------------------------- */
457
458 #ifdef DEBUG
459 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
460 CostCentreStack *
461 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
462 #define pushCostCentre _pushCostCentre
463 {
464 IF_DEBUG(prof,
465 traceBegin("pushing %s on ", cc->label);
466 debugCCS(ccs);
467 traceEnd(););
468
469 return pushCostCentre(ccs,cc);
470 }
471 #endif
472
473 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
474
475 #ifdef DEBUG
476 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
477 CostCentreStack *
478 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
479 #define appendCCS _appendCCS
480 {
481 IF_DEBUG(prof,
482 if (ccs1 != ccs2) {
483 debugBelch("Appending ");
484 debugCCS(ccs1);
485 debugBelch(" to ");
486 debugCCS(ccs2);
487 debugBelch("\n");});
488 return appendCCS(ccs1,ccs2);
489 }
490 #endif
491
492 CostCentreStack *
493 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
494 {
495 if (ccs1 == ccs2) {
496 return ccs1;
497 }
498
499 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
500 // stop at a CAF element
501 return ccs1;
502 }
503
504 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
505 }
506
507 // Pick one:
508 // #define RECURSION_DROPS
509 #define RECURSION_TRUNCATES
510
511 CostCentreStack *
512 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
513 {
514 CostCentreStack *temp_ccs, *ret;
515 IndexTable *ixtable;
516
517 if (ccs == EMPTY_STACK) {
518 ACQUIRE_LOCK(&ccs_mutex);
519 ret = actualPush(ccs,cc);
520 }
521 else
522 {
523 if (ccs->cc == cc) {
524 return ccs;
525 } else {
526 // check if we've already memoized this stack
527 ixtable = ccs->indexTable;
528 temp_ccs = isInIndexTable(ixtable,cc);
529
530 if (temp_ccs != EMPTY_STACK) {
531 return temp_ccs;
532 } else {
533
534 // not in the IndexTable, now we take the lock:
535 ACQUIRE_LOCK(&ccs_mutex);
536
537 if (ccs->indexTable != ixtable)
538 {
539 // someone modified ccs->indexTable while
540 // we did not hold the lock, so we must
541 // check it again:
542 temp_ccs = isInIndexTable(ixtable,cc);
543 if (temp_ccs != EMPTY_STACK)
544 {
545 RELEASE_LOCK(&ccs_mutex);
546 return temp_ccs;
547 }
548 }
549 temp_ccs = checkLoop(ccs,cc);
550 if (temp_ccs != NULL) {
551 // This CC is already in the stack somewhere.
552 // This could be recursion, or just calling
553 // another function with the same CC.
554 // A number of policies are possible at this
555 // point, we implement two here:
556 // - truncate the stack to the previous instance
557 // of this CC
558 // - ignore this push, return the same stack.
559 //
560 CostCentreStack *new_ccs;
561 #if defined(RECURSION_TRUNCATES)
562 new_ccs = temp_ccs;
563 #else // defined(RECURSION_DROPS)
564 new_ccs = ccs;
565 #endif
566 ccs->indexTable = addToIndexTable (ccs->indexTable,
567 new_ccs, cc, 1);
568 ret = new_ccs;
569 } else {
570 ret = actualPush (ccs,cc);
571 }
572 }
573 }
574 }
575
576 RELEASE_LOCK(&ccs_mutex);
577 return ret;
578 }
579
580 static CostCentreStack *
581 checkLoop (CostCentreStack *ccs, CostCentre *cc)
582 {
583 while (ccs != EMPTY_STACK) {
584 if (ccs->cc == cc)
585 return ccs;
586 ccs = ccs->prevStack;
587 }
588 return NULL;
589 }
590
591 static CostCentreStack *
592 actualPush (CostCentreStack *ccs, CostCentre *cc)
593 {
594 CostCentreStack *new_ccs;
595
596 // allocate space for a new CostCentreStack
597 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
598
599 return actualPush_(ccs, cc, new_ccs);
600 }
601
602 static CostCentreStack *
603 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
604 {
605 /* assign values to each member of the structure */
606 new_ccs->ccsID = CCS_ID++;
607 new_ccs->cc = cc;
608 new_ccs->prevStack = ccs;
609 new_ccs->root = ccs->root;
610 new_ccs->depth = ccs->depth + 1;
611
612 new_ccs->indexTable = EMPTY_TABLE;
613
614 /* Initialise the various _scc_ counters to zero
615 */
616 new_ccs->scc_count = 0;
617
618 /* Initialize all other stats here. There should be a quick way
619 * that's easily used elsewhere too
620 */
621 new_ccs->time_ticks = 0;
622 new_ccs->mem_alloc = 0;
623 new_ccs->inherited_ticks = 0;
624 new_ccs->inherited_alloc = 0;
625
626 // Set the selected field.
627 ccsSetSelected(new_ccs);
628
629 /* update the memoization table for the parent stack */
630 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
631 0/*not a back edge*/);
632
633 /* return a pointer to the new stack */
634 return new_ccs;
635 }
636
637
638 static CostCentreStack *
639 isInIndexTable(IndexTable *it, CostCentre *cc)
640 {
641 while (it!=EMPTY_TABLE)
642 {
643 if (it->cc == cc)
644 return it->ccs;
645 else
646 it = it->next;
647 }
648
649 /* otherwise we never found it so return EMPTY_TABLE */
650 return EMPTY_TABLE;
651 }
652
653
654 static IndexTable *
655 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
656 CostCentre *cc, unsigned int back_edge)
657 {
658 IndexTable *new_it;
659
660 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
661
662 new_it->cc = cc;
663 new_it->ccs = new_ccs;
664 new_it->next = it;
665 new_it->back_edge = back_edge;
666 return new_it;
667 }
668
669 /* -----------------------------------------------------------------------------
670 Generating a time & allocation profiling report.
671 -------------------------------------------------------------------------- */
672
673 /* We omit certain system-related CCs and CCSs from the default
674 * reports, so as not to cause confusion.
675 */
676 bool
677 ignoreCC (CostCentre const *cc)
678 {
679 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
680 ( cc == CC_OVERHEAD
681 || cc == CC_DONT_CARE
682 || cc == CC_GC
683 || cc == CC_SYSTEM
684 || cc == CC_IDLE);
685 }
686
687 bool
688 ignoreCCS (CostCentreStack const *ccs)
689 {
690 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
691 ( ccs == CCS_OVERHEAD
692 || ccs == CCS_DONT_CARE
693 || ccs == CCS_GC
694 || ccs == CCS_SYSTEM
695 || ccs == CCS_IDLE);
696 }
697
698 void
699 reportCCSProfiling( void )
700 {
701 stopProfTimer();
702 if (RtsFlags.CcFlags.doCostCentres == 0) return;
703
704 ProfilerTotals totals = countTickss(CCS_MAIN);
705 aggregateCCCosts(CCS_MAIN);
706 inheritCosts(CCS_MAIN);
707 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
708 sortCCSTree(stack);
709
710 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_JSON) {
711 writeCCSReportJson(prof_file, stack, totals);
712 } else {
713 writeCCSReport(prof_file, stack, totals);
714 }
715 }
716
717 /* -----------------------------------------------------------------------------
718 * Accumulating total allocatinos and tick count
719 -------------------------------------------------------------------------- */
720
721 /* Helper */
722 static void
723 countTickss_(CostCentreStack const *ccs, ProfilerTotals *totals)
724 {
725 if (!ignoreCCS(ccs)) {
726 totals->total_alloc += ccs->mem_alloc;
727 totals->total_prof_ticks += ccs->time_ticks;
728 }
729 for (IndexTable *i = ccs->indexTable; i != NULL; i = i->next) {
730 if (!i->back_edge) {
731 countTickss_(i->ccs, totals);
732 }
733 }
734 }
735
736 /* Traverse the cost centre stack tree and accumulate
737 * total ticks/allocations.
738 */
739 static ProfilerTotals
740 countTickss(CostCentreStack const *ccs)
741 {
742 ProfilerTotals totals = {0,0};
743 countTickss_(ccs, &totals);
744 return totals;
745 }
746
747 /* Traverse the cost centre stack tree and inherit ticks & allocs.
748 */
749 static void
750 inheritCosts(CostCentreStack *ccs)
751 {
752 IndexTable *i;
753
754 if (ignoreCCS(ccs)) { return; }
755
756 ccs->inherited_ticks += ccs->time_ticks;
757 ccs->inherited_alloc += ccs->mem_alloc;
758
759 for (i = ccs->indexTable; i != NULL; i = i->next)
760 if (!i->back_edge) {
761 inheritCosts(i->ccs);
762 ccs->inherited_ticks += i->ccs->inherited_ticks;
763 ccs->inherited_alloc += i->ccs->inherited_alloc;
764 }
765
766 return;
767 }
768
769 static void
770 aggregateCCCosts( CostCentreStack *ccs )
771 {
772 IndexTable *i;
773
774 ccs->cc->mem_alloc += ccs->mem_alloc;
775 ccs->cc->time_ticks += ccs->time_ticks;
776
777 for (i = ccs->indexTable; i != 0; i = i->next) {
778 if (!i->back_edge) {
779 aggregateCCCosts(i->ccs);
780 }
781 }
782 }
783
784 //
785 // Prune CCSs with zero entries, zero ticks or zero allocation from
786 // the tree, unless COST_CENTRES_ALL is on.
787 //
788 static CostCentreStack *
789 pruneCCSTree (CostCentreStack *ccs)
790 {
791 CostCentreStack *ccs1;
792 IndexTable *i, **prev;
793
794 prev = &ccs->indexTable;
795 for (i = ccs->indexTable; i != 0; i = i->next) {
796 if (i->back_edge) { continue; }
797
798 ccs1 = pruneCCSTree(i->ccs);
799 if (ccs1 == NULL) {
800 *prev = i->next;
801 } else {
802 prev = &(i->next);
803 }
804 }
805
806 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
807 /* force printing of *all* cost centres if -P -P */ )
808
809 || ( ccs->indexTable != 0 )
810 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
811 ) {
812 return ccs;
813 } else {
814 return NULL;
815 }
816 }
817
818 static IndexTable*
819 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
820 {
821 StgWord tbl_ticks = tbl->ccs->scc_count;
822 char* tbl_label = tbl->ccs->cc->label;
823
824 IndexTable *prev = NULL;
825 IndexTable *cursor = sortedList;
826
827 while (cursor != NULL) {
828 StgWord cursor_ticks = cursor->ccs->scc_count;
829 char* cursor_label = cursor->ccs->cc->label;
830
831 if (tbl_ticks > cursor_ticks ||
832 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
833 if (prev == NULL) {
834 tbl->next = sortedList;
835 return tbl;
836 } else {
837 prev->next = tbl;
838 tbl->next = cursor;
839 return sortedList;
840 }
841 } else {
842 prev = cursor;
843 cursor = cursor->next;
844 }
845 }
846
847 prev->next = tbl;
848 return sortedList;
849 }
850
851 static void
852 sortCCSTree(CostCentreStack *ccs)
853 {
854 if (ccs->indexTable == NULL) return;
855
856 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
857 if (!tbl->back_edge)
858 sortCCSTree(tbl->ccs);
859
860 IndexTable *sortedList = ccs->indexTable;
861 IndexTable *nonSortedList = sortedList->next;
862 sortedList->next = NULL;
863
864 while (nonSortedList != NULL)
865 {
866 IndexTable *nonSortedTail = nonSortedList->next;
867 nonSortedList->next = NULL;
868 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
869 nonSortedList = nonSortedTail;
870 }
871
872 ccs->indexTable = sortedList;
873 }
874
875 void
876 fprintCCS( FILE *f, CostCentreStack *ccs )
877 {
878 fprintf(f,"<");
879 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
880 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
881 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
882 fprintf(f,",");
883 }
884 }
885 fprintf(f,">");
886 }
887
888 // Returns: True if the call stack ended with CAF
889 static bool fprintCallStack (CostCentreStack *ccs)
890 {
891 CostCentreStack *prev;
892
893 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
894 prev = ccs->prevStack;
895 while (prev && prev != CCS_MAIN) {
896 ccs = prev;
897 fprintf(stderr, ",\n called from %s.%s",
898 ccs->cc->module, ccs->cc->label);
899 prev = ccs->prevStack;
900 }
901 fprintf(stderr, "\n");
902
903 return (!strncmp(ccs->cc->label, "CAF", 3));
904 }
905
906 /* For calling from .cmm code, where we can't reliably refer to stderr */
907 void
908 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
909 {
910 bool is_caf;
911 StgPtr frame;
912 StgStack *stack;
913 CostCentreStack *prev_ccs;
914 uint32_t depth = 0;
915 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
916
917 {
918 const char *desc;
919 const StgInfoTable *info;
920 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
921 switch (info->type) {
922 case CONSTR:
923 case CONSTR_1_0:
924 case CONSTR_0_1:
925 case CONSTR_2_0:
926 case CONSTR_1_1:
927 case CONSTR_0_2:
928 case CONSTR_NOCAF:
929 desc = GET_CON_DESC(itbl_to_con_itbl(info));
930 break;
931 default:
932 desc = closure_type_names[info->type];
933 break;
934 }
935 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
936 }
937
938 is_caf = fprintCallStack(ccs);
939
940 // traverse the stack down to the enclosing update frame to
941 // find out where this CCS was evaluated from...
942
943 stack = tso->stackobj;
944 frame = stack->sp;
945 prev_ccs = ccs;
946
947 for (; is_caf && depth < MAX_DEPTH; depth++)
948 {
949 switch (get_itbl((StgClosure*)frame)->type)
950 {
951 case UPDATE_FRAME:
952 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
953 frame += sizeofW(StgUpdateFrame);
954 if (ccs == CCS_MAIN) {
955 goto done;
956 }
957 if (ccs == prev_ccs) {
958 // ignore if this is the same as the previous stack,
959 // we're probably in library code and haven't
960 // accumulated any more interesting stack items
961 // since the last update frame.
962 break;
963 }
964 prev_ccs = ccs;
965 fprintf(stderr, " --> evaluated by: ");
966 is_caf = fprintCallStack(ccs);
967 break;
968 case UNDERFLOW_FRAME:
969 stack = ((StgUnderflowFrame*)frame)->next_chunk;
970 frame = stack->sp;
971 break;
972 case STOP_FRAME:
973 goto done;
974 default:
975 frame += stack_frame_sizeW((StgClosure*)frame);
976 break;
977 }
978 }
979 done:
980 return;
981 }
982
983 #ifdef DEBUG
984 void
985 debugCCS( CostCentreStack *ccs )
986 {
987 debugBelch("<");
988 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
989 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
990 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
991 debugBelch(",");
992 }
993 }
994 debugBelch(">");
995 }
996 #endif /* DEBUG */
997
998 #endif /* PROFILING */