Prefer #if defined to #ifdef
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if defined(PROFILING)
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "ProfilerReport.h"
21 #include "ProfilerReportJson.h"
22 #include "Printer.h"
23 #include "Capability.h"
24
25 #include <string.h>
26
27 #if defined(DEBUG)
28 #include "Trace.h"
29 #endif
30
31 /*
32 * Profiling allocation arena.
33 */
34 static Arena *prof_arena;
35
36 /*
37 * Global variables used to assign unique IDs to cc's, ccs's, and
38 * closure_cats
39 */
40
41 unsigned int CC_ID = 1;
42 unsigned int CCS_ID = 1;
43
44 /* Globals for opening the profiling log file(s)
45 */
46 static char *prof_filename; /* prof report file name = <program>.prof */
47 FILE *prof_file;
48
49 static char *hp_filename; /* heap profile (hp2ps style) log file */
50 FILE *hp_file;
51
52 /* Linked lists to keep track of CCs and CCSs that haven't
53 * been declared in the log file yet
54 */
55 CostCentre *CC_LIST = NULL;
56 CostCentreStack *CCS_LIST = NULL;
57
58 #if defined(THREADED_RTS)
59 static Mutex ccs_mutex;
60 #endif
61
62 /*
63 * Built-in cost centres and cost-centre stacks:
64 *
65 * MAIN is the root of the cost-centre stack tree. If there are
66 * no {-# SCC #-}s in the program, all costs will be attributed
67 * to MAIN.
68 *
69 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
70 * RTS operations apart from garbage collection are attributed
71 * to SYSTEM.
72 *
73 * GC is the storage manager / garbage collector.
74 *
75 * OVERHEAD gets all costs generated by the profiling system
76 * itself. These are costs that would not be incurred
77 * during non-profiled execution of the program.
78 *
79 * DONT_CARE is a placeholder cost-centre we assign to static
80 * constructors. It should *never* accumulate any costs.
81 *
82 * PINNED accumulates memory allocated to pinned objects, which
83 * cannot be profiled separately because we cannot reliably
84 * traverse pinned memory.
85 */
86
87 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
88 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
89 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
90 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
94
95 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
96 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
97 CCS_DECLARE(CCS_GC, CC_GC, );
98 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
99 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
100 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
101 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
102
103 /*
104 * Static Functions
105 */
106
107 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
108 CostCentreStack *ccs2 );
109 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
110 CostCentreStack *new_ccs );
111 static void inheritCosts ( CostCentreStack *ccs );
112 static ProfilerTotals countTickss ( CostCentreStack const *ccs );
113 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
114 CostCentre *cc );
115 static void sortCCSTree ( CostCentreStack *ccs );
116 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
117 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
118 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
119 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
120 CostCentre *, unsigned int );
121 static void ccsSetSelected ( CostCentreStack *ccs );
122 static void aggregateCCCosts( CostCentreStack *ccs );
123
124 static void initTimeProfiling ( void );
125 static void initProfilingLogFile ( void );
126
127 /* -----------------------------------------------------------------------------
128 Initialise the profiling environment
129 -------------------------------------------------------------------------- */
130
131 void initProfiling (void)
132 {
133 // initialise our arena
134 prof_arena = newArena();
135
136 /* for the benefit of allocate()... */
137 {
138 uint32_t n;
139 for (n=0; n < n_capabilities; n++) {
140 capabilities[n]->r.rCCCS = CCS_SYSTEM;
141 }
142 }
143
144 #if defined(THREADED_RTS)
145 initMutex(&ccs_mutex);
146 #endif
147
148 /* Set up the log file, and dump the header and cost centre
149 * information into it.
150 */
151 initProfilingLogFile();
152
153 /* Register all the cost centres / stacks in the program
154 * CC_MAIN gets link = 0, all others have non-zero link.
155 */
156 REGISTER_CC(CC_MAIN);
157 REGISTER_CC(CC_SYSTEM);
158 REGISTER_CC(CC_GC);
159 REGISTER_CC(CC_OVERHEAD);
160 REGISTER_CC(CC_DONT_CARE);
161 REGISTER_CC(CC_PINNED);
162 REGISTER_CC(CC_IDLE);
163
164 REGISTER_CCS(CCS_SYSTEM);
165 REGISTER_CCS(CCS_GC);
166 REGISTER_CCS(CCS_OVERHEAD);
167 REGISTER_CCS(CCS_DONT_CARE);
168 REGISTER_CCS(CCS_PINNED);
169 REGISTER_CCS(CCS_IDLE);
170 REGISTER_CCS(CCS_MAIN);
171
172 /* find all the registered cost centre stacks, and make them
173 * children of CCS_MAIN.
174 */
175 ASSERT(CCS_LIST == CCS_MAIN);
176 CCS_LIST = CCS_LIST->prevStack;
177 CCS_MAIN->prevStack = NULL;
178 CCS_MAIN->root = CCS_MAIN;
179 ccsSetSelected(CCS_MAIN);
180
181 initProfiling2();
182
183 if (RtsFlags.CcFlags.doCostCentres) {
184 initTimeProfiling();
185 }
186
187 if (RtsFlags.ProfFlags.doHeapProfile) {
188 initHeapProfiling();
189 }
190 }
191
192 //
193 // Should be called after loading any new Haskell code.
194 //
195 void initProfiling2 (void)
196 {
197 CostCentreStack *ccs, *next;
198
199 // make CCS_MAIN the parent of all the pre-defined CCSs.
200 for (ccs = CCS_LIST; ccs != NULL; ) {
201 next = ccs->prevStack;
202 ccs->prevStack = NULL;
203 actualPush_(CCS_MAIN,ccs->cc,ccs);
204 ccs->root = ccs;
205 ccs = next;
206 }
207 CCS_LIST = NULL;
208 }
209
210 void
211 freeProfiling (void)
212 {
213 arenaFree(prof_arena);
214 }
215
216 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
217 {
218 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
219 cc->label = label;
220 cc->module = module;
221 cc->srcloc = srcloc;
222 cc->is_caf = 0;
223 cc->mem_alloc = 0;
224 cc->time_ticks = 0;
225 cc->link = NULL;
226 return cc;
227 }
228
229 static void
230 initProfilingLogFile(void)
231 {
232 // Figure out output file name stem.
233 char const *stem;
234 if (RtsFlags.CcFlags.outputFileNameStem) {
235 stem = RtsFlags.CcFlags.outputFileNameStem;
236 } else {
237 char *prog;
238
239 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
240 strcpy(prog, prog_name);
241 #if defined(mingw32_HOST_OS)
242 // on Windows, drop the .exe suffix if there is one
243 {
244 char *suff;
245 suff = strrchr(prog,'.');
246 if (suff != NULL && !strcmp(suff,".exe")) {
247 *suff = '\0';
248 }
249 }
250 #endif
251 stem = prog;
252 }
253
254 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
255 {
256 /* No need for the <stem>.prof file */
257 prof_filename = NULL;
258 prof_file = NULL;
259 }
260 else
261 {
262 /* Initialise the log file name */
263 prof_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
264 sprintf(prof_filename, "%s.prof", stem);
265
266 /* open the log file */
267 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
268 debugBelch("Can't open profiling report file %s\n", prof_filename);
269 RtsFlags.CcFlags.doCostCentres = 0;
270 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
271 // both <program>.hp as <program>.prof.
272 if (doingRetainerProfiling()) {
273 RtsFlags.ProfFlags.doHeapProfile = 0;
274 }
275 }
276 }
277
278 if (RtsFlags.ProfFlags.doHeapProfile) {
279 /* Initialise the log file name */
280 hp_filename = arenaAlloc(prof_arena, strlen(stem) + 6);
281 sprintf(hp_filename, "%s.hp", stem);
282
283 /* open the log file */
284 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
285 debugBelch("Can't open profiling report file %s\n",
286 hp_filename);
287 RtsFlags.ProfFlags.doHeapProfile = 0;
288 }
289 }
290 }
291
292 void
293 initTimeProfiling(void)
294 {
295 /* Start ticking */
296 startProfTimer();
297 };
298
299 void
300 endProfiling ( void )
301 {
302 if (RtsFlags.CcFlags.doCostCentres) {
303 stopProfTimer();
304 }
305 if (RtsFlags.ProfFlags.doHeapProfile) {
306 endHeapProfiling();
307 }
308 }
309
310 /* -----------------------------------------------------------------------------
311 Set CCCS when entering a function.
312
313 The algorithm is as follows.
314
315 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
316
317 where
318
319 dropCommonPrefix A B
320 -- returns the suffix of B after removing any prefix common
321 -- to both A and B.
322
323 e.g.
324
325 <a,b,c> ++> <> = <a,b,c>
326 <a,b,c> ++> <d> = <a,b,c,d>
327 <a,b,c> ++> <a,b> = <a,b,c>
328 <a,b> ++> <a,b,c> = <a,b,c>
329 <a,b,c> ++> <a,b,d> = <a,b,c,d>
330
331 -------------------------------------------------------------------------- */
332
333 // implements c1 ++> c2, where c1 and c2 are equal depth
334 //
335 static CostCentreStack *
336 enterFunEqualStacks (CostCentreStack *ccs0,
337 CostCentreStack *ccsapp,
338 CostCentreStack *ccsfn)
339 {
340 ASSERT(ccsapp->depth == ccsfn->depth);
341 if (ccsapp == ccsfn) return ccs0;
342 return pushCostCentre(enterFunEqualStacks(ccs0,
343 ccsapp->prevStack,
344 ccsfn->prevStack),
345 ccsfn->cc);
346 }
347
348 // implements c1 ++> c2, where c2 is deeper than c1.
349 // Drop elements of c2 until we have equal stacks, call
350 // enterFunEqualStacks(), and then push on the elements that we
351 // dropped in reverse order.
352 //
353 static CostCentreStack *
354 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
355 {
356 if (n == 0) {
357 ASSERT(ccsfn->depth == ccsapp->depth);
358 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
359 } else {
360 ASSERT(ccsfn->depth > ccsapp->depth);
361 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
362 ccsfn->cc);
363 }
364 }
365
366 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
367 {
368 CostCentreStack *ccsapp;
369
370 // common case 1: both stacks are the same
371 if (ccsfn == reg->rCCCS) {
372 return;
373 }
374
375 // common case 2: the function stack is empty, or just CAF
376 if (ccsfn->cc->is_caf) {
377 return;
378 }
379
380 ccsapp = reg->rCCCS;
381 reg->rCCCS = CCS_OVERHEAD;
382
383 // common case 3: the stacks are completely different (e.g. one is a
384 // descendent of MAIN and the other of a CAF): we append the whole
385 // of the function stack to the current CCS.
386 if (ccsfn->root != ccsapp->root) {
387 reg->rCCCS = appendCCS(ccsapp,ccsfn);
388 return;
389 }
390
391 // uncommon case 4: ccsapp is deeper than ccsfn
392 if (ccsapp->depth > ccsfn->depth) {
393 uint32_t i, n;
394 CostCentreStack *tmp = ccsapp;
395 n = ccsapp->depth - ccsfn->depth;
396 for (i = 0; i < n; i++) {
397 tmp = tmp->prevStack;
398 }
399 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
400 return;
401 }
402
403 // uncommon case 5: ccsfn is deeper than CCCS
404 if (ccsfn->depth > ccsapp->depth) {
405 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
406 ccsfn->depth - ccsapp->depth);
407 return;
408 }
409
410 // uncommon case 6: stacks are equal depth, but different
411 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
412 }
413
414 /* -----------------------------------------------------------------------------
415 Decide whether closures with this CCS should contribute to the heap
416 profile.
417 -------------------------------------------------------------------------- */
418
419 static void
420 ccsSetSelected (CostCentreStack *ccs)
421 {
422 if (RtsFlags.ProfFlags.modSelector) {
423 if (! strMatchesSelector (ccs->cc->module,
424 RtsFlags.ProfFlags.modSelector) ) {
425 ccs->selected = 0;
426 return;
427 }
428 }
429 if (RtsFlags.ProfFlags.ccSelector) {
430 if (! strMatchesSelector (ccs->cc->label,
431 RtsFlags.ProfFlags.ccSelector) ) {
432 ccs->selected = 0;
433 return;
434 }
435 }
436 if (RtsFlags.ProfFlags.ccsSelector) {
437 CostCentreStack *c;
438 for (c = ccs; c != NULL; c = c->prevStack)
439 {
440 if ( strMatchesSelector (c->cc->label,
441 RtsFlags.ProfFlags.ccsSelector) ) {
442 break;
443 }
444 }
445 if (c == NULL) {
446 ccs->selected = 0;
447 return;
448 }
449 }
450
451 ccs->selected = 1;
452 return;
453 }
454
455 /* -----------------------------------------------------------------------------
456 Cost-centre stack manipulation
457 -------------------------------------------------------------------------- */
458
459 #if defined(DEBUG)
460 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
461 CostCentreStack *
462 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
463 #define pushCostCentre _pushCostCentre
464 {
465 IF_DEBUG(prof,
466 traceBegin("pushing %s on ", cc->label);
467 debugCCS(ccs);
468 traceEnd(););
469
470 return pushCostCentre(ccs,cc);
471 }
472 #endif
473
474 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
475
476 #if defined(DEBUG)
477 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
478 CostCentreStack *
479 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
480 #define appendCCS _appendCCS
481 {
482 IF_DEBUG(prof,
483 if (ccs1 != ccs2) {
484 debugBelch("Appending ");
485 debugCCS(ccs1);
486 debugBelch(" to ");
487 debugCCS(ccs2);
488 debugBelch("\n");});
489 return appendCCS(ccs1,ccs2);
490 }
491 #endif
492
493 CostCentreStack *
494 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
495 {
496 if (ccs1 == ccs2) {
497 return ccs1;
498 }
499
500 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
501 // stop at a CAF element
502 return ccs1;
503 }
504
505 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
506 }
507
508 // Pick one:
509 // #define RECURSION_DROPS
510 #define RECURSION_TRUNCATES
511
512 CostCentreStack *
513 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
514 {
515 CostCentreStack *temp_ccs, *ret;
516 IndexTable *ixtable;
517
518 if (ccs == EMPTY_STACK) {
519 ACQUIRE_LOCK(&ccs_mutex);
520 ret = actualPush(ccs,cc);
521 }
522 else
523 {
524 if (ccs->cc == cc) {
525 return ccs;
526 } else {
527 // check if we've already memoized this stack
528 ixtable = ccs->indexTable;
529 temp_ccs = isInIndexTable(ixtable,cc);
530
531 if (temp_ccs != EMPTY_STACK) {
532 return temp_ccs;
533 } else {
534
535 // not in the IndexTable, now we take the lock:
536 ACQUIRE_LOCK(&ccs_mutex);
537
538 if (ccs->indexTable != ixtable)
539 {
540 // someone modified ccs->indexTable while
541 // we did not hold the lock, so we must
542 // check it again:
543 temp_ccs = isInIndexTable(ixtable,cc);
544 if (temp_ccs != EMPTY_STACK)
545 {
546 RELEASE_LOCK(&ccs_mutex);
547 return temp_ccs;
548 }
549 }
550 temp_ccs = checkLoop(ccs,cc);
551 if (temp_ccs != NULL) {
552 // This CC is already in the stack somewhere.
553 // This could be recursion, or just calling
554 // another function with the same CC.
555 // A number of policies are possible at this
556 // point, we implement two here:
557 // - truncate the stack to the previous instance
558 // of this CC
559 // - ignore this push, return the same stack.
560 //
561 CostCentreStack *new_ccs;
562 #if defined(RECURSION_TRUNCATES)
563 new_ccs = temp_ccs;
564 #else // defined(RECURSION_DROPS)
565 new_ccs = ccs;
566 #endif
567 ccs->indexTable = addToIndexTable (ccs->indexTable,
568 new_ccs, cc, 1);
569 ret = new_ccs;
570 } else {
571 ret = actualPush (ccs,cc);
572 }
573 }
574 }
575 }
576
577 RELEASE_LOCK(&ccs_mutex);
578 return ret;
579 }
580
581 static CostCentreStack *
582 checkLoop (CostCentreStack *ccs, CostCentre *cc)
583 {
584 while (ccs != EMPTY_STACK) {
585 if (ccs->cc == cc)
586 return ccs;
587 ccs = ccs->prevStack;
588 }
589 return NULL;
590 }
591
592 static CostCentreStack *
593 actualPush (CostCentreStack *ccs, CostCentre *cc)
594 {
595 CostCentreStack *new_ccs;
596
597 // allocate space for a new CostCentreStack
598 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
599
600 return actualPush_(ccs, cc, new_ccs);
601 }
602
603 static CostCentreStack *
604 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
605 {
606 /* assign values to each member of the structure */
607 new_ccs->ccsID = CCS_ID++;
608 new_ccs->cc = cc;
609 new_ccs->prevStack = ccs;
610 new_ccs->root = ccs->root;
611 new_ccs->depth = ccs->depth + 1;
612
613 new_ccs->indexTable = EMPTY_TABLE;
614
615 /* Initialise the various _scc_ counters to zero
616 */
617 new_ccs->scc_count = 0;
618
619 /* Initialize all other stats here. There should be a quick way
620 * that's easily used elsewhere too
621 */
622 new_ccs->time_ticks = 0;
623 new_ccs->mem_alloc = 0;
624 new_ccs->inherited_ticks = 0;
625 new_ccs->inherited_alloc = 0;
626
627 // Set the selected field.
628 ccsSetSelected(new_ccs);
629
630 /* update the memoization table for the parent stack */
631 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
632 0/*not a back edge*/);
633
634 /* return a pointer to the new stack */
635 return new_ccs;
636 }
637
638
639 static CostCentreStack *
640 isInIndexTable(IndexTable *it, CostCentre *cc)
641 {
642 while (it!=EMPTY_TABLE)
643 {
644 if (it->cc == cc)
645 return it->ccs;
646 else
647 it = it->next;
648 }
649
650 /* otherwise we never found it so return EMPTY_TABLE */
651 return EMPTY_TABLE;
652 }
653
654
655 static IndexTable *
656 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
657 CostCentre *cc, unsigned int back_edge)
658 {
659 IndexTable *new_it;
660
661 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
662
663 new_it->cc = cc;
664 new_it->ccs = new_ccs;
665 new_it->next = it;
666 new_it->back_edge = back_edge;
667 return new_it;
668 }
669
670 /* -----------------------------------------------------------------------------
671 Generating a time & allocation profiling report.
672 -------------------------------------------------------------------------- */
673
674 /* We omit certain system-related CCs and CCSs from the default
675 * reports, so as not to cause confusion.
676 */
677 bool
678 ignoreCC (CostCentre const *cc)
679 {
680 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
681 ( cc == CC_OVERHEAD
682 || cc == CC_DONT_CARE
683 || cc == CC_GC
684 || cc == CC_SYSTEM
685 || cc == CC_IDLE);
686 }
687
688 bool
689 ignoreCCS (CostCentreStack const *ccs)
690 {
691 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
692 ( ccs == CCS_OVERHEAD
693 || ccs == CCS_DONT_CARE
694 || ccs == CCS_GC
695 || ccs == CCS_SYSTEM
696 || ccs == CCS_IDLE);
697 }
698
699 void
700 reportCCSProfiling( void )
701 {
702 stopProfTimer();
703 if (RtsFlags.CcFlags.doCostCentres == 0) return;
704
705 ProfilerTotals totals = countTickss(CCS_MAIN);
706 aggregateCCCosts(CCS_MAIN);
707 inheritCosts(CCS_MAIN);
708 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
709 sortCCSTree(stack);
710
711 if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_JSON) {
712 writeCCSReportJson(prof_file, stack, totals);
713 } else {
714 writeCCSReport(prof_file, stack, totals);
715 }
716 }
717
718 /* -----------------------------------------------------------------------------
719 * Accumulating total allocatinos and tick count
720 -------------------------------------------------------------------------- */
721
722 /* Helper */
723 static void
724 countTickss_(CostCentreStack const *ccs, ProfilerTotals *totals)
725 {
726 if (!ignoreCCS(ccs)) {
727 totals->total_alloc += ccs->mem_alloc;
728 totals->total_prof_ticks += ccs->time_ticks;
729 }
730 for (IndexTable *i = ccs->indexTable; i != NULL; i = i->next) {
731 if (!i->back_edge) {
732 countTickss_(i->ccs, totals);
733 }
734 }
735 }
736
737 /* Traverse the cost centre stack tree and accumulate
738 * total ticks/allocations.
739 */
740 static ProfilerTotals
741 countTickss(CostCentreStack const *ccs)
742 {
743 ProfilerTotals totals = {0,0};
744 countTickss_(ccs, &totals);
745 return totals;
746 }
747
748 /* Traverse the cost centre stack tree and inherit ticks & allocs.
749 */
750 static void
751 inheritCosts(CostCentreStack *ccs)
752 {
753 IndexTable *i;
754
755 if (ignoreCCS(ccs)) { return; }
756
757 ccs->inherited_ticks += ccs->time_ticks;
758 ccs->inherited_alloc += ccs->mem_alloc;
759
760 for (i = ccs->indexTable; i != NULL; i = i->next)
761 if (!i->back_edge) {
762 inheritCosts(i->ccs);
763 ccs->inherited_ticks += i->ccs->inherited_ticks;
764 ccs->inherited_alloc += i->ccs->inherited_alloc;
765 }
766
767 return;
768 }
769
770 static void
771 aggregateCCCosts( CostCentreStack *ccs )
772 {
773 IndexTable *i;
774
775 ccs->cc->mem_alloc += ccs->mem_alloc;
776 ccs->cc->time_ticks += ccs->time_ticks;
777
778 for (i = ccs->indexTable; i != 0; i = i->next) {
779 if (!i->back_edge) {
780 aggregateCCCosts(i->ccs);
781 }
782 }
783 }
784
785 //
786 // Prune CCSs with zero entries, zero ticks or zero allocation from
787 // the tree, unless COST_CENTRES_ALL is on.
788 //
789 static CostCentreStack *
790 pruneCCSTree (CostCentreStack *ccs)
791 {
792 CostCentreStack *ccs1;
793 IndexTable *i, **prev;
794
795 prev = &ccs->indexTable;
796 for (i = ccs->indexTable; i != 0; i = i->next) {
797 if (i->back_edge) { continue; }
798
799 ccs1 = pruneCCSTree(i->ccs);
800 if (ccs1 == NULL) {
801 *prev = i->next;
802 } else {
803 prev = &(i->next);
804 }
805 }
806
807 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
808 /* force printing of *all* cost centres if -P -P */ )
809
810 || ( ccs->indexTable != 0 )
811 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
812 ) {
813 return ccs;
814 } else {
815 return NULL;
816 }
817 }
818
819 static IndexTable*
820 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
821 {
822 StgWord tbl_ticks = tbl->ccs->scc_count;
823 char* tbl_label = tbl->ccs->cc->label;
824
825 IndexTable *prev = NULL;
826 IndexTable *cursor = sortedList;
827
828 while (cursor != NULL) {
829 StgWord cursor_ticks = cursor->ccs->scc_count;
830 char* cursor_label = cursor->ccs->cc->label;
831
832 if (tbl_ticks > cursor_ticks ||
833 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
834 if (prev == NULL) {
835 tbl->next = sortedList;
836 return tbl;
837 } else {
838 prev->next = tbl;
839 tbl->next = cursor;
840 return sortedList;
841 }
842 } else {
843 prev = cursor;
844 cursor = cursor->next;
845 }
846 }
847
848 prev->next = tbl;
849 return sortedList;
850 }
851
852 static void
853 sortCCSTree(CostCentreStack *ccs)
854 {
855 if (ccs->indexTable == NULL) return;
856
857 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
858 if (!tbl->back_edge)
859 sortCCSTree(tbl->ccs);
860
861 IndexTable *sortedList = ccs->indexTable;
862 IndexTable *nonSortedList = sortedList->next;
863 sortedList->next = NULL;
864
865 while (nonSortedList != NULL)
866 {
867 IndexTable *nonSortedTail = nonSortedList->next;
868 nonSortedList->next = NULL;
869 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
870 nonSortedList = nonSortedTail;
871 }
872
873 ccs->indexTable = sortedList;
874 }
875
876 void
877 fprintCCS( FILE *f, CostCentreStack *ccs )
878 {
879 fprintf(f,"<");
880 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
881 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
882 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
883 fprintf(f,",");
884 }
885 }
886 fprintf(f,">");
887 }
888
889 // Returns: True if the call stack ended with CAF
890 static bool fprintCallStack (CostCentreStack *ccs)
891 {
892 CostCentreStack *prev;
893
894 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
895 prev = ccs->prevStack;
896 while (prev && prev != CCS_MAIN) {
897 ccs = prev;
898 fprintf(stderr, ",\n called from %s.%s",
899 ccs->cc->module, ccs->cc->label);
900 prev = ccs->prevStack;
901 }
902 fprintf(stderr, "\n");
903
904 return (!strncmp(ccs->cc->label, "CAF", 3));
905 }
906
907 /* For calling from .cmm code, where we can't reliably refer to stderr */
908 void
909 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
910 {
911 bool is_caf;
912 StgPtr frame;
913 StgStack *stack;
914 CostCentreStack *prev_ccs;
915 uint32_t depth = 0;
916 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
917
918 {
919 const char *desc;
920 const StgInfoTable *info;
921 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
922 switch (info->type) {
923 case CONSTR:
924 case CONSTR_1_0:
925 case CONSTR_0_1:
926 case CONSTR_2_0:
927 case CONSTR_1_1:
928 case CONSTR_0_2:
929 case CONSTR_NOCAF:
930 desc = GET_CON_DESC(itbl_to_con_itbl(info));
931 break;
932 default:
933 desc = closure_type_names[info->type];
934 break;
935 }
936 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
937 }
938
939 is_caf = fprintCallStack(ccs);
940
941 // traverse the stack down to the enclosing update frame to
942 // find out where this CCS was evaluated from...
943
944 stack = tso->stackobj;
945 frame = stack->sp;
946 prev_ccs = ccs;
947
948 for (; is_caf && depth < MAX_DEPTH; depth++)
949 {
950 switch (get_itbl((StgClosure*)frame)->type)
951 {
952 case UPDATE_FRAME:
953 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
954 frame += sizeofW(StgUpdateFrame);
955 if (ccs == CCS_MAIN) {
956 goto done;
957 }
958 if (ccs == prev_ccs) {
959 // ignore if this is the same as the previous stack,
960 // we're probably in library code and haven't
961 // accumulated any more interesting stack items
962 // since the last update frame.
963 break;
964 }
965 prev_ccs = ccs;
966 fprintf(stderr, " --> evaluated by: ");
967 is_caf = fprintCallStack(ccs);
968 break;
969 case UNDERFLOW_FRAME:
970 stack = ((StgUnderflowFrame*)frame)->next_chunk;
971 frame = stack->sp;
972 break;
973 case STOP_FRAME:
974 goto done;
975 default:
976 frame += stack_frame_sizeW((StgClosure*)frame);
977 break;
978 }
979 }
980 done:
981 return;
982 }
983
984 #if defined(DEBUG)
985 void
986 debugCCS( CostCentreStack *ccs )
987 {
988 debugBelch("<");
989 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
990 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
991 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
992 debugBelch(",");
993 }
994 }
995 debugBelch(">");
996 }
997 #endif /* DEBUG */
998
999 #endif /* PROFILING */