Show sources of cost centers in .prof
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "Printer.h"
21 #include "Capability.h"
22
23 #include <string.h>
24
25 #ifdef DEBUG
26 #include "Trace.h"
27 #endif
28
29 /*
30 * Profiling allocation arena.
31 */
32 static Arena *prof_arena;
33
34 /*
35 * Global variables used to assign unique IDs to cc's, ccs's, and
36 * closure_cats
37 */
38
39 unsigned int CC_ID = 1;
40 unsigned int CCS_ID = 1;
41
42 /* figures for the profiling report.
43 */
44 static StgWord64 total_alloc;
45 static W_ total_prof_ticks;
46
47 /* Globals for opening the profiling log file(s)
48 */
49 static char *prof_filename; /* prof report file name = <program>.prof */
50 FILE *prof_file;
51
52 static char *hp_filename; /* heap profile (hp2ps style) log file */
53 FILE *hp_file;
54
55 /* Linked lists to keep track of CCs and CCSs that haven't
56 * been declared in the log file yet
57 */
58 CostCentre *CC_LIST = NULL;
59 CostCentreStack *CCS_LIST = NULL;
60
61 #ifdef THREADED_RTS
62 static Mutex ccs_mutex;
63 #endif
64
65 /*
66 * Built-in cost centres and cost-centre stacks:
67 *
68 * MAIN is the root of the cost-centre stack tree. If there are
69 * no {-# SCC #-}s in the program, all costs will be attributed
70 * to MAIN.
71 *
72 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
73 * RTS operations apart from garbage collection are attributed
74 * to SYSTEM.
75 *
76 * GC is the storage manager / garbage collector.
77 *
78 * OVERHEAD gets all costs generated by the profiling system
79 * itself. These are costs that would not be incurred
80 * during non-profiled execution of the program.
81 *
82 * DONT_CARE is a placeholder cost-centre we assign to static
83 * constructors. It should *never* accumulate any costs.
84 *
85 * PINNED accumulates memory allocated to pinned objects, which
86 * cannot be profiled separately because we cannot reliably
87 * traverse pinned memory.
88 */
89
90 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
95 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
96 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
97
98 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
99 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
100 CCS_DECLARE(CCS_GC, CC_GC, );
101 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
102 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
103 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
104 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
105
106 /*
107 * Static Functions
108 */
109
110 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
111 CostCentreStack *ccs2 );
112 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
113 CostCentreStack *new_ccs );
114 static rtsBool ignoreCCS ( CostCentreStack *ccs );
115 static void countTickss ( CostCentreStack *ccs );
116 static void inheritCosts ( CostCentreStack *ccs );
117 static uint32_t numDigits ( StgInt i );
118 static void findCCSMaxLens ( CostCentreStack *ccs,
119 uint32_t indent,
120 uint32_t *max_label_len,
121 uint32_t *max_module_len,
122 uint32_t *max_src_len,
123 uint32_t *max_id_len );
124 static void logCCS ( CostCentreStack *ccs,
125 uint32_t indent,
126 uint32_t max_label_len,
127 uint32_t max_module_len,
128 uint32_t max_src_len,
129 uint32_t max_id_len );
130 static void reportCCS ( CostCentreStack *ccs );
131 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
132 CostCentre *cc );
133 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
134 static void sortCCSTree ( CostCentreStack *ccs );
135 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
136 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
137 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
138 CostCentre *, unsigned int );
139 static void ccsSetSelected ( CostCentreStack *ccs );
140
141 static void initTimeProfiling ( void );
142 static void initProfilingLogFile ( void );
143
144 /* -----------------------------------------------------------------------------
145 Initialise the profiling environment
146 -------------------------------------------------------------------------- */
147
148 void initProfiling (void)
149 {
150 // initialise our arena
151 prof_arena = newArena();
152
153 /* for the benefit of allocate()... */
154 {
155 uint32_t n;
156 for (n=0; n < n_capabilities; n++) {
157 capabilities[n]->r.rCCCS = CCS_SYSTEM;
158 }
159 }
160
161 #ifdef THREADED_RTS
162 initMutex(&ccs_mutex);
163 #endif
164
165 /* Set up the log file, and dump the header and cost centre
166 * information into it.
167 */
168 initProfilingLogFile();
169
170 /* Register all the cost centres / stacks in the program
171 * CC_MAIN gets link = 0, all others have non-zero link.
172 */
173 REGISTER_CC(CC_MAIN);
174 REGISTER_CC(CC_SYSTEM);
175 REGISTER_CC(CC_GC);
176 REGISTER_CC(CC_OVERHEAD);
177 REGISTER_CC(CC_DONT_CARE);
178 REGISTER_CC(CC_PINNED);
179 REGISTER_CC(CC_IDLE);
180
181 REGISTER_CCS(CCS_SYSTEM);
182 REGISTER_CCS(CCS_GC);
183 REGISTER_CCS(CCS_OVERHEAD);
184 REGISTER_CCS(CCS_DONT_CARE);
185 REGISTER_CCS(CCS_PINNED);
186 REGISTER_CCS(CCS_IDLE);
187 REGISTER_CCS(CCS_MAIN);
188
189 /* find all the registered cost centre stacks, and make them
190 * children of CCS_MAIN.
191 */
192 ASSERT(CCS_LIST == CCS_MAIN);
193 CCS_LIST = CCS_LIST->prevStack;
194 CCS_MAIN->prevStack = NULL;
195 CCS_MAIN->root = CCS_MAIN;
196 ccsSetSelected(CCS_MAIN);
197
198 initProfiling2();
199
200 if (RtsFlags.CcFlags.doCostCentres) {
201 initTimeProfiling();
202 }
203
204 if (RtsFlags.ProfFlags.doHeapProfile) {
205 initHeapProfiling();
206 }
207 }
208
209 //
210 // Should be called after loading any new Haskell code.
211 //
212 void initProfiling2 (void)
213 {
214 CostCentreStack *ccs, *next;
215
216 // make CCS_MAIN the parent of all the pre-defined CCSs.
217 for (ccs = CCS_LIST; ccs != NULL; ) {
218 next = ccs->prevStack;
219 ccs->prevStack = NULL;
220 actualPush_(CCS_MAIN,ccs->cc,ccs);
221 ccs->root = ccs;
222 ccs = next;
223 }
224 CCS_LIST = NULL;
225 }
226
227 void
228 freeProfiling (void)
229 {
230 arenaFree(prof_arena);
231 }
232
233 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
234 {
235 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
236 cc->label = label;
237 cc->module = module;
238 cc->srcloc = srcloc;
239 return cc;
240 }
241
242 static void
243 initProfilingLogFile(void)
244 {
245 char *prog;
246
247 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
248 strcpy(prog, prog_name);
249 #ifdef mingw32_HOST_OS
250 // on Windows, drop the .exe suffix if there is one
251 {
252 char *suff;
253 suff = strrchr(prog,'.');
254 if (suff != NULL && !strcmp(suff,".exe")) {
255 *suff = '\0';
256 }
257 }
258 #endif
259
260 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
261 {
262 /* No need for the <prog>.prof file */
263 prof_filename = NULL;
264 prof_file = NULL;
265 }
266 else
267 {
268 /* Initialise the log file name */
269 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
270 sprintf(prof_filename, "%s.prof", prog);
271
272 /* open the log file */
273 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
274 debugBelch("Can't open profiling report file %s\n", prof_filename);
275 RtsFlags.CcFlags.doCostCentres = 0;
276 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
277 // both <program>.hp as <program>.prof.
278 if (doingRetainerProfiling()) {
279 RtsFlags.ProfFlags.doHeapProfile = 0;
280 }
281 }
282 }
283
284 if (RtsFlags.ProfFlags.doHeapProfile) {
285 /* Initialise the log file name */
286 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
287 sprintf(hp_filename, "%s.hp", prog);
288
289 /* open the log file */
290 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
291 debugBelch("Can't open profiling report file %s\n",
292 hp_filename);
293 RtsFlags.ProfFlags.doHeapProfile = 0;
294 }
295 }
296 }
297
298 void
299 initTimeProfiling(void)
300 {
301 /* Start ticking */
302 startProfTimer();
303 };
304
305 void
306 endProfiling ( void )
307 {
308 if (RtsFlags.CcFlags.doCostCentres) {
309 stopProfTimer();
310 }
311 if (RtsFlags.ProfFlags.doHeapProfile) {
312 endHeapProfiling();
313 }
314 }
315
316 /* -----------------------------------------------------------------------------
317 Set CCCS when entering a function.
318
319 The algorithm is as follows.
320
321 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
322
323 where
324
325 dropCommonPrefix A B
326 -- returns the suffix of B after removing any prefix common
327 -- to both A and B.
328
329 e.g.
330
331 <a,b,c> ++> <> = <a,b,c>
332 <a,b,c> ++> <d> = <a,b,c,d>
333 <a,b,c> ++> <a,b> = <a,b,c>
334 <a,b> ++> <a,b,c> = <a,b,c>
335 <a,b,c> ++> <a,b,d> = <a,b,c,d>
336
337 -------------------------------------------------------------------------- */
338
339 // implements c1 ++> c2, where c1 and c2 are equal depth
340 //
341 static CostCentreStack *
342 enterFunEqualStacks (CostCentreStack *ccs0,
343 CostCentreStack *ccsapp,
344 CostCentreStack *ccsfn)
345 {
346 ASSERT(ccsapp->depth == ccsfn->depth);
347 if (ccsapp == ccsfn) return ccs0;
348 return pushCostCentre(enterFunEqualStacks(ccs0,
349 ccsapp->prevStack,
350 ccsfn->prevStack),
351 ccsfn->cc);
352 }
353
354 // implements c1 ++> c2, where c2 is deeper than c1.
355 // Drop elements of c2 until we have equal stacks, call
356 // enterFunEqualStacks(), and then push on the elements that we
357 // dropped in reverse order.
358 //
359 static CostCentreStack *
360 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
361 {
362 if (n == 0) {
363 ASSERT(ccsfn->depth == ccsapp->depth);
364 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
365 } else {
366 ASSERT(ccsfn->depth > ccsapp->depth);
367 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
368 ccsfn->cc);
369 }
370 }
371
372 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
373 {
374 CostCentreStack *ccsapp;
375
376 // common case 1: both stacks are the same
377 if (ccsfn == reg->rCCCS) {
378 return;
379 }
380
381 // common case 2: the function stack is empty, or just CAF
382 if (ccsfn->prevStack == CCS_MAIN) {
383 return;
384 }
385
386 ccsapp = reg->rCCCS;
387 reg->rCCCS = CCS_OVERHEAD;
388
389 // common case 3: the stacks are completely different (e.g. one is a
390 // descendent of MAIN and the other of a CAF): we append the whole
391 // of the function stack to the current CCS.
392 if (ccsfn->root != ccsapp->root) {
393 reg->rCCCS = appendCCS(ccsapp,ccsfn);
394 return;
395 }
396
397 // uncommon case 4: ccsapp is deeper than ccsfn
398 if (ccsapp->depth > ccsfn->depth) {
399 uint32_t i, n;
400 CostCentreStack *tmp = ccsapp;
401 n = ccsapp->depth - ccsfn->depth;
402 for (i = 0; i < n; i++) {
403 tmp = tmp->prevStack;
404 }
405 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
406 return;
407 }
408
409 // uncommon case 5: ccsfn is deeper than CCCS
410 if (ccsfn->depth > ccsapp->depth) {
411 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
412 ccsfn->depth - ccsapp->depth);
413 return;
414 }
415
416 // uncommon case 6: stacks are equal depth, but different
417 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
418 }
419
420 /* -----------------------------------------------------------------------------
421 Decide whether closures with this CCS should contribute to the heap
422 profile.
423 -------------------------------------------------------------------------- */
424
425 static void
426 ccsSetSelected (CostCentreStack *ccs)
427 {
428 if (RtsFlags.ProfFlags.modSelector) {
429 if (! strMatchesSelector (ccs->cc->module,
430 RtsFlags.ProfFlags.modSelector) ) {
431 ccs->selected = 0;
432 return;
433 }
434 }
435 if (RtsFlags.ProfFlags.ccSelector) {
436 if (! strMatchesSelector (ccs->cc->label,
437 RtsFlags.ProfFlags.ccSelector) ) {
438 ccs->selected = 0;
439 return;
440 }
441 }
442 if (RtsFlags.ProfFlags.ccsSelector) {
443 CostCentreStack *c;
444 for (c = ccs; c != NULL; c = c->prevStack)
445 {
446 if ( strMatchesSelector (c->cc->label,
447 RtsFlags.ProfFlags.ccsSelector) ) {
448 break;
449 }
450 }
451 if (c == NULL) {
452 ccs->selected = 0;
453 return;
454 }
455 }
456
457 ccs->selected = 1;
458 return;
459 }
460
461 /* -----------------------------------------------------------------------------
462 Cost-centre stack manipulation
463 -------------------------------------------------------------------------- */
464
465 #ifdef DEBUG
466 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
467 CostCentreStack *
468 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
469 #define pushCostCentre _pushCostCentre
470 {
471 IF_DEBUG(prof,
472 traceBegin("pushing %s on ", cc->label);
473 debugCCS(ccs);
474 traceEnd(););
475
476 return pushCostCentre(ccs,cc);
477 }
478 #endif
479
480 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
481
482 #ifdef DEBUG
483 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
484 CostCentreStack *
485 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
486 #define appendCCS _appendCCS
487 {
488 IF_DEBUG(prof,
489 if (ccs1 != ccs2) {
490 debugBelch("Appending ");
491 debugCCS(ccs1);
492 debugBelch(" to ");
493 debugCCS(ccs2);
494 debugBelch("\n");});
495 return appendCCS(ccs1,ccs2);
496 }
497 #endif
498
499 CostCentreStack *
500 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
501 {
502 if (ccs1 == ccs2) {
503 return ccs1;
504 }
505
506 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
507 // stop at a CAF element
508 return ccs1;
509 }
510
511 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
512 }
513
514 // Pick one:
515 // #define RECURSION_DROPS
516 #define RECURSION_TRUNCATES
517
518 CostCentreStack *
519 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
520 {
521 CostCentreStack *temp_ccs, *ret;
522 IndexTable *ixtable;
523
524 if (ccs == EMPTY_STACK) {
525 ACQUIRE_LOCK(&ccs_mutex);
526 ret = actualPush(ccs,cc);
527 }
528 else
529 {
530 if (ccs->cc == cc) {
531 return ccs;
532 } else {
533 // check if we've already memoized this stack
534 ixtable = ccs->indexTable;
535 temp_ccs = isInIndexTable(ixtable,cc);
536
537 if (temp_ccs != EMPTY_STACK) {
538 return temp_ccs;
539 } else {
540
541 // not in the IndexTable, now we take the lock:
542 ACQUIRE_LOCK(&ccs_mutex);
543
544 if (ccs->indexTable != ixtable)
545 {
546 // someone modified ccs->indexTable while
547 // we did not hold the lock, so we must
548 // check it again:
549 temp_ccs = isInIndexTable(ixtable,cc);
550 if (temp_ccs != EMPTY_STACK)
551 {
552 RELEASE_LOCK(&ccs_mutex);
553 return temp_ccs;
554 }
555 }
556 temp_ccs = checkLoop(ccs,cc);
557 if (temp_ccs != NULL) {
558 // This CC is already in the stack somewhere.
559 // This could be recursion, or just calling
560 // another function with the same CC.
561 // A number of policies are possible at this
562 // point, we implement two here:
563 // - truncate the stack to the previous instance
564 // of this CC
565 // - ignore this push, return the same stack.
566 //
567 CostCentreStack *new_ccs;
568 #if defined(RECURSION_TRUNCATES)
569 new_ccs = temp_ccs;
570 #else // defined(RECURSION_DROPS)
571 new_ccs = ccs;
572 #endif
573 ccs->indexTable = addToIndexTable (ccs->indexTable,
574 new_ccs, cc, 1);
575 ret = new_ccs;
576 } else {
577 ret = actualPush (ccs,cc);
578 }
579 }
580 }
581 }
582
583 RELEASE_LOCK(&ccs_mutex);
584 return ret;
585 }
586
587 static CostCentreStack *
588 checkLoop (CostCentreStack *ccs, CostCentre *cc)
589 {
590 while (ccs != EMPTY_STACK) {
591 if (ccs->cc == cc)
592 return ccs;
593 ccs = ccs->prevStack;
594 }
595 return NULL;
596 }
597
598 static CostCentreStack *
599 actualPush (CostCentreStack *ccs, CostCentre *cc)
600 {
601 CostCentreStack *new_ccs;
602
603 // allocate space for a new CostCentreStack
604 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
605
606 return actualPush_(ccs, cc, new_ccs);
607 }
608
609 static CostCentreStack *
610 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
611 {
612 /* assign values to each member of the structure */
613 new_ccs->ccsID = CCS_ID++;
614 new_ccs->cc = cc;
615 new_ccs->prevStack = ccs;
616 new_ccs->root = ccs->root;
617 new_ccs->depth = ccs->depth + 1;
618
619 new_ccs->indexTable = EMPTY_TABLE;
620
621 /* Initialise the various _scc_ counters to zero
622 */
623 new_ccs->scc_count = 0;
624
625 /* Initialize all other stats here. There should be a quick way
626 * that's easily used elsewhere too
627 */
628 new_ccs->time_ticks = 0;
629 new_ccs->mem_alloc = 0;
630 new_ccs->inherited_ticks = 0;
631 new_ccs->inherited_alloc = 0;
632
633 // Set the selected field.
634 ccsSetSelected(new_ccs);
635
636 /* update the memoization table for the parent stack */
637 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
638 0/*not a back edge*/);
639
640 /* return a pointer to the new stack */
641 return new_ccs;
642 }
643
644
645 static CostCentreStack *
646 isInIndexTable(IndexTable *it, CostCentre *cc)
647 {
648 while (it!=EMPTY_TABLE)
649 {
650 if (it->cc == cc)
651 return it->ccs;
652 else
653 it = it->next;
654 }
655
656 /* otherwise we never found it so return EMPTY_TABLE */
657 return EMPTY_TABLE;
658 }
659
660
661 static IndexTable *
662 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
663 CostCentre *cc, unsigned int back_edge)
664 {
665 IndexTable *new_it;
666
667 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
668
669 new_it->cc = cc;
670 new_it->ccs = new_ccs;
671 new_it->next = it;
672 new_it->back_edge = back_edge;
673 return new_it;
674 }
675
676 /* -----------------------------------------------------------------------------
677 Generating a time & allocation profiling report.
678 -------------------------------------------------------------------------- */
679
680 /* We omit certain system-related CCs and CCSs from the default
681 * reports, so as not to cause confusion.
682 */
683 static rtsBool
684 ignoreCC (CostCentre *cc)
685 {
686 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
687 ( cc == CC_OVERHEAD
688 || cc == CC_DONT_CARE
689 || cc == CC_GC
690 || cc == CC_SYSTEM
691 || cc == CC_IDLE)) {
692 return rtsTrue;
693 } else {
694 return rtsFalse;
695 }
696 }
697
698 static rtsBool
699 ignoreCCS (CostCentreStack *ccs)
700 {
701 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
702 ( ccs == CCS_OVERHEAD
703 || ccs == CCS_DONT_CARE
704 || ccs == CCS_GC
705 || ccs == CCS_SYSTEM
706 || ccs == CCS_IDLE)) {
707 return rtsTrue;
708 } else {
709 return rtsFalse;
710 }
711 }
712
713 /* -----------------------------------------------------------------------------
714 Generating the aggregated per-cost-centre time/alloc report.
715 -------------------------------------------------------------------------- */
716
717 static CostCentre *sorted_cc_list;
718
719 static void
720 aggregateCCCosts( CostCentreStack *ccs )
721 {
722 IndexTable *i;
723
724 ccs->cc->mem_alloc += ccs->mem_alloc;
725 ccs->cc->time_ticks += ccs->time_ticks;
726
727 for (i = ccs->indexTable; i != 0; i = i->next) {
728 if (!i->back_edge) {
729 aggregateCCCosts(i->ccs);
730 }
731 }
732 }
733
734 static void
735 insertCCInSortedList( CostCentre *new_cc )
736 {
737 CostCentre **prev, *cc;
738
739 prev = &sorted_cc_list;
740 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
741 if (new_cc->time_ticks > cc->time_ticks) {
742 new_cc->link = cc;
743 *prev = new_cc;
744 return;
745 } else {
746 prev = &(cc->link);
747 }
748 }
749 new_cc->link = NULL;
750 *prev = new_cc;
751 }
752
753 static uint32_t
754 strlen_utf8 (char *s)
755 {
756 uint32_t n = 0;
757 unsigned char c;
758
759 for (; *s != '\0'; s++) {
760 c = *s;
761 if (c < 0x80 || c > 0xBF) n++;
762 }
763 return n;
764 }
765
766 static void
767 reportPerCCCosts( void )
768 {
769 CostCentre *cc, *next;
770 uint32_t max_label_len, max_module_len, max_src_len;
771
772 aggregateCCCosts(CCS_MAIN);
773 sorted_cc_list = NULL;
774
775 max_label_len = 11; // no shorter than the "COST CENTRE" header
776 max_module_len = 6; // no shorter than the "MODULE" header
777 max_src_len = 3; // no shorter than the "SRC" header
778
779 for (cc = CC_LIST; cc != NULL; cc = next) {
780 next = cc->link;
781 if (cc->time_ticks > total_prof_ticks/100
782 || cc->mem_alloc > total_alloc/100
783 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
784 insertCCInSortedList(cc);
785
786 max_label_len = stg_max(strlen_utf8(cc->label), max_label_len);
787 max_module_len = stg_max(strlen_utf8(cc->module), max_module_len);
788 max_src_len = stg_max(strlen_utf8(cc->srcloc), max_src_len);
789 }
790 }
791
792 fprintf(prof_file, "%-*s %-*s %-*s",
793 max_label_len, "COST CENTRE", max_module_len, "MODULE", max_src_len, "SRC");
794 fprintf(prof_file, " %6s %6s", "%time", "%alloc");
795 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
796 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
797 }
798 fprintf(prof_file, "\n\n");
799
800 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
801 if (ignoreCC(cc)) {
802 continue;
803 }
804 fprintf(prof_file, "%s%*s %s%*s %s%*s",
805 cc->label,
806 max_label_len - strlen_utf8(cc->label), "",
807 cc->module,
808 max_module_len - strlen_utf8(cc->module), "",
809 cc->srcloc,
810 max_src_len - strlen_utf8(cc->srcloc), "");
811
812 fprintf(prof_file, " %6.1f %6.1f",
813 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
814 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
815 total_alloc * 100)
816 );
817
818 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
819 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
820 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
821 }
822 fprintf(prof_file, "\n");
823 }
824
825 fprintf(prof_file,"\n\n");
826 }
827
828 /* -----------------------------------------------------------------------------
829 Generate the cost-centre-stack time/alloc report
830 -------------------------------------------------------------------------- */
831
832 static void
833 fprintHeader( uint32_t max_label_len, uint32_t max_module_len,
834 uint32_t max_src_len, uint32_t max_id_len )
835 {
836 fprintf(prof_file, "%-*s %-*s %-*s %-*s %11s %12s %12s\n",
837 max_label_len, "",
838 max_module_len, "",
839 max_src_len, "",
840 max_id_len, "",
841 "", "individual", "inherited");
842
843 fprintf(prof_file, "%-*s %-*s %-*s %-*s",
844 max_label_len, "COST CENTRE",
845 max_module_len, "MODULE",
846 max_src_len, "SRC",
847 max_id_len, "no.");
848
849 fprintf(prof_file, " %11s %5s %6s %5s %6s",
850 "entries", "%time", "%alloc", "%time", "%alloc");
851
852 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
853 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
854 }
855
856 fprintf(prof_file, "\n\n");
857 }
858
859 void
860 reportCCSProfiling( void )
861 {
862 uint32_t count;
863 char temp[128]; /* sigh: magic constant */
864
865 stopProfTimer();
866
867 total_prof_ticks = 0;
868 total_alloc = 0;
869 countTickss(CCS_MAIN);
870
871 if (RtsFlags.CcFlags.doCostCentres == 0) return;
872
873 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
874 time_str(), "Final");
875
876 fprintf(prof_file, "\n\t ");
877 fprintf(prof_file, " %s", prog_name);
878 fprintf(prof_file, " +RTS");
879 for (count = 0; rts_argv[count]; count++)
880 fprintf(prof_file, " %s", rts_argv[count]);
881 fprintf(prof_file, " -RTS");
882 for (count = 1; prog_argv[count]; count++)
883 fprintf(prof_file, " %s", prog_argv[count]);
884 fprintf(prof_file, "\n\n");
885
886 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n",
887 ((double) total_prof_ticks *
888 (double) RtsFlags.MiscFlags.tickInterval) / (TIME_RESOLUTION * n_capabilities),
889 (unsigned long) total_prof_ticks,
890 (int) TimeToUS(RtsFlags.MiscFlags.tickInterval),
891 n_capabilities, n_capabilities > 1 ? "s" : "");
892
893 fprintf(prof_file, "\ttotal alloc = %11s bytes",
894 showStgWord64(total_alloc * sizeof(W_),
895 temp, rtsTrue/*commas*/));
896
897 fprintf(prof_file, " (excludes profiling overheads)\n\n");
898
899 reportPerCCCosts();
900
901 inheritCosts(CCS_MAIN);
902
903 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
904 sortCCSTree(stack);
905 reportCCS(stack);
906 }
907
908 static uint32_t
909 numDigits(StgInt i) {
910 uint32_t result;
911
912 result = 1;
913
914 if (i < 0) i = 0;
915
916 while (i > 9) {
917 i /= 10;
918 result++;
919 }
920
921 return result;
922 }
923
924 static void
925 findCCSMaxLens(CostCentreStack *ccs, uint32_t indent, uint32_t *max_label_len,
926 uint32_t *max_module_len, uint32_t *max_src_len, uint32_t *max_id_len) {
927 CostCentre *cc;
928 IndexTable *i;
929
930 cc = ccs->cc;
931
932 *max_label_len = stg_max(*max_label_len, indent + strlen_utf8(cc->label));
933 *max_module_len = stg_max(*max_module_len, strlen_utf8(cc->module));
934 *max_src_len = stg_max(*max_src_len, strlen_utf8(cc->srcloc));
935 *max_id_len = stg_max(*max_id_len, numDigits(ccs->ccsID));
936
937 for (i = ccs->indexTable; i != 0; i = i->next) {
938 if (!i->back_edge) {
939 findCCSMaxLens(i->ccs, indent+1,
940 max_label_len, max_module_len, max_src_len, max_id_len);
941 }
942 }
943 }
944
945 static void
946 logCCS(CostCentreStack *ccs, uint32_t indent,
947 uint32_t max_label_len, uint32_t max_module_len,
948 uint32_t max_src_len, uint32_t max_id_len)
949 {
950 CostCentre *cc;
951 IndexTable *i;
952
953 cc = ccs->cc;
954
955 /* Only print cost centres with non 0 data ! */
956
957 if (!ignoreCCS(ccs))
958 /* force printing of *all* cost centres if -Pa */
959 {
960
961 fprintf(prof_file, "%*s%s%*s %s%*s %s%*s",
962 indent, "",
963 cc->label,
964 max_label_len-indent - strlen_utf8(cc->label), "",
965 cc->module,
966 max_module_len - strlen_utf8(cc->module), "",
967 cc->srcloc,
968 max_src_len - strlen_utf8(cc->srcloc), "");
969
970 fprintf(prof_file,
971 " %*" FMT_Int "%11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
972 max_id_len, ccs->ccsID, ccs->scc_count,
973 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
974 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
975 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
976 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
977 );
978
979 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
980 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
981 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
982 }
983 fprintf(prof_file, "\n");
984 }
985
986 for (i = ccs->indexTable; i != 0; i = i->next) {
987 if (!i->back_edge) {
988 logCCS(i->ccs, indent+1,
989 max_label_len, max_module_len, max_src_len, max_id_len);
990 }
991 }
992 }
993
994 static void
995 reportCCS(CostCentreStack *ccs)
996 {
997 uint32_t max_label_len, max_module_len, max_src_len, max_id_len;
998
999 max_label_len = 11; // no shorter than "COST CENTRE" header
1000 max_module_len = 6; // no shorter than "MODULE" header
1001 max_src_len = 3; // no shorter than "SRC" header
1002 max_id_len = 3; // no shorter than "no." header
1003
1004 findCCSMaxLens(ccs, 0,
1005 &max_label_len, &max_module_len, &max_src_len, &max_id_len);
1006
1007 fprintHeader(max_label_len, max_module_len, max_src_len, max_id_len);
1008 logCCS(ccs, 0, max_label_len, max_module_len, max_src_len, max_id_len);
1009 }
1010
1011
1012 /* Traverse the cost centre stack tree and accumulate
1013 * ticks/allocations.
1014 */
1015 static void
1016 countTickss(CostCentreStack *ccs)
1017 {
1018 IndexTable *i;
1019
1020 if (!ignoreCCS(ccs)) {
1021 total_alloc += ccs->mem_alloc;
1022 total_prof_ticks += ccs->time_ticks;
1023 }
1024 for (i = ccs->indexTable; i != NULL; i = i->next)
1025 if (!i->back_edge) {
1026 countTickss(i->ccs);
1027 }
1028 }
1029
1030 /* Traverse the cost centre stack tree and inherit ticks & allocs.
1031 */
1032 static void
1033 inheritCosts(CostCentreStack *ccs)
1034 {
1035 IndexTable *i;
1036
1037 if (ignoreCCS(ccs)) { return; }
1038
1039 ccs->inherited_ticks += ccs->time_ticks;
1040 ccs->inherited_alloc += ccs->mem_alloc;
1041
1042 for (i = ccs->indexTable; i != NULL; i = i->next)
1043 if (!i->back_edge) {
1044 inheritCosts(i->ccs);
1045 ccs->inherited_ticks += i->ccs->inherited_ticks;
1046 ccs->inherited_alloc += i->ccs->inherited_alloc;
1047 }
1048
1049 return;
1050 }
1051
1052 //
1053 // Prune CCSs with zero entries, zero ticks or zero allocation from
1054 // the tree, unless COST_CENTRES_ALL is on.
1055 //
1056 static CostCentreStack *
1057 pruneCCSTree (CostCentreStack *ccs)
1058 {
1059 CostCentreStack *ccs1;
1060 IndexTable *i, **prev;
1061
1062 prev = &ccs->indexTable;
1063 for (i = ccs->indexTable; i != 0; i = i->next) {
1064 if (i->back_edge) { continue; }
1065
1066 ccs1 = pruneCCSTree(i->ccs);
1067 if (ccs1 == NULL) {
1068 *prev = i->next;
1069 } else {
1070 prev = &(i->next);
1071 }
1072 }
1073
1074 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
1075 /* force printing of *all* cost centres if -P -P */ )
1076
1077 || ( ccs->indexTable != 0 )
1078 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
1079 ) {
1080 return ccs;
1081 } else {
1082 return NULL;
1083 }
1084 }
1085
1086 static IndexTable*
1087 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
1088 {
1089 StgWord tbl_ticks = tbl->ccs->scc_count;
1090 char* tbl_label = tbl->ccs->cc->label;
1091
1092 IndexTable *prev = NULL;
1093 IndexTable *cursor = sortedList;
1094
1095 while (cursor != NULL) {
1096 StgWord cursor_ticks = cursor->ccs->scc_count;
1097 char* cursor_label = cursor->ccs->cc->label;
1098
1099 if (tbl_ticks > cursor_ticks ||
1100 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
1101 if (prev == NULL) {
1102 tbl->next = sortedList;
1103 return tbl;
1104 } else {
1105 prev->next = tbl;
1106 tbl->next = cursor;
1107 return sortedList;
1108 }
1109 } else {
1110 prev = cursor;
1111 cursor = cursor->next;
1112 }
1113 }
1114
1115 prev->next = tbl;
1116 return sortedList;
1117 }
1118
1119 static void
1120 sortCCSTree(CostCentreStack *ccs)
1121 {
1122 if (ccs->indexTable == NULL) return;
1123
1124 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
1125 if (!tbl->back_edge)
1126 sortCCSTree(tbl->ccs);
1127
1128 IndexTable *sortedList = ccs->indexTable;
1129 IndexTable *nonSortedList = sortedList->next;
1130 sortedList->next = NULL;
1131
1132 while (nonSortedList != NULL)
1133 {
1134 IndexTable *nonSortedTail = nonSortedList->next;
1135 nonSortedList->next = NULL;
1136 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
1137 nonSortedList = nonSortedTail;
1138 }
1139
1140 ccs->indexTable = sortedList;
1141 }
1142
1143 void
1144 fprintCCS( FILE *f, CostCentreStack *ccs )
1145 {
1146 fprintf(f,"<");
1147 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1148 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
1149 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1150 fprintf(f,",");
1151 }
1152 }
1153 fprintf(f,">");
1154 }
1155
1156 // Returns: True if the call stack ended with CAF
1157 static rtsBool fprintCallStack (CostCentreStack *ccs)
1158 {
1159 CostCentreStack *prev;
1160
1161 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
1162 prev = ccs->prevStack;
1163 while (prev && prev != CCS_MAIN) {
1164 ccs = prev;
1165 fprintf(stderr, ",\n called from %s.%s",
1166 ccs->cc->module, ccs->cc->label);
1167 prev = ccs->prevStack;
1168 }
1169 fprintf(stderr, "\n");
1170
1171 return (!strncmp(ccs->cc->label, "CAF", 3));
1172 }
1173
1174 /* For calling from .cmm code, where we can't reliably refer to stderr */
1175 void
1176 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
1177 {
1178 rtsBool is_caf;
1179 StgPtr frame;
1180 StgStack *stack;
1181 CostCentreStack *prev_ccs;
1182 uint32_t depth = 0;
1183 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
1184
1185 {
1186 const char *desc;
1187 const StgInfoTable *info;
1188 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
1189 switch (info->type) {
1190 case CONSTR:
1191 case CONSTR_1_0:
1192 case CONSTR_0_1:
1193 case CONSTR_2_0:
1194 case CONSTR_1_1:
1195 case CONSTR_0_2:
1196 case CONSTR_STATIC:
1197 case CONSTR_NOCAF_STATIC:
1198 desc = GET_CON_DESC(itbl_to_con_itbl(info));
1199 break;
1200 default:
1201 desc = closure_type_names[info->type];
1202 break;
1203 }
1204 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
1205 }
1206
1207 is_caf = fprintCallStack(ccs);
1208
1209 // traverse the stack down to the enclosing update frame to
1210 // find out where this CCS was evaluated from...
1211
1212 stack = tso->stackobj;
1213 frame = stack->sp;
1214 prev_ccs = ccs;
1215
1216 for (; is_caf && depth < MAX_DEPTH; depth++)
1217 {
1218 switch (get_itbl((StgClosure*)frame)->type)
1219 {
1220 case UPDATE_FRAME:
1221 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
1222 frame += sizeofW(StgUpdateFrame);
1223 if (ccs == CCS_MAIN) {
1224 goto done;
1225 }
1226 if (ccs == prev_ccs) {
1227 // ignore if this is the same as the previous stack,
1228 // we're probably in library code and haven't
1229 // accumulated any more interesting stack items
1230 // since the last update frame.
1231 break;
1232 }
1233 prev_ccs = ccs;
1234 fprintf(stderr, " --> evaluated by: ");
1235 is_caf = fprintCallStack(ccs);
1236 break;
1237 case UNDERFLOW_FRAME:
1238 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1239 frame = stack->sp;
1240 break;
1241 case STOP_FRAME:
1242 goto done;
1243 default:
1244 frame += stack_frame_sizeW((StgClosure*)frame);
1245 break;
1246 }
1247 }
1248 done:
1249 return;
1250 }
1251
1252 #ifdef DEBUG
1253 void
1254 debugCCS( CostCentreStack *ccs )
1255 {
1256 debugBelch("<");
1257 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1258 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1259 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1260 debugBelch(",");
1261 }
1262 }
1263 debugBelch(">");
1264 }
1265 #endif /* DEBUG */
1266
1267 #endif /* PROFILING */