f6430ae41a54ac704bf7961fad0acb5ce0533034
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "Printer.h"
21 #include "Capability.h"
22
23 #include <string.h>
24
25 #ifdef DEBUG
26 #include "Trace.h"
27 #endif
28
29 /*
30 * Profiling allocation arena.
31 */
32 static Arena *prof_arena;
33
34 /*
35 * Global variables used to assign unique IDs to cc's, ccs's, and
36 * closure_cats
37 */
38
39 unsigned int CC_ID = 1;
40 unsigned int CCS_ID = 1;
41
42 /* figures for the profiling report.
43 */
44 static StgWord64 total_alloc;
45 static W_ total_prof_ticks;
46
47 /* Globals for opening the profiling log file(s)
48 */
49 static char *prof_filename; /* prof report file name = <program>.prof */
50 FILE *prof_file;
51
52 static char *hp_filename; /* heap profile (hp2ps style) log file */
53 FILE *hp_file;
54
55 /* Linked lists to keep track of CCs and CCSs that haven't
56 * been declared in the log file yet
57 */
58 CostCentre *CC_LIST = NULL;
59 CostCentreStack *CCS_LIST = NULL;
60
61 #ifdef THREADED_RTS
62 static Mutex ccs_mutex;
63 #endif
64
65 /*
66 * Built-in cost centres and cost-centre stacks:
67 *
68 * MAIN is the root of the cost-centre stack tree. If there are
69 * no {-# SCC #-}s in the program, all costs will be attributed
70 * to MAIN.
71 *
72 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
73 * RTS operations apart from garbage collection are attributed
74 * to SYSTEM.
75 *
76 * GC is the storage manager / garbage collector.
77 *
78 * OVERHEAD gets all costs generated by the profiling system
79 * itself. These are costs that would not be incurred
80 * during non-profiled execution of the program.
81 *
82 * DONT_CARE is a placeholder cost-centre we assign to static
83 * constructors. It should *never* accumulate any costs.
84 *
85 * PINNED accumulates memory allocated to pinned objects, which
86 * cannot be profiled separately because we cannot reliably
87 * traverse pinned memory.
88 */
89
90 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
95 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
96 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
97
98 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
99 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
100 CCS_DECLARE(CCS_GC, CC_GC, );
101 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
102 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
103 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
104 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
105
106 /*
107 * Static Functions
108 */
109
110 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
111 CostCentreStack *ccs2 );
112 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
113 CostCentreStack *new_ccs );
114 static rtsBool ignoreCCS ( CostCentreStack *ccs );
115 static void countTickss ( CostCentreStack *ccs );
116 static void inheritCosts ( CostCentreStack *ccs );
117 static uint32_t numDigits ( StgInt i );
118 static void findCCSMaxLens ( CostCentreStack *ccs,
119 uint32_t indent,
120 uint32_t *max_label_len,
121 uint32_t *max_module_len,
122 uint32_t *max_id_len );
123 static void logCCS ( CostCentreStack *ccs,
124 uint32_t indent,
125 uint32_t max_label_len,
126 uint32_t max_module_len,
127 uint32_t max_id_len );
128 static void reportCCS ( CostCentreStack *ccs );
129 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
130 CostCentre *cc );
131 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
132 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
133 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
134 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
135 CostCentre *, unsigned int );
136 static void ccsSetSelected ( CostCentreStack *ccs );
137
138 static void initTimeProfiling ( void );
139 static void initProfilingLogFile ( void );
140
141 /* -----------------------------------------------------------------------------
142 Initialise the profiling environment
143 -------------------------------------------------------------------------- */
144
145 void initProfiling (void)
146 {
147 // initialise our arena
148 prof_arena = newArena();
149
150 /* for the benefit of allocate()... */
151 {
152 uint32_t n;
153 for (n=0; n < n_capabilities; n++) {
154 capabilities[n]->r.rCCCS = CCS_SYSTEM;
155 }
156 }
157
158 #ifdef THREADED_RTS
159 initMutex(&ccs_mutex);
160 #endif
161
162 /* Set up the log file, and dump the header and cost centre
163 * information into it.
164 */
165 initProfilingLogFile();
166
167 /* Register all the cost centres / stacks in the program
168 * CC_MAIN gets link = 0, all others have non-zero link.
169 */
170 REGISTER_CC(CC_MAIN);
171 REGISTER_CC(CC_SYSTEM);
172 REGISTER_CC(CC_GC);
173 REGISTER_CC(CC_OVERHEAD);
174 REGISTER_CC(CC_DONT_CARE);
175 REGISTER_CC(CC_PINNED);
176 REGISTER_CC(CC_IDLE);
177
178 REGISTER_CCS(CCS_SYSTEM);
179 REGISTER_CCS(CCS_GC);
180 REGISTER_CCS(CCS_OVERHEAD);
181 REGISTER_CCS(CCS_DONT_CARE);
182 REGISTER_CCS(CCS_PINNED);
183 REGISTER_CCS(CCS_IDLE);
184 REGISTER_CCS(CCS_MAIN);
185
186 /* find all the registered cost centre stacks, and make them
187 * children of CCS_MAIN.
188 */
189 ASSERT(CCS_LIST == CCS_MAIN);
190 CCS_LIST = CCS_LIST->prevStack;
191 CCS_MAIN->prevStack = NULL;
192 CCS_MAIN->root = CCS_MAIN;
193 ccsSetSelected(CCS_MAIN);
194
195 initProfiling2();
196
197 if (RtsFlags.CcFlags.doCostCentres) {
198 initTimeProfiling();
199 }
200
201 if (RtsFlags.ProfFlags.doHeapProfile) {
202 initHeapProfiling();
203 }
204 }
205
206 //
207 // Should be called after loading any new Haskell code.
208 //
209 void initProfiling2 (void)
210 {
211 CostCentreStack *ccs, *next;
212
213 // make CCS_MAIN the parent of all the pre-defined CCSs.
214 for (ccs = CCS_LIST; ccs != NULL; ) {
215 next = ccs->prevStack;
216 ccs->prevStack = NULL;
217 actualPush_(CCS_MAIN,ccs->cc,ccs);
218 ccs->root = ccs;
219 ccs = next;
220 }
221 CCS_LIST = NULL;
222 }
223
224 void
225 freeProfiling (void)
226 {
227 arenaFree(prof_arena);
228 }
229
230 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
231 {
232 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
233 cc->label = label;
234 cc->module = module;
235 cc->srcloc = srcloc;
236 return cc;
237 }
238
239 static void
240 initProfilingLogFile(void)
241 {
242 char *prog;
243
244 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
245 strcpy(prog, prog_name);
246 #ifdef mingw32_HOST_OS
247 // on Windows, drop the .exe suffix if there is one
248 {
249 char *suff;
250 suff = strrchr(prog,'.');
251 if (suff != NULL && !strcmp(suff,".exe")) {
252 *suff = '\0';
253 }
254 }
255 #endif
256
257 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
258 {
259 /* No need for the <prog>.prof file */
260 prof_filename = NULL;
261 prof_file = NULL;
262 }
263 else
264 {
265 /* Initialise the log file name */
266 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
267 sprintf(prof_filename, "%s.prof", prog);
268
269 /* open the log file */
270 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
271 debugBelch("Can't open profiling report file %s\n", prof_filename);
272 RtsFlags.CcFlags.doCostCentres = 0;
273 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
274 // both <program>.hp as <program>.prof.
275 if (doingRetainerProfiling()) {
276 RtsFlags.ProfFlags.doHeapProfile = 0;
277 }
278 }
279 }
280
281 if (RtsFlags.ProfFlags.doHeapProfile) {
282 /* Initialise the log file name */
283 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
284 sprintf(hp_filename, "%s.hp", prog);
285
286 /* open the log file */
287 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
288 debugBelch("Can't open profiling report file %s\n",
289 hp_filename);
290 RtsFlags.ProfFlags.doHeapProfile = 0;
291 }
292 }
293 }
294
295 void
296 initTimeProfiling(void)
297 {
298 /* Start ticking */
299 startProfTimer();
300 };
301
302 void
303 endProfiling ( void )
304 {
305 if (RtsFlags.CcFlags.doCostCentres) {
306 stopProfTimer();
307 }
308 if (RtsFlags.ProfFlags.doHeapProfile) {
309 endHeapProfiling();
310 }
311 }
312
313 /* -----------------------------------------------------------------------------
314 Set CCCS when entering a function.
315
316 The algorithm is as follows.
317
318 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
319
320 where
321
322 dropCommonPrefix A B
323 -- returns the suffix of B after removing any prefix common
324 -- to both A and B.
325
326 e.g.
327
328 <a,b,c> ++> <> = <a,b,c>
329 <a,b,c> ++> <d> = <a,b,c,d>
330 <a,b,c> ++> <a,b> = <a,b,c>
331 <a,b> ++> <a,b,c> = <a,b,c>
332 <a,b,c> ++> <a,b,d> = <a,b,c,d>
333
334 -------------------------------------------------------------------------- */
335
336 // implements c1 ++> c2, where c1 and c2 are equal depth
337 //
338 static CostCentreStack *
339 enterFunEqualStacks (CostCentreStack *ccs0,
340 CostCentreStack *ccsapp,
341 CostCentreStack *ccsfn)
342 {
343 ASSERT(ccsapp->depth == ccsfn->depth);
344 if (ccsapp == ccsfn) return ccs0;
345 return pushCostCentre(enterFunEqualStacks(ccs0,
346 ccsapp->prevStack,
347 ccsfn->prevStack),
348 ccsfn->cc);
349 }
350
351 // implements c1 ++> c2, where c2 is deeper than c1.
352 // Drop elements of c2 until we have equal stacks, call
353 // enterFunEqualStacks(), and then push on the elements that we
354 // dropped in reverse order.
355 //
356 static CostCentreStack *
357 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
358 {
359 if (n == 0) {
360 ASSERT(ccsfn->depth == ccsapp->depth);
361 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
362 } else {
363 ASSERT(ccsfn->depth > ccsapp->depth);
364 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
365 ccsfn->cc);
366 }
367 }
368
369 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
370 {
371 CostCentreStack *ccsapp;
372
373 // common case 1: both stacks are the same
374 if (ccsfn == reg->rCCCS) {
375 return;
376 }
377
378 // common case 2: the function stack is empty, or just CAF
379 if (ccsfn->prevStack == CCS_MAIN) {
380 return;
381 }
382
383 ccsapp = reg->rCCCS;
384 reg->rCCCS = CCS_OVERHEAD;
385
386 // common case 3: the stacks are completely different (e.g. one is a
387 // descendent of MAIN and the other of a CAF): we append the whole
388 // of the function stack to the current CCS.
389 if (ccsfn->root != ccsapp->root) {
390 reg->rCCCS = appendCCS(ccsapp,ccsfn);
391 return;
392 }
393
394 // uncommon case 4: ccsapp is deeper than ccsfn
395 if (ccsapp->depth > ccsfn->depth) {
396 uint32_t i, n;
397 CostCentreStack *tmp = ccsapp;
398 n = ccsapp->depth - ccsfn->depth;
399 for (i = 0; i < n; i++) {
400 tmp = tmp->prevStack;
401 }
402 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
403 return;
404 }
405
406 // uncommon case 5: ccsfn is deeper than CCCS
407 if (ccsfn->depth > ccsapp->depth) {
408 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
409 ccsfn->depth - ccsapp->depth);
410 return;
411 }
412
413 // uncommon case 6: stacks are equal depth, but different
414 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
415 }
416
417 /* -----------------------------------------------------------------------------
418 Decide whether closures with this CCS should contribute to the heap
419 profile.
420 -------------------------------------------------------------------------- */
421
422 static void
423 ccsSetSelected (CostCentreStack *ccs)
424 {
425 if (RtsFlags.ProfFlags.modSelector) {
426 if (! strMatchesSelector (ccs->cc->module,
427 RtsFlags.ProfFlags.modSelector) ) {
428 ccs->selected = 0;
429 return;
430 }
431 }
432 if (RtsFlags.ProfFlags.ccSelector) {
433 if (! strMatchesSelector (ccs->cc->label,
434 RtsFlags.ProfFlags.ccSelector) ) {
435 ccs->selected = 0;
436 return;
437 }
438 }
439 if (RtsFlags.ProfFlags.ccsSelector) {
440 CostCentreStack *c;
441 for (c = ccs; c != NULL; c = c->prevStack)
442 {
443 if ( strMatchesSelector (c->cc->label,
444 RtsFlags.ProfFlags.ccsSelector) ) {
445 break;
446 }
447 }
448 if (c == NULL) {
449 ccs->selected = 0;
450 return;
451 }
452 }
453
454 ccs->selected = 1;
455 return;
456 }
457
458 /* -----------------------------------------------------------------------------
459 Cost-centre stack manipulation
460 -------------------------------------------------------------------------- */
461
462 #ifdef DEBUG
463 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
464 CostCentreStack *
465 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
466 #define pushCostCentre _pushCostCentre
467 {
468 IF_DEBUG(prof,
469 traceBegin("pushing %s on ", cc->label);
470 debugCCS(ccs);
471 traceEnd(););
472
473 return pushCostCentre(ccs,cc);
474 }
475 #endif
476
477 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
478
479 #ifdef DEBUG
480 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
481 CostCentreStack *
482 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
483 #define appendCCS _appendCCS
484 {
485 IF_DEBUG(prof,
486 if (ccs1 != ccs2) {
487 debugBelch("Appending ");
488 debugCCS(ccs1);
489 debugBelch(" to ");
490 debugCCS(ccs2);
491 debugBelch("\n");});
492 return appendCCS(ccs1,ccs2);
493 }
494 #endif
495
496 CostCentreStack *
497 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
498 {
499 if (ccs1 == ccs2) {
500 return ccs1;
501 }
502
503 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
504 // stop at a CAF element
505 return ccs1;
506 }
507
508 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
509 }
510
511 // Pick one:
512 // #define RECURSION_DROPS
513 #define RECURSION_TRUNCATES
514
515 CostCentreStack *
516 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
517 {
518 CostCentreStack *temp_ccs, *ret;
519 IndexTable *ixtable;
520
521 if (ccs == EMPTY_STACK) {
522 ACQUIRE_LOCK(&ccs_mutex);
523 ret = actualPush(ccs,cc);
524 }
525 else
526 {
527 if (ccs->cc == cc) {
528 return ccs;
529 } else {
530 // check if we've already memoized this stack
531 ixtable = ccs->indexTable;
532 temp_ccs = isInIndexTable(ixtable,cc);
533
534 if (temp_ccs != EMPTY_STACK) {
535 return temp_ccs;
536 } else {
537
538 // not in the IndexTable, now we take the lock:
539 ACQUIRE_LOCK(&ccs_mutex);
540
541 if (ccs->indexTable != ixtable)
542 {
543 // someone modified ccs->indexTable while
544 // we did not hold the lock, so we must
545 // check it again:
546 temp_ccs = isInIndexTable(ixtable,cc);
547 if (temp_ccs != EMPTY_STACK)
548 {
549 RELEASE_LOCK(&ccs_mutex);
550 return temp_ccs;
551 }
552 }
553 temp_ccs = checkLoop(ccs,cc);
554 if (temp_ccs != NULL) {
555 // This CC is already in the stack somewhere.
556 // This could be recursion, or just calling
557 // another function with the same CC.
558 // A number of policies are possible at this
559 // point, we implement two here:
560 // - truncate the stack to the previous instance
561 // of this CC
562 // - ignore this push, return the same stack.
563 //
564 CostCentreStack *new_ccs;
565 #if defined(RECURSION_TRUNCATES)
566 new_ccs = temp_ccs;
567 #else // defined(RECURSION_DROPS)
568 new_ccs = ccs;
569 #endif
570 ccs->indexTable = addToIndexTable (ccs->indexTable,
571 new_ccs, cc, 1);
572 ret = new_ccs;
573 } else {
574 ret = actualPush (ccs,cc);
575 }
576 }
577 }
578 }
579
580 RELEASE_LOCK(&ccs_mutex);
581 return ret;
582 }
583
584 static CostCentreStack *
585 checkLoop (CostCentreStack *ccs, CostCentre *cc)
586 {
587 while (ccs != EMPTY_STACK) {
588 if (ccs->cc == cc)
589 return ccs;
590 ccs = ccs->prevStack;
591 }
592 return NULL;
593 }
594
595 static CostCentreStack *
596 actualPush (CostCentreStack *ccs, CostCentre *cc)
597 {
598 CostCentreStack *new_ccs;
599
600 // allocate space for a new CostCentreStack
601 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
602
603 return actualPush_(ccs, cc, new_ccs);
604 }
605
606 static CostCentreStack *
607 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
608 {
609 /* assign values to each member of the structure */
610 new_ccs->ccsID = CCS_ID++;
611 new_ccs->cc = cc;
612 new_ccs->prevStack = ccs;
613 new_ccs->root = ccs->root;
614 new_ccs->depth = ccs->depth + 1;
615
616 new_ccs->indexTable = EMPTY_TABLE;
617
618 /* Initialise the various _scc_ counters to zero
619 */
620 new_ccs->scc_count = 0;
621
622 /* Initialize all other stats here. There should be a quick way
623 * that's easily used elsewhere too
624 */
625 new_ccs->time_ticks = 0;
626 new_ccs->mem_alloc = 0;
627 new_ccs->inherited_ticks = 0;
628 new_ccs->inherited_alloc = 0;
629
630 // Set the selected field.
631 ccsSetSelected(new_ccs);
632
633 /* update the memoization table for the parent stack */
634 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
635 0/*not a back edge*/);
636
637 /* return a pointer to the new stack */
638 return new_ccs;
639 }
640
641
642 static CostCentreStack *
643 isInIndexTable(IndexTable *it, CostCentre *cc)
644 {
645 while (it!=EMPTY_TABLE)
646 {
647 if (it->cc == cc)
648 return it->ccs;
649 else
650 it = it->next;
651 }
652
653 /* otherwise we never found it so return EMPTY_TABLE */
654 return EMPTY_TABLE;
655 }
656
657
658 static IndexTable *
659 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
660 CostCentre *cc, unsigned int back_edge)
661 {
662 IndexTable *new_it;
663
664 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
665
666 new_it->cc = cc;
667 new_it->ccs = new_ccs;
668 new_it->next = it;
669 new_it->back_edge = back_edge;
670 return new_it;
671 }
672
673 /* -----------------------------------------------------------------------------
674 Generating a time & allocation profiling report.
675 -------------------------------------------------------------------------- */
676
677 /* We omit certain system-related CCs and CCSs from the default
678 * reports, so as not to cause confusion.
679 */
680 static rtsBool
681 ignoreCC (CostCentre *cc)
682 {
683 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
684 ( cc == CC_OVERHEAD
685 || cc == CC_DONT_CARE
686 || cc == CC_GC
687 || cc == CC_SYSTEM
688 || cc == CC_IDLE)) {
689 return rtsTrue;
690 } else {
691 return rtsFalse;
692 }
693 }
694
695 static rtsBool
696 ignoreCCS (CostCentreStack *ccs)
697 {
698 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
699 ( ccs == CCS_OVERHEAD
700 || ccs == CCS_DONT_CARE
701 || ccs == CCS_GC
702 || ccs == CCS_SYSTEM
703 || ccs == CCS_IDLE)) {
704 return rtsTrue;
705 } else {
706 return rtsFalse;
707 }
708 }
709
710 /* -----------------------------------------------------------------------------
711 Generating the aggregated per-cost-centre time/alloc report.
712 -------------------------------------------------------------------------- */
713
714 static CostCentre *sorted_cc_list;
715
716 static void
717 aggregateCCCosts( CostCentreStack *ccs )
718 {
719 IndexTable *i;
720
721 ccs->cc->mem_alloc += ccs->mem_alloc;
722 ccs->cc->time_ticks += ccs->time_ticks;
723
724 for (i = ccs->indexTable; i != 0; i = i->next) {
725 if (!i->back_edge) {
726 aggregateCCCosts(i->ccs);
727 }
728 }
729 }
730
731 static void
732 insertCCInSortedList( CostCentre *new_cc )
733 {
734 CostCentre **prev, *cc;
735
736 prev = &sorted_cc_list;
737 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
738 if (new_cc->time_ticks > cc->time_ticks) {
739 new_cc->link = cc;
740 *prev = new_cc;
741 return;
742 } else {
743 prev = &(cc->link);
744 }
745 }
746 new_cc->link = NULL;
747 *prev = new_cc;
748 }
749
750 static uint32_t
751 strlen_utf8 (char *s)
752 {
753 uint32_t n = 0;
754 unsigned char c;
755
756 for (; *s != '\0'; s++) {
757 c = *s;
758 if (c < 0x80 || c > 0xBF) n++;
759 }
760 return n;
761 }
762
763 static void
764 reportPerCCCosts( void )
765 {
766 CostCentre *cc, *next;
767 uint32_t max_label_len, max_module_len;
768
769 aggregateCCCosts(CCS_MAIN);
770 sorted_cc_list = NULL;
771
772 max_label_len = 11; // no shorter than the "COST CENTRE" header
773 max_module_len = 6; // no shorter than the "MODULE" header
774
775 for (cc = CC_LIST; cc != NULL; cc = next) {
776 next = cc->link;
777 if (cc->time_ticks > total_prof_ticks/100
778 || cc->mem_alloc > total_alloc/100
779 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
780 insertCCInSortedList(cc);
781
782 max_label_len = stg_max(strlen_utf8(cc->label), max_label_len);
783 max_module_len = stg_max(strlen_utf8(cc->module), max_module_len);
784 }
785 }
786
787 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
788 fprintf(prof_file, " %6s %6s", "%time", "%alloc");
789 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
790 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
791 }
792 fprintf(prof_file, "\n\n");
793
794 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
795 if (ignoreCC(cc)) {
796 continue;
797 }
798 fprintf(prof_file, "%s%*s %s%*s",
799 cc->label,
800 max_label_len - strlen_utf8(cc->label), "",
801 cc->module,
802 max_module_len - strlen_utf8(cc->module), "");
803
804 fprintf(prof_file, " %6.1f %6.1f",
805 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
806 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
807 total_alloc * 100)
808 );
809
810 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
811 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
812 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
813 }
814 fprintf(prof_file, "\n");
815 }
816
817 fprintf(prof_file,"\n\n");
818 }
819
820 /* -----------------------------------------------------------------------------
821 Generate the cost-centre-stack time/alloc report
822 -------------------------------------------------------------------------- */
823
824 static void
825 fprintHeader( uint32_t max_label_len, uint32_t max_module_len,
826 uint32_t max_id_len )
827 {
828 fprintf(prof_file, "%-*s %-*s %-*s %11s %12s %12s\n",
829 max_label_len, "",
830 max_module_len, "",
831 max_id_len, "",
832 "", "individual", "inherited");
833
834 fprintf(prof_file, "%-*s %-*s %-*s",
835 max_label_len, "COST CENTRE",
836 max_module_len, "MODULE",
837 max_id_len, "no.");
838
839 fprintf(prof_file, " %11s %5s %6s %5s %6s",
840 "entries", "%time", "%alloc", "%time", "%alloc");
841
842 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
843 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
844 }
845
846 fprintf(prof_file, "\n\n");
847 }
848
849 void
850 reportCCSProfiling( void )
851 {
852 uint32_t count;
853 char temp[128]; /* sigh: magic constant */
854
855 stopProfTimer();
856
857 total_prof_ticks = 0;
858 total_alloc = 0;
859 countTickss(CCS_MAIN);
860
861 if (RtsFlags.CcFlags.doCostCentres == 0) return;
862
863 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
864 time_str(), "Final");
865
866 fprintf(prof_file, "\n\t ");
867 fprintf(prof_file, " %s", prog_name);
868 fprintf(prof_file, " +RTS");
869 for (count = 0; rts_argv[count]; count++)
870 fprintf(prof_file, " %s", rts_argv[count]);
871 fprintf(prof_file, " -RTS");
872 for (count = 1; prog_argv[count]; count++)
873 fprintf(prof_file, " %s", prog_argv[count]);
874 fprintf(prof_file, "\n\n");
875
876 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n",
877 ((double) total_prof_ticks *
878 (double) RtsFlags.MiscFlags.tickInterval) / (TIME_RESOLUTION * n_capabilities),
879 (unsigned long) total_prof_ticks,
880 (int) TimeToUS(RtsFlags.MiscFlags.tickInterval),
881 n_capabilities, n_capabilities > 1 ? "s" : "");
882
883 fprintf(prof_file, "\ttotal alloc = %11s bytes",
884 showStgWord64(total_alloc * sizeof(W_),
885 temp, rtsTrue/*commas*/));
886
887 fprintf(prof_file, " (excludes profiling overheads)\n\n");
888
889 reportPerCCCosts();
890
891 inheritCosts(CCS_MAIN);
892
893 reportCCS(pruneCCSTree(CCS_MAIN));
894 }
895
896 static uint32_t
897 numDigits(StgInt i) {
898 uint32_t result;
899
900 result = 1;
901
902 if (i < 0) i = 0;
903
904 while (i > 9) {
905 i /= 10;
906 result++;
907 }
908
909 return result;
910 }
911
912 static void
913 findCCSMaxLens(CostCentreStack *ccs, uint32_t indent, uint32_t *max_label_len,
914 uint32_t *max_module_len, uint32_t *max_id_len) {
915 CostCentre *cc;
916 IndexTable *i;
917
918 cc = ccs->cc;
919
920 *max_label_len = stg_max(*max_label_len, indent + strlen_utf8(cc->label));
921 *max_module_len = stg_max(*max_module_len, strlen_utf8(cc->module));
922 *max_id_len = stg_max(*max_id_len, numDigits(ccs->ccsID));
923
924 for (i = ccs->indexTable; i != 0; i = i->next) {
925 if (!i->back_edge) {
926 findCCSMaxLens(i->ccs, indent+1,
927 max_label_len, max_module_len, max_id_len);
928 }
929 }
930 }
931
932 static void
933 logCCS(CostCentreStack *ccs, uint32_t indent,
934 uint32_t max_label_len, uint32_t max_module_len, uint32_t max_id_len)
935 {
936 CostCentre *cc;
937 IndexTable *i;
938
939 cc = ccs->cc;
940
941 /* Only print cost centres with non 0 data ! */
942
943 if (!ignoreCCS(ccs))
944 /* force printing of *all* cost centres if -Pa */
945 {
946
947 fprintf(prof_file, "%-*s%s%*s %s%*s",
948 indent, "",
949 cc->label,
950 max_label_len-indent - strlen_utf8(cc->label), "",
951 cc->module,
952 max_module_len - strlen_utf8(cc->module), "");
953
954 fprintf(prof_file,
955 " %*" FMT_Int "%11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
956 max_id_len, ccs->ccsID, ccs->scc_count,
957 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
958 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
959 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
960 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
961 );
962
963 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
964 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
965 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
966 }
967 fprintf(prof_file, "\n");
968 }
969
970 for (i = ccs->indexTable; i != 0; i = i->next) {
971 if (!i->back_edge) {
972 logCCS(i->ccs, indent+1, max_label_len, max_module_len, max_id_len);
973 }
974 }
975 }
976
977 static void
978 reportCCS(CostCentreStack *ccs)
979 {
980 uint32_t max_label_len, max_module_len, max_id_len;
981
982 max_label_len = 11; // no shorter than "COST CENTRE" header
983 max_module_len = 6; // no shorter than "MODULE" header
984 max_id_len = 3; // no shorter than "no." header
985
986 findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len, &max_id_len);
987
988 fprintHeader(max_label_len, max_module_len, max_id_len);
989 logCCS(ccs, 0, max_label_len, max_module_len, max_id_len);
990 }
991
992
993 /* Traverse the cost centre stack tree and accumulate
994 * ticks/allocations.
995 */
996 static void
997 countTickss(CostCentreStack *ccs)
998 {
999 IndexTable *i;
1000
1001 if (!ignoreCCS(ccs)) {
1002 total_alloc += ccs->mem_alloc;
1003 total_prof_ticks += ccs->time_ticks;
1004 }
1005 for (i = ccs->indexTable; i != NULL; i = i->next)
1006 if (!i->back_edge) {
1007 countTickss(i->ccs);
1008 }
1009 }
1010
1011 /* Traverse the cost centre stack tree and inherit ticks & allocs.
1012 */
1013 static void
1014 inheritCosts(CostCentreStack *ccs)
1015 {
1016 IndexTable *i;
1017
1018 if (ignoreCCS(ccs)) { return; }
1019
1020 ccs->inherited_ticks += ccs->time_ticks;
1021 ccs->inherited_alloc += ccs->mem_alloc;
1022
1023 for (i = ccs->indexTable; i != NULL; i = i->next)
1024 if (!i->back_edge) {
1025 inheritCosts(i->ccs);
1026 ccs->inherited_ticks += i->ccs->inherited_ticks;
1027 ccs->inherited_alloc += i->ccs->inherited_alloc;
1028 }
1029
1030 return;
1031 }
1032
1033 //
1034 // Prune CCSs with zero entries, zero ticks or zero allocation from
1035 // the tree, unless COST_CENTRES_ALL is on.
1036 //
1037 static CostCentreStack *
1038 pruneCCSTree (CostCentreStack *ccs)
1039 {
1040 CostCentreStack *ccs1;
1041 IndexTable *i, **prev;
1042
1043 prev = &ccs->indexTable;
1044 for (i = ccs->indexTable; i != 0; i = i->next) {
1045 if (i->back_edge) { continue; }
1046
1047 ccs1 = pruneCCSTree(i->ccs);
1048 if (ccs1 == NULL) {
1049 *prev = i->next;
1050 } else {
1051 prev = &(i->next);
1052 }
1053 }
1054
1055 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
1056 /* force printing of *all* cost centres if -P -P */ )
1057
1058 || ( ccs->indexTable != 0 )
1059 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
1060 ) {
1061 return ccs;
1062 } else {
1063 return NULL;
1064 }
1065 }
1066
1067 void
1068 fprintCCS( FILE *f, CostCentreStack *ccs )
1069 {
1070 fprintf(f,"<");
1071 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1072 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
1073 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1074 fprintf(f,",");
1075 }
1076 }
1077 fprintf(f,">");
1078 }
1079
1080 // Returns: True if the call stack ended with CAF
1081 static rtsBool fprintCallStack (CostCentreStack *ccs)
1082 {
1083 CostCentreStack *prev;
1084
1085 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
1086 prev = ccs->prevStack;
1087 while (prev && prev != CCS_MAIN) {
1088 ccs = prev;
1089 fprintf(stderr, ",\n called from %s.%s",
1090 ccs->cc->module, ccs->cc->label);
1091 prev = ccs->prevStack;
1092 }
1093 fprintf(stderr, "\n");
1094
1095 return (!strncmp(ccs->cc->label, "CAF", 3));
1096 }
1097
1098 /* For calling from .cmm code, where we can't reliably refer to stderr */
1099 void
1100 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
1101 {
1102 rtsBool is_caf;
1103 StgPtr frame;
1104 StgStack *stack;
1105 CostCentreStack *prev_ccs;
1106 uint32_t depth = 0;
1107 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
1108
1109 {
1110 char *desc;
1111 StgInfoTable *info;
1112 info = get_itbl(UNTAG_CLOSURE(exception));
1113 switch (info->type) {
1114 case CONSTR:
1115 case CONSTR_1_0:
1116 case CONSTR_0_1:
1117 case CONSTR_2_0:
1118 case CONSTR_1_1:
1119 case CONSTR_0_2:
1120 case CONSTR_STATIC:
1121 case CONSTR_NOCAF_STATIC:
1122 desc = GET_CON_DESC(itbl_to_con_itbl(info));
1123 break;
1124 default:
1125 desc = closure_type_names[info->type];
1126 break;
1127 }
1128 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
1129 }
1130
1131 is_caf = fprintCallStack(ccs);
1132
1133 // traverse the stack down to the enclosing update frame to
1134 // find out where this CCS was evaluated from...
1135
1136 stack = tso->stackobj;
1137 frame = stack->sp;
1138 prev_ccs = ccs;
1139
1140 for (; is_caf && depth < MAX_DEPTH; depth++)
1141 {
1142 switch (get_itbl((StgClosure*)frame)->type)
1143 {
1144 case UPDATE_FRAME:
1145 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
1146 frame += sizeofW(StgUpdateFrame);
1147 if (ccs == CCS_MAIN) {
1148 goto done;
1149 }
1150 if (ccs == prev_ccs) {
1151 // ignore if this is the same as the previous stack,
1152 // we're probably in library code and haven't
1153 // accumulated any more interesting stack items
1154 // since the last update frame.
1155 break;
1156 }
1157 prev_ccs = ccs;
1158 fprintf(stderr, " --> evaluated by: ");
1159 is_caf = fprintCallStack(ccs);
1160 break;
1161 case UNDERFLOW_FRAME:
1162 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1163 frame = stack->sp;
1164 break;
1165 case STOP_FRAME:
1166 goto done;
1167 default:
1168 frame += stack_frame_sizeW((StgClosure*)frame);
1169 break;
1170 }
1171 }
1172 done:
1173 return;
1174 }
1175
1176 #ifdef DEBUG
1177 void
1178 debugCCS( CostCentreStack *ccs )
1179 {
1180 debugBelch("<");
1181 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1182 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1183 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1184 debugBelch(",");
1185 }
1186 }
1187 debugBelch(">");
1188 }
1189 #endif /* DEBUG */
1190
1191 #endif /* PROFILING */