Fix AnnDotDot in module export
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "Printer.h"
21 #include "Capability.h"
22
23 #include <string.h>
24
25 #ifdef DEBUG
26 #include "Trace.h"
27 #endif
28
29 /*
30 * Profiling allocation arena.
31 */
32 Arena *prof_arena;
33
34 /*
35 * Global variables used to assign unique IDs to cc's, ccs's, and
36 * closure_cats
37 */
38
39 unsigned int CC_ID = 1;
40 unsigned int CCS_ID = 1;
41
42 /* figures for the profiling report.
43 */
44 static StgWord64 total_alloc;
45 static W_ total_prof_ticks;
46
47 /* Globals for opening the profiling log file(s)
48 */
49 static char *prof_filename; /* prof report file name = <program>.prof */
50 FILE *prof_file;
51
52 static char *hp_filename; /* heap profile (hp2ps style) log file */
53 FILE *hp_file;
54
55 /* Linked lists to keep track of CCs and CCSs that haven't
56 * been declared in the log file yet
57 */
58 CostCentre *CC_LIST = NULL;
59 CostCentreStack *CCS_LIST = NULL;
60
61 #ifdef THREADED_RTS
62 Mutex ccs_mutex;
63 #endif
64
65 /*
66 * Built-in cost centres and cost-centre stacks:
67 *
68 * MAIN is the root of the cost-centre stack tree. If there are
69 * no {-# SCC #-}s in the program, all costs will be attributed
70 * to MAIN.
71 *
72 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
73 * RTS operations apart from garbage collection are attributed
74 * to SYSTEM.
75 *
76 * GC is the storage manager / garbage collector.
77 *
78 * OVERHEAD gets all costs generated by the profiling system
79 * itself. These are costs that would not be incurred
80 * during non-profiled execution of the program.
81 *
82 * DONT_CARE is a placeholder cost-centre we assign to static
83 * constructors. It should *never* accumulate any costs.
84 *
85 * PINNED accumulates memory allocated to pinned objects, which
86 * cannot be profiled separately because we cannot reliably
87 * traverse pinned memory.
88 */
89
90 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
95 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
96 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
97
98 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
99 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
100 CCS_DECLARE(CCS_GC, CC_GC, );
101 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
102 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
103 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
104 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
105
106 /*
107 * Static Functions
108 */
109
110 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
111 CostCentreStack *ccs2 );
112 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
113 CostCentreStack *new_ccs );
114 static rtsBool ignoreCCS ( CostCentreStack *ccs );
115 static void countTickss ( CostCentreStack *ccs );
116 static void inheritCosts ( CostCentreStack *ccs );
117 static nat numDigits ( StgInt i );
118 static void findCCSMaxLens ( CostCentreStack *ccs,
119 nat indent,
120 nat *max_label_len,
121 nat *max_module_len,
122 nat *max_id_len );
123 static void logCCS ( CostCentreStack *ccs,
124 nat indent,
125 nat max_label_len,
126 nat max_module_len,
127 nat max_id_len );
128 static void reportCCS ( CostCentreStack *ccs );
129 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
130 CostCentre *cc );
131 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
132 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
133 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
134 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
135 CostCentre *, unsigned int );
136 static void ccsSetSelected ( CostCentreStack *ccs );
137
138 static void initTimeProfiling ( void );
139 static void initProfilingLogFile ( void );
140
141 /* -----------------------------------------------------------------------------
142 Initialise the profiling environment
143 -------------------------------------------------------------------------- */
144
145 void initProfiling (void)
146 {
147 // initialise our arena
148 prof_arena = newArena();
149
150 /* for the benefit of allocate()... */
151 {
152 nat n;
153 for (n=0; n < n_capabilities; n++) {
154 capabilities[n]->r.rCCCS = CCS_SYSTEM;
155 }
156 }
157
158 #ifdef THREADED_RTS
159 initMutex(&ccs_mutex);
160 #endif
161
162 /* Set up the log file, and dump the header and cost centre
163 * information into it.
164 */
165 initProfilingLogFile();
166
167 /* Register all the cost centres / stacks in the program
168 * CC_MAIN gets link = 0, all others have non-zero link.
169 */
170 REGISTER_CC(CC_MAIN);
171 REGISTER_CC(CC_SYSTEM);
172 REGISTER_CC(CC_GC);
173 REGISTER_CC(CC_OVERHEAD);
174 REGISTER_CC(CC_DONT_CARE);
175 REGISTER_CC(CC_PINNED);
176 REGISTER_CC(CC_IDLE);
177
178 REGISTER_CCS(CCS_SYSTEM);
179 REGISTER_CCS(CCS_GC);
180 REGISTER_CCS(CCS_OVERHEAD);
181 REGISTER_CCS(CCS_DONT_CARE);
182 REGISTER_CCS(CCS_PINNED);
183 REGISTER_CCS(CCS_IDLE);
184 REGISTER_CCS(CCS_MAIN);
185
186 /* find all the registered cost centre stacks, and make them
187 * children of CCS_MAIN.
188 */
189 ASSERT(CCS_LIST == CCS_MAIN);
190 CCS_LIST = CCS_LIST->prevStack;
191 CCS_MAIN->prevStack = NULL;
192 CCS_MAIN->root = CCS_MAIN;
193 ccsSetSelected(CCS_MAIN);
194
195 initProfiling2();
196
197 if (RtsFlags.CcFlags.doCostCentres) {
198 initTimeProfiling();
199 }
200
201 if (RtsFlags.ProfFlags.doHeapProfile) {
202 initHeapProfiling();
203 }
204 }
205
206 //
207 // Should be called after loading any new Haskell code.
208 //
209 void initProfiling2 (void)
210 {
211 CostCentreStack *ccs, *next;
212
213 // make CCS_MAIN the parent of all the pre-defined CCSs.
214 for (ccs = CCS_LIST; ccs != NULL; ) {
215 next = ccs->prevStack;
216 ccs->prevStack = NULL;
217 actualPush_(CCS_MAIN,ccs->cc,ccs);
218 ccs->root = ccs;
219 ccs = next;
220 }
221 CCS_LIST = NULL;
222 }
223
224 void
225 freeProfiling (void)
226 {
227 arenaFree(prof_arena);
228 }
229
230 static void
231 initProfilingLogFile(void)
232 {
233 char *prog;
234
235 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
236 strcpy(prog, prog_name);
237 #ifdef mingw32_HOST_OS
238 // on Windows, drop the .exe suffix if there is one
239 {
240 char *suff;
241 suff = strrchr(prog,'.');
242 if (suff != NULL && !strcmp(suff,".exe")) {
243 *suff = '\0';
244 }
245 }
246 #endif
247
248 if (RtsFlags.CcFlags.doCostCentres == 0 &&
249 RtsFlags.ProfFlags.doHeapProfile != HEAP_BY_RETAINER &&
250 RtsFlags.ProfFlags.retainerSelector == NULL)
251 {
252 /* No need for the <prog>.prof file */
253 prof_filename = NULL;
254 prof_file = NULL;
255 }
256 else
257 {
258 /* Initialise the log file name */
259 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
260 sprintf(prof_filename, "%s.prof", prog);
261
262 /* open the log file */
263 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
264 debugBelch("Can't open profiling report file %s\n", prof_filename);
265 RtsFlags.CcFlags.doCostCentres = 0;
266 // The following line was added by Sung; retainer/LDV profiling may need
267 // two output files, i.e., <program>.prof/hp.
268 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
269 RtsFlags.ProfFlags.doHeapProfile = 0;
270 return;
271 }
272 }
273
274 if (RtsFlags.ProfFlags.doHeapProfile) {
275 /* Initialise the log file name */
276 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
277 sprintf(hp_filename, "%s.hp", prog);
278
279 /* open the log file */
280 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
281 debugBelch("Can't open profiling report file %s\n",
282 hp_filename);
283 RtsFlags.ProfFlags.doHeapProfile = 0;
284 return;
285 }
286 }
287 }
288
289 void
290 initTimeProfiling(void)
291 {
292 /* Start ticking */
293 startProfTimer();
294 };
295
296 void
297 endProfiling ( void )
298 {
299 if (RtsFlags.CcFlags.doCostCentres) {
300 stopProfTimer();
301 }
302 if (RtsFlags.ProfFlags.doHeapProfile) {
303 endHeapProfiling();
304 }
305 }
306
307 /* -----------------------------------------------------------------------------
308 Set CCCS when entering a function.
309
310 The algorithm is as follows.
311
312 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
313
314 where
315
316 dropCommonPrefix A B
317 -- returns the suffix of B after removing any prefix common
318 -- to both A and B.
319
320 e.g.
321
322 <a,b,c> ++> <> = <a,b,c>
323 <a,b,c> ++> <d> = <a,b,c,d>
324 <a,b,c> ++> <a,b> = <a,b,c>
325 <a,b> ++> <a,b,c> = <a,b,c>
326 <a,b,c> ++> <a,b,d> = <a,b,c,d>
327
328 -------------------------------------------------------------------------- */
329
330 // implements c1 ++> c2, where c1 and c2 are equal depth
331 //
332 static CostCentreStack *
333 enterFunEqualStacks (CostCentreStack *ccs0,
334 CostCentreStack *ccsapp,
335 CostCentreStack *ccsfn)
336 {
337 ASSERT(ccsapp->depth == ccsfn->depth);
338 if (ccsapp == ccsfn) return ccs0;
339 return pushCostCentre(enterFunEqualStacks(ccs0,
340 ccsapp->prevStack,
341 ccsfn->prevStack),
342 ccsfn->cc);
343 }
344
345 // implements c1 ++> c2, where c2 is deeper than c1.
346 // Drop elements of c2 until we have equal stacks, call
347 // enterFunEqualStacks(), and then push on the elements that we
348 // dropped in reverse order.
349 //
350 static CostCentreStack *
351 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
352 {
353 if (n == 0) {
354 ASSERT(ccsfn->depth == ccsapp->depth);
355 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
356 } else {
357 ASSERT(ccsfn->depth > ccsapp->depth);
358 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
359 ccsfn->cc);
360 }
361 }
362
363 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
364 {
365 CostCentreStack *ccsapp;
366
367 // common case 1: both stacks are the same
368 if (ccsfn == reg->rCCCS) {
369 return;
370 }
371
372 // common case 2: the function stack is empty, or just CAF
373 if (ccsfn->prevStack == CCS_MAIN) {
374 return;
375 }
376
377 ccsapp = reg->rCCCS;
378 reg->rCCCS = CCS_OVERHEAD;
379
380 // common case 3: the stacks are completely different (e.g. one is a
381 // descendent of MAIN and the other of a CAF): we append the whole
382 // of the function stack to the current CCS.
383 if (ccsfn->root != ccsapp->root) {
384 reg->rCCCS = appendCCS(ccsapp,ccsfn);
385 return;
386 }
387
388 // uncommon case 4: ccsapp is deeper than ccsfn
389 if (ccsapp->depth > ccsfn->depth) {
390 nat i, n;
391 CostCentreStack *tmp = ccsapp;
392 n = ccsapp->depth - ccsfn->depth;
393 for (i = 0; i < n; i++) {
394 tmp = tmp->prevStack;
395 }
396 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
397 return;
398 }
399
400 // uncommon case 5: ccsfn is deeper than CCCS
401 if (ccsfn->depth > ccsapp->depth) {
402 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
403 ccsfn->depth - ccsapp->depth);
404 return;
405 }
406
407 // uncommon case 6: stacks are equal depth, but different
408 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
409 }
410
411 /* -----------------------------------------------------------------------------
412 Decide whether closures with this CCS should contribute to the heap
413 profile.
414 -------------------------------------------------------------------------- */
415
416 static void
417 ccsSetSelected (CostCentreStack *ccs)
418 {
419 if (RtsFlags.ProfFlags.modSelector) {
420 if (! strMatchesSelector (ccs->cc->module,
421 RtsFlags.ProfFlags.modSelector) ) {
422 ccs->selected = 0;
423 return;
424 }
425 }
426 if (RtsFlags.ProfFlags.ccSelector) {
427 if (! strMatchesSelector (ccs->cc->label,
428 RtsFlags.ProfFlags.ccSelector) ) {
429 ccs->selected = 0;
430 return;
431 }
432 }
433 if (RtsFlags.ProfFlags.ccsSelector) {
434 CostCentreStack *c;
435 for (c = ccs; c != NULL; c = c->prevStack)
436 {
437 if ( strMatchesSelector (c->cc->label,
438 RtsFlags.ProfFlags.ccsSelector) ) {
439 break;
440 }
441 }
442 if (c == NULL) {
443 ccs->selected = 0;
444 return;
445 }
446 }
447
448 ccs->selected = 1;
449 return;
450 }
451
452 /* -----------------------------------------------------------------------------
453 Cost-centre stack manipulation
454 -------------------------------------------------------------------------- */
455
456 #ifdef DEBUG
457 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
458 CostCentreStack *
459 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
460 #define pushCostCentre _pushCostCentre
461 {
462 IF_DEBUG(prof,
463 traceBegin("pushing %s on ", cc->label);
464 debugCCS(ccs);
465 traceEnd(););
466
467 return pushCostCentre(ccs,cc);
468 }
469 #endif
470
471 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
472
473 #ifdef DEBUG
474 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
475 CostCentreStack *
476 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
477 #define appendCCS _appendCCS
478 {
479 IF_DEBUG(prof,
480 if (ccs1 != ccs2) {
481 debugBelch("Appending ");
482 debugCCS(ccs1);
483 debugBelch(" to ");
484 debugCCS(ccs2);
485 debugBelch("\n");});
486 return appendCCS(ccs1,ccs2);
487 }
488 #endif
489
490 CostCentreStack *
491 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
492 {
493 if (ccs1 == ccs2) {
494 return ccs1;
495 }
496
497 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
498 // stop at a CAF element
499 return ccs1;
500 }
501
502 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
503 }
504
505 // Pick one:
506 // #define RECURSION_DROPS
507 #define RECURSION_TRUNCATES
508
509 CostCentreStack *
510 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
511 {
512 CostCentreStack *temp_ccs, *ret;
513 IndexTable *ixtable;
514
515 if (ccs == EMPTY_STACK) {
516 ACQUIRE_LOCK(&ccs_mutex);
517 ret = actualPush(ccs,cc);
518 }
519 else
520 {
521 if (ccs->cc == cc) {
522 return ccs;
523 } else {
524 // check if we've already memoized this stack
525 ixtable = ccs->indexTable;
526 temp_ccs = isInIndexTable(ixtable,cc);
527
528 if (temp_ccs != EMPTY_STACK) {
529 return temp_ccs;
530 } else {
531
532 // not in the IndexTable, now we take the lock:
533 ACQUIRE_LOCK(&ccs_mutex);
534
535 if (ccs->indexTable != ixtable)
536 {
537 // someone modified ccs->indexTable while
538 // we did not hold the lock, so we must
539 // check it again:
540 temp_ccs = isInIndexTable(ixtable,cc);
541 if (temp_ccs != EMPTY_STACK)
542 {
543 RELEASE_LOCK(&ccs_mutex);
544 return temp_ccs;
545 }
546 }
547 temp_ccs = checkLoop(ccs,cc);
548 if (temp_ccs != NULL) {
549 // This CC is already in the stack somewhere.
550 // This could be recursion, or just calling
551 // another function with the same CC.
552 // A number of policies are possible at this
553 // point, we implement two here:
554 // - truncate the stack to the previous instance
555 // of this CC
556 // - ignore this push, return the same stack.
557 //
558 CostCentreStack *new_ccs;
559 #if defined(RECURSION_TRUNCATES)
560 new_ccs = temp_ccs;
561 #else // defined(RECURSION_DROPS)
562 new_ccs = ccs;
563 #endif
564 ccs->indexTable = addToIndexTable (ccs->indexTable,
565 new_ccs, cc, 1);
566 ret = new_ccs;
567 } else {
568 ret = actualPush (ccs,cc);
569 }
570 }
571 }
572 }
573
574 RELEASE_LOCK(&ccs_mutex);
575 return ret;
576 }
577
578 static CostCentreStack *
579 checkLoop (CostCentreStack *ccs, CostCentre *cc)
580 {
581 while (ccs != EMPTY_STACK) {
582 if (ccs->cc == cc)
583 return ccs;
584 ccs = ccs->prevStack;
585 }
586 return NULL;
587 }
588
589 static CostCentreStack *
590 actualPush (CostCentreStack *ccs, CostCentre *cc)
591 {
592 CostCentreStack *new_ccs;
593
594 // allocate space for a new CostCentreStack
595 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
596
597 return actualPush_(ccs, cc, new_ccs);
598 }
599
600 static CostCentreStack *
601 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
602 {
603 /* assign values to each member of the structure */
604 new_ccs->ccsID = CCS_ID++;
605 new_ccs->cc = cc;
606 new_ccs->prevStack = ccs;
607 new_ccs->root = ccs->root;
608 new_ccs->depth = ccs->depth + 1;
609
610 new_ccs->indexTable = EMPTY_TABLE;
611
612 /* Initialise the various _scc_ counters to zero
613 */
614 new_ccs->scc_count = 0;
615
616 /* Initialize all other stats here. There should be a quick way
617 * that's easily used elsewhere too
618 */
619 new_ccs->time_ticks = 0;
620 new_ccs->mem_alloc = 0;
621 new_ccs->inherited_ticks = 0;
622 new_ccs->inherited_alloc = 0;
623
624 // Set the selected field.
625 ccsSetSelected(new_ccs);
626
627 /* update the memoization table for the parent stack */
628 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
629 0/*not a back edge*/);
630
631 /* return a pointer to the new stack */
632 return new_ccs;
633 }
634
635
636 static CostCentreStack *
637 isInIndexTable(IndexTable *it, CostCentre *cc)
638 {
639 while (it!=EMPTY_TABLE)
640 {
641 if (it->cc == cc)
642 return it->ccs;
643 else
644 it = it->next;
645 }
646
647 /* otherwise we never found it so return EMPTY_TABLE */
648 return EMPTY_TABLE;
649 }
650
651
652 static IndexTable *
653 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
654 CostCentre *cc, unsigned int back_edge)
655 {
656 IndexTable *new_it;
657
658 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
659
660 new_it->cc = cc;
661 new_it->ccs = new_ccs;
662 new_it->next = it;
663 new_it->back_edge = back_edge;
664 return new_it;
665 }
666
667 /* -----------------------------------------------------------------------------
668 Generating a time & allocation profiling report.
669 -------------------------------------------------------------------------- */
670
671 /* We omit certain system-related CCs and CCSs from the default
672 * reports, so as not to cause confusion.
673 */
674 static rtsBool
675 ignoreCC (CostCentre *cc)
676 {
677 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
678 ( cc == CC_OVERHEAD
679 || cc == CC_DONT_CARE
680 || cc == CC_GC
681 || cc == CC_SYSTEM
682 || cc == CC_IDLE)) {
683 return rtsTrue;
684 } else {
685 return rtsFalse;
686 }
687 }
688
689 static rtsBool
690 ignoreCCS (CostCentreStack *ccs)
691 {
692 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
693 ( ccs == CCS_OVERHEAD
694 || ccs == CCS_DONT_CARE
695 || ccs == CCS_GC
696 || ccs == CCS_SYSTEM
697 || ccs == CCS_IDLE)) {
698 return rtsTrue;
699 } else {
700 return rtsFalse;
701 }
702 }
703
704 /* -----------------------------------------------------------------------------
705 Generating the aggregated per-cost-centre time/alloc report.
706 -------------------------------------------------------------------------- */
707
708 static CostCentre *sorted_cc_list;
709
710 static void
711 aggregateCCCosts( CostCentreStack *ccs )
712 {
713 IndexTable *i;
714
715 ccs->cc->mem_alloc += ccs->mem_alloc;
716 ccs->cc->time_ticks += ccs->time_ticks;
717
718 for (i = ccs->indexTable; i != 0; i = i->next) {
719 if (!i->back_edge) {
720 aggregateCCCosts(i->ccs);
721 }
722 }
723 }
724
725 static void
726 insertCCInSortedList( CostCentre *new_cc )
727 {
728 CostCentre **prev, *cc;
729
730 prev = &sorted_cc_list;
731 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
732 if (new_cc->time_ticks > cc->time_ticks) {
733 new_cc->link = cc;
734 *prev = new_cc;
735 return;
736 } else {
737 prev = &(cc->link);
738 }
739 }
740 new_cc->link = NULL;
741 *prev = new_cc;
742 }
743
744 static nat
745 strlen_utf8 (char *s)
746 {
747 nat n = 0;
748 unsigned char c;
749
750 for (; *s != '\0'; s++) {
751 c = *s;
752 if (c < 0x80 || c > 0xBF) n++;
753 }
754 return n;
755 }
756
757 static void
758 reportPerCCCosts( void )
759 {
760 CostCentre *cc, *next;
761 nat max_label_len, max_module_len;
762
763 aggregateCCCosts(CCS_MAIN);
764 sorted_cc_list = NULL;
765
766 max_label_len = 11; // no shorter than the "COST CENTRE" header
767 max_module_len = 6; // no shorter than the "MODULE" header
768
769 for (cc = CC_LIST; cc != NULL; cc = next) {
770 next = cc->link;
771 if (cc->time_ticks > total_prof_ticks/100
772 || cc->mem_alloc > total_alloc/100
773 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
774 insertCCInSortedList(cc);
775
776 max_label_len = stg_max(strlen_utf8(cc->label), max_label_len);
777 max_module_len = stg_max(strlen_utf8(cc->module), max_module_len);
778 }
779 }
780
781 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
782 fprintf(prof_file, " %6s %6s", "%time", "%alloc");
783 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
784 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
785 }
786 fprintf(prof_file, "\n\n");
787
788 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
789 if (ignoreCC(cc)) {
790 continue;
791 }
792 fprintf(prof_file, "%s%*s %s%*s",
793 cc->label,
794 max_label_len - strlen_utf8(cc->label), "",
795 cc->module,
796 max_module_len - strlen_utf8(cc->module), "");
797
798 fprintf(prof_file, " %6.1f %6.1f",
799 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
800 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
801 total_alloc * 100)
802 );
803
804 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
805 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
806 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
807 }
808 fprintf(prof_file, "\n");
809 }
810
811 fprintf(prof_file,"\n\n");
812 }
813
814 /* -----------------------------------------------------------------------------
815 Generate the cost-centre-stack time/alloc report
816 -------------------------------------------------------------------------- */
817
818 static void
819 fprintHeader( nat max_label_len, nat max_module_len, nat max_id_len )
820 {
821 fprintf(prof_file, "%-*s %-*s %-*s %11s %12s %12s\n",
822 max_label_len, "",
823 max_module_len, "",
824 max_id_len, "",
825 "", "individual", "inherited");
826
827 fprintf(prof_file, "%-*s %-*s %-*s",
828 max_label_len, "COST CENTRE",
829 max_module_len, "MODULE",
830 max_id_len, "no.");
831
832 fprintf(prof_file, " %11s %5s %6s %5s %6s",
833 "entries", "%time", "%alloc", "%time", "%alloc");
834
835 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
836 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
837 }
838
839 fprintf(prof_file, "\n\n");
840 }
841
842 void
843 reportCCSProfiling( void )
844 {
845 nat count;
846 char temp[128]; /* sigh: magic constant */
847
848 stopProfTimer();
849
850 total_prof_ticks = 0;
851 total_alloc = 0;
852 countTickss(CCS_MAIN);
853
854 if (RtsFlags.CcFlags.doCostCentres == 0) return;
855
856 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
857 time_str(), "Final");
858
859 fprintf(prof_file, "\n\t ");
860 fprintf(prof_file, " %s", prog_name);
861 fprintf(prof_file, " +RTS");
862 for (count = 0; rts_argv[count]; count++)
863 fprintf(prof_file, " %s", rts_argv[count]);
864 fprintf(prof_file, " -RTS");
865 for (count = 1; prog_argv[count]; count++)
866 fprintf(prof_file, " %s", prog_argv[count]);
867 fprintf(prof_file, "\n\n");
868
869 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n",
870 ((double) total_prof_ticks *
871 (double) RtsFlags.MiscFlags.tickInterval) / (TIME_RESOLUTION * n_capabilities),
872 (unsigned long) total_prof_ticks,
873 (int) TimeToUS(RtsFlags.MiscFlags.tickInterval),
874 n_capabilities, n_capabilities > 1 ? "s" : "");
875
876 fprintf(prof_file, "\ttotal alloc = %11s bytes",
877 showStgWord64(total_alloc * sizeof(W_),
878 temp, rtsTrue/*commas*/));
879
880 fprintf(prof_file, " (excludes profiling overheads)\n\n");
881
882 reportPerCCCosts();
883
884 inheritCosts(CCS_MAIN);
885
886 reportCCS(pruneCCSTree(CCS_MAIN));
887 }
888
889 static nat
890 numDigits(StgInt i) {
891 nat result;
892
893 result = 1;
894
895 if (i < 0) i = 0;
896
897 while (i > 9) {
898 i /= 10;
899 result++;
900 }
901
902 return result;
903 }
904
905 static void
906 findCCSMaxLens(CostCentreStack *ccs, nat indent,
907 nat *max_label_len, nat *max_module_len, nat *max_id_len) {
908 CostCentre *cc;
909 IndexTable *i;
910
911 cc = ccs->cc;
912
913 *max_label_len = stg_max(*max_label_len, indent + strlen_utf8(cc->label));
914 *max_module_len = stg_max(*max_module_len, strlen_utf8(cc->module));
915 *max_id_len = stg_max(*max_id_len, numDigits(ccs->ccsID));
916
917 for (i = ccs->indexTable; i != 0; i = i->next) {
918 if (!i->back_edge) {
919 findCCSMaxLens(i->ccs, indent+1,
920 max_label_len, max_module_len, max_id_len);
921 }
922 }
923 }
924
925 static void
926 logCCS(CostCentreStack *ccs, nat indent,
927 nat max_label_len, nat max_module_len, nat max_id_len)
928 {
929 CostCentre *cc;
930 IndexTable *i;
931
932 cc = ccs->cc;
933
934 /* Only print cost centres with non 0 data ! */
935
936 if (!ignoreCCS(ccs))
937 /* force printing of *all* cost centres if -Pa */
938 {
939
940 fprintf(prof_file, "%-*s%s%*s %s%*s",
941 indent, "",
942 cc->label,
943 max_label_len-indent - strlen_utf8(cc->label), "",
944 cc->module,
945 max_module_len - strlen_utf8(cc->module), "");
946
947 fprintf(prof_file,
948 " %*ld %11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
949 max_id_len, ccs->ccsID, ccs->scc_count,
950 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
951 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
952 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
953 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
954 );
955
956 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
957 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
958 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
959 }
960 fprintf(prof_file, "\n");
961 }
962
963 for (i = ccs->indexTable; i != 0; i = i->next) {
964 if (!i->back_edge) {
965 logCCS(i->ccs, indent+1, max_label_len, max_module_len, max_id_len);
966 }
967 }
968 }
969
970 static void
971 reportCCS(CostCentreStack *ccs)
972 {
973 nat max_label_len, max_module_len, max_id_len;
974
975 max_label_len = 11; // no shorter than "COST CENTRE" header
976 max_module_len = 6; // no shorter than "MODULE" header
977 max_id_len = 3; // no shorter than "no." header
978
979 findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len, &max_id_len);
980
981 fprintHeader(max_label_len, max_module_len, max_id_len);
982 logCCS(ccs, 0, max_label_len, max_module_len, max_id_len);
983 }
984
985
986 /* Traverse the cost centre stack tree and accumulate
987 * ticks/allocations.
988 */
989 static void
990 countTickss(CostCentreStack *ccs)
991 {
992 IndexTable *i;
993
994 if (!ignoreCCS(ccs)) {
995 total_alloc += ccs->mem_alloc;
996 total_prof_ticks += ccs->time_ticks;
997 }
998 for (i = ccs->indexTable; i != NULL; i = i->next)
999 if (!i->back_edge) {
1000 countTickss(i->ccs);
1001 }
1002 }
1003
1004 /* Traverse the cost centre stack tree and inherit ticks & allocs.
1005 */
1006 static void
1007 inheritCosts(CostCentreStack *ccs)
1008 {
1009 IndexTable *i;
1010
1011 if (ignoreCCS(ccs)) { return; }
1012
1013 ccs->inherited_ticks += ccs->time_ticks;
1014 ccs->inherited_alloc += ccs->mem_alloc;
1015
1016 for (i = ccs->indexTable; i != NULL; i = i->next)
1017 if (!i->back_edge) {
1018 inheritCosts(i->ccs);
1019 ccs->inherited_ticks += i->ccs->inherited_ticks;
1020 ccs->inherited_alloc += i->ccs->inherited_alloc;
1021 }
1022
1023 return;
1024 }
1025
1026 //
1027 // Prune CCSs with zero entries, zero ticks or zero allocation from
1028 // the tree, unless COST_CENTRES_ALL is on.
1029 //
1030 static CostCentreStack *
1031 pruneCCSTree (CostCentreStack *ccs)
1032 {
1033 CostCentreStack *ccs1;
1034 IndexTable *i, **prev;
1035
1036 prev = &ccs->indexTable;
1037 for (i = ccs->indexTable; i != 0; i = i->next) {
1038 if (i->back_edge) { continue; }
1039
1040 ccs1 = pruneCCSTree(i->ccs);
1041 if (ccs1 == NULL) {
1042 *prev = i->next;
1043 } else {
1044 prev = &(i->next);
1045 }
1046 }
1047
1048 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
1049 /* force printing of *all* cost centres if -P -P */ )
1050
1051 || ( ccs->indexTable != 0 )
1052 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
1053 ) {
1054 return ccs;
1055 } else {
1056 return NULL;
1057 }
1058 }
1059
1060 void
1061 fprintCCS( FILE *f, CostCentreStack *ccs )
1062 {
1063 fprintf(f,"<");
1064 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1065 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
1066 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1067 fprintf(f,",");
1068 }
1069 }
1070 fprintf(f,">");
1071 }
1072
1073 // Returns: True if the call stack ended with CAF
1074 static rtsBool fprintCallStack (CostCentreStack *ccs)
1075 {
1076 CostCentreStack *prev;
1077
1078 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
1079 prev = ccs->prevStack;
1080 while (prev && prev != CCS_MAIN) {
1081 ccs = prev;
1082 fprintf(stderr, ",\n called from %s.%s",
1083 ccs->cc->module, ccs->cc->label);
1084 prev = ccs->prevStack;
1085 }
1086 fprintf(stderr, "\n");
1087
1088 return (!strncmp(ccs->cc->label, "CAF", 3));
1089 }
1090
1091 /* For calling from .cmm code, where we can't reliably refer to stderr */
1092 void
1093 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
1094 {
1095 rtsBool is_caf;
1096 StgPtr frame;
1097 StgStack *stack;
1098 CostCentreStack *prev_ccs;
1099 nat depth = 0;
1100 const nat MAX_DEPTH = 10; // don't print gigantic chains of stacks
1101
1102 {
1103 char *desc;
1104 StgInfoTable *info;
1105 info = get_itbl(UNTAG_CLOSURE(exception));
1106 switch (info->type) {
1107 case CONSTR:
1108 case CONSTR_1_0:
1109 case CONSTR_0_1:
1110 case CONSTR_2_0:
1111 case CONSTR_1_1:
1112 case CONSTR_0_2:
1113 case CONSTR_STATIC:
1114 case CONSTR_NOCAF_STATIC:
1115 desc = GET_CON_DESC(itbl_to_con_itbl(info));
1116 break;
1117 default:
1118 desc = closure_type_names[info->type];
1119 break;
1120 }
1121 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
1122 }
1123
1124 is_caf = fprintCallStack(ccs);
1125
1126 // traverse the stack down to the enclosing update frame to
1127 // find out where this CCS was evaluated from...
1128
1129 stack = tso->stackobj;
1130 frame = stack->sp;
1131 prev_ccs = ccs;
1132
1133 for (; is_caf && depth < MAX_DEPTH; depth++)
1134 {
1135 switch (get_itbl((StgClosure*)frame)->type)
1136 {
1137 case UPDATE_FRAME:
1138 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
1139 frame += sizeofW(StgUpdateFrame);
1140 if (ccs == CCS_MAIN) {
1141 goto done;
1142 }
1143 if (ccs == prev_ccs) {
1144 // ignore if this is the same as the previous stack,
1145 // we're probably in library code and haven't
1146 // accumulated any more interesting stack items
1147 // since the last update frame.
1148 break;
1149 }
1150 prev_ccs = ccs;
1151 fprintf(stderr, " --> evaluated by: ");
1152 is_caf = fprintCallStack(ccs);
1153 break;
1154 case UNDERFLOW_FRAME:
1155 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1156 frame = stack->sp;
1157 break;
1158 case STOP_FRAME:
1159 goto done;
1160 default:
1161 frame += stack_frame_sizeW((StgClosure*)frame);
1162 break;
1163 }
1164 }
1165 done:
1166 return;
1167 }
1168
1169 #ifdef DEBUG
1170 void
1171 debugCCS( CostCentreStack *ccs )
1172 {
1173 debugBelch("<");
1174 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1175 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1176 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1177 debugBelch(",");
1178 }
1179 }
1180 debugBelch(">");
1181 }
1182 #endif /* DEBUG */
1183
1184 #endif /* PROFILING */