Make profiling work with multiple capabilities (+RTS -N)
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "Printer.h"
21 #include "Capability.h"
22
23 #include <string.h>
24
25 #ifdef DEBUG
26 #include "Trace.h"
27 #endif
28
29 /*
30 * Profiling allocation arena.
31 */
32 Arena *prof_arena;
33
34 /*
35 * Global variables used to assign unique IDs to cc's, ccs's, and
36 * closure_cats
37 */
38
39 unsigned int CC_ID = 1;
40 unsigned int CCS_ID = 1;
41
42 /* figures for the profiling report.
43 */
44 static StgWord64 total_alloc;
45 static lnat total_prof_ticks;
46
47 /* Globals for opening the profiling log file(s)
48 */
49 static char *prof_filename; /* prof report file name = <program>.prof */
50 FILE *prof_file;
51
52 static char *hp_filename; /* heap profile (hp2ps style) log file */
53 FILE *hp_file;
54
55 /* Linked lists to keep track of CCs and CCSs that haven't
56 * been declared in the log file yet
57 */
58 CostCentre *CC_LIST = NULL;
59 CostCentreStack *CCS_LIST = NULL;
60
61 #ifdef THREADED_RTS
62 Mutex ccs_mutex;
63 #endif
64
65 /*
66 * Built-in cost centres and cost-centre stacks:
67 *
68 * MAIN is the root of the cost-centre stack tree. If there are
69 * no _scc_s in the program, all costs will be attributed
70 * to MAIN.
71 *
72 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
73 * RTS operations apart from garbage collection are attributed
74 * to SYSTEM.
75 *
76 * GC is the storage manager / garbage collector.
77 *
78 * OVERHEAD gets all costs generated by the profiling system
79 * itself. These are costs that would not be incurred
80 * during non-profiled execution of the program.
81 *
82 * DONT_CARE is a placeholder cost-centre we assign to static
83 * constructors. It should *never* accumulate any costs.
84 *
85 * PINNED accumulates memory allocated to pinned objects, which
86 * cannot be profiled separately because we cannot reliably
87 * traverse pinned memory.
88 */
89
90 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_NOT_CAF, );
91 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", CC_NOT_CAF, );
92 CC_DECLARE(CC_GC, "GC", "GC", CC_NOT_CAF, );
93 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_NOT_CAF, );
94 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_NOT_CAF, );
95 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", CC_NOT_CAF, );
96 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", CC_NOT_CAF, );
97
98 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
99 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
100 CCS_DECLARE(CCS_GC, CC_GC, );
101 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
102 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
103 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
104 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
105
106 /*
107 * Static Functions
108 */
109
110 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
111 CostCentreStack *ccs2 );
112 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
113 CostCentreStack *new_ccs );
114 static rtsBool ignoreCCS ( CostCentreStack *ccs );
115 static void countTickss ( CostCentreStack *ccs );
116 static void inheritCosts ( CostCentreStack *ccs );
117 static void findCCSMaxLens ( CostCentreStack *ccs,
118 nat indent,
119 nat *max_label_len,
120 nat *max_module_len );
121 static void logCCS ( CostCentreStack *ccs,
122 nat indent,
123 nat max_label_len,
124 nat max_module_len );
125 static void reportCCS ( CostCentreStack *ccs );
126 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
127 CostCentre *cc );
128 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
129 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
130 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
131 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
132 CostCentre *, unsigned int );
133 static void ccsSetSelected ( CostCentreStack *ccs );
134
135 static void initTimeProfiling ( void );
136 static void initProfilingLogFile ( void );
137
138 /* -----------------------------------------------------------------------------
139 Initialise the profiling environment
140 -------------------------------------------------------------------------- */
141
142 void
143 initProfiling1 (void)
144 {
145 // initialise our arena
146 prof_arena = newArena();
147
148 /* for the benefit of allocate()... */
149 {
150 nat n;
151 for (n=0; n < n_capabilities; n++) {
152 capabilities[n].r.rCCCS = CCS_SYSTEM;
153 }
154 }
155 }
156
157 void
158 freeProfiling (void)
159 {
160 arenaFree(prof_arena);
161 }
162
163 void
164 initProfiling2 (void)
165 {
166 CostCentreStack *ccs, *next;
167
168 /* Set up the log file, and dump the header and cost centre
169 * information into it.
170 */
171 initProfilingLogFile();
172
173 /* Register all the cost centres / stacks in the program
174 * CC_MAIN gets link = 0, all others have non-zero link.
175 */
176 REGISTER_CC(CC_MAIN);
177 REGISTER_CC(CC_SYSTEM);
178 REGISTER_CC(CC_GC);
179 REGISTER_CC(CC_OVERHEAD);
180 REGISTER_CC(CC_DONT_CARE);
181 REGISTER_CC(CC_PINNED);
182 REGISTER_CC(CC_IDLE);
183
184 REGISTER_CCS(CCS_SYSTEM);
185 REGISTER_CCS(CCS_GC);
186 REGISTER_CCS(CCS_OVERHEAD);
187 REGISTER_CCS(CCS_DONT_CARE);
188 REGISTER_CCS(CCS_PINNED);
189 REGISTER_CCS(CCS_IDLE);
190 REGISTER_CCS(CCS_MAIN);
191
192 /* find all the registered cost centre stacks, and make them
193 * children of CCS_MAIN.
194 */
195 ASSERT(CCS_LIST == CCS_MAIN);
196 CCS_LIST = CCS_LIST->prevStack;
197 CCS_MAIN->prevStack = NULL;
198 CCS_MAIN->root = CCS_MAIN;
199 ccsSetSelected(CCS_MAIN);
200
201 // make CCS_MAIN the parent of all the pre-defined CCSs.
202 for (ccs = CCS_LIST; ccs != NULL; ) {
203 next = ccs->prevStack;
204 ccs->prevStack = NULL;
205 actualPush_(CCS_MAIN,ccs->cc,ccs);
206 ccs->root = ccs;
207 ccs = next;
208 }
209
210 if (RtsFlags.CcFlags.doCostCentres) {
211 initTimeProfiling();
212 }
213
214 if (RtsFlags.ProfFlags.doHeapProfile) {
215 initHeapProfiling();
216 }
217 }
218
219
220 static void
221 initProfilingLogFile(void)
222 {
223 char *prog;
224
225 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
226 strcpy(prog, prog_name);
227 #ifdef mingw32_HOST_OS
228 // on Windows, drop the .exe suffix if there is one
229 {
230 char *suff;
231 suff = strrchr(prog,'.');
232 if (suff != NULL && !strcmp(suff,".exe")) {
233 *suff = '\0';
234 }
235 }
236 #endif
237
238 if (RtsFlags.CcFlags.doCostCentres == 0 &&
239 RtsFlags.ProfFlags.doHeapProfile != HEAP_BY_RETAINER)
240 {
241 /* No need for the <prog>.prof file */
242 prof_filename = NULL;
243 prof_file = NULL;
244 }
245 else
246 {
247 /* Initialise the log file name */
248 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
249 sprintf(prof_filename, "%s.prof", prog);
250
251 /* open the log file */
252 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
253 debugBelch("Can't open profiling report file %s\n", prof_filename);
254 RtsFlags.CcFlags.doCostCentres = 0;
255 // The following line was added by Sung; retainer/LDV profiling may need
256 // two output files, i.e., <program>.prof/hp.
257 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
258 RtsFlags.ProfFlags.doHeapProfile = 0;
259 return;
260 }
261 }
262
263 if (RtsFlags.ProfFlags.doHeapProfile) {
264 /* Initialise the log file name */
265 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
266 sprintf(hp_filename, "%s.hp", prog);
267
268 /* open the log file */
269 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
270 debugBelch("Can't open profiling report file %s\n",
271 hp_filename);
272 RtsFlags.ProfFlags.doHeapProfile = 0;
273 return;
274 }
275 }
276 }
277
278 void
279 initTimeProfiling(void)
280 {
281 /* Start ticking */
282 startProfTimer();
283 };
284
285 void
286 endProfiling ( void )
287 {
288 if (RtsFlags.CcFlags.doCostCentres) {
289 stopProfTimer();
290 }
291 if (RtsFlags.ProfFlags.doHeapProfile) {
292 endHeapProfiling();
293 }
294 }
295
296 /* -----------------------------------------------------------------------------
297 Set CCCS when entering a function.
298
299 The algorithm is as follows.
300
301 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
302
303 where
304
305 dropCommonPrefix A B
306 -- returns the suffix of B after removing any prefix common
307 -- to both A and B.
308
309 e.g.
310
311 <a,b,c> ++> <> = <a,b,c>
312 <a,b,c> ++> <d> = <a,b,c,d>
313 <a,b,c> ++> <a,b> = <a,b,c>
314 <a,b> ++> <a,b,c> = <a,b,c>
315 <a,b,c> ++> <a,b,d> = <a,b,c,d>
316
317 -------------------------------------------------------------------------- */
318
319 // implements c1 ++> c2, where c1 and c2 are equal depth
320 //
321 static CostCentreStack *
322 enterFunEqualStacks (CostCentreStack *ccs0,
323 CostCentreStack *ccsapp,
324 CostCentreStack *ccsfn)
325 {
326 ASSERT(ccsapp->depth == ccsfn->depth);
327 if (ccsapp == ccsfn) return ccs0;
328 return pushCostCentre(enterFunEqualStacks(ccs0,
329 ccsapp->prevStack,
330 ccsfn->prevStack),
331 ccsfn->cc);
332 }
333
334 // implements c1 ++> c2, where c2 is deeper than c1.
335 // Drop elements of c2 until we have equal stacks, call
336 // enterFunEqualStacks(), and then push on the elements that we
337 // dropped in reverse order.
338 //
339 static CostCentreStack *
340 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
341 {
342 if (n == 0) {
343 ASSERT(ccsfn->depth == ccsapp->depth);
344 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
345 } else {
346 ASSERT(ccsfn->depth > ccsapp->depth);
347 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
348 ccsfn->cc);
349 }
350 }
351
352 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
353 {
354 CostCentreStack *ccsapp;
355
356 // common case 1: both stacks are the same
357 if (ccsfn == reg->rCCCS) {
358 return;
359 }
360
361 // common case 2: the function stack is empty, or just CAF
362 if (ccsfn->prevStack == CCS_MAIN) {
363 return;
364 }
365
366 ccsapp = reg->rCCCS;
367 reg->rCCCS = CCS_OVERHEAD;
368
369 // common case 3: the stacks are completely different (e.g. one is a
370 // descendent of MAIN and the other of a CAF): we append the whole
371 // of the function stack to the current CCS.
372 if (ccsfn->root != ccsapp->root) {
373 reg->rCCCS = appendCCS(ccsapp,ccsfn);
374 return;
375 }
376
377 // uncommon case 4: ccsapp is deeper than ccsfn
378 if (ccsapp->depth > ccsfn->depth) {
379 nat i, n;
380 CostCentreStack *tmp = ccsapp;
381 n = ccsapp->depth - ccsfn->depth;
382 for (i = 0; i < n; i++) {
383 tmp = tmp->prevStack;
384 }
385 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
386 return;
387 }
388
389 // uncommon case 5: ccsfn is deeper than CCCS
390 if (ccsfn->depth > ccsapp->depth) {
391 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
392 ccsfn->depth - ccsapp->depth);
393 return;
394 }
395
396 // uncommon case 6: stacks are equal depth, but different
397 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
398 }
399
400 /* -----------------------------------------------------------------------------
401 Decide whether closures with this CCS should contribute to the heap
402 profile.
403 -------------------------------------------------------------------------- */
404
405 static void
406 ccsSetSelected (CostCentreStack *ccs)
407 {
408 if (RtsFlags.ProfFlags.modSelector) {
409 if (! strMatchesSelector (ccs->cc->module,
410 RtsFlags.ProfFlags.modSelector) ) {
411 ccs->selected = 0;
412 return;
413 }
414 }
415 if (RtsFlags.ProfFlags.ccSelector) {
416 if (! strMatchesSelector (ccs->cc->label,
417 RtsFlags.ProfFlags.ccSelector) ) {
418 ccs->selected = 0;
419 return;
420 }
421 }
422 if (RtsFlags.ProfFlags.ccsSelector) {
423 CostCentreStack *c;
424 for (c = ccs; c != NULL; c = c->prevStack)
425 {
426 if ( strMatchesSelector (c->cc->label,
427 RtsFlags.ProfFlags.ccsSelector) ) {
428 break;
429 }
430 }
431 if (c == NULL) {
432 ccs->selected = 0;
433 return;
434 }
435 }
436
437 ccs->selected = 1;
438 return;
439 }
440
441 /* -----------------------------------------------------------------------------
442 Cost-centre stack manipulation
443 -------------------------------------------------------------------------- */
444
445 #ifdef DEBUG
446 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
447 CostCentreStack *
448 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
449 #define pushCostCentre _pushCostCentre
450 {
451 IF_DEBUG(prof,
452 traceBegin("pushing %s on ", cc->label);
453 debugCCS(ccs);
454 traceEnd(););
455
456 return pushCostCentre(ccs,cc);
457 }
458 #endif
459
460 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
461
462 #ifdef DEBUG
463 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
464 CostCentreStack *
465 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
466 #define appendCCS _appendCCS
467 {
468 IF_DEBUG(prof,
469 if (ccs1 != ccs2) {
470 debugBelch("Appending ");
471 debugCCS(ccs1);
472 debugBelch(" to ");
473 debugCCS(ccs2);
474 debugBelch("\n");});
475 return appendCCS(ccs1,ccs2);
476 }
477 #endif
478
479 CostCentreStack *
480 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
481 {
482 if (ccs1 == ccs2) {
483 return ccs1;
484 }
485
486 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
487 // stop at a CAF element
488 return ccs1;
489 }
490
491 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
492 }
493
494 // Pick one:
495 // #define RECURSION_DROPS
496 #define RECURSION_TRUNCATES
497
498 CostCentreStack *
499 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
500 {
501 CostCentreStack *temp_ccs, *ret;
502 IndexTable *ixtable;
503
504 if (ccs == EMPTY_STACK) {
505 ACQUIRE_LOCK(&ccs_mutex);
506 ret = actualPush(ccs,cc);
507 }
508 else
509 {
510 if (ccs->cc == cc) {
511 return ccs;
512 } else {
513 // check if we've already memoized this stack
514 ixtable = ccs->indexTable;
515 temp_ccs = isInIndexTable(ixtable,cc);
516
517 if (temp_ccs != EMPTY_STACK) {
518 return temp_ccs;
519 } else {
520
521 // not in the IndexTable, now we take the lock:
522 ACQUIRE_LOCK(&ccs_mutex);
523
524 if (ccs->indexTable != ixtable)
525 {
526 // someone modified ccs->indexTable while
527 // we did not hold the lock, so we must
528 // check it again:
529 temp_ccs = isInIndexTable(ixtable,cc);
530 if (temp_ccs != EMPTY_STACK)
531 {
532 RELEASE_LOCK(&ccs_mutex);
533 return temp_ccs;
534 }
535 }
536 temp_ccs = checkLoop(ccs,cc);
537 if (temp_ccs != NULL) {
538 // This CC is already in the stack somewhere.
539 // This could be recursion, or just calling
540 // another function with the same CC.
541 // A number of policies are possible at this
542 // point, we implement two here:
543 // - truncate the stack to the previous instance
544 // of this CC
545 // - ignore this push, return the same stack.
546 //
547 CostCentreStack *new_ccs;
548 #if defined(RECURSION_TRUNCATES)
549 new_ccs = temp_ccs;
550 #else // defined(RECURSION_DROPS)
551 new_ccs = ccs;
552 #endif
553 ccs->indexTable = addToIndexTable (ccs->indexTable,
554 new_ccs, cc, 1);
555 ret = new_ccs;
556 } else {
557 ret = actualPush (ccs,cc);
558 }
559 }
560 }
561 }
562
563 RELEASE_LOCK(&ccs_mutex);
564 return ret;
565 }
566
567 static CostCentreStack *
568 checkLoop (CostCentreStack *ccs, CostCentre *cc)
569 {
570 while (ccs != EMPTY_STACK) {
571 if (ccs->cc == cc)
572 return ccs;
573 ccs = ccs->prevStack;
574 }
575 return NULL;
576 }
577
578 static CostCentreStack *
579 actualPush (CostCentreStack *ccs, CostCentre *cc)
580 {
581 CostCentreStack *new_ccs;
582
583 // allocate space for a new CostCentreStack
584 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
585
586 return actualPush_(ccs, cc, new_ccs);
587 }
588
589 static CostCentreStack *
590 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
591 {
592 /* assign values to each member of the structure */
593 new_ccs->ccsID = CCS_ID++;
594 new_ccs->cc = cc;
595 new_ccs->prevStack = ccs;
596 new_ccs->root = ccs->root;
597 new_ccs->depth = ccs->depth + 1;
598
599 new_ccs->indexTable = EMPTY_TABLE;
600
601 /* Initialise the various _scc_ counters to zero
602 */
603 new_ccs->scc_count = 0;
604
605 /* Initialize all other stats here. There should be a quick way
606 * that's easily used elsewhere too
607 */
608 new_ccs->time_ticks = 0;
609 new_ccs->mem_alloc = 0;
610 new_ccs->inherited_ticks = 0;
611 new_ccs->inherited_alloc = 0;
612
613 // Set the selected field.
614 ccsSetSelected(new_ccs);
615
616 /* update the memoization table for the parent stack */
617 if (ccs != EMPTY_STACK) {
618 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
619 0/*not a back edge*/);
620 }
621
622 /* return a pointer to the new stack */
623 return new_ccs;
624 }
625
626
627 static CostCentreStack *
628 isInIndexTable(IndexTable *it, CostCentre *cc)
629 {
630 while (it!=EMPTY_TABLE)
631 {
632 if (it->cc == cc)
633 return it->ccs;
634 else
635 it = it->next;
636 }
637
638 /* otherwise we never found it so return EMPTY_TABLE */
639 return EMPTY_TABLE;
640 }
641
642
643 static IndexTable *
644 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
645 CostCentre *cc, unsigned int back_edge)
646 {
647 IndexTable *new_it;
648
649 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
650
651 new_it->cc = cc;
652 new_it->ccs = new_ccs;
653 new_it->next = it;
654 new_it->back_edge = back_edge;
655 return new_it;
656 }
657
658 /* -----------------------------------------------------------------------------
659 Generating a time & allocation profiling report.
660 -------------------------------------------------------------------------- */
661
662 /* We omit certain system-related CCs and CCSs from the default
663 * reports, so as not to cause confusion.
664 */
665 static rtsBool
666 ignoreCC (CostCentre *cc)
667 {
668 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
669 ( cc == CC_OVERHEAD
670 || cc == CC_DONT_CARE
671 || cc == CC_GC
672 || cc == CC_SYSTEM)) {
673 return rtsTrue;
674 } else {
675 return rtsFalse;
676 }
677 }
678
679 static rtsBool
680 ignoreCCS (CostCentreStack *ccs)
681 {
682 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
683 ( ccs == CCS_OVERHEAD
684 || ccs == CCS_DONT_CARE
685 || ccs == CCS_GC
686 || ccs == CCS_SYSTEM)) {
687 return rtsTrue;
688 } else {
689 return rtsFalse;
690 }
691 }
692
693 /* -----------------------------------------------------------------------------
694 Generating the aggregated per-cost-centre time/alloc report.
695 -------------------------------------------------------------------------- */
696
697 static CostCentre *sorted_cc_list;
698
699 static void
700 aggregateCCCosts( CostCentreStack *ccs )
701 {
702 IndexTable *i;
703
704 ccs->cc->mem_alloc += ccs->mem_alloc;
705 ccs->cc->time_ticks += ccs->time_ticks;
706
707 for (i = ccs->indexTable; i != 0; i = i->next) {
708 if (!i->back_edge) {
709 aggregateCCCosts(i->ccs);
710 }
711 }
712 }
713
714 static void
715 insertCCInSortedList( CostCentre *new_cc )
716 {
717 CostCentre **prev, *cc;
718
719 prev = &sorted_cc_list;
720 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
721 if (new_cc->time_ticks > cc->time_ticks) {
722 new_cc->link = cc;
723 *prev = new_cc;
724 return;
725 } else {
726 prev = &(cc->link);
727 }
728 }
729 new_cc->link = NULL;
730 *prev = new_cc;
731 }
732
733 static nat
734 strlen_utf8 (char *s)
735 {
736 nat n = 0;
737 unsigned char c;
738
739 for (; *s != '\0'; s++) {
740 c = *s;
741 if (c < 0x80 || c > 0xBF) n++;
742 }
743 return n;
744 }
745
746 static void
747 reportPerCCCosts( void )
748 {
749 CostCentre *cc, *next;
750 nat max_label_len, max_module_len;
751
752 aggregateCCCosts(CCS_MAIN);
753 sorted_cc_list = NULL;
754
755 max_label_len = 11; // no shorter than the "COST CENTRE" header
756 max_module_len = 7; // no shorter than the "MODULE" header
757
758 for (cc = CC_LIST; cc != NULL; cc = next) {
759 next = cc->link;
760 if (cc->time_ticks > total_prof_ticks/100
761 || cc->mem_alloc > total_alloc/100
762 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
763 insertCCInSortedList(cc);
764
765 max_label_len = stg_max(strlen_utf8(cc->label), max_label_len);
766 max_module_len = stg_max(strlen_utf8(cc->module), max_module_len);
767 }
768 }
769
770 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
771 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
772 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
773 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
774 }
775 fprintf(prof_file, "\n\n");
776
777 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
778 if (ignoreCC(cc)) {
779 continue;
780 }
781 fprintf(prof_file, "%s%*s %s%*s",
782 cc->label,
783 max_label_len - strlen_utf8(cc->label), "",
784 cc->module,
785 max_module_len - strlen_utf8(cc->module), "");
786
787 fprintf(prof_file, "%6.1f %6.1f",
788 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
789 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
790 total_alloc * 100)
791 );
792
793 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
794 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
795 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
796 }
797 fprintf(prof_file, "\n");
798 }
799
800 fprintf(prof_file,"\n\n");
801 }
802
803 /* -----------------------------------------------------------------------------
804 Generate the cost-centre-stack time/alloc report
805 -------------------------------------------------------------------------- */
806
807 static void
808 fprintHeader( nat max_label_len, nat max_module_len )
809 {
810 fprintf(prof_file, "%-*s %-*s%6s %11s %11s %11s\n", max_label_len, "", max_module_len, "", "", "", "individual", "inherited");
811
812 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
813 fprintf(prof_file, "%6s %11s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
814
815 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
816 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
817 }
818
819 fprintf(prof_file, "\n\n");
820 }
821
822 void
823 reportCCSProfiling( void )
824 {
825 nat count;
826 char temp[128]; /* sigh: magic constant */
827
828 stopProfTimer();
829
830 total_prof_ticks = 0;
831 total_alloc = 0;
832 countTickss(CCS_MAIN);
833
834 if (RtsFlags.CcFlags.doCostCentres == 0) return;
835
836 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
837 time_str(), "Final");
838
839 fprintf(prof_file, "\n\t ");
840 fprintf(prof_file, " %s", prog_name);
841 fprintf(prof_file, " +RTS");
842 for (count = 0; rts_argv[count]; count++)
843 fprintf(prof_file, " %s", rts_argv[count]);
844 fprintf(prof_file, " -RTS");
845 for (count = 1; prog_argv[count]; count++)
846 fprintf(prof_file, " %s", prog_argv[count]);
847 fprintf(prof_file, "\n\n");
848
849 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n",
850 ((double) total_prof_ticks *
851 (double) RtsFlags.MiscFlags.tickInterval) / (TIME_RESOLUTION * n_capabilities),
852 (unsigned long) total_prof_ticks,
853 (int) TimeToUS(RtsFlags.MiscFlags.tickInterval),
854 n_capabilities, n_capabilities > 1 ? "s" : "");
855
856 fprintf(prof_file, "\ttotal alloc = %11s bytes",
857 showStgWord64(total_alloc * sizeof(W_),
858 temp, rtsTrue/*commas*/));
859
860 fprintf(prof_file, " (excludes profiling overheads)\n\n");
861
862 reportPerCCCosts();
863
864 inheritCosts(CCS_MAIN);
865
866 reportCCS(pruneCCSTree(CCS_MAIN));
867 }
868
869 static void
870 findCCSMaxLens(CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len) {
871 CostCentre *cc;
872 IndexTable *i;
873
874 cc = ccs->cc;
875
876 *max_label_len = stg_max(*max_label_len, indent + strlen_utf8(cc->label));
877 *max_module_len = stg_max(*max_module_len, strlen_utf8(cc->module));
878
879 for (i = ccs->indexTable; i != 0; i = i->next) {
880 if (!i->back_edge) {
881 findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len);
882 }
883 }
884 }
885
886 static void
887 logCCS(CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len)
888 {
889 CostCentre *cc;
890 IndexTable *i;
891
892 cc = ccs->cc;
893
894 /* Only print cost centres with non 0 data ! */
895
896 if (!ignoreCCS(ccs))
897 /* force printing of *all* cost centres if -Pa */
898 {
899
900 fprintf(prof_file, "%-*s%s%*s %s%*s",
901 indent, "",
902 cc->label,
903 max_label_len-indent - strlen_utf8(cc->label), "",
904 cc->module,
905 max_module_len - strlen_utf8(cc->module), "");
906
907 fprintf(prof_file, "%6ld %11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
908 ccs->ccsID, ccs->scc_count,
909 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
910 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
911 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
912 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
913 );
914
915 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
916 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
917 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
918 }
919 fprintf(prof_file, "\n");
920 }
921
922 for (i = ccs->indexTable; i != 0; i = i->next) {
923 if (!i->back_edge) {
924 logCCS(i->ccs, indent+1, max_label_len, max_module_len);
925 }
926 }
927 }
928
929 static void
930 reportCCS(CostCentreStack *ccs)
931 {
932 nat max_label_len, max_module_len;
933
934 max_label_len = 11; // no shorter than "COST CENTRE" header
935 max_module_len = 7; // no shorter than "MODULE" header
936
937 findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len);
938
939 fprintHeader(max_label_len, max_module_len);
940 logCCS(ccs, 0, max_label_len, max_module_len);
941 }
942
943
944 /* Traverse the cost centre stack tree and accumulate
945 * ticks/allocations.
946 */
947 static void
948 countTickss(CostCentreStack *ccs)
949 {
950 IndexTable *i;
951
952 if (!ignoreCCS(ccs)) {
953 total_alloc += ccs->mem_alloc;
954 total_prof_ticks += ccs->time_ticks;
955 }
956 for (i = ccs->indexTable; i != NULL; i = i->next)
957 if (!i->back_edge) {
958 countTickss(i->ccs);
959 }
960 }
961
962 /* Traverse the cost centre stack tree and inherit ticks & allocs.
963 */
964 static void
965 inheritCosts(CostCentreStack *ccs)
966 {
967 IndexTable *i;
968
969 if (ignoreCCS(ccs)) { return; }
970
971 ccs->inherited_ticks += ccs->time_ticks;
972 ccs->inherited_alloc += ccs->mem_alloc;
973
974 for (i = ccs->indexTable; i != NULL; i = i->next)
975 if (!i->back_edge) {
976 inheritCosts(i->ccs);
977 ccs->inherited_ticks += i->ccs->inherited_ticks;
978 ccs->inherited_alloc += i->ccs->inherited_alloc;
979 }
980
981 return;
982 }
983
984 //
985 // Prune CCSs with zero entries, zero ticks or zero allocation from
986 // the tree, unless COST_CENTRES_ALL is on.
987 //
988 static CostCentreStack *
989 pruneCCSTree (CostCentreStack *ccs)
990 {
991 CostCentreStack *ccs1;
992 IndexTable *i, **prev;
993
994 prev = &ccs->indexTable;
995 for (i = ccs->indexTable; i != 0; i = i->next) {
996 if (i->back_edge) { continue; }
997
998 ccs1 = pruneCCSTree(i->ccs);
999 if (ccs1 == NULL) {
1000 *prev = i->next;
1001 } else {
1002 prev = &(i->next);
1003 }
1004 }
1005
1006 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
1007 /* force printing of *all* cost centres if -P -P */ )
1008
1009 || ( ccs->indexTable != 0 )
1010 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
1011 ) {
1012 return ccs;
1013 } else {
1014 return NULL;
1015 }
1016 }
1017
1018 void
1019 fprintCCS( FILE *f, CostCentreStack *ccs )
1020 {
1021 fprintf(f,"<");
1022 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1023 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
1024 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1025 fprintf(f,",");
1026 }
1027 }
1028 fprintf(f,">");
1029 }
1030
1031 // Returns: True if the call stack ended with CAF
1032 static rtsBool fprintCallStack (CostCentreStack *ccs)
1033 {
1034 CostCentreStack *prev;
1035
1036 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
1037 prev = ccs->prevStack;
1038 while (prev && prev != CCS_MAIN) {
1039 ccs = prev;
1040 fprintf(stderr, ",\n called from %s.%s",
1041 ccs->cc->module, ccs->cc->label);
1042 prev = ccs->prevStack;
1043 }
1044 fprintf(stderr, "\n");
1045
1046 return (!strncmp(ccs->cc->label, "CAF", 3));
1047 }
1048
1049 /* For calling from .cmm code, where we can't reliably refer to stderr */
1050 void
1051 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
1052 {
1053 rtsBool is_caf;
1054 StgPtr frame;
1055 StgStack *stack;
1056 CostCentreStack *prev_ccs;
1057 nat depth = 0;
1058 const nat MAX_DEPTH = 10; // don't print gigantic chains of stacks
1059
1060 {
1061 char *desc;
1062 StgInfoTable *info;
1063 info = get_itbl(exception);
1064 switch (info->type) {
1065 case CONSTR:
1066 case CONSTR_1_0:
1067 case CONSTR_0_1:
1068 case CONSTR_2_0:
1069 case CONSTR_1_1:
1070 case CONSTR_0_2:
1071 case CONSTR_STATIC:
1072 case CONSTR_NOCAF_STATIC:
1073 desc = GET_CON_DESC(itbl_to_con_itbl(info));
1074 default:
1075 desc = closure_type_names[info->type];
1076 }
1077 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
1078 }
1079
1080 is_caf = fprintCallStack(ccs);
1081
1082 // traverse the stack down to the enclosing update frame to
1083 // find out where this CCS was evaluated from...
1084
1085 stack = tso->stackobj;
1086 frame = stack->sp;
1087 prev_ccs = ccs;
1088
1089 for (; is_caf && depth < MAX_DEPTH; depth++)
1090 {
1091 switch (get_itbl((StgClosure*)frame)->type)
1092 {
1093 case UPDATE_FRAME:
1094 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
1095 frame += sizeofW(StgUpdateFrame);
1096 if (ccs == CCS_MAIN) {
1097 goto done;
1098 }
1099 if (ccs == prev_ccs) {
1100 // ignore if this is the same as the previous stack,
1101 // we're probably in library code and haven't
1102 // accumulated any more interesting stack items
1103 // since the last update frame.
1104 break;
1105 }
1106 prev_ccs = ccs;
1107 fprintf(stderr, " --> evaluated by: ");
1108 is_caf = fprintCallStack(ccs);
1109 break;
1110 case UNDERFLOW_FRAME:
1111 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1112 frame = stack->sp;
1113 break;
1114 case STOP_FRAME:
1115 goto done;
1116 default:
1117 frame += stack_frame_sizeW((StgClosure*)frame);
1118 break;
1119 }
1120 }
1121 done:
1122 return;
1123 }
1124
1125 #ifdef DEBUG
1126 void
1127 debugCCS( CostCentreStack *ccs )
1128 {
1129 debugBelch("<");
1130 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1131 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1132 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1133 debugBelch(",");
1134 }
1135 }
1136 debugBelch(">");
1137 }
1138 #endif /* DEBUG */
1139
1140 #endif /* PROFILING */