rts/Profiling: Kill a few globals and add consts
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "Printer.h"
21 #include "Capability.h"
22
23 #include <string.h>
24
25 #ifdef DEBUG
26 #include "Trace.h"
27 #endif
28
29 /*
30 * Profiling allocation arena.
31 */
32 static Arena *prof_arena;
33
34 /*
35 * Global variables used to assign unique IDs to cc's, ccs's, and
36 * closure_cats
37 */
38
39 unsigned int CC_ID = 1;
40 unsigned int CCS_ID = 1;
41
42 /* Globals for opening the profiling log file(s)
43 */
44 static char *prof_filename; /* prof report file name = <program>.prof */
45 FILE *prof_file;
46
47 static char *hp_filename; /* heap profile (hp2ps style) log file */
48 FILE *hp_file;
49
50 /* Linked lists to keep track of CCs and CCSs that haven't
51 * been declared in the log file yet
52 */
53 CostCentre *CC_LIST = NULL;
54 CostCentreStack *CCS_LIST = NULL;
55
56 #ifdef THREADED_RTS
57 static Mutex ccs_mutex;
58 #endif
59
60 /*
61 * Built-in cost centres and cost-centre stacks:
62 *
63 * MAIN is the root of the cost-centre stack tree. If there are
64 * no {-# SCC #-}s in the program, all costs will be attributed
65 * to MAIN.
66 *
67 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
68 * RTS operations apart from garbage collection are attributed
69 * to SYSTEM.
70 *
71 * GC is the storage manager / garbage collector.
72 *
73 * OVERHEAD gets all costs generated by the profiling system
74 * itself. These are costs that would not be incurred
75 * during non-profiled execution of the program.
76 *
77 * DONT_CARE is a placeholder cost-centre we assign to static
78 * constructors. It should *never* accumulate any costs.
79 *
80 * PINNED accumulates memory allocated to pinned objects, which
81 * cannot be profiled separately because we cannot reliably
82 * traverse pinned memory.
83 */
84
85 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
86 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
87 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
88 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
89 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
90 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
92
93 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
94 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
95 CCS_DECLARE(CCS_GC, CC_GC, );
96 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
97 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
98 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
99 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
100
101 /*
102 * Static Functions
103 */
104
105 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
106 CostCentreStack *ccs2 );
107 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
108 CostCentreStack *new_ccs );
109 static bool ignoreCCS ( CostCentreStack const *ccs );
110 static ProfilerTotals countTickss ( CostCentreStack const *ccs );
111 static void inheritCosts ( CostCentreStack *ccs );
112 static uint32_t numDigits ( StgInt i );
113 static void findCCSMaxLens ( CostCentreStack const *ccs,
114 uint32_t indent,
115 uint32_t *max_label_len,
116 uint32_t *max_module_len,
117 uint32_t *max_src_len,
118 uint32_t *max_id_len );
119 static void logCCS ( FILE *prof_file,
120 CostCentreStack const *ccs,
121 ProfilerTotals totals,
122 uint32_t indent,
123 uint32_t max_label_len,
124 uint32_t max_module_len,
125 uint32_t max_src_len,
126 uint32_t max_id_len );
127 static void reportPerCCCosts( FILE *prof_file, ProfilerTotals totals );
128 static void reportCCS ( FILE *prof_file,
129 CostCentreStack const *ccs,
130 ProfilerTotals totals );
131 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
132 CostCentre *cc );
133 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
134 static void sortCCSTree ( CostCentreStack *ccs );
135 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
136 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
137 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
138 CostCentre *, unsigned int );
139 static void ccsSetSelected ( CostCentreStack *ccs );
140
141 static void initTimeProfiling ( void );
142 static void initProfilingLogFile ( void );
143
144 /* -----------------------------------------------------------------------------
145 Initialise the profiling environment
146 -------------------------------------------------------------------------- */
147
148 void initProfiling (void)
149 {
150 // initialise our arena
151 prof_arena = newArena();
152
153 /* for the benefit of allocate()... */
154 {
155 uint32_t n;
156 for (n=0; n < n_capabilities; n++) {
157 capabilities[n]->r.rCCCS = CCS_SYSTEM;
158 }
159 }
160
161 #ifdef THREADED_RTS
162 initMutex(&ccs_mutex);
163 #endif
164
165 /* Set up the log file, and dump the header and cost centre
166 * information into it.
167 */
168 initProfilingLogFile();
169
170 /* Register all the cost centres / stacks in the program
171 * CC_MAIN gets link = 0, all others have non-zero link.
172 */
173 REGISTER_CC(CC_MAIN);
174 REGISTER_CC(CC_SYSTEM);
175 REGISTER_CC(CC_GC);
176 REGISTER_CC(CC_OVERHEAD);
177 REGISTER_CC(CC_DONT_CARE);
178 REGISTER_CC(CC_PINNED);
179 REGISTER_CC(CC_IDLE);
180
181 REGISTER_CCS(CCS_SYSTEM);
182 REGISTER_CCS(CCS_GC);
183 REGISTER_CCS(CCS_OVERHEAD);
184 REGISTER_CCS(CCS_DONT_CARE);
185 REGISTER_CCS(CCS_PINNED);
186 REGISTER_CCS(CCS_IDLE);
187 REGISTER_CCS(CCS_MAIN);
188
189 /* find all the registered cost centre stacks, and make them
190 * children of CCS_MAIN.
191 */
192 ASSERT(CCS_LIST == CCS_MAIN);
193 CCS_LIST = CCS_LIST->prevStack;
194 CCS_MAIN->prevStack = NULL;
195 CCS_MAIN->root = CCS_MAIN;
196 ccsSetSelected(CCS_MAIN);
197
198 initProfiling2();
199
200 if (RtsFlags.CcFlags.doCostCentres) {
201 initTimeProfiling();
202 }
203
204 if (RtsFlags.ProfFlags.doHeapProfile) {
205 initHeapProfiling();
206 }
207 }
208
209 //
210 // Should be called after loading any new Haskell code.
211 //
212 void initProfiling2 (void)
213 {
214 CostCentreStack *ccs, *next;
215
216 // make CCS_MAIN the parent of all the pre-defined CCSs.
217 for (ccs = CCS_LIST; ccs != NULL; ) {
218 next = ccs->prevStack;
219 ccs->prevStack = NULL;
220 actualPush_(CCS_MAIN,ccs->cc,ccs);
221 ccs->root = ccs;
222 ccs = next;
223 }
224 CCS_LIST = NULL;
225 }
226
227 void
228 freeProfiling (void)
229 {
230 arenaFree(prof_arena);
231 }
232
233 CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
234 {
235 CostCentre *cc = stgMallocBytes (sizeof(CostCentre), "mkCostCentre");
236 cc->label = label;
237 cc->module = module;
238 cc->srcloc = srcloc;
239 cc->is_caf = 0;
240 cc->mem_alloc = 0;
241 cc->time_ticks = 0;
242 cc->link = NULL;
243 return cc;
244 }
245
246 static void
247 initProfilingLogFile(void)
248 {
249 char *prog;
250
251 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
252 strcpy(prog, prog_name);
253 #ifdef mingw32_HOST_OS
254 // on Windows, drop the .exe suffix if there is one
255 {
256 char *suff;
257 suff = strrchr(prog,'.');
258 if (suff != NULL && !strcmp(suff,".exe")) {
259 *suff = '\0';
260 }
261 }
262 #endif
263
264 if (RtsFlags.CcFlags.doCostCentres == 0 && !doingRetainerProfiling())
265 {
266 /* No need for the <prog>.prof file */
267 prof_filename = NULL;
268 prof_file = NULL;
269 }
270 else
271 {
272 /* Initialise the log file name */
273 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
274 sprintf(prof_filename, "%s.prof", prog);
275
276 /* open the log file */
277 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
278 debugBelch("Can't open profiling report file %s\n", prof_filename);
279 RtsFlags.CcFlags.doCostCentres = 0;
280 // Retainer profiling (`-hr` or `-hr<cc> -h<x>`) writes to
281 // both <program>.hp as <program>.prof.
282 if (doingRetainerProfiling()) {
283 RtsFlags.ProfFlags.doHeapProfile = 0;
284 }
285 }
286 }
287
288 if (RtsFlags.ProfFlags.doHeapProfile) {
289 /* Initialise the log file name */
290 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
291 sprintf(hp_filename, "%s.hp", prog);
292
293 /* open the log file */
294 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
295 debugBelch("Can't open profiling report file %s\n",
296 hp_filename);
297 RtsFlags.ProfFlags.doHeapProfile = 0;
298 }
299 }
300 }
301
302 void
303 initTimeProfiling(void)
304 {
305 /* Start ticking */
306 startProfTimer();
307 };
308
309 void
310 endProfiling ( void )
311 {
312 if (RtsFlags.CcFlags.doCostCentres) {
313 stopProfTimer();
314 }
315 if (RtsFlags.ProfFlags.doHeapProfile) {
316 endHeapProfiling();
317 }
318 }
319
320 /* -----------------------------------------------------------------------------
321 Set CCCS when entering a function.
322
323 The algorithm is as follows.
324
325 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
326
327 where
328
329 dropCommonPrefix A B
330 -- returns the suffix of B after removing any prefix common
331 -- to both A and B.
332
333 e.g.
334
335 <a,b,c> ++> <> = <a,b,c>
336 <a,b,c> ++> <d> = <a,b,c,d>
337 <a,b,c> ++> <a,b> = <a,b,c>
338 <a,b> ++> <a,b,c> = <a,b,c>
339 <a,b,c> ++> <a,b,d> = <a,b,c,d>
340
341 -------------------------------------------------------------------------- */
342
343 // implements c1 ++> c2, where c1 and c2 are equal depth
344 //
345 static CostCentreStack *
346 enterFunEqualStacks (CostCentreStack *ccs0,
347 CostCentreStack *ccsapp,
348 CostCentreStack *ccsfn)
349 {
350 ASSERT(ccsapp->depth == ccsfn->depth);
351 if (ccsapp == ccsfn) return ccs0;
352 return pushCostCentre(enterFunEqualStacks(ccs0,
353 ccsapp->prevStack,
354 ccsfn->prevStack),
355 ccsfn->cc);
356 }
357
358 // implements c1 ++> c2, where c2 is deeper than c1.
359 // Drop elements of c2 until we have equal stacks, call
360 // enterFunEqualStacks(), and then push on the elements that we
361 // dropped in reverse order.
362 //
363 static CostCentreStack *
364 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
365 {
366 if (n == 0) {
367 ASSERT(ccsfn->depth == ccsapp->depth);
368 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
369 } else {
370 ASSERT(ccsfn->depth > ccsapp->depth);
371 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
372 ccsfn->cc);
373 }
374 }
375
376 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
377 {
378 CostCentreStack *ccsapp;
379
380 // common case 1: both stacks are the same
381 if (ccsfn == reg->rCCCS) {
382 return;
383 }
384
385 // common case 2: the function stack is empty, or just CAF
386 if (ccsfn->cc->is_caf) {
387 return;
388 }
389
390 ccsapp = reg->rCCCS;
391 reg->rCCCS = CCS_OVERHEAD;
392
393 // common case 3: the stacks are completely different (e.g. one is a
394 // descendent of MAIN and the other of a CAF): we append the whole
395 // of the function stack to the current CCS.
396 if (ccsfn->root != ccsapp->root) {
397 reg->rCCCS = appendCCS(ccsapp,ccsfn);
398 return;
399 }
400
401 // uncommon case 4: ccsapp is deeper than ccsfn
402 if (ccsapp->depth > ccsfn->depth) {
403 uint32_t i, n;
404 CostCentreStack *tmp = ccsapp;
405 n = ccsapp->depth - ccsfn->depth;
406 for (i = 0; i < n; i++) {
407 tmp = tmp->prevStack;
408 }
409 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
410 return;
411 }
412
413 // uncommon case 5: ccsfn is deeper than CCCS
414 if (ccsfn->depth > ccsapp->depth) {
415 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
416 ccsfn->depth - ccsapp->depth);
417 return;
418 }
419
420 // uncommon case 6: stacks are equal depth, but different
421 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
422 }
423
424 /* -----------------------------------------------------------------------------
425 Decide whether closures with this CCS should contribute to the heap
426 profile.
427 -------------------------------------------------------------------------- */
428
429 static void
430 ccsSetSelected (CostCentreStack *ccs)
431 {
432 if (RtsFlags.ProfFlags.modSelector) {
433 if (! strMatchesSelector (ccs->cc->module,
434 RtsFlags.ProfFlags.modSelector) ) {
435 ccs->selected = 0;
436 return;
437 }
438 }
439 if (RtsFlags.ProfFlags.ccSelector) {
440 if (! strMatchesSelector (ccs->cc->label,
441 RtsFlags.ProfFlags.ccSelector) ) {
442 ccs->selected = 0;
443 return;
444 }
445 }
446 if (RtsFlags.ProfFlags.ccsSelector) {
447 CostCentreStack *c;
448 for (c = ccs; c != NULL; c = c->prevStack)
449 {
450 if ( strMatchesSelector (c->cc->label,
451 RtsFlags.ProfFlags.ccsSelector) ) {
452 break;
453 }
454 }
455 if (c == NULL) {
456 ccs->selected = 0;
457 return;
458 }
459 }
460
461 ccs->selected = 1;
462 return;
463 }
464
465 /* -----------------------------------------------------------------------------
466 Cost-centre stack manipulation
467 -------------------------------------------------------------------------- */
468
469 #ifdef DEBUG
470 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
471 CostCentreStack *
472 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
473 #define pushCostCentre _pushCostCentre
474 {
475 IF_DEBUG(prof,
476 traceBegin("pushing %s on ", cc->label);
477 debugCCS(ccs);
478 traceEnd(););
479
480 return pushCostCentre(ccs,cc);
481 }
482 #endif
483
484 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
485
486 #ifdef DEBUG
487 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
488 CostCentreStack *
489 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
490 #define appendCCS _appendCCS
491 {
492 IF_DEBUG(prof,
493 if (ccs1 != ccs2) {
494 debugBelch("Appending ");
495 debugCCS(ccs1);
496 debugBelch(" to ");
497 debugCCS(ccs2);
498 debugBelch("\n");});
499 return appendCCS(ccs1,ccs2);
500 }
501 #endif
502
503 CostCentreStack *
504 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
505 {
506 if (ccs1 == ccs2) {
507 return ccs1;
508 }
509
510 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
511 // stop at a CAF element
512 return ccs1;
513 }
514
515 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
516 }
517
518 // Pick one:
519 // #define RECURSION_DROPS
520 #define RECURSION_TRUNCATES
521
522 CostCentreStack *
523 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
524 {
525 CostCentreStack *temp_ccs, *ret;
526 IndexTable *ixtable;
527
528 if (ccs == EMPTY_STACK) {
529 ACQUIRE_LOCK(&ccs_mutex);
530 ret = actualPush(ccs,cc);
531 }
532 else
533 {
534 if (ccs->cc == cc) {
535 return ccs;
536 } else {
537 // check if we've already memoized this stack
538 ixtable = ccs->indexTable;
539 temp_ccs = isInIndexTable(ixtable,cc);
540
541 if (temp_ccs != EMPTY_STACK) {
542 return temp_ccs;
543 } else {
544
545 // not in the IndexTable, now we take the lock:
546 ACQUIRE_LOCK(&ccs_mutex);
547
548 if (ccs->indexTable != ixtable)
549 {
550 // someone modified ccs->indexTable while
551 // we did not hold the lock, so we must
552 // check it again:
553 temp_ccs = isInIndexTable(ixtable,cc);
554 if (temp_ccs != EMPTY_STACK)
555 {
556 RELEASE_LOCK(&ccs_mutex);
557 return temp_ccs;
558 }
559 }
560 temp_ccs = checkLoop(ccs,cc);
561 if (temp_ccs != NULL) {
562 // This CC is already in the stack somewhere.
563 // This could be recursion, or just calling
564 // another function with the same CC.
565 // A number of policies are possible at this
566 // point, we implement two here:
567 // - truncate the stack to the previous instance
568 // of this CC
569 // - ignore this push, return the same stack.
570 //
571 CostCentreStack *new_ccs;
572 #if defined(RECURSION_TRUNCATES)
573 new_ccs = temp_ccs;
574 #else // defined(RECURSION_DROPS)
575 new_ccs = ccs;
576 #endif
577 ccs->indexTable = addToIndexTable (ccs->indexTable,
578 new_ccs, cc, 1);
579 ret = new_ccs;
580 } else {
581 ret = actualPush (ccs,cc);
582 }
583 }
584 }
585 }
586
587 RELEASE_LOCK(&ccs_mutex);
588 return ret;
589 }
590
591 static CostCentreStack *
592 checkLoop (CostCentreStack *ccs, CostCentre *cc)
593 {
594 while (ccs != EMPTY_STACK) {
595 if (ccs->cc == cc)
596 return ccs;
597 ccs = ccs->prevStack;
598 }
599 return NULL;
600 }
601
602 static CostCentreStack *
603 actualPush (CostCentreStack *ccs, CostCentre *cc)
604 {
605 CostCentreStack *new_ccs;
606
607 // allocate space for a new CostCentreStack
608 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
609
610 return actualPush_(ccs, cc, new_ccs);
611 }
612
613 static CostCentreStack *
614 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
615 {
616 /* assign values to each member of the structure */
617 new_ccs->ccsID = CCS_ID++;
618 new_ccs->cc = cc;
619 new_ccs->prevStack = ccs;
620 new_ccs->root = ccs->root;
621 new_ccs->depth = ccs->depth + 1;
622
623 new_ccs->indexTable = EMPTY_TABLE;
624
625 /* Initialise the various _scc_ counters to zero
626 */
627 new_ccs->scc_count = 0;
628
629 /* Initialize all other stats here. There should be a quick way
630 * that's easily used elsewhere too
631 */
632 new_ccs->time_ticks = 0;
633 new_ccs->mem_alloc = 0;
634 new_ccs->inherited_ticks = 0;
635 new_ccs->inherited_alloc = 0;
636
637 // Set the selected field.
638 ccsSetSelected(new_ccs);
639
640 /* update the memoization table for the parent stack */
641 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
642 0/*not a back edge*/);
643
644 /* return a pointer to the new stack */
645 return new_ccs;
646 }
647
648
649 static CostCentreStack *
650 isInIndexTable(IndexTable *it, CostCentre *cc)
651 {
652 while (it!=EMPTY_TABLE)
653 {
654 if (it->cc == cc)
655 return it->ccs;
656 else
657 it = it->next;
658 }
659
660 /* otherwise we never found it so return EMPTY_TABLE */
661 return EMPTY_TABLE;
662 }
663
664
665 static IndexTable *
666 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
667 CostCentre *cc, unsigned int back_edge)
668 {
669 IndexTable *new_it;
670
671 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
672
673 new_it->cc = cc;
674 new_it->ccs = new_ccs;
675 new_it->next = it;
676 new_it->back_edge = back_edge;
677 return new_it;
678 }
679
680 /* -----------------------------------------------------------------------------
681 Generating a time & allocation profiling report.
682 -------------------------------------------------------------------------- */
683
684 /* We omit certain system-related CCs and CCSs from the default
685 * reports, so as not to cause confusion.
686 */
687 static bool
688 ignoreCC (CostCentre *cc)
689 {
690 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
691 ( cc == CC_OVERHEAD
692 || cc == CC_DONT_CARE
693 || cc == CC_GC
694 || cc == CC_SYSTEM
695 || cc == CC_IDLE);
696 }
697
698 static bool
699 ignoreCCS (CostCentreStack const *ccs)
700 {
701 return RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
702 ( ccs == CCS_OVERHEAD
703 || ccs == CCS_DONT_CARE
704 || ccs == CCS_GC
705 || ccs == CCS_SYSTEM
706 || ccs == CCS_IDLE);
707 }
708
709 /* -----------------------------------------------------------------------------
710 Generating the aggregated per-cost-centre time/alloc report.
711 -------------------------------------------------------------------------- */
712
713 static CostCentre *sorted_cc_list;
714
715 static void
716 aggregateCCCosts( CostCentreStack *ccs )
717 {
718 IndexTable *i;
719
720 ccs->cc->mem_alloc += ccs->mem_alloc;
721 ccs->cc->time_ticks += ccs->time_ticks;
722
723 for (i = ccs->indexTable; i != 0; i = i->next) {
724 if (!i->back_edge) {
725 aggregateCCCosts(i->ccs);
726 }
727 }
728 }
729
730 static void
731 insertCCInSortedList( CostCentre *new_cc )
732 {
733 CostCentre **prev, *cc;
734
735 prev = &sorted_cc_list;
736 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
737 if (new_cc->time_ticks > cc->time_ticks) {
738 new_cc->link = cc;
739 *prev = new_cc;
740 return;
741 } else {
742 prev = &(cc->link);
743 }
744 }
745 new_cc->link = NULL;
746 *prev = new_cc;
747 }
748
749 static uint32_t
750 strlen_utf8 (char *s)
751 {
752 uint32_t n = 0;
753 unsigned char c;
754
755 for (; *s != '\0'; s++) {
756 c = *s;
757 if (c < 0x80 || c > 0xBF) n++;
758 }
759 return n;
760 }
761
762 static void
763 reportPerCCCosts( FILE *prof_file, ProfilerTotals totals )
764 {
765 CostCentre *cc, *next;
766 uint32_t max_label_len, max_module_len, max_src_len;
767
768 aggregateCCCosts(CCS_MAIN);
769 sorted_cc_list = NULL;
770
771 max_label_len = 11; // no shorter than the "COST CENTRE" header
772 max_module_len = 6; // no shorter than the "MODULE" header
773 max_src_len = 3; // no shorter than the "SRC" header
774
775 for (cc = CC_LIST; cc != NULL; cc = next) {
776 next = cc->link;
777 if (cc->time_ticks > totals.total_prof_ticks/100
778 || cc->mem_alloc > totals.total_alloc/100
779 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
780 insertCCInSortedList(cc);
781
782 max_label_len = stg_max(strlen_utf8(cc->label), max_label_len);
783 max_module_len = stg_max(strlen_utf8(cc->module), max_module_len);
784 max_src_len = stg_max(strlen_utf8(cc->srcloc), max_src_len);
785 }
786 }
787
788 fprintf(prof_file, "%-*s %-*s %-*s",
789 max_label_len, "COST CENTRE", max_module_len, "MODULE", max_src_len, "SRC");
790 fprintf(prof_file, " %6s %6s", "%time", "%alloc");
791 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
792 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
793 }
794 fprintf(prof_file, "\n\n");
795
796 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
797 if (ignoreCC(cc)) {
798 continue;
799 }
800 fprintf(prof_file, "%s%*s %s%*s %s%*s",
801 cc->label,
802 max_label_len - strlen_utf8(cc->label), "",
803 cc->module,
804 max_module_len - strlen_utf8(cc->module), "",
805 cc->srcloc,
806 max_src_len - strlen_utf8(cc->srcloc), "");
807
808 fprintf(prof_file, " %6.1f %6.1f",
809 totals.total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) totals.total_prof_ticks * 100),
810 totals.total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) totals.total_alloc * 100)
811 );
812
813 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
814 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
815 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
816 }
817 fprintf(prof_file, "\n");
818 }
819
820 fprintf(prof_file,"\n\n");
821 }
822
823 /* -----------------------------------------------------------------------------
824 Generate the cost-centre-stack time/alloc report
825 -------------------------------------------------------------------------- */
826
827 static void
828 fprintHeader( FILE *prof_file, uint32_t max_label_len, uint32_t max_module_len,
829 uint32_t max_src_len, uint32_t max_id_len )
830 {
831 fprintf(prof_file, "%-*s %-*s %-*s %-*s %11s %12s %12s\n",
832 max_label_len, "",
833 max_module_len, "",
834 max_src_len, "",
835 max_id_len, "",
836 "", "individual", "inherited");
837
838 fprintf(prof_file, "%-*s %-*s %-*s %-*s",
839 max_label_len, "COST CENTRE",
840 max_module_len, "MODULE",
841 max_src_len, "SRC",
842 max_id_len, "no.");
843
844 fprintf(prof_file, " %11s %5s %6s %5s %6s",
845 "entries", "%time", "%alloc", "%time", "%alloc");
846
847 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
848 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
849 }
850
851 fprintf(prof_file, "\n\n");
852 }
853
854 void
855 reportCCSProfiling( void )
856 {
857 uint32_t count;
858 char temp[128]; /* sigh: magic constant */
859
860 stopProfTimer();
861
862 ProfilerTotals totals = countTickss(CCS_MAIN);
863
864 if (RtsFlags.CcFlags.doCostCentres == 0) return;
865
866 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
867 time_str(), "Final");
868
869 fprintf(prof_file, "\n\t ");
870 fprintf(prof_file, " %s", prog_name);
871 fprintf(prof_file, " +RTS");
872 for (count = 0; rts_argv[count]; count++)
873 fprintf(prof_file, " %s", rts_argv[count]);
874 fprintf(prof_file, " -RTS");
875 for (count = 1; prog_argv[count]; count++)
876 fprintf(prof_file, " %s", prog_argv[count]);
877 fprintf(prof_file, "\n\n");
878
879 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n",
880 ((double) totals.total_prof_ticks *
881 (double) RtsFlags.MiscFlags.tickInterval) / (TIME_RESOLUTION * n_capabilities),
882 (unsigned long) totals.total_prof_ticks,
883 (int) TimeToUS(RtsFlags.MiscFlags.tickInterval),
884 n_capabilities, n_capabilities > 1 ? "s" : "");
885
886 fprintf(prof_file, "\ttotal alloc = %11s bytes",
887 showStgWord64(totals.total_alloc * sizeof(W_),
888 temp, true/*commas*/));
889
890 fprintf(prof_file, " (excludes profiling overheads)\n\n");
891
892 reportPerCCCosts(prof_file, totals);
893
894 inheritCosts(CCS_MAIN);
895
896 CostCentreStack *stack = pruneCCSTree(CCS_MAIN);
897 sortCCSTree(stack);
898 reportCCS(prof_file, stack, totals);
899 }
900
901 static uint32_t
902 numDigits(StgInt i) {
903 uint32_t result;
904
905 result = 1;
906
907 if (i < 0) i = 0;
908
909 while (i > 9) {
910 i /= 10;
911 result++;
912 }
913
914 return result;
915 }
916
917 static void
918 findCCSMaxLens(CostCentreStack const *ccs, uint32_t indent,
919 uint32_t *max_label_len, uint32_t *max_module_len,
920 uint32_t *max_src_len, uint32_t *max_id_len) {
921 CostCentre *cc;
922 IndexTable *i;
923
924 cc = ccs->cc;
925
926 *max_label_len = stg_max(*max_label_len, indent + strlen_utf8(cc->label));
927 *max_module_len = stg_max(*max_module_len, strlen_utf8(cc->module));
928 *max_src_len = stg_max(*max_src_len, strlen_utf8(cc->srcloc));
929 *max_id_len = stg_max(*max_id_len, numDigits(ccs->ccsID));
930
931 for (i = ccs->indexTable; i != 0; i = i->next) {
932 if (!i->back_edge) {
933 findCCSMaxLens(i->ccs, indent+1,
934 max_label_len, max_module_len, max_src_len, max_id_len);
935 }
936 }
937 }
938
939 static void
940 logCCS(FILE *prof_file, CostCentreStack const *ccs, ProfilerTotals totals,
941 uint32_t indent,
942 uint32_t max_label_len, uint32_t max_module_len,
943 uint32_t max_src_len, uint32_t max_id_len)
944 {
945 CostCentre *cc;
946 IndexTable *i;
947
948 cc = ccs->cc;
949
950 /* Only print cost centres with non 0 data ! */
951
952 if (!ignoreCCS(ccs))
953 /* force printing of *all* cost centres if -Pa */
954 {
955
956 fprintf(prof_file, "%*s%s%*s %s%*s %s%*s",
957 indent, "",
958 cc->label,
959 max_label_len-indent - strlen_utf8(cc->label), "",
960 cc->module,
961 max_module_len - strlen_utf8(cc->module), "",
962 cc->srcloc,
963 max_src_len - strlen_utf8(cc->srcloc), "");
964
965 fprintf(prof_file,
966 " %*" FMT_Int "%11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
967 max_id_len, ccs->ccsID, ccs->scc_count,
968 totals.total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)totals.total_prof_ticks * 100.0),
969 totals.total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)totals.total_alloc * 100.0),
970 totals.total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)totals.total_prof_ticks * 100.0),
971 totals.total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)totals.total_alloc * 100.0)
972 );
973
974 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
975 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
976 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
977 }
978 fprintf(prof_file, "\n");
979 }
980
981 for (i = ccs->indexTable; i != 0; i = i->next) {
982 if (!i->back_edge) {
983 logCCS(prof_file, i->ccs, totals, indent+1,
984 max_label_len, max_module_len, max_src_len, max_id_len);
985 }
986 }
987 }
988
989 static void
990 reportCCS(FILE *prof_file, CostCentreStack const *ccs, ProfilerTotals totals)
991 {
992 uint32_t max_label_len, max_module_len, max_src_len, max_id_len;
993
994 max_label_len = 11; // no shorter than "COST CENTRE" header
995 max_module_len = 6; // no shorter than "MODULE" header
996 max_src_len = 3; // no shorter than "SRC" header
997 max_id_len = 3; // no shorter than "no." header
998
999 findCCSMaxLens(ccs, 0,
1000 &max_label_len, &max_module_len, &max_src_len, &max_id_len);
1001
1002 fprintHeader(prof_file,
1003 max_label_len, max_module_len, max_src_len, max_id_len);
1004 logCCS(prof_file, ccs, totals, 0,
1005 max_label_len, max_module_len, max_src_len, max_id_len);
1006 }
1007
1008
1009 /* -----------------------------------------------------------------------------
1010 * Accumulating total allocatinos and tick count
1011 -------------------------------------------------------------------------- */
1012
1013 /* Helper */
1014 static void
1015 countTickss_(CostCentreStack const *ccs, ProfilerTotals *totals)
1016 {
1017 if (!ignoreCCS(ccs)) {
1018 totals->total_alloc += ccs->mem_alloc;
1019 totals->total_prof_ticks += ccs->time_ticks;
1020 }
1021 for (IndexTable *i = ccs->indexTable; i != NULL; i = i->next) {
1022 if (!i->back_edge) {
1023 countTickss_(i->ccs, totals);
1024 }
1025 }
1026 }
1027
1028 /* Traverse the cost centre stack tree and accumulate
1029 * total ticks/allocations.
1030 */
1031 static ProfilerTotals
1032 countTickss(CostCentreStack const *ccs)
1033 {
1034 ProfilerTotals totals = {0,0};
1035 countTickss_(ccs, &totals);
1036 return totals;
1037 }
1038
1039 /* Traverse the cost centre stack tree and inherit ticks & allocs.
1040 */
1041 static void
1042 inheritCosts(CostCentreStack *ccs)
1043 {
1044 IndexTable *i;
1045
1046 if (ignoreCCS(ccs)) { return; }
1047
1048 ccs->inherited_ticks += ccs->time_ticks;
1049 ccs->inherited_alloc += ccs->mem_alloc;
1050
1051 for (i = ccs->indexTable; i != NULL; i = i->next)
1052 if (!i->back_edge) {
1053 inheritCosts(i->ccs);
1054 ccs->inherited_ticks += i->ccs->inherited_ticks;
1055 ccs->inherited_alloc += i->ccs->inherited_alloc;
1056 }
1057
1058 return;
1059 }
1060
1061 //
1062 // Prune CCSs with zero entries, zero ticks or zero allocation from
1063 // the tree, unless COST_CENTRES_ALL is on.
1064 //
1065 static CostCentreStack *
1066 pruneCCSTree (CostCentreStack *ccs)
1067 {
1068 CostCentreStack *ccs1;
1069 IndexTable *i, **prev;
1070
1071 prev = &ccs->indexTable;
1072 for (i = ccs->indexTable; i != 0; i = i->next) {
1073 if (i->back_edge) { continue; }
1074
1075 ccs1 = pruneCCSTree(i->ccs);
1076 if (ccs1 == NULL) {
1077 *prev = i->next;
1078 } else {
1079 prev = &(i->next);
1080 }
1081 }
1082
1083 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
1084 /* force printing of *all* cost centres if -P -P */ )
1085
1086 || ( ccs->indexTable != 0 )
1087 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
1088 ) {
1089 return ccs;
1090 } else {
1091 return NULL;
1092 }
1093 }
1094
1095 static IndexTable*
1096 insertIndexTableInSortedList(IndexTable* tbl, IndexTable* sortedList)
1097 {
1098 StgWord tbl_ticks = tbl->ccs->scc_count;
1099 char* tbl_label = tbl->ccs->cc->label;
1100
1101 IndexTable *prev = NULL;
1102 IndexTable *cursor = sortedList;
1103
1104 while (cursor != NULL) {
1105 StgWord cursor_ticks = cursor->ccs->scc_count;
1106 char* cursor_label = cursor->ccs->cc->label;
1107
1108 if (tbl_ticks > cursor_ticks ||
1109 (tbl_ticks == cursor_ticks && strcmp(tbl_label, cursor_label) < 0)) {
1110 if (prev == NULL) {
1111 tbl->next = sortedList;
1112 return tbl;
1113 } else {
1114 prev->next = tbl;
1115 tbl->next = cursor;
1116 return sortedList;
1117 }
1118 } else {
1119 prev = cursor;
1120 cursor = cursor->next;
1121 }
1122 }
1123
1124 prev->next = tbl;
1125 return sortedList;
1126 }
1127
1128 static void
1129 sortCCSTree(CostCentreStack *ccs)
1130 {
1131 if (ccs->indexTable == NULL) return;
1132
1133 for (IndexTable *tbl = ccs->indexTable; tbl != NULL; tbl = tbl->next)
1134 if (!tbl->back_edge)
1135 sortCCSTree(tbl->ccs);
1136
1137 IndexTable *sortedList = ccs->indexTable;
1138 IndexTable *nonSortedList = sortedList->next;
1139 sortedList->next = NULL;
1140
1141 while (nonSortedList != NULL)
1142 {
1143 IndexTable *nonSortedTail = nonSortedList->next;
1144 nonSortedList->next = NULL;
1145 sortedList = insertIndexTableInSortedList(nonSortedList, sortedList);
1146 nonSortedList = nonSortedTail;
1147 }
1148
1149 ccs->indexTable = sortedList;
1150 }
1151
1152 void
1153 fprintCCS( FILE *f, CostCentreStack *ccs )
1154 {
1155 fprintf(f,"<");
1156 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1157 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
1158 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1159 fprintf(f,",");
1160 }
1161 }
1162 fprintf(f,">");
1163 }
1164
1165 // Returns: True if the call stack ended with CAF
1166 static bool fprintCallStack (CostCentreStack *ccs)
1167 {
1168 CostCentreStack *prev;
1169
1170 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
1171 prev = ccs->prevStack;
1172 while (prev && prev != CCS_MAIN) {
1173 ccs = prev;
1174 fprintf(stderr, ",\n called from %s.%s",
1175 ccs->cc->module, ccs->cc->label);
1176 prev = ccs->prevStack;
1177 }
1178 fprintf(stderr, "\n");
1179
1180 return (!strncmp(ccs->cc->label, "CAF", 3));
1181 }
1182
1183 /* For calling from .cmm code, where we can't reliably refer to stderr */
1184 void
1185 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
1186 {
1187 bool is_caf;
1188 StgPtr frame;
1189 StgStack *stack;
1190 CostCentreStack *prev_ccs;
1191 uint32_t depth = 0;
1192 const uint32_t MAX_DEPTH = 10; // don't print gigantic chains of stacks
1193
1194 {
1195 const char *desc;
1196 const StgInfoTable *info;
1197 info = get_itbl(UNTAG_CONST_CLOSURE(exception));
1198 switch (info->type) {
1199 case CONSTR:
1200 case CONSTR_1_0:
1201 case CONSTR_0_1:
1202 case CONSTR_2_0:
1203 case CONSTR_1_1:
1204 case CONSTR_0_2:
1205 case CONSTR_NOCAF:
1206 desc = GET_CON_DESC(itbl_to_con_itbl(info));
1207 break;
1208 default:
1209 desc = closure_type_names[info->type];
1210 break;
1211 }
1212 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
1213 }
1214
1215 is_caf = fprintCallStack(ccs);
1216
1217 // traverse the stack down to the enclosing update frame to
1218 // find out where this CCS was evaluated from...
1219
1220 stack = tso->stackobj;
1221 frame = stack->sp;
1222 prev_ccs = ccs;
1223
1224 for (; is_caf && depth < MAX_DEPTH; depth++)
1225 {
1226 switch (get_itbl((StgClosure*)frame)->type)
1227 {
1228 case UPDATE_FRAME:
1229 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
1230 frame += sizeofW(StgUpdateFrame);
1231 if (ccs == CCS_MAIN) {
1232 goto done;
1233 }
1234 if (ccs == prev_ccs) {
1235 // ignore if this is the same as the previous stack,
1236 // we're probably in library code and haven't
1237 // accumulated any more interesting stack items
1238 // since the last update frame.
1239 break;
1240 }
1241 prev_ccs = ccs;
1242 fprintf(stderr, " --> evaluated by: ");
1243 is_caf = fprintCallStack(ccs);
1244 break;
1245 case UNDERFLOW_FRAME:
1246 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1247 frame = stack->sp;
1248 break;
1249 case STOP_FRAME:
1250 goto done;
1251 default:
1252 frame += stack_frame_sizeW((StgClosure*)frame);
1253 break;
1254 }
1255 }
1256 done:
1257 return;
1258 }
1259
1260 #ifdef DEBUG
1261 void
1262 debugCCS( CostCentreStack *ccs )
1263 {
1264 debugBelch("<");
1265 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1266 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1267 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1268 debugBelch(",");
1269 }
1270 }
1271 debugBelch(">");
1272 }
1273 #endif /* DEBUG */
1274
1275 #endif /* PROFILING */