Eliminate atomic_inc_by and instead medofiy atomic_inc.
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "Printer.h"
21 #include "Capability.h"
22
23 #include <string.h>
24
25 #ifdef DEBUG
26 #include "Trace.h"
27 #endif
28
29 /*
30 * Profiling allocation arena.
31 */
32 Arena *prof_arena;
33
34 /*
35 * Global variables used to assign unique IDs to cc's, ccs's, and
36 * closure_cats
37 */
38
39 unsigned int CC_ID = 1;
40 unsigned int CCS_ID = 1;
41
42 /* figures for the profiling report.
43 */
44 static StgWord64 total_alloc;
45 static W_ total_prof_ticks;
46
47 /* Globals for opening the profiling log file(s)
48 */
49 static char *prof_filename; /* prof report file name = <program>.prof */
50 FILE *prof_file;
51
52 static char *hp_filename; /* heap profile (hp2ps style) log file */
53 FILE *hp_file;
54
55 /* Linked lists to keep track of CCs and CCSs that haven't
56 * been declared in the log file yet
57 */
58 CostCentre *CC_LIST = NULL;
59 CostCentreStack *CCS_LIST = NULL;
60
61 #ifdef THREADED_RTS
62 Mutex ccs_mutex;
63 #endif
64
65 /*
66 * Built-in cost centres and cost-centre stacks:
67 *
68 * MAIN is the root of the cost-centre stack tree. If there are
69 * no _scc_s in the program, all costs will be attributed
70 * to MAIN.
71 *
72 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
73 * RTS operations apart from garbage collection are attributed
74 * to SYSTEM.
75 *
76 * GC is the storage manager / garbage collector.
77 *
78 * OVERHEAD gets all costs generated by the profiling system
79 * itself. These are costs that would not be incurred
80 * during non-profiled execution of the program.
81 *
82 * DONT_CARE is a placeholder cost-centre we assign to static
83 * constructors. It should *never* accumulate any costs.
84 *
85 * PINNED accumulates memory allocated to pinned objects, which
86 * cannot be profiled separately because we cannot reliably
87 * traverse pinned memory.
88 */
89
90 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
95 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
96 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
97
98 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
99 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
100 CCS_DECLARE(CCS_GC, CC_GC, );
101 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
102 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
103 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
104 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
105
106 /*
107 * Static Functions
108 */
109
110 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
111 CostCentreStack *ccs2 );
112 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
113 CostCentreStack *new_ccs );
114 static rtsBool ignoreCCS ( CostCentreStack *ccs );
115 static void countTickss ( CostCentreStack *ccs );
116 static void inheritCosts ( CostCentreStack *ccs );
117 static void findCCSMaxLens ( CostCentreStack *ccs,
118 nat indent,
119 nat *max_label_len,
120 nat *max_module_len );
121 static void logCCS ( CostCentreStack *ccs,
122 nat indent,
123 nat max_label_len,
124 nat max_module_len );
125 static void reportCCS ( CostCentreStack *ccs );
126 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
127 CostCentre *cc );
128 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
129 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
130 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
131 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
132 CostCentre *, unsigned int );
133 static void ccsSetSelected ( CostCentreStack *ccs );
134
135 static void initTimeProfiling ( void );
136 static void initProfilingLogFile ( void );
137
138 /* -----------------------------------------------------------------------------
139 Initialise the profiling environment
140 -------------------------------------------------------------------------- */
141
142 void
143 initProfiling1 (void)
144 {
145 // initialise our arena
146 prof_arena = newArena();
147
148 /* for the benefit of allocate()... */
149 {
150 nat n;
151 for (n=0; n < n_capabilities; n++) {
152 capabilities[n].r.rCCCS = CCS_SYSTEM;
153 }
154 }
155
156 #ifdef THREADED_RTS
157 initMutex(&ccs_mutex);
158 #endif
159 }
160
161 void
162 freeProfiling (void)
163 {
164 arenaFree(prof_arena);
165 }
166
167 void
168 initProfiling2 (void)
169 {
170 CostCentreStack *ccs, *next;
171
172 /* Set up the log file, and dump the header and cost centre
173 * information into it.
174 */
175 initProfilingLogFile();
176
177 /* Register all the cost centres / stacks in the program
178 * CC_MAIN gets link = 0, all others have non-zero link.
179 */
180 REGISTER_CC(CC_MAIN);
181 REGISTER_CC(CC_SYSTEM);
182 REGISTER_CC(CC_GC);
183 REGISTER_CC(CC_OVERHEAD);
184 REGISTER_CC(CC_DONT_CARE);
185 REGISTER_CC(CC_PINNED);
186 REGISTER_CC(CC_IDLE);
187
188 REGISTER_CCS(CCS_SYSTEM);
189 REGISTER_CCS(CCS_GC);
190 REGISTER_CCS(CCS_OVERHEAD);
191 REGISTER_CCS(CCS_DONT_CARE);
192 REGISTER_CCS(CCS_PINNED);
193 REGISTER_CCS(CCS_IDLE);
194 REGISTER_CCS(CCS_MAIN);
195
196 /* find all the registered cost centre stacks, and make them
197 * children of CCS_MAIN.
198 */
199 ASSERT(CCS_LIST == CCS_MAIN);
200 CCS_LIST = CCS_LIST->prevStack;
201 CCS_MAIN->prevStack = NULL;
202 CCS_MAIN->root = CCS_MAIN;
203 ccsSetSelected(CCS_MAIN);
204
205 // make CCS_MAIN the parent of all the pre-defined CCSs.
206 for (ccs = CCS_LIST; ccs != NULL; ) {
207 next = ccs->prevStack;
208 ccs->prevStack = NULL;
209 actualPush_(CCS_MAIN,ccs->cc,ccs);
210 ccs->root = ccs;
211 ccs = next;
212 }
213
214 if (RtsFlags.CcFlags.doCostCentres) {
215 initTimeProfiling();
216 }
217
218 if (RtsFlags.ProfFlags.doHeapProfile) {
219 initHeapProfiling();
220 }
221 }
222
223
224 static void
225 initProfilingLogFile(void)
226 {
227 char *prog;
228
229 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
230 strcpy(prog, prog_name);
231 #ifdef mingw32_HOST_OS
232 // on Windows, drop the .exe suffix if there is one
233 {
234 char *suff;
235 suff = strrchr(prog,'.');
236 if (suff != NULL && !strcmp(suff,".exe")) {
237 *suff = '\0';
238 }
239 }
240 #endif
241
242 if (RtsFlags.CcFlags.doCostCentres == 0 &&
243 RtsFlags.ProfFlags.doHeapProfile != HEAP_BY_RETAINER &&
244 RtsFlags.ProfFlags.retainerSelector == NULL)
245 {
246 /* No need for the <prog>.prof file */
247 prof_filename = NULL;
248 prof_file = NULL;
249 }
250 else
251 {
252 /* Initialise the log file name */
253 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
254 sprintf(prof_filename, "%s.prof", prog);
255
256 /* open the log file */
257 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
258 debugBelch("Can't open profiling report file %s\n", prof_filename);
259 RtsFlags.CcFlags.doCostCentres = 0;
260 // The following line was added by Sung; retainer/LDV profiling may need
261 // two output files, i.e., <program>.prof/hp.
262 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
263 RtsFlags.ProfFlags.doHeapProfile = 0;
264 return;
265 }
266 }
267
268 if (RtsFlags.ProfFlags.doHeapProfile) {
269 /* Initialise the log file name */
270 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
271 sprintf(hp_filename, "%s.hp", prog);
272
273 /* open the log file */
274 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
275 debugBelch("Can't open profiling report file %s\n",
276 hp_filename);
277 RtsFlags.ProfFlags.doHeapProfile = 0;
278 return;
279 }
280 }
281 }
282
283 void
284 initTimeProfiling(void)
285 {
286 /* Start ticking */
287 startProfTimer();
288 };
289
290 void
291 endProfiling ( void )
292 {
293 if (RtsFlags.CcFlags.doCostCentres) {
294 stopProfTimer();
295 }
296 if (RtsFlags.ProfFlags.doHeapProfile) {
297 endHeapProfiling();
298 }
299 }
300
301 /* -----------------------------------------------------------------------------
302 Set CCCS when entering a function.
303
304 The algorithm is as follows.
305
306 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
307
308 where
309
310 dropCommonPrefix A B
311 -- returns the suffix of B after removing any prefix common
312 -- to both A and B.
313
314 e.g.
315
316 <a,b,c> ++> <> = <a,b,c>
317 <a,b,c> ++> <d> = <a,b,c,d>
318 <a,b,c> ++> <a,b> = <a,b,c>
319 <a,b> ++> <a,b,c> = <a,b,c>
320 <a,b,c> ++> <a,b,d> = <a,b,c,d>
321
322 -------------------------------------------------------------------------- */
323
324 // implements c1 ++> c2, where c1 and c2 are equal depth
325 //
326 static CostCentreStack *
327 enterFunEqualStacks (CostCentreStack *ccs0,
328 CostCentreStack *ccsapp,
329 CostCentreStack *ccsfn)
330 {
331 ASSERT(ccsapp->depth == ccsfn->depth);
332 if (ccsapp == ccsfn) return ccs0;
333 return pushCostCentre(enterFunEqualStacks(ccs0,
334 ccsapp->prevStack,
335 ccsfn->prevStack),
336 ccsfn->cc);
337 }
338
339 // implements c1 ++> c2, where c2 is deeper than c1.
340 // Drop elements of c2 until we have equal stacks, call
341 // enterFunEqualStacks(), and then push on the elements that we
342 // dropped in reverse order.
343 //
344 static CostCentreStack *
345 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
346 {
347 if (n == 0) {
348 ASSERT(ccsfn->depth == ccsapp->depth);
349 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
350 } else {
351 ASSERT(ccsfn->depth > ccsapp->depth);
352 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
353 ccsfn->cc);
354 }
355 }
356
357 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
358 {
359 CostCentreStack *ccsapp;
360
361 // common case 1: both stacks are the same
362 if (ccsfn == reg->rCCCS) {
363 return;
364 }
365
366 // common case 2: the function stack is empty, or just CAF
367 if (ccsfn->prevStack == CCS_MAIN) {
368 return;
369 }
370
371 ccsapp = reg->rCCCS;
372 reg->rCCCS = CCS_OVERHEAD;
373
374 // common case 3: the stacks are completely different (e.g. one is a
375 // descendent of MAIN and the other of a CAF): we append the whole
376 // of the function stack to the current CCS.
377 if (ccsfn->root != ccsapp->root) {
378 reg->rCCCS = appendCCS(ccsapp,ccsfn);
379 return;
380 }
381
382 // uncommon case 4: ccsapp is deeper than ccsfn
383 if (ccsapp->depth > ccsfn->depth) {
384 nat i, n;
385 CostCentreStack *tmp = ccsapp;
386 n = ccsapp->depth - ccsfn->depth;
387 for (i = 0; i < n; i++) {
388 tmp = tmp->prevStack;
389 }
390 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
391 return;
392 }
393
394 // uncommon case 5: ccsfn is deeper than CCCS
395 if (ccsfn->depth > ccsapp->depth) {
396 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
397 ccsfn->depth - ccsapp->depth);
398 return;
399 }
400
401 // uncommon case 6: stacks are equal depth, but different
402 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
403 }
404
405 /* -----------------------------------------------------------------------------
406 Decide whether closures with this CCS should contribute to the heap
407 profile.
408 -------------------------------------------------------------------------- */
409
410 static void
411 ccsSetSelected (CostCentreStack *ccs)
412 {
413 if (RtsFlags.ProfFlags.modSelector) {
414 if (! strMatchesSelector (ccs->cc->module,
415 RtsFlags.ProfFlags.modSelector) ) {
416 ccs->selected = 0;
417 return;
418 }
419 }
420 if (RtsFlags.ProfFlags.ccSelector) {
421 if (! strMatchesSelector (ccs->cc->label,
422 RtsFlags.ProfFlags.ccSelector) ) {
423 ccs->selected = 0;
424 return;
425 }
426 }
427 if (RtsFlags.ProfFlags.ccsSelector) {
428 CostCentreStack *c;
429 for (c = ccs; c != NULL; c = c->prevStack)
430 {
431 if ( strMatchesSelector (c->cc->label,
432 RtsFlags.ProfFlags.ccsSelector) ) {
433 break;
434 }
435 }
436 if (c == NULL) {
437 ccs->selected = 0;
438 return;
439 }
440 }
441
442 ccs->selected = 1;
443 return;
444 }
445
446 /* -----------------------------------------------------------------------------
447 Cost-centre stack manipulation
448 -------------------------------------------------------------------------- */
449
450 #ifdef DEBUG
451 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
452 CostCentreStack *
453 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
454 #define pushCostCentre _pushCostCentre
455 {
456 IF_DEBUG(prof,
457 traceBegin("pushing %s on ", cc->label);
458 debugCCS(ccs);
459 traceEnd(););
460
461 return pushCostCentre(ccs,cc);
462 }
463 #endif
464
465 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
466
467 #ifdef DEBUG
468 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
469 CostCentreStack *
470 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
471 #define appendCCS _appendCCS
472 {
473 IF_DEBUG(prof,
474 if (ccs1 != ccs2) {
475 debugBelch("Appending ");
476 debugCCS(ccs1);
477 debugBelch(" to ");
478 debugCCS(ccs2);
479 debugBelch("\n");});
480 return appendCCS(ccs1,ccs2);
481 }
482 #endif
483
484 CostCentreStack *
485 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
486 {
487 if (ccs1 == ccs2) {
488 return ccs1;
489 }
490
491 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
492 // stop at a CAF element
493 return ccs1;
494 }
495
496 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
497 }
498
499 // Pick one:
500 // #define RECURSION_DROPS
501 #define RECURSION_TRUNCATES
502
503 CostCentreStack *
504 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
505 {
506 CostCentreStack *temp_ccs, *ret;
507 IndexTable *ixtable;
508
509 if (ccs == EMPTY_STACK) {
510 ACQUIRE_LOCK(&ccs_mutex);
511 ret = actualPush(ccs,cc);
512 }
513 else
514 {
515 if (ccs->cc == cc) {
516 return ccs;
517 } else {
518 // check if we've already memoized this stack
519 ixtable = ccs->indexTable;
520 temp_ccs = isInIndexTable(ixtable,cc);
521
522 if (temp_ccs != EMPTY_STACK) {
523 return temp_ccs;
524 } else {
525
526 // not in the IndexTable, now we take the lock:
527 ACQUIRE_LOCK(&ccs_mutex);
528
529 if (ccs->indexTable != ixtable)
530 {
531 // someone modified ccs->indexTable while
532 // we did not hold the lock, so we must
533 // check it again:
534 temp_ccs = isInIndexTable(ixtable,cc);
535 if (temp_ccs != EMPTY_STACK)
536 {
537 RELEASE_LOCK(&ccs_mutex);
538 return temp_ccs;
539 }
540 }
541 temp_ccs = checkLoop(ccs,cc);
542 if (temp_ccs != NULL) {
543 // This CC is already in the stack somewhere.
544 // This could be recursion, or just calling
545 // another function with the same CC.
546 // A number of policies are possible at this
547 // point, we implement two here:
548 // - truncate the stack to the previous instance
549 // of this CC
550 // - ignore this push, return the same stack.
551 //
552 CostCentreStack *new_ccs;
553 #if defined(RECURSION_TRUNCATES)
554 new_ccs = temp_ccs;
555 #else // defined(RECURSION_DROPS)
556 new_ccs = ccs;
557 #endif
558 ccs->indexTable = addToIndexTable (ccs->indexTable,
559 new_ccs, cc, 1);
560 ret = new_ccs;
561 } else {
562 ret = actualPush (ccs,cc);
563 }
564 }
565 }
566 }
567
568 RELEASE_LOCK(&ccs_mutex);
569 return ret;
570 }
571
572 static CostCentreStack *
573 checkLoop (CostCentreStack *ccs, CostCentre *cc)
574 {
575 while (ccs != EMPTY_STACK) {
576 if (ccs->cc == cc)
577 return ccs;
578 ccs = ccs->prevStack;
579 }
580 return NULL;
581 }
582
583 static CostCentreStack *
584 actualPush (CostCentreStack *ccs, CostCentre *cc)
585 {
586 CostCentreStack *new_ccs;
587
588 // allocate space for a new CostCentreStack
589 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
590
591 return actualPush_(ccs, cc, new_ccs);
592 }
593
594 static CostCentreStack *
595 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
596 {
597 /* assign values to each member of the structure */
598 new_ccs->ccsID = CCS_ID++;
599 new_ccs->cc = cc;
600 new_ccs->prevStack = ccs;
601 new_ccs->root = ccs->root;
602 new_ccs->depth = ccs->depth + 1;
603
604 new_ccs->indexTable = EMPTY_TABLE;
605
606 /* Initialise the various _scc_ counters to zero
607 */
608 new_ccs->scc_count = 0;
609
610 /* Initialize all other stats here. There should be a quick way
611 * that's easily used elsewhere too
612 */
613 new_ccs->time_ticks = 0;
614 new_ccs->mem_alloc = 0;
615 new_ccs->inherited_ticks = 0;
616 new_ccs->inherited_alloc = 0;
617
618 // Set the selected field.
619 ccsSetSelected(new_ccs);
620
621 /* update the memoization table for the parent stack */
622 if (ccs != EMPTY_STACK) {
623 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
624 0/*not a back edge*/);
625 }
626
627 /* return a pointer to the new stack */
628 return new_ccs;
629 }
630
631
632 static CostCentreStack *
633 isInIndexTable(IndexTable *it, CostCentre *cc)
634 {
635 while (it!=EMPTY_TABLE)
636 {
637 if (it->cc == cc)
638 return it->ccs;
639 else
640 it = it->next;
641 }
642
643 /* otherwise we never found it so return EMPTY_TABLE */
644 return EMPTY_TABLE;
645 }
646
647
648 static IndexTable *
649 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
650 CostCentre *cc, unsigned int back_edge)
651 {
652 IndexTable *new_it;
653
654 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
655
656 new_it->cc = cc;
657 new_it->ccs = new_ccs;
658 new_it->next = it;
659 new_it->back_edge = back_edge;
660 return new_it;
661 }
662
663 /* -----------------------------------------------------------------------------
664 Generating a time & allocation profiling report.
665 -------------------------------------------------------------------------- */
666
667 /* We omit certain system-related CCs and CCSs from the default
668 * reports, so as not to cause confusion.
669 */
670 static rtsBool
671 ignoreCC (CostCentre *cc)
672 {
673 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
674 ( cc == CC_OVERHEAD
675 || cc == CC_DONT_CARE
676 || cc == CC_GC
677 || cc == CC_SYSTEM
678 || cc == CC_IDLE)) {
679 return rtsTrue;
680 } else {
681 return rtsFalse;
682 }
683 }
684
685 static rtsBool
686 ignoreCCS (CostCentreStack *ccs)
687 {
688 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
689 ( ccs == CCS_OVERHEAD
690 || ccs == CCS_DONT_CARE
691 || ccs == CCS_GC
692 || ccs == CCS_SYSTEM
693 || ccs == CCS_IDLE)) {
694 return rtsTrue;
695 } else {
696 return rtsFalse;
697 }
698 }
699
700 /* -----------------------------------------------------------------------------
701 Generating the aggregated per-cost-centre time/alloc report.
702 -------------------------------------------------------------------------- */
703
704 static CostCentre *sorted_cc_list;
705
706 static void
707 aggregateCCCosts( CostCentreStack *ccs )
708 {
709 IndexTable *i;
710
711 ccs->cc->mem_alloc += ccs->mem_alloc;
712 ccs->cc->time_ticks += ccs->time_ticks;
713
714 for (i = ccs->indexTable; i != 0; i = i->next) {
715 if (!i->back_edge) {
716 aggregateCCCosts(i->ccs);
717 }
718 }
719 }
720
721 static void
722 insertCCInSortedList( CostCentre *new_cc )
723 {
724 CostCentre **prev, *cc;
725
726 prev = &sorted_cc_list;
727 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
728 if (new_cc->time_ticks > cc->time_ticks) {
729 new_cc->link = cc;
730 *prev = new_cc;
731 return;
732 } else {
733 prev = &(cc->link);
734 }
735 }
736 new_cc->link = NULL;
737 *prev = new_cc;
738 }
739
740 static nat
741 strlen_utf8 (char *s)
742 {
743 nat n = 0;
744 unsigned char c;
745
746 for (; *s != '\0'; s++) {
747 c = *s;
748 if (c < 0x80 || c > 0xBF) n++;
749 }
750 return n;
751 }
752
753 static void
754 reportPerCCCosts( void )
755 {
756 CostCentre *cc, *next;
757 nat max_label_len, max_module_len;
758
759 aggregateCCCosts(CCS_MAIN);
760 sorted_cc_list = NULL;
761
762 max_label_len = 11; // no shorter than the "COST CENTRE" header
763 max_module_len = 7; // no shorter than the "MODULE" header
764
765 for (cc = CC_LIST; cc != NULL; cc = next) {
766 next = cc->link;
767 if (cc->time_ticks > total_prof_ticks/100
768 || cc->mem_alloc > total_alloc/100
769 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
770 insertCCInSortedList(cc);
771
772 max_label_len = stg_max(strlen_utf8(cc->label), max_label_len);
773 max_module_len = stg_max(strlen_utf8(cc->module), max_module_len);
774 }
775 }
776
777 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
778 fprintf(prof_file, "%6s %6s", "%time", "%alloc");
779 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
780 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
781 }
782 fprintf(prof_file, "\n\n");
783
784 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
785 if (ignoreCC(cc)) {
786 continue;
787 }
788 fprintf(prof_file, "%s%*s %s%*s",
789 cc->label,
790 max_label_len - strlen_utf8(cc->label), "",
791 cc->module,
792 max_module_len - strlen_utf8(cc->module), "");
793
794 fprintf(prof_file, "%6.1f %6.1f",
795 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
796 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
797 total_alloc * 100)
798 );
799
800 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
801 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
802 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
803 }
804 fprintf(prof_file, "\n");
805 }
806
807 fprintf(prof_file,"\n\n");
808 }
809
810 /* -----------------------------------------------------------------------------
811 Generate the cost-centre-stack time/alloc report
812 -------------------------------------------------------------------------- */
813
814 static void
815 fprintHeader( nat max_label_len, nat max_module_len )
816 {
817 fprintf(prof_file, "%-*s %-*s%6s %11s %11s %11s\n", max_label_len, "", max_module_len, "", "", "", "individual", "inherited");
818
819 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
820 fprintf(prof_file, "%6s %11s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
821
822 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
823 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
824 }
825
826 fprintf(prof_file, "\n\n");
827 }
828
829 void
830 reportCCSProfiling( void )
831 {
832 nat count;
833 char temp[128]; /* sigh: magic constant */
834
835 stopProfTimer();
836
837 total_prof_ticks = 0;
838 total_alloc = 0;
839 countTickss(CCS_MAIN);
840
841 if (RtsFlags.CcFlags.doCostCentres == 0) return;
842
843 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
844 time_str(), "Final");
845
846 fprintf(prof_file, "\n\t ");
847 fprintf(prof_file, " %s", prog_name);
848 fprintf(prof_file, " +RTS");
849 for (count = 0; rts_argv[count]; count++)
850 fprintf(prof_file, " %s", rts_argv[count]);
851 fprintf(prof_file, " -RTS");
852 for (count = 1; prog_argv[count]; count++)
853 fprintf(prof_file, " %s", prog_argv[count]);
854 fprintf(prof_file, "\n\n");
855
856 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n",
857 ((double) total_prof_ticks *
858 (double) RtsFlags.MiscFlags.tickInterval) / (TIME_RESOLUTION * n_capabilities),
859 (unsigned long) total_prof_ticks,
860 (int) TimeToUS(RtsFlags.MiscFlags.tickInterval),
861 n_capabilities, n_capabilities > 1 ? "s" : "");
862
863 fprintf(prof_file, "\ttotal alloc = %11s bytes",
864 showStgWord64(total_alloc * sizeof(W_),
865 temp, rtsTrue/*commas*/));
866
867 fprintf(prof_file, " (excludes profiling overheads)\n\n");
868
869 reportPerCCCosts();
870
871 inheritCosts(CCS_MAIN);
872
873 reportCCS(pruneCCSTree(CCS_MAIN));
874 }
875
876 static void
877 findCCSMaxLens(CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len) {
878 CostCentre *cc;
879 IndexTable *i;
880
881 cc = ccs->cc;
882
883 *max_label_len = stg_max(*max_label_len, indent + strlen_utf8(cc->label));
884 *max_module_len = stg_max(*max_module_len, strlen_utf8(cc->module));
885
886 for (i = ccs->indexTable; i != 0; i = i->next) {
887 if (!i->back_edge) {
888 findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len);
889 }
890 }
891 }
892
893 static void
894 logCCS(CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len)
895 {
896 CostCentre *cc;
897 IndexTable *i;
898
899 cc = ccs->cc;
900
901 /* Only print cost centres with non 0 data ! */
902
903 if (!ignoreCCS(ccs))
904 /* force printing of *all* cost centres if -Pa */
905 {
906
907 fprintf(prof_file, "%-*s%s%*s %s%*s",
908 indent, "",
909 cc->label,
910 max_label_len-indent - strlen_utf8(cc->label), "",
911 cc->module,
912 max_module_len - strlen_utf8(cc->module), "");
913
914 fprintf(prof_file, "%6ld %11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
915 ccs->ccsID, ccs->scc_count,
916 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
917 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
918 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
919 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
920 );
921
922 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
923 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
924 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
925 }
926 fprintf(prof_file, "\n");
927 }
928
929 for (i = ccs->indexTable; i != 0; i = i->next) {
930 if (!i->back_edge) {
931 logCCS(i->ccs, indent+1, max_label_len, max_module_len);
932 }
933 }
934 }
935
936 static void
937 reportCCS(CostCentreStack *ccs)
938 {
939 nat max_label_len, max_module_len;
940
941 max_label_len = 11; // no shorter than "COST CENTRE" header
942 max_module_len = 7; // no shorter than "MODULE" header
943
944 findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len);
945
946 fprintHeader(max_label_len, max_module_len);
947 logCCS(ccs, 0, max_label_len, max_module_len);
948 }
949
950
951 /* Traverse the cost centre stack tree and accumulate
952 * ticks/allocations.
953 */
954 static void
955 countTickss(CostCentreStack *ccs)
956 {
957 IndexTable *i;
958
959 if (!ignoreCCS(ccs)) {
960 total_alloc += ccs->mem_alloc;
961 total_prof_ticks += ccs->time_ticks;
962 }
963 for (i = ccs->indexTable; i != NULL; i = i->next)
964 if (!i->back_edge) {
965 countTickss(i->ccs);
966 }
967 }
968
969 /* Traverse the cost centre stack tree and inherit ticks & allocs.
970 */
971 static void
972 inheritCosts(CostCentreStack *ccs)
973 {
974 IndexTable *i;
975
976 if (ignoreCCS(ccs)) { return; }
977
978 ccs->inherited_ticks += ccs->time_ticks;
979 ccs->inherited_alloc += ccs->mem_alloc;
980
981 for (i = ccs->indexTable; i != NULL; i = i->next)
982 if (!i->back_edge) {
983 inheritCosts(i->ccs);
984 ccs->inherited_ticks += i->ccs->inherited_ticks;
985 ccs->inherited_alloc += i->ccs->inherited_alloc;
986 }
987
988 return;
989 }
990
991 //
992 // Prune CCSs with zero entries, zero ticks or zero allocation from
993 // the tree, unless COST_CENTRES_ALL is on.
994 //
995 static CostCentreStack *
996 pruneCCSTree (CostCentreStack *ccs)
997 {
998 CostCentreStack *ccs1;
999 IndexTable *i, **prev;
1000
1001 prev = &ccs->indexTable;
1002 for (i = ccs->indexTable; i != 0; i = i->next) {
1003 if (i->back_edge) { continue; }
1004
1005 ccs1 = pruneCCSTree(i->ccs);
1006 if (ccs1 == NULL) {
1007 *prev = i->next;
1008 } else {
1009 prev = &(i->next);
1010 }
1011 }
1012
1013 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
1014 /* force printing of *all* cost centres if -P -P */ )
1015
1016 || ( ccs->indexTable != 0 )
1017 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
1018 ) {
1019 return ccs;
1020 } else {
1021 return NULL;
1022 }
1023 }
1024
1025 void
1026 fprintCCS( FILE *f, CostCentreStack *ccs )
1027 {
1028 fprintf(f,"<");
1029 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1030 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
1031 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1032 fprintf(f,",");
1033 }
1034 }
1035 fprintf(f,">");
1036 }
1037
1038 // Returns: True if the call stack ended with CAF
1039 static rtsBool fprintCallStack (CostCentreStack *ccs)
1040 {
1041 CostCentreStack *prev;
1042
1043 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
1044 prev = ccs->prevStack;
1045 while (prev && prev != CCS_MAIN) {
1046 ccs = prev;
1047 fprintf(stderr, ",\n called from %s.%s",
1048 ccs->cc->module, ccs->cc->label);
1049 prev = ccs->prevStack;
1050 }
1051 fprintf(stderr, "\n");
1052
1053 return (!strncmp(ccs->cc->label, "CAF", 3));
1054 }
1055
1056 /* For calling from .cmm code, where we can't reliably refer to stderr */
1057 void
1058 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
1059 {
1060 rtsBool is_caf;
1061 StgPtr frame;
1062 StgStack *stack;
1063 CostCentreStack *prev_ccs;
1064 nat depth = 0;
1065 const nat MAX_DEPTH = 10; // don't print gigantic chains of stacks
1066
1067 {
1068 char *desc;
1069 StgInfoTable *info;
1070 info = get_itbl(UNTAG_CLOSURE(exception));
1071 switch (info->type) {
1072 case CONSTR:
1073 case CONSTR_1_0:
1074 case CONSTR_0_1:
1075 case CONSTR_2_0:
1076 case CONSTR_1_1:
1077 case CONSTR_0_2:
1078 case CONSTR_STATIC:
1079 case CONSTR_NOCAF_STATIC:
1080 desc = GET_CON_DESC(itbl_to_con_itbl(info));
1081 break;
1082 default:
1083 desc = closure_type_names[info->type];
1084 break;
1085 }
1086 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
1087 }
1088
1089 is_caf = fprintCallStack(ccs);
1090
1091 // traverse the stack down to the enclosing update frame to
1092 // find out where this CCS was evaluated from...
1093
1094 stack = tso->stackobj;
1095 frame = stack->sp;
1096 prev_ccs = ccs;
1097
1098 for (; is_caf && depth < MAX_DEPTH; depth++)
1099 {
1100 switch (get_itbl((StgClosure*)frame)->type)
1101 {
1102 case UPDATE_FRAME:
1103 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
1104 frame += sizeofW(StgUpdateFrame);
1105 if (ccs == CCS_MAIN) {
1106 goto done;
1107 }
1108 if (ccs == prev_ccs) {
1109 // ignore if this is the same as the previous stack,
1110 // we're probably in library code and haven't
1111 // accumulated any more interesting stack items
1112 // since the last update frame.
1113 break;
1114 }
1115 prev_ccs = ccs;
1116 fprintf(stderr, " --> evaluated by: ");
1117 is_caf = fprintCallStack(ccs);
1118 break;
1119 case UNDERFLOW_FRAME:
1120 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1121 frame = stack->sp;
1122 break;
1123 case STOP_FRAME:
1124 goto done;
1125 default:
1126 frame += stack_frame_sizeW((StgClosure*)frame);
1127 break;
1128 }
1129 }
1130 done:
1131 return;
1132 }
1133
1134 #ifdef DEBUG
1135 void
1136 debugCCS( CostCentreStack *ccs )
1137 {
1138 debugBelch("<");
1139 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1140 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1141 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1142 debugBelch(",");
1143 }
1144 }
1145 debugBelch(">");
1146 }
1147 #endif /* DEBUG */
1148
1149 #endif /* PROFILING */