EventLog: Factor out ensureRoomFor*Event
[ghc.git] / rts / Profiling.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2000
4 *
5 * Support for profiling
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifdef PROFILING
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
17 #include "ProfHeap.h"
18 #include "Arena.h"
19 #include "RetainerProfile.h"
20 #include "Printer.h"
21 #include "Capability.h"
22
23 #include <string.h>
24
25 #ifdef DEBUG
26 #include "Trace.h"
27 #endif
28
29 /*
30 * Profiling allocation arena.
31 */
32 Arena *prof_arena;
33
34 /*
35 * Global variables used to assign unique IDs to cc's, ccs's, and
36 * closure_cats
37 */
38
39 unsigned int CC_ID = 1;
40 unsigned int CCS_ID = 1;
41
42 /* figures for the profiling report.
43 */
44 static StgWord64 total_alloc;
45 static W_ total_prof_ticks;
46
47 /* Globals for opening the profiling log file(s)
48 */
49 static char *prof_filename; /* prof report file name = <program>.prof */
50 FILE *prof_file;
51
52 static char *hp_filename; /* heap profile (hp2ps style) log file */
53 FILE *hp_file;
54
55 /* Linked lists to keep track of CCs and CCSs that haven't
56 * been declared in the log file yet
57 */
58 CostCentre *CC_LIST = NULL;
59 CostCentreStack *CCS_LIST = NULL;
60
61 #ifdef THREADED_RTS
62 Mutex ccs_mutex;
63 #endif
64
65 /*
66 * Built-in cost centres and cost-centre stacks:
67 *
68 * MAIN is the root of the cost-centre stack tree. If there are
69 * no {-# SCC #-}s in the program, all costs will be attributed
70 * to MAIN.
71 *
72 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
73 * RTS operations apart from garbage collection are attributed
74 * to SYSTEM.
75 *
76 * GC is the storage manager / garbage collector.
77 *
78 * OVERHEAD gets all costs generated by the profiling system
79 * itself. These are costs that would not be incurred
80 * during non-profiled execution of the program.
81 *
82 * DONT_CARE is a placeholder cost-centre we assign to static
83 * constructors. It should *never* accumulate any costs.
84 *
85 * PINNED accumulates memory allocated to pinned objects, which
86 * cannot be profiled separately because we cannot reliably
87 * traverse pinned memory.
88 */
89
90 CC_DECLARE(CC_MAIN, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF, );
91 CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF, );
92 CC_DECLARE(CC_GC, "GC", "GC", "<built-in>", CC_NOT_CAF, );
93 CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF, );
94 CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF, );
95 CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF, );
96 CC_DECLARE(CC_IDLE, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF, );
97
98 CCS_DECLARE(CCS_MAIN, CC_MAIN, );
99 CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
100 CCS_DECLARE(CCS_GC, CC_GC, );
101 CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
102 CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
103 CCS_DECLARE(CCS_PINNED, CC_PINNED, );
104 CCS_DECLARE(CCS_IDLE, CC_IDLE, );
105
106 /*
107 * Static Functions
108 */
109
110 static CostCentreStack * appendCCS ( CostCentreStack *ccs1,
111 CostCentreStack *ccs2 );
112 static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc,
113 CostCentreStack *new_ccs );
114 static rtsBool ignoreCCS ( CostCentreStack *ccs );
115 static void countTickss ( CostCentreStack *ccs );
116 static void inheritCosts ( CostCentreStack *ccs );
117 static nat numDigits ( StgInt i );
118 static void findCCSMaxLens ( CostCentreStack *ccs,
119 nat indent,
120 nat *max_label_len,
121 nat *max_module_len,
122 nat *max_id_len );
123 static void logCCS ( CostCentreStack *ccs,
124 nat indent,
125 nat max_label_len,
126 nat max_module_len,
127 nat max_id_len );
128 static void reportCCS ( CostCentreStack *ccs );
129 static CostCentreStack * checkLoop ( CostCentreStack *ccs,
130 CostCentre *cc );
131 static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
132 static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * );
133 static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * );
134 static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *,
135 CostCentre *, unsigned int );
136 static void ccsSetSelected ( CostCentreStack *ccs );
137
138 static void initTimeProfiling ( void );
139 static void initProfilingLogFile ( void );
140
141 /* -----------------------------------------------------------------------------
142 Initialise the profiling environment
143 -------------------------------------------------------------------------- */
144
145 void
146 initProfiling1 (void)
147 {
148 // initialise our arena
149 prof_arena = newArena();
150
151 /* for the benefit of allocate()... */
152 {
153 nat n;
154 for (n=0; n < n_capabilities; n++) {
155 capabilities[n]->r.rCCCS = CCS_SYSTEM;
156 }
157 }
158
159 #ifdef THREADED_RTS
160 initMutex(&ccs_mutex);
161 #endif
162 }
163
164 void
165 freeProfiling (void)
166 {
167 arenaFree(prof_arena);
168 }
169
170 void
171 initProfiling2 (void)
172 {
173 CostCentreStack *ccs, *next;
174
175 /* Set up the log file, and dump the header and cost centre
176 * information into it.
177 */
178 initProfilingLogFile();
179
180 /* Register all the cost centres / stacks in the program
181 * CC_MAIN gets link = 0, all others have non-zero link.
182 */
183 REGISTER_CC(CC_MAIN);
184 REGISTER_CC(CC_SYSTEM);
185 REGISTER_CC(CC_GC);
186 REGISTER_CC(CC_OVERHEAD);
187 REGISTER_CC(CC_DONT_CARE);
188 REGISTER_CC(CC_PINNED);
189 REGISTER_CC(CC_IDLE);
190
191 REGISTER_CCS(CCS_SYSTEM);
192 REGISTER_CCS(CCS_GC);
193 REGISTER_CCS(CCS_OVERHEAD);
194 REGISTER_CCS(CCS_DONT_CARE);
195 REGISTER_CCS(CCS_PINNED);
196 REGISTER_CCS(CCS_IDLE);
197 REGISTER_CCS(CCS_MAIN);
198
199 /* find all the registered cost centre stacks, and make them
200 * children of CCS_MAIN.
201 */
202 ASSERT(CCS_LIST == CCS_MAIN);
203 CCS_LIST = CCS_LIST->prevStack;
204 CCS_MAIN->prevStack = NULL;
205 CCS_MAIN->root = CCS_MAIN;
206 ccsSetSelected(CCS_MAIN);
207
208 // make CCS_MAIN the parent of all the pre-defined CCSs.
209 for (ccs = CCS_LIST; ccs != NULL; ) {
210 next = ccs->prevStack;
211 ccs->prevStack = NULL;
212 actualPush_(CCS_MAIN,ccs->cc,ccs);
213 ccs->root = ccs;
214 ccs = next;
215 }
216
217 if (RtsFlags.CcFlags.doCostCentres) {
218 initTimeProfiling();
219 }
220
221 if (RtsFlags.ProfFlags.doHeapProfile) {
222 initHeapProfiling();
223 }
224 }
225
226
227 static void
228 initProfilingLogFile(void)
229 {
230 char *prog;
231
232 prog = arenaAlloc(prof_arena, strlen(prog_name) + 1);
233 strcpy(prog, prog_name);
234 #ifdef mingw32_HOST_OS
235 // on Windows, drop the .exe suffix if there is one
236 {
237 char *suff;
238 suff = strrchr(prog,'.');
239 if (suff != NULL && !strcmp(suff,".exe")) {
240 *suff = '\0';
241 }
242 }
243 #endif
244
245 if (RtsFlags.CcFlags.doCostCentres == 0 &&
246 RtsFlags.ProfFlags.doHeapProfile != HEAP_BY_RETAINER &&
247 RtsFlags.ProfFlags.retainerSelector == NULL)
248 {
249 /* No need for the <prog>.prof file */
250 prof_filename = NULL;
251 prof_file = NULL;
252 }
253 else
254 {
255 /* Initialise the log file name */
256 prof_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
257 sprintf(prof_filename, "%s.prof", prog);
258
259 /* open the log file */
260 if ((prof_file = fopen(prof_filename, "w")) == NULL) {
261 debugBelch("Can't open profiling report file %s\n", prof_filename);
262 RtsFlags.CcFlags.doCostCentres = 0;
263 // The following line was added by Sung; retainer/LDV profiling may need
264 // two output files, i.e., <program>.prof/hp.
265 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
266 RtsFlags.ProfFlags.doHeapProfile = 0;
267 return;
268 }
269 }
270
271 if (RtsFlags.ProfFlags.doHeapProfile) {
272 /* Initialise the log file name */
273 hp_filename = arenaAlloc(prof_arena, strlen(prog) + 6);
274 sprintf(hp_filename, "%s.hp", prog);
275
276 /* open the log file */
277 if ((hp_file = fopen(hp_filename, "w")) == NULL) {
278 debugBelch("Can't open profiling report file %s\n",
279 hp_filename);
280 RtsFlags.ProfFlags.doHeapProfile = 0;
281 return;
282 }
283 }
284 }
285
286 void
287 initTimeProfiling(void)
288 {
289 /* Start ticking */
290 startProfTimer();
291 };
292
293 void
294 endProfiling ( void )
295 {
296 if (RtsFlags.CcFlags.doCostCentres) {
297 stopProfTimer();
298 }
299 if (RtsFlags.ProfFlags.doHeapProfile) {
300 endHeapProfiling();
301 }
302 }
303
304 /* -----------------------------------------------------------------------------
305 Set CCCS when entering a function.
306
307 The algorithm is as follows.
308
309 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
310
311 where
312
313 dropCommonPrefix A B
314 -- returns the suffix of B after removing any prefix common
315 -- to both A and B.
316
317 e.g.
318
319 <a,b,c> ++> <> = <a,b,c>
320 <a,b,c> ++> <d> = <a,b,c,d>
321 <a,b,c> ++> <a,b> = <a,b,c>
322 <a,b> ++> <a,b,c> = <a,b,c>
323 <a,b,c> ++> <a,b,d> = <a,b,c,d>
324
325 -------------------------------------------------------------------------- */
326
327 // implements c1 ++> c2, where c1 and c2 are equal depth
328 //
329 static CostCentreStack *
330 enterFunEqualStacks (CostCentreStack *ccs0,
331 CostCentreStack *ccsapp,
332 CostCentreStack *ccsfn)
333 {
334 ASSERT(ccsapp->depth == ccsfn->depth);
335 if (ccsapp == ccsfn) return ccs0;
336 return pushCostCentre(enterFunEqualStacks(ccs0,
337 ccsapp->prevStack,
338 ccsfn->prevStack),
339 ccsfn->cc);
340 }
341
342 // implements c1 ++> c2, where c2 is deeper than c1.
343 // Drop elements of c2 until we have equal stacks, call
344 // enterFunEqualStacks(), and then push on the elements that we
345 // dropped in reverse order.
346 //
347 static CostCentreStack *
348 enterFunCurShorter (CostCentreStack *ccsapp, CostCentreStack *ccsfn, StgWord n)
349 {
350 if (n == 0) {
351 ASSERT(ccsfn->depth == ccsapp->depth);
352 return enterFunEqualStacks(ccsapp,ccsapp,ccsfn);;
353 } else {
354 ASSERT(ccsfn->depth > ccsapp->depth);
355 return pushCostCentre(enterFunCurShorter(ccsapp, ccsfn->prevStack, n-1),
356 ccsfn->cc);
357 }
358 }
359
360 void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
361 {
362 CostCentreStack *ccsapp;
363
364 // common case 1: both stacks are the same
365 if (ccsfn == reg->rCCCS) {
366 return;
367 }
368
369 // common case 2: the function stack is empty, or just CAF
370 if (ccsfn->prevStack == CCS_MAIN) {
371 return;
372 }
373
374 ccsapp = reg->rCCCS;
375 reg->rCCCS = CCS_OVERHEAD;
376
377 // common case 3: the stacks are completely different (e.g. one is a
378 // descendent of MAIN and the other of a CAF): we append the whole
379 // of the function stack to the current CCS.
380 if (ccsfn->root != ccsapp->root) {
381 reg->rCCCS = appendCCS(ccsapp,ccsfn);
382 return;
383 }
384
385 // uncommon case 4: ccsapp is deeper than ccsfn
386 if (ccsapp->depth > ccsfn->depth) {
387 nat i, n;
388 CostCentreStack *tmp = ccsapp;
389 n = ccsapp->depth - ccsfn->depth;
390 for (i = 0; i < n; i++) {
391 tmp = tmp->prevStack;
392 }
393 reg->rCCCS = enterFunEqualStacks(ccsapp,tmp,ccsfn);
394 return;
395 }
396
397 // uncommon case 5: ccsfn is deeper than CCCS
398 if (ccsfn->depth > ccsapp->depth) {
399 reg->rCCCS = enterFunCurShorter(ccsapp, ccsfn,
400 ccsfn->depth - ccsapp->depth);
401 return;
402 }
403
404 // uncommon case 6: stacks are equal depth, but different
405 reg->rCCCS = enterFunEqualStacks(ccsapp,ccsapp,ccsfn);
406 }
407
408 /* -----------------------------------------------------------------------------
409 Decide whether closures with this CCS should contribute to the heap
410 profile.
411 -------------------------------------------------------------------------- */
412
413 static void
414 ccsSetSelected (CostCentreStack *ccs)
415 {
416 if (RtsFlags.ProfFlags.modSelector) {
417 if (! strMatchesSelector (ccs->cc->module,
418 RtsFlags.ProfFlags.modSelector) ) {
419 ccs->selected = 0;
420 return;
421 }
422 }
423 if (RtsFlags.ProfFlags.ccSelector) {
424 if (! strMatchesSelector (ccs->cc->label,
425 RtsFlags.ProfFlags.ccSelector) ) {
426 ccs->selected = 0;
427 return;
428 }
429 }
430 if (RtsFlags.ProfFlags.ccsSelector) {
431 CostCentreStack *c;
432 for (c = ccs; c != NULL; c = c->prevStack)
433 {
434 if ( strMatchesSelector (c->cc->label,
435 RtsFlags.ProfFlags.ccsSelector) ) {
436 break;
437 }
438 }
439 if (c == NULL) {
440 ccs->selected = 0;
441 return;
442 }
443 }
444
445 ccs->selected = 1;
446 return;
447 }
448
449 /* -----------------------------------------------------------------------------
450 Cost-centre stack manipulation
451 -------------------------------------------------------------------------- */
452
453 #ifdef DEBUG
454 CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
455 CostCentreStack *
456 pushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
457 #define pushCostCentre _pushCostCentre
458 {
459 IF_DEBUG(prof,
460 traceBegin("pushing %s on ", cc->label);
461 debugCCS(ccs);
462 traceEnd(););
463
464 return pushCostCentre(ccs,cc);
465 }
466 #endif
467
468 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
469
470 #ifdef DEBUG
471 CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
472 CostCentreStack *
473 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
474 #define appendCCS _appendCCS
475 {
476 IF_DEBUG(prof,
477 if (ccs1 != ccs2) {
478 debugBelch("Appending ");
479 debugCCS(ccs1);
480 debugBelch(" to ");
481 debugCCS(ccs2);
482 debugBelch("\n");});
483 return appendCCS(ccs1,ccs2);
484 }
485 #endif
486
487 CostCentreStack *
488 appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
489 {
490 if (ccs1 == ccs2) {
491 return ccs1;
492 }
493
494 if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) {
495 // stop at a CAF element
496 return ccs1;
497 }
498
499 return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc);
500 }
501
502 // Pick one:
503 // #define RECURSION_DROPS
504 #define RECURSION_TRUNCATES
505
506 CostCentreStack *
507 pushCostCentre (CostCentreStack *ccs, CostCentre *cc)
508 {
509 CostCentreStack *temp_ccs, *ret;
510 IndexTable *ixtable;
511
512 if (ccs == EMPTY_STACK) {
513 ACQUIRE_LOCK(&ccs_mutex);
514 ret = actualPush(ccs,cc);
515 }
516 else
517 {
518 if (ccs->cc == cc) {
519 return ccs;
520 } else {
521 // check if we've already memoized this stack
522 ixtable = ccs->indexTable;
523 temp_ccs = isInIndexTable(ixtable,cc);
524
525 if (temp_ccs != EMPTY_STACK) {
526 return temp_ccs;
527 } else {
528
529 // not in the IndexTable, now we take the lock:
530 ACQUIRE_LOCK(&ccs_mutex);
531
532 if (ccs->indexTable != ixtable)
533 {
534 // someone modified ccs->indexTable while
535 // we did not hold the lock, so we must
536 // check it again:
537 temp_ccs = isInIndexTable(ixtable,cc);
538 if (temp_ccs != EMPTY_STACK)
539 {
540 RELEASE_LOCK(&ccs_mutex);
541 return temp_ccs;
542 }
543 }
544 temp_ccs = checkLoop(ccs,cc);
545 if (temp_ccs != NULL) {
546 // This CC is already in the stack somewhere.
547 // This could be recursion, or just calling
548 // another function with the same CC.
549 // A number of policies are possible at this
550 // point, we implement two here:
551 // - truncate the stack to the previous instance
552 // of this CC
553 // - ignore this push, return the same stack.
554 //
555 CostCentreStack *new_ccs;
556 #if defined(RECURSION_TRUNCATES)
557 new_ccs = temp_ccs;
558 #else // defined(RECURSION_DROPS)
559 new_ccs = ccs;
560 #endif
561 ccs->indexTable = addToIndexTable (ccs->indexTable,
562 new_ccs, cc, 1);
563 ret = new_ccs;
564 } else {
565 ret = actualPush (ccs,cc);
566 }
567 }
568 }
569 }
570
571 RELEASE_LOCK(&ccs_mutex);
572 return ret;
573 }
574
575 static CostCentreStack *
576 checkLoop (CostCentreStack *ccs, CostCentre *cc)
577 {
578 while (ccs != EMPTY_STACK) {
579 if (ccs->cc == cc)
580 return ccs;
581 ccs = ccs->prevStack;
582 }
583 return NULL;
584 }
585
586 static CostCentreStack *
587 actualPush (CostCentreStack *ccs, CostCentre *cc)
588 {
589 CostCentreStack *new_ccs;
590
591 // allocate space for a new CostCentreStack
592 new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
593
594 return actualPush_(ccs, cc, new_ccs);
595 }
596
597 static CostCentreStack *
598 actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs)
599 {
600 /* assign values to each member of the structure */
601 new_ccs->ccsID = CCS_ID++;
602 new_ccs->cc = cc;
603 new_ccs->prevStack = ccs;
604 new_ccs->root = ccs->root;
605 new_ccs->depth = ccs->depth + 1;
606
607 new_ccs->indexTable = EMPTY_TABLE;
608
609 /* Initialise the various _scc_ counters to zero
610 */
611 new_ccs->scc_count = 0;
612
613 /* Initialize all other stats here. There should be a quick way
614 * that's easily used elsewhere too
615 */
616 new_ccs->time_ticks = 0;
617 new_ccs->mem_alloc = 0;
618 new_ccs->inherited_ticks = 0;
619 new_ccs->inherited_alloc = 0;
620
621 // Set the selected field.
622 ccsSetSelected(new_ccs);
623
624 /* update the memoization table for the parent stack */
625 ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc,
626 0/*not a back edge*/);
627
628 /* return a pointer to the new stack */
629 return new_ccs;
630 }
631
632
633 static CostCentreStack *
634 isInIndexTable(IndexTable *it, CostCentre *cc)
635 {
636 while (it!=EMPTY_TABLE)
637 {
638 if (it->cc == cc)
639 return it->ccs;
640 else
641 it = it->next;
642 }
643
644 /* otherwise we never found it so return EMPTY_TABLE */
645 return EMPTY_TABLE;
646 }
647
648
649 static IndexTable *
650 addToIndexTable (IndexTable *it, CostCentreStack *new_ccs,
651 CostCentre *cc, unsigned int back_edge)
652 {
653 IndexTable *new_it;
654
655 new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
656
657 new_it->cc = cc;
658 new_it->ccs = new_ccs;
659 new_it->next = it;
660 new_it->back_edge = back_edge;
661 return new_it;
662 }
663
664 /* -----------------------------------------------------------------------------
665 Generating a time & allocation profiling report.
666 -------------------------------------------------------------------------- */
667
668 /* We omit certain system-related CCs and CCSs from the default
669 * reports, so as not to cause confusion.
670 */
671 static rtsBool
672 ignoreCC (CostCentre *cc)
673 {
674 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
675 ( cc == CC_OVERHEAD
676 || cc == CC_DONT_CARE
677 || cc == CC_GC
678 || cc == CC_SYSTEM
679 || cc == CC_IDLE)) {
680 return rtsTrue;
681 } else {
682 return rtsFalse;
683 }
684 }
685
686 static rtsBool
687 ignoreCCS (CostCentreStack *ccs)
688 {
689 if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL &&
690 ( ccs == CCS_OVERHEAD
691 || ccs == CCS_DONT_CARE
692 || ccs == CCS_GC
693 || ccs == CCS_SYSTEM
694 || ccs == CCS_IDLE)) {
695 return rtsTrue;
696 } else {
697 return rtsFalse;
698 }
699 }
700
701 /* -----------------------------------------------------------------------------
702 Generating the aggregated per-cost-centre time/alloc report.
703 -------------------------------------------------------------------------- */
704
705 static CostCentre *sorted_cc_list;
706
707 static void
708 aggregateCCCosts( CostCentreStack *ccs )
709 {
710 IndexTable *i;
711
712 ccs->cc->mem_alloc += ccs->mem_alloc;
713 ccs->cc->time_ticks += ccs->time_ticks;
714
715 for (i = ccs->indexTable; i != 0; i = i->next) {
716 if (!i->back_edge) {
717 aggregateCCCosts(i->ccs);
718 }
719 }
720 }
721
722 static void
723 insertCCInSortedList( CostCentre *new_cc )
724 {
725 CostCentre **prev, *cc;
726
727 prev = &sorted_cc_list;
728 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
729 if (new_cc->time_ticks > cc->time_ticks) {
730 new_cc->link = cc;
731 *prev = new_cc;
732 return;
733 } else {
734 prev = &(cc->link);
735 }
736 }
737 new_cc->link = NULL;
738 *prev = new_cc;
739 }
740
741 static nat
742 strlen_utf8 (char *s)
743 {
744 nat n = 0;
745 unsigned char c;
746
747 for (; *s != '\0'; s++) {
748 c = *s;
749 if (c < 0x80 || c > 0xBF) n++;
750 }
751 return n;
752 }
753
754 static void
755 reportPerCCCosts( void )
756 {
757 CostCentre *cc, *next;
758 nat max_label_len, max_module_len;
759
760 aggregateCCCosts(CCS_MAIN);
761 sorted_cc_list = NULL;
762
763 max_label_len = 11; // no shorter than the "COST CENTRE" header
764 max_module_len = 6; // no shorter than the "MODULE" header
765
766 for (cc = CC_LIST; cc != NULL; cc = next) {
767 next = cc->link;
768 if (cc->time_ticks > total_prof_ticks/100
769 || cc->mem_alloc > total_alloc/100
770 || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
771 insertCCInSortedList(cc);
772
773 max_label_len = stg_max(strlen_utf8(cc->label), max_label_len);
774 max_module_len = stg_max(strlen_utf8(cc->module), max_module_len);
775 }
776 }
777
778 fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE");
779 fprintf(prof_file, " %6s %6s", "%time", "%alloc");
780 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
781 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
782 }
783 fprintf(prof_file, "\n\n");
784
785 for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
786 if (ignoreCC(cc)) {
787 continue;
788 }
789 fprintf(prof_file, "%s%*s %s%*s",
790 cc->label,
791 max_label_len - strlen_utf8(cc->label), "",
792 cc->module,
793 max_module_len - strlen_utf8(cc->module), "");
794
795 fprintf(prof_file, " %6.1f %6.1f",
796 total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
797 total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
798 total_alloc * 100)
799 );
800
801 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
802 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
803 (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_));
804 }
805 fprintf(prof_file, "\n");
806 }
807
808 fprintf(prof_file,"\n\n");
809 }
810
811 /* -----------------------------------------------------------------------------
812 Generate the cost-centre-stack time/alloc report
813 -------------------------------------------------------------------------- */
814
815 static void
816 fprintHeader( nat max_label_len, nat max_module_len, nat max_id_len )
817 {
818 fprintf(prof_file, "%-*s %-*s %-*s %11s %12s %12s\n",
819 max_label_len, "",
820 max_module_len, "",
821 max_id_len, "",
822 "", "individual", "inherited");
823
824 fprintf(prof_file, "%-*s %-*s %-*s",
825 max_label_len, "COST CENTRE",
826 max_module_len, "MODULE",
827 max_id_len, "no.");
828
829 fprintf(prof_file, " %11s %5s %6s %5s %6s",
830 "entries", "%time", "%alloc", "%time", "%alloc");
831
832 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
833 fprintf(prof_file, " %5s %9s", "ticks", "bytes");
834 }
835
836 fprintf(prof_file, "\n\n");
837 }
838
839 void
840 reportCCSProfiling( void )
841 {
842 nat count;
843 char temp[128]; /* sigh: magic constant */
844
845 stopProfTimer();
846
847 total_prof_ticks = 0;
848 total_alloc = 0;
849 countTickss(CCS_MAIN);
850
851 if (RtsFlags.CcFlags.doCostCentres == 0) return;
852
853 fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
854 time_str(), "Final");
855
856 fprintf(prof_file, "\n\t ");
857 fprintf(prof_file, " %s", prog_name);
858 fprintf(prof_file, " +RTS");
859 for (count = 0; rts_argv[count]; count++)
860 fprintf(prof_file, " %s", rts_argv[count]);
861 fprintf(prof_file, " -RTS");
862 for (count = 1; prog_argv[count]; count++)
863 fprintf(prof_file, " %s", prog_argv[count]);
864 fprintf(prof_file, "\n\n");
865
866 fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n",
867 ((double) total_prof_ticks *
868 (double) RtsFlags.MiscFlags.tickInterval) / (TIME_RESOLUTION * n_capabilities),
869 (unsigned long) total_prof_ticks,
870 (int) TimeToUS(RtsFlags.MiscFlags.tickInterval),
871 n_capabilities, n_capabilities > 1 ? "s" : "");
872
873 fprintf(prof_file, "\ttotal alloc = %11s bytes",
874 showStgWord64(total_alloc * sizeof(W_),
875 temp, rtsTrue/*commas*/));
876
877 fprintf(prof_file, " (excludes profiling overheads)\n\n");
878
879 reportPerCCCosts();
880
881 inheritCosts(CCS_MAIN);
882
883 reportCCS(pruneCCSTree(CCS_MAIN));
884 }
885
886 static nat
887 numDigits(StgInt i) {
888 nat result;
889
890 result = 1;
891
892 if (i < 0) i = 0;
893
894 while (i > 9) {
895 i /= 10;
896 result++;
897 }
898
899 return result;
900 }
901
902 static void
903 findCCSMaxLens(CostCentreStack *ccs, nat indent,
904 nat *max_label_len, nat *max_module_len, nat *max_id_len) {
905 CostCentre *cc;
906 IndexTable *i;
907
908 cc = ccs->cc;
909
910 *max_label_len = stg_max(*max_label_len, indent + strlen_utf8(cc->label));
911 *max_module_len = stg_max(*max_module_len, strlen_utf8(cc->module));
912 *max_id_len = stg_max(*max_id_len, numDigits(ccs->ccsID));
913
914 for (i = ccs->indexTable; i != 0; i = i->next) {
915 if (!i->back_edge) {
916 findCCSMaxLens(i->ccs, indent+1,
917 max_label_len, max_module_len, max_id_len);
918 }
919 }
920 }
921
922 static void
923 logCCS(CostCentreStack *ccs, nat indent,
924 nat max_label_len, nat max_module_len, nat max_id_len)
925 {
926 CostCentre *cc;
927 IndexTable *i;
928
929 cc = ccs->cc;
930
931 /* Only print cost centres with non 0 data ! */
932
933 if (!ignoreCCS(ccs))
934 /* force printing of *all* cost centres if -Pa */
935 {
936
937 fprintf(prof_file, "%-*s%s%*s %s%*s",
938 indent, "",
939 cc->label,
940 max_label_len-indent - strlen_utf8(cc->label), "",
941 cc->module,
942 max_module_len - strlen_utf8(cc->module), "");
943
944 fprintf(prof_file,
945 " %*ld %11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f",
946 max_id_len, ccs->ccsID, ccs->scc_count,
947 total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
948 total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
949 total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
950 total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
951 );
952
953 if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
954 fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64,
955 (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
956 }
957 fprintf(prof_file, "\n");
958 }
959
960 for (i = ccs->indexTable; i != 0; i = i->next) {
961 if (!i->back_edge) {
962 logCCS(i->ccs, indent+1, max_label_len, max_module_len, max_id_len);
963 }
964 }
965 }
966
967 static void
968 reportCCS(CostCentreStack *ccs)
969 {
970 nat max_label_len, max_module_len, max_id_len;
971
972 max_label_len = 11; // no shorter than "COST CENTRE" header
973 max_module_len = 6; // no shorter than "MODULE" header
974 max_id_len = 3; // no shorter than "no." header
975
976 findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len, &max_id_len);
977
978 fprintHeader(max_label_len, max_module_len, max_id_len);
979 logCCS(ccs, 0, max_label_len, max_module_len, max_id_len);
980 }
981
982
983 /* Traverse the cost centre stack tree and accumulate
984 * ticks/allocations.
985 */
986 static void
987 countTickss(CostCentreStack *ccs)
988 {
989 IndexTable *i;
990
991 if (!ignoreCCS(ccs)) {
992 total_alloc += ccs->mem_alloc;
993 total_prof_ticks += ccs->time_ticks;
994 }
995 for (i = ccs->indexTable; i != NULL; i = i->next)
996 if (!i->back_edge) {
997 countTickss(i->ccs);
998 }
999 }
1000
1001 /* Traverse the cost centre stack tree and inherit ticks & allocs.
1002 */
1003 static void
1004 inheritCosts(CostCentreStack *ccs)
1005 {
1006 IndexTable *i;
1007
1008 if (ignoreCCS(ccs)) { return; }
1009
1010 ccs->inherited_ticks += ccs->time_ticks;
1011 ccs->inherited_alloc += ccs->mem_alloc;
1012
1013 for (i = ccs->indexTable; i != NULL; i = i->next)
1014 if (!i->back_edge) {
1015 inheritCosts(i->ccs);
1016 ccs->inherited_ticks += i->ccs->inherited_ticks;
1017 ccs->inherited_alloc += i->ccs->inherited_alloc;
1018 }
1019
1020 return;
1021 }
1022
1023 //
1024 // Prune CCSs with zero entries, zero ticks or zero allocation from
1025 // the tree, unless COST_CENTRES_ALL is on.
1026 //
1027 static CostCentreStack *
1028 pruneCCSTree (CostCentreStack *ccs)
1029 {
1030 CostCentreStack *ccs1;
1031 IndexTable *i, **prev;
1032
1033 prev = &ccs->indexTable;
1034 for (i = ccs->indexTable; i != 0; i = i->next) {
1035 if (i->back_edge) { continue; }
1036
1037 ccs1 = pruneCCSTree(i->ccs);
1038 if (ccs1 == NULL) {
1039 *prev = i->next;
1040 } else {
1041 prev = &(i->next);
1042 }
1043 }
1044
1045 if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
1046 /* force printing of *all* cost centres if -P -P */ )
1047
1048 || ( ccs->indexTable != 0 )
1049 || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
1050 ) {
1051 return ccs;
1052 } else {
1053 return NULL;
1054 }
1055 }
1056
1057 void
1058 fprintCCS( FILE *f, CostCentreStack *ccs )
1059 {
1060 fprintf(f,"<");
1061 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1062 fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
1063 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1064 fprintf(f,",");
1065 }
1066 }
1067 fprintf(f,">");
1068 }
1069
1070 // Returns: True if the call stack ended with CAF
1071 static rtsBool fprintCallStack (CostCentreStack *ccs)
1072 {
1073 CostCentreStack *prev;
1074
1075 fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label);
1076 prev = ccs->prevStack;
1077 while (prev && prev != CCS_MAIN) {
1078 ccs = prev;
1079 fprintf(stderr, ",\n called from %s.%s",
1080 ccs->cc->module, ccs->cc->label);
1081 prev = ccs->prevStack;
1082 }
1083 fprintf(stderr, "\n");
1084
1085 return (!strncmp(ccs->cc->label, "CAF", 3));
1086 }
1087
1088 /* For calling from .cmm code, where we can't reliably refer to stderr */
1089 void
1090 fprintCCS_stderr (CostCentreStack *ccs, StgClosure *exception, StgTSO *tso)
1091 {
1092 rtsBool is_caf;
1093 StgPtr frame;
1094 StgStack *stack;
1095 CostCentreStack *prev_ccs;
1096 nat depth = 0;
1097 const nat MAX_DEPTH = 10; // don't print gigantic chains of stacks
1098
1099 {
1100 char *desc;
1101 StgInfoTable *info;
1102 info = get_itbl(UNTAG_CLOSURE(exception));
1103 switch (info->type) {
1104 case CONSTR:
1105 case CONSTR_1_0:
1106 case CONSTR_0_1:
1107 case CONSTR_2_0:
1108 case CONSTR_1_1:
1109 case CONSTR_0_2:
1110 case CONSTR_STATIC:
1111 case CONSTR_NOCAF_STATIC:
1112 desc = GET_CON_DESC(itbl_to_con_itbl(info));
1113 break;
1114 default:
1115 desc = closure_type_names[info->type];
1116 break;
1117 }
1118 fprintf(stderr, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc);
1119 }
1120
1121 is_caf = fprintCallStack(ccs);
1122
1123 // traverse the stack down to the enclosing update frame to
1124 // find out where this CCS was evaluated from...
1125
1126 stack = tso->stackobj;
1127 frame = stack->sp;
1128 prev_ccs = ccs;
1129
1130 for (; is_caf && depth < MAX_DEPTH; depth++)
1131 {
1132 switch (get_itbl((StgClosure*)frame)->type)
1133 {
1134 case UPDATE_FRAME:
1135 ccs = ((StgUpdateFrame*)frame)->header.prof.ccs;
1136 frame += sizeofW(StgUpdateFrame);
1137 if (ccs == CCS_MAIN) {
1138 goto done;
1139 }
1140 if (ccs == prev_ccs) {
1141 // ignore if this is the same as the previous stack,
1142 // we're probably in library code and haven't
1143 // accumulated any more interesting stack items
1144 // since the last update frame.
1145 break;
1146 }
1147 prev_ccs = ccs;
1148 fprintf(stderr, " --> evaluated by: ");
1149 is_caf = fprintCallStack(ccs);
1150 break;
1151 case UNDERFLOW_FRAME:
1152 stack = ((StgUnderflowFrame*)frame)->next_chunk;
1153 frame = stack->sp;
1154 break;
1155 case STOP_FRAME:
1156 goto done;
1157 default:
1158 frame += stack_frame_sizeW((StgClosure*)frame);
1159 break;
1160 }
1161 }
1162 done:
1163 return;
1164 }
1165
1166 #ifdef DEBUG
1167 void
1168 debugCCS( CostCentreStack *ccs )
1169 {
1170 debugBelch("<");
1171 for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
1172 debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
1173 if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
1174 debugBelch(",");
1175 }
1176 }
1177 debugBelch(">");
1178 }
1179 #endif /* DEBUG */
1180
1181 #endif /* PROFILING */