1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2000
5 * Support for profiling
7 * ---------------------------------------------------------------------------*/
11 #include "PosixSource.h"
15 #include "Profiling.h"
16 #include "Proftimer.h"
19 #include "RetainerProfile.h"
21 #include "Capability.h"
30 * Profiling allocation arena.
35 * Global variables used to assign unique IDs to cc's, ccs's, and
39 unsigned int CC_ID
= 1;
40 unsigned int CCS_ID
= 1;
42 /* figures for the profiling report.
44 static StgWord64 total_alloc
;
45 static W_ total_prof_ticks
;
47 /* Globals for opening the profiling log file(s)
49 static char *prof_filename
; /* prof report file name = <program>.prof */
52 static char *hp_filename
; /* heap profile (hp2ps style) log file */
55 /* Linked lists to keep track of CCs and CCSs that haven't
56 * been declared in the log file yet
58 CostCentre
*CC_LIST
= NULL
;
59 CostCentreStack
*CCS_LIST
= NULL
;
66 * Built-in cost centres and cost-centre stacks:
68 * MAIN is the root of the cost-centre stack tree. If there are
69 * no {-# SCC #-}s in the program, all costs will be attributed
72 * SYSTEM is the RTS in general (scheduler, etc.). All costs for
73 * RTS operations apart from garbage collection are attributed
76 * GC is the storage manager / garbage collector.
78 * OVERHEAD gets all costs generated by the profiling system
79 * itself. These are costs that would not be incurred
80 * during non-profiled execution of the program.
82 * DONT_CARE is a placeholder cost-centre we assign to static
83 * constructors. It should *never* accumulate any costs.
85 * PINNED accumulates memory allocated to pinned objects, which
86 * cannot be profiled separately because we cannot reliably
87 * traverse pinned memory.
90 CC_DECLARE(CC_MAIN
, "MAIN", "MAIN", "<built-in>", CC_NOT_CAF
, );
91 CC_DECLARE(CC_SYSTEM
, "SYSTEM", "SYSTEM", "<built-in>", CC_NOT_CAF
, );
92 CC_DECLARE(CC_GC
, "GC", "GC", "<built-in>", CC_NOT_CAF
, );
93 CC_DECLARE(CC_OVERHEAD
, "OVERHEAD_of", "PROFILING", "<built-in>", CC_NOT_CAF
, );
94 CC_DECLARE(CC_DONT_CARE
, "DONT_CARE", "MAIN", "<built-in>", CC_NOT_CAF
, );
95 CC_DECLARE(CC_PINNED
, "PINNED", "SYSTEM", "<built-in>", CC_NOT_CAF
, );
96 CC_DECLARE(CC_IDLE
, "IDLE", "IDLE", "<built-in>", CC_NOT_CAF
, );
98 CCS_DECLARE(CCS_MAIN
, CC_MAIN
, );
99 CCS_DECLARE(CCS_SYSTEM
, CC_SYSTEM
, );
100 CCS_DECLARE(CCS_GC
, CC_GC
, );
101 CCS_DECLARE(CCS_OVERHEAD
, CC_OVERHEAD
, );
102 CCS_DECLARE(CCS_DONT_CARE
, CC_DONT_CARE
, );
103 CCS_DECLARE(CCS_PINNED
, CC_PINNED
, );
104 CCS_DECLARE(CCS_IDLE
, CC_IDLE
, );
110 static CostCentreStack
* appendCCS ( CostCentreStack
*ccs1
,
111 CostCentreStack
*ccs2
);
112 static CostCentreStack
* actualPush_ ( CostCentreStack
*ccs
, CostCentre
*cc
,
113 CostCentreStack
*new_ccs
);
114 static rtsBool
ignoreCCS ( CostCentreStack
*ccs
);
115 static void countTickss ( CostCentreStack
*ccs
);
116 static void inheritCosts ( CostCentreStack
*ccs
);
117 static nat
numDigits ( StgInt i
);
118 static void findCCSMaxLens ( CostCentreStack
*ccs
,
123 static void logCCS ( CostCentreStack
*ccs
,
128 static void reportCCS ( CostCentreStack
*ccs
);
129 static CostCentreStack
* checkLoop ( CostCentreStack
*ccs
,
131 static CostCentreStack
* pruneCCSTree ( CostCentreStack
*ccs
);
132 static CostCentreStack
* actualPush ( CostCentreStack
*, CostCentre
* );
133 static CostCentreStack
* isInIndexTable ( IndexTable
*, CostCentre
* );
134 static IndexTable
* addToIndexTable ( IndexTable
*, CostCentreStack
*,
135 CostCentre
*, unsigned int );
136 static void ccsSetSelected ( CostCentreStack
*ccs
);
138 static void initTimeProfiling ( void );
139 static void initProfilingLogFile ( void );
141 /* -----------------------------------------------------------------------------
142 Initialise the profiling environment
143 -------------------------------------------------------------------------- */
145 void initProfiling (void)
147 // initialise our arena
148 prof_arena
= newArena();
150 /* for the benefit of allocate()... */
153 for (n
=0; n
< n_capabilities
; n
++) {
154 capabilities
[n
]->r
.rCCCS
= CCS_SYSTEM
;
159 initMutex(&ccs_mutex
);
162 /* Set up the log file, and dump the header and cost centre
163 * information into it.
165 initProfilingLogFile();
167 /* Register all the cost centres / stacks in the program
168 * CC_MAIN gets link = 0, all others have non-zero link.
170 REGISTER_CC(CC_MAIN
);
171 REGISTER_CC(CC_SYSTEM
);
173 REGISTER_CC(CC_OVERHEAD
);
174 REGISTER_CC(CC_DONT_CARE
);
175 REGISTER_CC(CC_PINNED
);
176 REGISTER_CC(CC_IDLE
);
178 REGISTER_CCS(CCS_SYSTEM
);
179 REGISTER_CCS(CCS_GC
);
180 REGISTER_CCS(CCS_OVERHEAD
);
181 REGISTER_CCS(CCS_DONT_CARE
);
182 REGISTER_CCS(CCS_PINNED
);
183 REGISTER_CCS(CCS_IDLE
);
184 REGISTER_CCS(CCS_MAIN
);
186 /* find all the registered cost centre stacks, and make them
187 * children of CCS_MAIN.
189 ASSERT(CCS_LIST
== CCS_MAIN
);
190 CCS_LIST
= CCS_LIST
->prevStack
;
191 CCS_MAIN
->prevStack
= NULL
;
192 CCS_MAIN
->root
= CCS_MAIN
;
193 ccsSetSelected(CCS_MAIN
);
197 if (RtsFlags
.CcFlags
.doCostCentres
) {
201 if (RtsFlags
.ProfFlags
.doHeapProfile
) {
207 // Should be called after loading any new Haskell code.
209 void initProfiling2 (void)
211 CostCentreStack
*ccs
, *next
;
213 // make CCS_MAIN the parent of all the pre-defined CCSs.
214 for (ccs
= CCS_LIST
; ccs
!= NULL
; ) {
215 next
= ccs
->prevStack
;
216 ccs
->prevStack
= NULL
;
217 actualPush_(CCS_MAIN
,ccs
->cc
,ccs
);
227 arenaFree(prof_arena
);
231 initProfilingLogFile(void)
235 prog
= arenaAlloc(prof_arena
, strlen(prog_name
) + 1);
236 strcpy(prog
, prog_name
);
237 #ifdef mingw32_HOST_OS
238 // on Windows, drop the .exe suffix if there is one
241 suff
= strrchr(prog
,'.');
242 if (suff
!= NULL
&& !strcmp(suff
,".exe")) {
248 if (RtsFlags
.CcFlags
.doCostCentres
== 0 &&
249 RtsFlags
.ProfFlags
.doHeapProfile
!= HEAP_BY_RETAINER
&&
250 RtsFlags
.ProfFlags
.retainerSelector
== NULL
)
252 /* No need for the <prog>.prof file */
253 prof_filename
= NULL
;
258 /* Initialise the log file name */
259 prof_filename
= arenaAlloc(prof_arena
, strlen(prog
) + 6);
260 sprintf(prof_filename
, "%s.prof", prog
);
262 /* open the log file */
263 if ((prof_file
= fopen(prof_filename
, "w")) == NULL
) {
264 debugBelch("Can't open profiling report file %s\n", prof_filename
);
265 RtsFlags
.CcFlags
.doCostCentres
= 0;
266 // The following line was added by Sung; retainer/LDV profiling may need
267 // two output files, i.e., <program>.prof/hp.
268 if (RtsFlags
.ProfFlags
.doHeapProfile
== HEAP_BY_RETAINER
)
269 RtsFlags
.ProfFlags
.doHeapProfile
= 0;
274 if (RtsFlags
.ProfFlags
.doHeapProfile
) {
275 /* Initialise the log file name */
276 hp_filename
= arenaAlloc(prof_arena
, strlen(prog
) + 6);
277 sprintf(hp_filename
, "%s.hp", prog
);
279 /* open the log file */
280 if ((hp_file
= fopen(hp_filename
, "w")) == NULL
) {
281 debugBelch("Can't open profiling report file %s\n",
283 RtsFlags
.ProfFlags
.doHeapProfile
= 0;
290 initTimeProfiling(void)
297 endProfiling ( void )
299 if (RtsFlags
.CcFlags
.doCostCentres
) {
302 if (RtsFlags
.ProfFlags
.doHeapProfile
) {
307 /* -----------------------------------------------------------------------------
308 Set CCCS when entering a function.
310 The algorithm is as follows.
312 ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn
317 -- returns the suffix of B after removing any prefix common
322 <a,b,c> ++> <> = <a,b,c>
323 <a,b,c> ++> <d> = <a,b,c,d>
324 <a,b,c> ++> <a,b> = <a,b,c>
325 <a,b> ++> <a,b,c> = <a,b,c>
326 <a,b,c> ++> <a,b,d> = <a,b,c,d>
328 -------------------------------------------------------------------------- */
330 // implements c1 ++> c2, where c1 and c2 are equal depth
332 static CostCentreStack
*
333 enterFunEqualStacks (CostCentreStack
*ccs0
,
334 CostCentreStack
*ccsapp
,
335 CostCentreStack
*ccsfn
)
337 ASSERT(ccsapp
->depth
== ccsfn
->depth
);
338 if (ccsapp
== ccsfn
) return ccs0
;
339 return pushCostCentre(enterFunEqualStacks(ccs0
,
345 // implements c1 ++> c2, where c2 is deeper than c1.
346 // Drop elements of c2 until we have equal stacks, call
347 // enterFunEqualStacks(), and then push on the elements that we
348 // dropped in reverse order.
350 static CostCentreStack
*
351 enterFunCurShorter (CostCentreStack
*ccsapp
, CostCentreStack
*ccsfn
, StgWord n
)
354 ASSERT(ccsfn
->depth
== ccsapp
->depth
);
355 return enterFunEqualStacks(ccsapp
,ccsapp
,ccsfn
);;
357 ASSERT(ccsfn
->depth
> ccsapp
->depth
);
358 return pushCostCentre(enterFunCurShorter(ccsapp
, ccsfn
->prevStack
, n
-1),
363 void enterFunCCS (StgRegTable
*reg
, CostCentreStack
*ccsfn
)
365 CostCentreStack
*ccsapp
;
367 // common case 1: both stacks are the same
368 if (ccsfn
== reg
->rCCCS
) {
372 // common case 2: the function stack is empty, or just CAF
373 if (ccsfn
->prevStack
== CCS_MAIN
) {
378 reg
->rCCCS
= CCS_OVERHEAD
;
380 // common case 3: the stacks are completely different (e.g. one is a
381 // descendent of MAIN and the other of a CAF): we append the whole
382 // of the function stack to the current CCS.
383 if (ccsfn
->root
!= ccsapp
->root
) {
384 reg
->rCCCS
= appendCCS(ccsapp
,ccsfn
);
388 // uncommon case 4: ccsapp is deeper than ccsfn
389 if (ccsapp
->depth
> ccsfn
->depth
) {
391 CostCentreStack
*tmp
= ccsapp
;
392 n
= ccsapp
->depth
- ccsfn
->depth
;
393 for (i
= 0; i
< n
; i
++) {
394 tmp
= tmp
->prevStack
;
396 reg
->rCCCS
= enterFunEqualStacks(ccsapp
,tmp
,ccsfn
);
400 // uncommon case 5: ccsfn is deeper than CCCS
401 if (ccsfn
->depth
> ccsapp
->depth
) {
402 reg
->rCCCS
= enterFunCurShorter(ccsapp
, ccsfn
,
403 ccsfn
->depth
- ccsapp
->depth
);
407 // uncommon case 6: stacks are equal depth, but different
408 reg
->rCCCS
= enterFunEqualStacks(ccsapp
,ccsapp
,ccsfn
);
411 /* -----------------------------------------------------------------------------
412 Decide whether closures with this CCS should contribute to the heap
414 -------------------------------------------------------------------------- */
417 ccsSetSelected (CostCentreStack
*ccs
)
419 if (RtsFlags
.ProfFlags
.modSelector
) {
420 if (! strMatchesSelector (ccs
->cc
->module
,
421 RtsFlags
.ProfFlags
.modSelector
) ) {
426 if (RtsFlags
.ProfFlags
.ccSelector
) {
427 if (! strMatchesSelector (ccs
->cc
->label
,
428 RtsFlags
.ProfFlags
.ccSelector
) ) {
433 if (RtsFlags
.ProfFlags
.ccsSelector
) {
435 for (c
= ccs
; c
!= NULL
; c
= c
->prevStack
)
437 if ( strMatchesSelector (c
->cc
->label
,
438 RtsFlags
.ProfFlags
.ccsSelector
) ) {
452 /* -----------------------------------------------------------------------------
453 Cost-centre stack manipulation
454 -------------------------------------------------------------------------- */
457 CostCentreStack
* _pushCostCentre ( CostCentreStack
*ccs
, CostCentre
*cc
);
459 pushCostCentre ( CostCentreStack
*ccs
, CostCentre
*cc
)
460 #define pushCostCentre _pushCostCentre
463 traceBegin("pushing %s on ", cc
->label
);
467 return pushCostCentre(ccs
,cc
);
471 /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
474 CostCentreStack
*_appendCCS ( CostCentreStack
*ccs1
, CostCentreStack
*ccs2
);
476 appendCCS ( CostCentreStack
*ccs1
, CostCentreStack
*ccs2
)
477 #define appendCCS _appendCCS
481 debugBelch("Appending ");
486 return appendCCS(ccs1
,ccs2
);
491 appendCCS ( CostCentreStack
*ccs1
, CostCentreStack
*ccs2
)
497 if (ccs2
== CCS_MAIN
|| ccs2
->cc
->is_caf
== CC_IS_CAF
) {
498 // stop at a CAF element
502 return pushCostCentre(appendCCS(ccs1
, ccs2
->prevStack
), ccs2
->cc
);
506 // #define RECURSION_DROPS
507 #define RECURSION_TRUNCATES
510 pushCostCentre (CostCentreStack
*ccs
, CostCentre
*cc
)
512 CostCentreStack
*temp_ccs
, *ret
;
515 if (ccs
== EMPTY_STACK
) {
516 ACQUIRE_LOCK(&ccs_mutex
);
517 ret
= actualPush(ccs
,cc
);
524 // check if we've already memoized this stack
525 ixtable
= ccs
->indexTable
;
526 temp_ccs
= isInIndexTable(ixtable
,cc
);
528 if (temp_ccs
!= EMPTY_STACK
) {
532 // not in the IndexTable, now we take the lock:
533 ACQUIRE_LOCK(&ccs_mutex
);
535 if (ccs
->indexTable
!= ixtable
)
537 // someone modified ccs->indexTable while
538 // we did not hold the lock, so we must
540 temp_ccs
= isInIndexTable(ixtable
,cc
);
541 if (temp_ccs
!= EMPTY_STACK
)
543 RELEASE_LOCK(&ccs_mutex
);
547 temp_ccs
= checkLoop(ccs
,cc
);
548 if (temp_ccs
!= NULL
) {
549 // This CC is already in the stack somewhere.
550 // This could be recursion, or just calling
551 // another function with the same CC.
552 // A number of policies are possible at this
553 // point, we implement two here:
554 // - truncate the stack to the previous instance
556 // - ignore this push, return the same stack.
558 CostCentreStack
*new_ccs
;
559 #if defined(RECURSION_TRUNCATES)
561 #else // defined(RECURSION_DROPS)
564 ccs
->indexTable
= addToIndexTable (ccs
->indexTable
,
568 ret
= actualPush (ccs
,cc
);
574 RELEASE_LOCK(&ccs_mutex
);
578 static CostCentreStack
*
579 checkLoop (CostCentreStack
*ccs
, CostCentre
*cc
)
581 while (ccs
!= EMPTY_STACK
) {
584 ccs
= ccs
->prevStack
;
589 static CostCentreStack
*
590 actualPush (CostCentreStack
*ccs
, CostCentre
*cc
)
592 CostCentreStack
*new_ccs
;
594 // allocate space for a new CostCentreStack
595 new_ccs
= (CostCentreStack
*) arenaAlloc(prof_arena
, sizeof(CostCentreStack
));
597 return actualPush_(ccs
, cc
, new_ccs
);
600 static CostCentreStack
*
601 actualPush_ (CostCentreStack
*ccs
, CostCentre
*cc
, CostCentreStack
*new_ccs
)
603 /* assign values to each member of the structure */
604 new_ccs
->ccsID
= CCS_ID
++;
606 new_ccs
->prevStack
= ccs
;
607 new_ccs
->root
= ccs
->root
;
608 new_ccs
->depth
= ccs
->depth
+ 1;
610 new_ccs
->indexTable
= EMPTY_TABLE
;
612 /* Initialise the various _scc_ counters to zero
614 new_ccs
->scc_count
= 0;
616 /* Initialize all other stats here. There should be a quick way
617 * that's easily used elsewhere too
619 new_ccs
->time_ticks
= 0;
620 new_ccs
->mem_alloc
= 0;
621 new_ccs
->inherited_ticks
= 0;
622 new_ccs
->inherited_alloc
= 0;
624 // Set the selected field.
625 ccsSetSelected(new_ccs
);
627 /* update the memoization table for the parent stack */
628 ccs
->indexTable
= addToIndexTable(ccs
->indexTable
, new_ccs
, cc
,
629 0/*not a back edge*/);
631 /* return a pointer to the new stack */
636 static CostCentreStack
*
637 isInIndexTable(IndexTable
*it
, CostCentre
*cc
)
639 while (it
!=EMPTY_TABLE
)
647 /* otherwise we never found it so return EMPTY_TABLE */
653 addToIndexTable (IndexTable
*it
, CostCentreStack
*new_ccs
,
654 CostCentre
*cc
, unsigned int back_edge
)
658 new_it
= arenaAlloc(prof_arena
, sizeof(IndexTable
));
661 new_it
->ccs
= new_ccs
;
663 new_it
->back_edge
= back_edge
;
667 /* -----------------------------------------------------------------------------
668 Generating a time & allocation profiling report.
669 -------------------------------------------------------------------------- */
671 /* We omit certain system-related CCs and CCSs from the default
672 * reports, so as not to cause confusion.
675 ignoreCC (CostCentre
*cc
)
677 if (RtsFlags
.CcFlags
.doCostCentres
< COST_CENTRES_ALL
&&
679 || cc
== CC_DONT_CARE
690 ignoreCCS (CostCentreStack
*ccs
)
692 if (RtsFlags
.CcFlags
.doCostCentres
< COST_CENTRES_ALL
&&
693 ( ccs
== CCS_OVERHEAD
694 || ccs
== CCS_DONT_CARE
697 || ccs
== CCS_IDLE
)) {
704 /* -----------------------------------------------------------------------------
705 Generating the aggregated per-cost-centre time/alloc report.
706 -------------------------------------------------------------------------- */
708 static CostCentre
*sorted_cc_list
;
711 aggregateCCCosts( CostCentreStack
*ccs
)
715 ccs
->cc
->mem_alloc
+= ccs
->mem_alloc
;
716 ccs
->cc
->time_ticks
+= ccs
->time_ticks
;
718 for (i
= ccs
->indexTable
; i
!= 0; i
= i
->next
) {
720 aggregateCCCosts(i
->ccs
);
726 insertCCInSortedList( CostCentre
*new_cc
)
728 CostCentre
**prev
, *cc
;
730 prev
= &sorted_cc_list
;
731 for (cc
= sorted_cc_list
; cc
!= NULL
; cc
= cc
->link
) {
732 if (new_cc
->time_ticks
> cc
->time_ticks
) {
745 strlen_utf8 (char *s
)
750 for (; *s
!= '\0'; s
++) {
752 if (c
< 0x80 || c
> 0xBF) n
++;
758 reportPerCCCosts( void )
760 CostCentre
*cc
, *next
;
761 nat max_label_len
, max_module_len
;
763 aggregateCCCosts(CCS_MAIN
);
764 sorted_cc_list
= NULL
;
766 max_label_len
= 11; // no shorter than the "COST CENTRE" header
767 max_module_len
= 6; // no shorter than the "MODULE" header
769 for (cc
= CC_LIST
; cc
!= NULL
; cc
= next
) {
771 if (cc
->time_ticks
> total_prof_ticks
/100
772 || cc
->mem_alloc
> total_alloc
/100
773 || RtsFlags
.CcFlags
.doCostCentres
>= COST_CENTRES_ALL
) {
774 insertCCInSortedList(cc
);
776 max_label_len
= stg_max(strlen_utf8(cc
->label
), max_label_len
);
777 max_module_len
= stg_max(strlen_utf8(cc
->module
), max_module_len
);
781 fprintf(prof_file
, "%-*s %-*s", max_label_len
, "COST CENTRE", max_module_len
, "MODULE");
782 fprintf(prof_file
, " %6s %6s", "%time", "%alloc");
783 if (RtsFlags
.CcFlags
.doCostCentres
>= COST_CENTRES_VERBOSE
) {
784 fprintf(prof_file
, " %5s %9s", "ticks", "bytes");
786 fprintf(prof_file
, "\n\n");
788 for (cc
= sorted_cc_list
; cc
!= NULL
; cc
= cc
->link
) {
792 fprintf(prof_file
, "%s%*s %s%*s",
794 max_label_len
- strlen_utf8(cc
->label
), "",
796 max_module_len
- strlen_utf8(cc
->module
), "");
798 fprintf(prof_file
, " %6.1f %6.1f",
799 total_prof_ticks
== 0 ?
0.0 : (cc
->time_ticks
/ (StgFloat
) total_prof_ticks
* 100),
800 total_alloc
== 0 ?
0.0 : (cc
->mem_alloc
/ (StgFloat
)
804 if (RtsFlags
.CcFlags
.doCostCentres
>= COST_CENTRES_VERBOSE
) {
805 fprintf(prof_file
, " %5" FMT_Word64
" %9" FMT_Word64
,
806 (StgWord64
)(cc
->time_ticks
), cc
->mem_alloc
*sizeof(W_
));
808 fprintf(prof_file
, "\n");
811 fprintf(prof_file
,"\n\n");
814 /* -----------------------------------------------------------------------------
815 Generate the cost-centre-stack time/alloc report
816 -------------------------------------------------------------------------- */
819 fprintHeader( nat max_label_len
, nat max_module_len
, nat max_id_len
)
821 fprintf(prof_file
, "%-*s %-*s %-*s %11s %12s %12s\n",
825 "", "individual", "inherited");
827 fprintf(prof_file
, "%-*s %-*s %-*s",
828 max_label_len
, "COST CENTRE",
829 max_module_len
, "MODULE",
832 fprintf(prof_file
, " %11s %5s %6s %5s %6s",
833 "entries", "%time", "%alloc", "%time", "%alloc");
835 if (RtsFlags
.CcFlags
.doCostCentres
>= COST_CENTRES_VERBOSE
) {
836 fprintf(prof_file
, " %5s %9s", "ticks", "bytes");
839 fprintf(prof_file
, "\n\n");
843 reportCCSProfiling( void )
846 char temp
[128]; /* sigh: magic constant */
850 total_prof_ticks
= 0;
852 countTickss(CCS_MAIN
);
854 if (RtsFlags
.CcFlags
.doCostCentres
== 0) return;
856 fprintf(prof_file
, "\t%s Time and Allocation Profiling Report (%s)\n",
857 time_str(), "Final");
859 fprintf(prof_file
, "\n\t ");
860 fprintf(prof_file
, " %s", prog_name
);
861 fprintf(prof_file
, " +RTS");
862 for (count
= 0; rts_argv
[count
]; count
++)
863 fprintf(prof_file
, " %s", rts_argv
[count
]);
864 fprintf(prof_file
, " -RTS");
865 for (count
= 1; prog_argv
[count
]; count
++)
866 fprintf(prof_file
, " %s", prog_argv
[count
]);
867 fprintf(prof_file
, "\n\n");
869 fprintf(prof_file
, "\ttotal time = %11.2f secs (%lu ticks @ %d us, %d processor%s)\n",
870 ((double) total_prof_ticks
*
871 (double) RtsFlags
.MiscFlags
.tickInterval
) / (TIME_RESOLUTION
* n_capabilities
),
872 (unsigned long) total_prof_ticks
,
873 (int) TimeToUS(RtsFlags
.MiscFlags
.tickInterval
),
874 n_capabilities
, n_capabilities
> 1 ?
"s" : "");
876 fprintf(prof_file
, "\ttotal alloc = %11s bytes",
877 showStgWord64(total_alloc
* sizeof(W_
),
878 temp
, rtsTrue
/*commas*/));
880 fprintf(prof_file
, " (excludes profiling overheads)\n\n");
884 inheritCosts(CCS_MAIN
);
886 reportCCS(pruneCCSTree(CCS_MAIN
));
890 numDigits(StgInt i
) {
906 findCCSMaxLens(CostCentreStack
*ccs
, nat indent
,
907 nat
*max_label_len
, nat
*max_module_len
, nat
*max_id_len
) {
913 *max_label_len
= stg_max(*max_label_len
, indent
+ strlen_utf8(cc
->label
));
914 *max_module_len
= stg_max(*max_module_len
, strlen_utf8(cc
->module
));
915 *max_id_len
= stg_max(*max_id_len
, numDigits(ccs
->ccsID
));
917 for (i
= ccs
->indexTable
; i
!= 0; i
= i
->next
) {
919 findCCSMaxLens(i
->ccs
, indent
+1,
920 max_label_len
, max_module_len
, max_id_len
);
926 logCCS(CostCentreStack
*ccs
, nat indent
,
927 nat max_label_len
, nat max_module_len
, nat max_id_len
)
934 /* Only print cost centres with non 0 data ! */
937 /* force printing of *all* cost centres if -Pa */
940 fprintf(prof_file
, "%-*s%s%*s %s%*s",
943 max_label_len
-indent
- strlen_utf8(cc
->label
), "",
945 max_module_len
- strlen_utf8(cc
->module
), "");
948 " %*ld %11" FMT_Word64
" %5.1f %5.1f %5.1f %5.1f",
949 max_id_len
, ccs
->ccsID
, ccs
->scc_count
,
950 total_prof_ticks
== 0 ?
0.0 : ((double)ccs
->time_ticks
/ (double)total_prof_ticks
* 100.0),
951 total_alloc
== 0 ?
0.0 : ((double)ccs
->mem_alloc
/ (double)total_alloc
* 100.0),
952 total_prof_ticks
== 0 ?
0.0 : ((double)ccs
->inherited_ticks
/ (double)total_prof_ticks
* 100.0),
953 total_alloc
== 0 ?
0.0 : ((double)ccs
->inherited_alloc
/ (double)total_alloc
* 100.0)
956 if (RtsFlags
.CcFlags
.doCostCentres
>= COST_CENTRES_VERBOSE
) {
957 fprintf(prof_file
, " %5" FMT_Word64
" %9" FMT_Word64
,
958 (StgWord64
)(ccs
->time_ticks
), ccs
->mem_alloc
*sizeof(W_
));
960 fprintf(prof_file
, "\n");
963 for (i
= ccs
->indexTable
; i
!= 0; i
= i
->next
) {
965 logCCS(i
->ccs
, indent
+1, max_label_len
, max_module_len
, max_id_len
);
971 reportCCS(CostCentreStack
*ccs
)
973 nat max_label_len
, max_module_len
, max_id_len
;
975 max_label_len
= 11; // no shorter than "COST CENTRE" header
976 max_module_len
= 6; // no shorter than "MODULE" header
977 max_id_len
= 3; // no shorter than "no." header
979 findCCSMaxLens(ccs
, 0, &max_label_len
, &max_module_len
, &max_id_len
);
981 fprintHeader(max_label_len
, max_module_len
, max_id_len
);
982 logCCS(ccs
, 0, max_label_len
, max_module_len
, max_id_len
);
986 /* Traverse the cost centre stack tree and accumulate
990 countTickss(CostCentreStack
*ccs
)
994 if (!ignoreCCS(ccs
)) {
995 total_alloc
+= ccs
->mem_alloc
;
996 total_prof_ticks
+= ccs
->time_ticks
;
998 for (i
= ccs
->indexTable
; i
!= NULL
; i
= i
->next
)
1000 countTickss(i
->ccs
);
1004 /* Traverse the cost centre stack tree and inherit ticks & allocs.
1007 inheritCosts(CostCentreStack
*ccs
)
1011 if (ignoreCCS(ccs
)) { return; }
1013 ccs
->inherited_ticks
+= ccs
->time_ticks
;
1014 ccs
->inherited_alloc
+= ccs
->mem_alloc
;
1016 for (i
= ccs
->indexTable
; i
!= NULL
; i
= i
->next
)
1017 if (!i
->back_edge
) {
1018 inheritCosts(i
->ccs
);
1019 ccs
->inherited_ticks
+= i
->ccs
->inherited_ticks
;
1020 ccs
->inherited_alloc
+= i
->ccs
->inherited_alloc
;
1027 // Prune CCSs with zero entries, zero ticks or zero allocation from
1028 // the tree, unless COST_CENTRES_ALL is on.
1030 static CostCentreStack
*
1031 pruneCCSTree (CostCentreStack
*ccs
)
1033 CostCentreStack
*ccs1
;
1034 IndexTable
*i
, **prev
;
1036 prev
= &ccs
->indexTable
;
1037 for (i
= ccs
->indexTable
; i
!= 0; i
= i
->next
) {
1038 if (i
->back_edge
) { continue; }
1040 ccs1
= pruneCCSTree(i
->ccs
);
1048 if ( (RtsFlags
.CcFlags
.doCostCentres
>= COST_CENTRES_ALL
1049 /* force printing of *all* cost centres if -P -P */ )
1051 || ( ccs
->indexTable
!= 0 )
1052 || ( ccs
->scc_count
|| ccs
->time_ticks
|| ccs
->mem_alloc
)
1061 fprintCCS( FILE *f
, CostCentreStack
*ccs
)
1064 for (; ccs
&& ccs
!= CCS_MAIN
; ccs
= ccs
->prevStack
) {
1065 fprintf(f
,"%s.%s", ccs
->cc
->module
, ccs
->cc
->label
);
1066 if (ccs
->prevStack
&& ccs
->prevStack
!= CCS_MAIN
) {
1073 // Returns: True if the call stack ended with CAF
1074 static rtsBool
fprintCallStack (CostCentreStack
*ccs
)
1076 CostCentreStack
*prev
;
1078 fprintf(stderr
,"%s.%s", ccs
->cc
->module
, ccs
->cc
->label
);
1079 prev
= ccs
->prevStack
;
1080 while (prev
&& prev
!= CCS_MAIN
) {
1082 fprintf(stderr
, ",\n called from %s.%s",
1083 ccs
->cc
->module
, ccs
->cc
->label
);
1084 prev
= ccs
->prevStack
;
1086 fprintf(stderr
, "\n");
1088 return (!strncmp(ccs
->cc
->label
, "CAF", 3));
1091 /* For calling from .cmm code, where we can't reliably refer to stderr */
1093 fprintCCS_stderr (CostCentreStack
*ccs
, StgClosure
*exception
, StgTSO
*tso
)
1098 CostCentreStack
*prev_ccs
;
1100 const nat MAX_DEPTH
= 10; // don't print gigantic chains of stacks
1105 info
= get_itbl(UNTAG_CLOSURE(exception
));
1106 switch (info
->type
) {
1114 case CONSTR_NOCAF_STATIC
:
1115 desc
= GET_CON_DESC(itbl_to_con_itbl(info
));
1118 desc
= closure_type_names
[info
->type
];
1121 fprintf(stderr
, "*** Exception (reporting due to +RTS -xc): (%s), stack trace: \n ", desc
);
1124 is_caf
= fprintCallStack(ccs
);
1126 // traverse the stack down to the enclosing update frame to
1127 // find out where this CCS was evaluated from...
1129 stack
= tso
->stackobj
;
1133 for (; is_caf
&& depth
< MAX_DEPTH
; depth
++)
1135 switch (get_itbl((StgClosure
*)frame
)->type
)
1138 ccs
= ((StgUpdateFrame
*)frame
)->header
.prof
.ccs
;
1139 frame
+= sizeofW(StgUpdateFrame
);
1140 if (ccs
== CCS_MAIN
) {
1143 if (ccs
== prev_ccs
) {
1144 // ignore if this is the same as the previous stack,
1145 // we're probably in library code and haven't
1146 // accumulated any more interesting stack items
1147 // since the last update frame.
1151 fprintf(stderr
, " --> evaluated by: ");
1152 is_caf
= fprintCallStack(ccs
);
1154 case UNDERFLOW_FRAME
:
1155 stack
= ((StgUnderflowFrame
*)frame
)->next_chunk
;
1161 frame
+= stack_frame_sizeW((StgClosure
*)frame
);
1171 debugCCS( CostCentreStack
*ccs
)
1174 for (; ccs
&& ccs
!= CCS_MAIN
; ccs
= ccs
->prevStack
) {
1175 debugBelch("%s.%s", ccs
->cc
->module
, ccs
->cc
->label
);
1176 if (ccs
->prevStack
&& ccs
->prevStack
!= CCS_MAIN
) {
1184 #endif /* PROFILING */