One more PAPI measurement, dropped precise cycle counting and replaced it with instru...
[ghc.git] / rts / Papi.c
1 /* -----------------------------------------------------------------------------
2 * (c) The GHC Team 2006
3 *
4 * Initialization and use of the PAPI performance monitoring library
5 *
6 *
7 * For adding events or add your processor counters modify
8 *
9 * init_countable_events
10 * papi_report
11 *
12 * ---------------------------------------------------------------------------*/
13
14
15 #ifdef USE_PAPI /* ugly */
16
17 #include "Papi.h"
18 #include "Rts.h"
19 #include "RtsUtils.h"
20 #include "Stats.h"
21 #include "RtsFlags.h"
22
23
24 struct _papi_events {
25 int event_code;
26 char * event_name;
27 };
28
29 #define PAPI_ADD_EVENT(EVENT) \
30 { \
31 ASSERT(n_papi_events<MAX_PAPI_EVENTS); \
32 papi_events[n_papi_events].event_code = EVENT; \
33 papi_events[n_papi_events].event_name = #EVENT; \
34 n_papi_events++; \
35 }
36
37 /* Report the value of a counter */
38 #define PAPI_REPORT(EVENTSET,EVENT) \
39 { \
40 ullong_format_string(papi_counter(EVENTSET,EVENT),temp,rtsTrue/*commas*/); \
41 statsPrintf(" (" #EVENT ") : %s\n",temp); \
42 }
43
44 /* Report the value of a counter as a percentage of another counter */
45 #define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \
46 statsPrintf(" (" #EVENT ") %% of (" #EVENTTOT ") : %.1f%%\n", \
47 papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT))
48
49 /* Beware, these counters are Opteron specific
50 * I obtained the numbers using the papi_avail
51 * and papi_native_avail utilities.
52 * This is certainly not the official PAPI way
53 * of doing things.
54 */
55 #define FR_BR 0x40000040
56 #define FR_BR_MIS 0x40000041
57 #define FR_BR_MISCOMPARE 0x40000048
58 #define DC_ACCESS 0x40000019
59 #define DC_MISS 0x4000001a
60 #define FR_DISPATCH_STALLS_BR 0x40000055
61 #define FR_DISPATCH_STALLS_FULL_LS 0x4000005b
62 #define DC_L2_REFILL_MOES 0x40001e1b
63 #define DC_SYS_REFILL_MOES 0x40001e1c
64
65 /* Number of counted events, computed from size of papi_events */
66 #define N_PAPI_EVENTS n_papi_events
67
68 /* This is bad, it should be in a header */
69 #define BIG_STRING_LEN 512
70
71 /* While PAPI reporting is going on this flag is on */
72 int papi_is_reporting;
73
74 /* Event sets and counter arrays for GC and mutator */
75
76 int MutatorEvents = PAPI_NULL;
77 int GCEvents = PAPI_NULL;
78
79 int papi_error;
80
81 /* Arbitrary, to avoid using malloc */
82 #define MAX_PAPI_EVENTS 10
83
84 int n_papi_events = 0;
85
86
87 /* Events counted during GC and Mutator execution */
88 /* There's a trailing comma, do all C compilers accept that? */
89 static struct _papi_events papi_events[MAX_PAPI_EVENTS];
90 long_long MutatorCounters[MAX_PAPI_EVENTS];
91 long_long GCCounters[MAX_PAPI_EVENTS];
92
93 long_long start_mutator_cycles;
94 long_long start_gc_cycles;
95 long_long mutator_cycles;
96 long_long gc_cycles;
97
98
99
100 /* If you want to add events to count, extend the
101 * init_countable_events and the papi_report function.
102 * Be aware that your processor can count a limited number
103 * of events simultaneously, you can turn on multiplexing
104 * to increase that number, though.
105 */
106 static void
107 init_countable_events(void)
108 {
109 PAPI_ADD_EVENT(PAPI_TOT_INS);
110 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
111 PAPI_ADD_EVENT(FR_BR);
112 PAPI_ADD_EVENT(FR_BR_MIS);
113 /* Docs are wrong? Opteron does not count indirect branch misses exclusively */
114 PAPI_ADD_EVENT(FR_BR_MISCOMPARE);
115 }
116 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) {
117 PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR);
118 PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS);
119 }
120 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
121 PAPI_ADD_EVENT(PAPI_L1_DCA);
122 PAPI_ADD_EVENT(PAPI_L1_DCM);
123 }
124 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
125 PAPI_ADD_EVENT(PAPI_L2_DCA);
126 PAPI_ADD_EVENT(PAPI_L2_DCM);
127 }
128 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) {
129 PAPI_ADD_EVENT(DC_L2_REFILL_MOES);
130 PAPI_ADD_EVENT(DC_SYS_REFILL_MOES);
131 PAPI_ADD_EVENT(FR_BR_MIS);
132 }
133 };
134
135
136 static char temp[BIG_STRING_LEN];
137
138 void
139 papi_mut_cycles()
140 {
141 ullong_format_string(mutator_cycles,temp,rtsTrue/*commas*/);
142 statsPrintf(" (MUT_CYCLES) : %s\n",temp);
143 }
144
145 void
146 papi_gc_cycles()
147 {
148 ullong_format_string(gc_cycles,temp,rtsTrue/*commas*/);
149 statsPrintf(" (GC_CYCLES) : %s\n",temp);
150 }
151
152 /* This function reports counters for GC and mutator */
153 void
154 papi_report(long_long PapiCounters[])
155 {
156
157 /* I need to improve formatting aesthetics */
158 PAPI_REPORT(PapiCounters,PAPI_TOT_INS);
159
160 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
161 PAPI_REPORT(PapiCounters,FR_BR);
162 PAPI_REPORT(PapiCounters,FR_BR_MIS);
163 PAPI_REPORT_PCT(PapiCounters,FR_BR_MIS,FR_BR);
164 PAPI_REPORT_PCT(PapiCounters,FR_BR_MISCOMPARE,FR_BR);
165 }
166
167 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) {
168 PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_BR);
169 //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_BR,PAPI_TOT_CYC);
170 PAPI_REPORT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS);
171 //PAPI_REPORT_PCT(PapiCounters,FR_DISPATCH_STALLS_FULL_LS,PAPI_TOT_CYC);
172 }
173
174 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
175 PAPI_REPORT(PapiCounters,PAPI_L1_DCA);
176 PAPI_REPORT(PapiCounters,PAPI_L1_DCM);
177 PAPI_REPORT_PCT(PapiCounters,PAPI_L1_DCM,PAPI_L1_DCA);
178 }
179
180 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
181 PAPI_REPORT(PapiCounters,PAPI_L2_DCA);
182 PAPI_REPORT(PapiCounters,PAPI_L2_DCM);
183 PAPI_REPORT_PCT(PapiCounters,PAPI_L2_DCM,PAPI_L2_DCA);
184 }
185
186 if(RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) {
187 PAPI_REPORT(PapiCounters,DC_L2_REFILL_MOES);
188 PAPI_REPORT(PapiCounters,DC_SYS_REFILL_MOES);
189 PAPI_REPORT(PapiCounters,FR_BR_MIS);
190 }
191
192 }
193
194
195
196 void
197 papi_init_eventsets(void)
198 {
199
200 init_countable_events();
201
202 /* One event set for the mutator and another for the GC */
203 PAPI_CHECK( PAPI_create_eventset(&MutatorEvents));
204 PAPI_CHECK( PAPI_create_eventset(&GCEvents));
205
206 /* Both sets contain the same events */
207 papi_add_events(MutatorEvents);
208 papi_add_events(GCEvents);
209
210 }
211
212 /* Extract the value corresponding to an event */
213 long_long
214 papi_counter(long_long values[],int event)
215 {
216 int i;
217 for(i=0;i<N_PAPI_EVENTS;i++) {
218 if(papi_events[i].event_code==event) {
219 return values[i];
220 }
221 }
222 /* Passed a wrong event? */
223 debugBelch("Event %d is not part of event set\n",event);
224 return 0;
225 }
226
227 /* Add the events of papi_events into an event set */
228 void
229 papi_add_events(int EventSet)
230 {
231 int i;
232 for(i=0;i<N_PAPI_EVENTS;i++) {
233 if((papi_error=PAPI_add_event(EventSet,
234 papi_events[i].event_code))
235 != PAPI_OK)
236 debugBelch("Failed adding %s to event set with error code %d\n",
237 papi_events[i].event_name,papi_error);
238 }
239 }
240
241 /* We should be using elapsed cycles
242 * to be consistent with time metric chosen in Stats.c (Elapsed time).
243 * This is an approximation to the cycles that the program spends.
244 * Note that the counters, in contrast, are virtual and user space.
245 */
246 #define PAPI_cycles PAPI_get_virt_cyc
247
248 void
249 papi_start_mutator_count(void)
250 {
251 PAPI_CHECK( PAPI_start(MutatorEvents));
252 start_mutator_cycles = PAPI_cycles();
253 }
254
255 void
256 papi_stop_mutator_count(void)
257 {
258 mutator_cycles += PAPI_cycles() - start_mutator_cycles;
259 PAPI_CHECK( PAPI_accum(MutatorEvents,MutatorCounters));
260 PAPI_CHECK( PAPI_stop(MutatorEvents,NULL));
261 }
262
263 void
264 papi_start_gc_count(void)
265 {
266 PAPI_CHECK( PAPI_start(GCEvents));
267 start_gc_cycles = PAPI_cycles();
268 }
269
270 void
271 papi_stop_gc_count(void)
272 {
273 gc_cycles += PAPI_cycles() - start_gc_cycles;
274 PAPI_CHECK( PAPI_accum(GCEvents,GCCounters));
275 PAPI_CHECK( PAPI_stop(GCEvents,NULL));
276 }
277
278
279 #endif /* USE_PAPI */