Make tidyProgram discard speculative specialisation rules
[ghc.git] / rts / Papi.c
1 /* -----------------------------------------------------------------------------
2 * (c) The GHC Team 2006
3 *
4 * Initialization and use of the PAPI performance monitoring library
5 *
6 *
7 * For adding events or add your processor counters modify
8 *
9 * init_countable_events
10 * papi_report
11 *
12 * ---------------------------------------------------------------------------*/
13
14
15 #ifdef USE_PAPI /* ugly */
16
17 #include <papi.h>
18 /* The posix symbols get defined in a header included from papi.h.
19 * undefind them here to allow redefinition in PosixSource.h */
20 #undef _POSIX_SOURCE
21 #undef _POSIX_C_SOURCE
22 #undef _XOPEN_SOURCE
23
24 #include "PosixSource.h"
25 #include "Rts.h"
26
27 #include "RtsUtils.h"
28 #include "Stats.h"
29 #include "Papi.h"
30
31 // used to protect the aggregated counters
32 #ifdef THREADED_RTS
33 static Mutex papi_counter_mutex;
34 #endif
35
36 struct _papi_events {
37 int event_code;
38 const char * event_name;
39 };
40
41 /* Beware, these counters are Opteron specific
42 * I obtained the numbers using the papi_avail
43 * and papi_native_avail utilities.
44 * This is certainly not the official PAPI way
45 * of doing things.
46 */
47 #define FR_BR 0x40000040
48 #define FR_BR_MIS 0x40000041
49 #define FR_BR_MISCOMPARE 0x40000048
50 #define DC_ACCESS 0x40000019
51 #define DC_MISS 0x4000001a
52 #define FR_DISPATCH_STALLS 0x40000054
53 #define FR_DISPATCH_STALLS_BR 0x40000055
54 #define FR_DISPATCH_STALLS_FULL_REORDER 0x40000058
55 #define FR_DISPATCH_STALLS_FULL_RESERVATION 0x40000059
56 #define FR_DISPATCH_STALLS_FULL_LS 0x4000005b
57 #define DC_L2_REFILL_MOES 0x40001e1b
58 #define DC_SYS_REFILL_MOES 0x40001e1c
59
60 /* This is bad, it should be in a header */
61 #define BIG_STRING_LEN 512
62
63
64 #define PAPI_CHECK(CALL) \
65 if((papi_error=(CALL)) != PAPI_OK) { \
66 debugBelch("PAPI function failed in module %s at line %d " \
67 "with error code %d\n", \
68 __FILE__,__LINE__,papi_error); \
69 }
70
71 /* While PAPI reporting is going on this flag is on */
72 int papi_is_reporting;
73
74 /* Event sets and counter arrays for GC and mutator */
75
76 int MutatorEvents = PAPI_NULL;
77 int GCEvents = PAPI_NULL;
78
79 int papi_error;
80
81 /* Arbitrary, to avoid using malloc */
82 #define MAX_PAPI_EVENTS 10
83 static char papiNativeEventNames[MAX_PAPI_EVENTS][PAPI_MAX_STR_LEN];
84
85 static nat n_papi_events = 0;
86
87
88 /* Events counted during GC and Mutator execution */
89 /* There's a trailing comma, do all C compilers accept that? */
90 static struct _papi_events papi_events[MAX_PAPI_EVENTS];
91 long_long MutatorCounters[MAX_PAPI_EVENTS];
92 long_long GC0Counters[MAX_PAPI_EVENTS];
93 long_long GC1Counters[MAX_PAPI_EVENTS];
94
95 long_long start_mutator_cycles;
96 long_long mutator_cycles = 0;
97 long_long start_gc_cycles;
98 long_long gc0_cycles = 0;
99 long_long gc1_cycles = 0;
100
101
102
103 static long_long papi_counter(long_long values[],int event);
104 static void papi_add_events(int EventSet);
105
106 static nat max_hardware_counters = 2;
107
108 /* If you want to add events to count, extend the
109 * init_countable_events and the papi_report function.
110 * Be aware that your processor can count a limited number
111 * of events simultaneously, you can turn on multiplexing
112 * to increase that number, though.
113 */
114 static void papi_add_event(const char *name, int code)
115 {
116 if (n_papi_events >= max_hardware_counters) {
117 errorBelch("too many PAPI events for this CPU (max: %d)",
118 max_hardware_counters);
119 stg_exit(EXIT_FAILURE);
120 }
121 papi_events[n_papi_events].event_code = code;
122 papi_events[n_papi_events].event_name = name;
123 n_papi_events++;
124 }
125
126 static void
127 init_countable_events(void)
128 {
129 max_hardware_counters = PAPI_num_counters();
130
131 #define PAPI_ADD_EVENT(EVENT) papi_add_event(#EVENT,EVENT)
132
133 if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
134 PAPI_ADD_EVENT(FR_BR);
135 PAPI_ADD_EVENT(FR_BR_MIS);
136 // Docs are wrong? Opteron does not count indirect branch
137 // misses exclusively
138 PAPI_ADD_EVENT(FR_BR_MISCOMPARE);
139 } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_STALLS) {
140 PAPI_ADD_EVENT(FR_DISPATCH_STALLS);
141 PAPI_ADD_EVENT(FR_DISPATCH_STALLS_BR);
142 PAPI_ADD_EVENT(FR_DISPATCH_STALLS_FULL_LS);
143 } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
144 PAPI_ADD_EVENT(PAPI_L1_DCA);
145 PAPI_ADD_EVENT(PAPI_L1_DCM);
146 } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
147 PAPI_ADD_EVENT(PAPI_L2_DCA);
148 PAPI_ADD_EVENT(PAPI_L2_DCM);
149 } else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CB_EVENTS) {
150 PAPI_ADD_EVENT(DC_L2_REFILL_MOES);
151 PAPI_ADD_EVENT(DC_SYS_REFILL_MOES);
152 PAPI_ADD_EVENT(FR_BR_MIS);
153 } else if (RtsFlags.PapiFlags.eventType==PAPI_USER_EVENTS) {
154 nat i;
155 char *name;
156 char *asciiEventCode;
157 int code;
158 for (i = 0; i < RtsFlags.PapiFlags.numUserEvents; i++) {
159 if(RtsFlags.PapiFlags.userEventsKind[i] == PAPI_PRESET_EVENT_KIND) {
160 name = RtsFlags.PapiFlags.userEvents[i];
161 PAPI_CHECK(PAPI_event_name_to_code(name, &code))
162 }
163 else { // PAPI_NATIVE_EVENT_KIND
164 asciiEventCode = RtsFlags.PapiFlags.userEvents[i];
165 name = papiNativeEventNames[i];
166 code = strtol(asciiEventCode, NULL, 16 /* hex number expected */);
167 PAPI_CHECK(PAPI_event_code_to_name(code, name))
168 }
169 papi_add_event(name, code);
170 }
171 } else {
172 // PAPI_ADD_EVENT(PAPI_L1_DCA); // L1 data cache accesses
173 // PAPI_ADD_EVENT(PAPI_L1_ICR); // L1 instruction cache reads
174 // PAPI_ADD_EVENT(PAPI_L1_ICM); // L1 instruction cache misses
175 // PAPI_ADD_EVENT(PAPI_L1_STM); // L1 store misses
176 // PAPI_ADD_EVENT(PAPI_L1_DCM); // L1 data cache misses
177 // PAPI_ADD_EVENT(PAPI_L1_LDM); // L1 load misses
178 // PAPI_ADD_EVENT(PAPI_L2_TCM); // L2 cache misses
179 // PAPI_ADD_EVENT(PAPI_L2_STM); // L2 store misses
180 // PAPI_ADD_EVENT(PAPI_L2_DCW); // L2 data cache writes
181 // PAPI_ADD_EVENT(PAPI_L2_DCR); // L2 data cache reads
182 // PAPI_ADD_EVENT(PAPI_L2_TCW); // L2 cache writes
183 // PAPI_ADD_EVENT(PAPI_L2_TCR); // L2 cache reads
184 // PAPI_ADD_EVENT(PAPI_CA_CLN); // exclusive access to clean cache line
185 // PAPI_ADD_EVENT(PAPI_TLB_DM); // TLB misses
186 PAPI_ADD_EVENT(PAPI_TOT_INS); // Total instructions
187 PAPI_ADD_EVENT(PAPI_TOT_CYC); // Total instructions
188 // PAPI_ADD_EVENT(PAPI_CA_SHR); // exclusive access to shared cache line
189 // PAPI_ADD_EVENT(PAPI_RES_STL); // Cycles stalled on any resource
190
191 }
192
193 // We might also consider:
194 // PAPI_BR_MSP Conditional branch instructions mispredicted
195 // PAPI_RES_STL Cycles stalled on any resource
196 };
197
198
199 static void
200 papi_report_event(const char *name, StgWord64 value)
201 {
202 static char temp[BIG_STRING_LEN];
203 showStgWord64(value,temp,rtsTrue/*commas*/);
204 statsPrintf(" %15s %15s\n", name, temp);
205 }
206
207 /* This function reports counters for GC and mutator */
208 static void
209 papi_report(long_long counters[])
210 {
211 nat i;
212
213 /* Report the value of a counter as a percentage of another counter */
214 #define PAPI_REPORT_PCT(EVENTSET,EVENT,EVENTTOT) \
215 statsPrintf(" " #EVENT " %% of " #EVENTTOT " : %.1f%%\n", \
216 papi_counter(EVENTSET,EVENT)*100.0/papi_counter(EVENTSET,EVENTTOT))
217
218 for (i = 0; i < n_papi_events; i++)
219 {
220 papi_report_event(papi_events[i].event_name, counters[i]);
221 }
222
223 if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_BRANCH) {
224 PAPI_REPORT_PCT(counters,FR_BR_MIS,FR_BR);
225 PAPI_REPORT_PCT(counters,FR_BR_MISCOMPARE,FR_BR);
226 }
227
228 else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L1) {
229 PAPI_REPORT_PCT(counters,PAPI_L1_DCM,PAPI_L1_DCA);
230 }
231
232 else if (RtsFlags.PapiFlags.eventType==PAPI_FLAG_CACHE_L2) {
233 PAPI_REPORT_PCT(counters,PAPI_L2_DCM,PAPI_L2_DCA);
234 }
235 }
236
237 void
238 papi_stats_report (void)
239 {
240 statsPrintf(" Mutator CPU counters\n");
241 papi_report_event("CYCLES", mutator_cycles);
242 papi_report(MutatorCounters);
243
244 statsPrintf("\n GC(0) CPU counters\n");
245 papi_report_event("CYCLES", gc0_cycles);
246 papi_report(GC0Counters);
247
248 statsPrintf("\n GC(1) CPU counters\n");
249 papi_report_event("CYCLES", gc1_cycles);
250 papi_report(GC1Counters);
251 }
252
253 void
254 papi_init_eventset (int *event_set)
255 {
256 PAPI_register_thread();
257 PAPI_CHECK( PAPI_create_eventset(event_set));
258 papi_add_events(*event_set);
259 }
260
261 void
262 papi_init (void)
263 {
264 /* Initialise the performance tracking library */
265 int ver;
266 if ((ver = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) {
267 if (ver > 0) {
268 errorBelch("PAPI_library_init: wrong version: %x", ver);
269 stg_exit(EXIT_FAILURE);
270 } else {
271 sysErrorBelch("PAPI_library_init");
272 stg_exit(EXIT_FAILURE);
273 }
274 }
275
276 #ifdef THREADED_RTS
277 {
278 int err;
279 if ((err = PAPI_thread_init(osThreadId)) < 0) {
280 barf("PAPI_thread_init: %d",err);
281 }
282
283 initMutex(&papi_counter_mutex);
284 }
285 #endif
286
287 init_countable_events();
288
289 papi_init_eventset(&MutatorEvents);
290 papi_init_eventset(&GCEvents);
291 }
292
293 /* Extract the value corresponding to an event */
294 static long_long
295 papi_counter(long_long values[],int event)
296 {
297 nat i;
298 for(i=0;i<n_papi_events;i++) {
299 if(papi_events[i].event_code==event) {
300 return values[i];
301 }
302 }
303 /* Passed a wrong event? */
304 debugBelch("Event %d is not part of event set\n",event);
305 return 0;
306 }
307
308 /* Add the events of papi_events into an event set */
309 static void
310 papi_add_events(int EventSet)
311 {
312 nat i;
313 for(i=0;i<n_papi_events;i++) {
314 if((papi_error=PAPI_add_event(EventSet,
315 papi_events[i].event_code))
316 != PAPI_OK)
317 debugBelch("Failed adding %s to event set with error code %d\n",
318 papi_events[i].event_name,papi_error);
319 }
320 }
321
322 /* We should be using elapsed cycles
323 * to be consistent with time metric chosen in Stats.c (Elapsed time).
324 * This is an approximation to the cycles that the program spends.
325 * Note that the counters, in contrast, are virtual and user space.
326 */
327 #define PAPI_cycles PAPI_get_virt_cyc
328
329 void
330 papi_start_mutator_count(void)
331 {
332 ACQUIRE_LOCK(&papi_counter_mutex);
333 PAPI_CHECK( PAPI_start(MutatorEvents));
334 start_mutator_cycles = PAPI_cycles();
335 RELEASE_LOCK(&papi_counter_mutex);
336 }
337
338 void
339 papi_stop_mutator_count(void)
340 {
341 ACQUIRE_LOCK(&papi_counter_mutex);
342 mutator_cycles += PAPI_cycles() - start_mutator_cycles;
343 PAPI_CHECK( PAPI_accum(MutatorEvents,MutatorCounters));
344 PAPI_CHECK( PAPI_stop(MutatorEvents,NULL));
345 RELEASE_LOCK(&papi_counter_mutex);
346 }
347
348 void
349 papi_start_gc_count(void)
350 {
351 ACQUIRE_LOCK(&papi_counter_mutex);
352 PAPI_CHECK( PAPI_start(GCEvents));
353 start_gc_cycles = PAPI_cycles();
354 RELEASE_LOCK(&papi_counter_mutex);
355 }
356
357 void
358 papi_stop_gc0_count(void)
359 {
360 ACQUIRE_LOCK(&papi_counter_mutex);
361 PAPI_CHECK( PAPI_accum(GCEvents,GC0Counters));
362 PAPI_CHECK( PAPI_stop(GCEvents,NULL));
363 gc0_cycles += PAPI_cycles() - start_gc_cycles;
364 RELEASE_LOCK(&papi_counter_mutex);
365 }
366
367
368 void
369 papi_stop_gc1_count(void)
370 {
371 ACQUIRE_LOCK(&papi_counter_mutex);
372 PAPI_CHECK( PAPI_accum(GCEvents,GC1Counters));
373 PAPI_CHECK( PAPI_stop(GCEvents,NULL));
374 gc1_cycles += PAPI_cycles() - start_gc_cycles;
375 RELEASE_LOCK(&papi_counter_mutex);
376 }
377
378
379 void
380 papi_thread_start_gc1_count(int event_set)
381 {
382 ACQUIRE_LOCK(&papi_counter_mutex);
383 PAPI_CHECK( PAPI_start(event_set));
384 RELEASE_LOCK(&papi_counter_mutex);
385 }
386
387 void
388 papi_thread_stop_gc1_count(int event_set)
389 {
390 ACQUIRE_LOCK(&papi_counter_mutex);
391 PAPI_CHECK( PAPI_accum(event_set,GC1Counters));
392 PAPI_CHECK( PAPI_stop(event_set,NULL));
393 RELEASE_LOCK(&papi_counter_mutex);
394 }
395
396 #endif /* USE_PAPI */
397
398 // Local Variables:
399 // mode: C
400 // fill-column: 80
401 // indent-tabs-mode: nil
402 // c-basic-offset: 4
403 // buffer-file-coding-system: utf-8-unix
404 // End: