Update some files for new testsuite tests location
[ghc.git] / includes / stg / SMP.h
1 /* ----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2005-2009
4 *
5 * Macros for multi-CPU support
6 *
7 * Do not #include this file directly: #include "Rts.h" instead.
8 *
9 * To understand the structure of the RTS headers, see the wiki:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
11 *
12 * -------------------------------------------------------------------------- */
13
14 #ifndef SMP_H
15 #define SMP_H
16
17 #if defined(THREADED_RTS)
18
19 /* ----------------------------------------------------------------------------
20 Atomic operations
21 ------------------------------------------------------------------------- */
22
23 #if !IN_STG_CODE || IN_STGCRUN
24 // We only want the barriers, e.g. write_barrier(), declared in .hc
25 // files. Defining the other inline functions here causes type
26 // mismatch errors from gcc, because the generated C code is assuming
27 // that there are no prototypes in scope.
28
29 /*
30 * The atomic exchange operation: xchg(p,w) exchanges the value
31 * pointed to by p with the value w, returning the old value.
32 *
33 * Used for locking closures during updates (see lockClosure() below)
34 * and the MVar primops.
35 */
36 EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w);
37
38 /*
39 * Compare-and-swap. Atomically does this:
40 *
41 * cas(p,o,n) {
42 * r = *p;
43 * if (r == o) { *p = n };
44 * return r;
45 * }
46 */
47 EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
48
49 /*
50 * Atomic increment
51 *
52 * atomic_inc(p) {
53 * return ++(*p);
54 * }
55 */
56 EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p);
57
58 /*
59 * Atomic decrement
60 *
61 * atomic_dec(p) {
62 * return --(*p);
63 * }
64 */
65 EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p);
66
67 /*
68 * Busy-wait nop: this is a hint to the CPU that we are currently in a
69 * busy-wait loop waiting for another CPU to change something. On a
70 * hypertreaded CPU it should yield to another thread, for example.
71 */
72 EXTERN_INLINE void busy_wait_nop(void);
73
74 #endif // !IN_STG_CODE
75
76 /*
77 * Various kinds of memory barrier.
78 * write_barrier: prevents future stores occurring before prededing stores.
79 * store_load_barrier: prevents future loads occurring before preceding stores.
80 * load_load_barrier: prevents future loads occurring before earlier stores.
81 *
82 * Reference for these: "The JSR-133 Cookbook for Compiler Writers"
83 * http://gee.cs.oswego.edu/dl/jmm/cookbook.html
84 *
85 * To check whether you got these right, try the test in
86 * testsuite/tests/rts/testwsdeque.c
87 * This tests the work-stealing deque implementation, which relies on
88 * properly working store_load and load_load memory barriers.
89 */
90 EXTERN_INLINE void write_barrier(void);
91 EXTERN_INLINE void store_load_barrier(void);
92 EXTERN_INLINE void load_load_barrier(void);
93
94 /* ----------------------------------------------------------------------------
95 Implementations
96 ------------------------------------------------------------------------- */
97
98 #if !IN_STG_CODE || IN_STGCRUN
99
100 EXTERN_INLINE StgWord
101 xchg(StgPtr p, StgWord w)
102 {
103 StgWord result;
104 #if i386_HOST_ARCH || x86_64_HOST_ARCH
105 result = w;
106 __asm__ __volatile__ (
107 // NB: the xchg instruction is implicitly locked, so we do not
108 // need a lock prefix here.
109 "xchg %1,%0"
110 :"+r" (result), "+m" (*p)
111 : /* no input-only operands */
112 );
113 #elif powerpc_HOST_ARCH
114 __asm__ __volatile__ (
115 "1: lwarx %0, 0, %2\n"
116 " stwcx. %1, 0, %2\n"
117 " bne- 1b"
118 :"=&r" (result)
119 :"r" (w), "r" (p)
120 );
121 #elif sparc_HOST_ARCH
122 result = w;
123 __asm__ __volatile__ (
124 "swap %1,%0"
125 : "+r" (result), "+m" (*p)
126 : /* no input-only operands */
127 );
128 #elif !defined(WITHSMP)
129 result = *p;
130 *p = w;
131 #else
132 #error xchg() unimplemented on this architecture
133 #endif
134 return result;
135 }
136
137 /*
138 * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used
139 * in the STM implementation.
140 */
141 EXTERN_INLINE StgWord
142 cas(StgVolatilePtr p, StgWord o, StgWord n)
143 {
144 #if i386_HOST_ARCH || x86_64_HOST_ARCH
145 __asm__ __volatile__ (
146 "lock\ncmpxchg %3,%1"
147 :"=a"(o), "=m" (*(volatile unsigned int *)p)
148 :"0" (o), "r" (n));
149 return o;
150 #elif powerpc_HOST_ARCH
151 StgWord result;
152 __asm__ __volatile__ (
153 "1: lwarx %0, 0, %3\n"
154 " cmpw %0, %1\n"
155 " bne 2f\n"
156 " stwcx. %2, 0, %3\n"
157 " bne- 1b\n"
158 "2:"
159 :"=&r" (result)
160 :"r" (o), "r" (n), "r" (p)
161 :"cc", "memory"
162 );
163 return result;
164 #elif sparc_HOST_ARCH
165 __asm__ __volatile__ (
166 "cas [%1], %2, %0"
167 : "+r" (n)
168 : "r" (p), "r" (o)
169 : "memory"
170 );
171 return n;
172 #elif !defined(WITHSMP)
173 StgWord result;
174 result = *p;
175 if (result == o) {
176 *p = n;
177 }
178 return result;
179 #else
180 #error cas() unimplemented on this architecture
181 #endif
182 }
183
184 EXTERN_INLINE StgWord
185 atomic_inc(StgVolatilePtr p)
186 {
187 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
188 StgWord r;
189 r = 1;
190 __asm__ __volatile__ (
191 "lock\nxadd %0,%1":
192 "+r" (r), "+m" (*p):
193 );
194 return r+1;
195 #else
196 StgWord old, new;
197 do {
198 old = *p;
199 new = old + 1;
200 } while (cas(p, old, new) != old);
201 return new;
202 #endif
203 }
204
205 EXTERN_INLINE StgWord
206 atomic_dec(StgVolatilePtr p)
207 {
208 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
209 StgWord r;
210 r = (StgWord)-1;
211 __asm__ __volatile__ (
212 "lock\nxadd %0,%1":
213 "+r" (r), "+m" (*p):
214 );
215 return r-1;
216 #else
217 StgWord old, new;
218 do {
219 old = *p;
220 new = old - 1;
221 } while (cas(p, old, new) != old);
222 return new;
223 #endif
224 }
225
226 EXTERN_INLINE void
227 busy_wait_nop(void)
228 {
229 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
230 __asm__ __volatile__ ("rep; nop");
231 //
232 #else
233 // nothing
234 #endif
235 }
236
237 #endif // !IN_STG_CODE
238
239 /*
240 * We need to tell both the compiler AND the CPU about the barriers.
241 * It's no good preventing the CPU from reordering the operations if
242 * the compiler has already done so - hence the "memory" restriction
243 * on each of the barriers below.
244 */
245 EXTERN_INLINE void
246 write_barrier(void) {
247 #if i386_HOST_ARCH || x86_64_HOST_ARCH
248 __asm__ __volatile__ ("" : : : "memory");
249 #elif powerpc_HOST_ARCH
250 __asm__ __volatile__ ("lwsync" : : : "memory");
251 #elif sparc_HOST_ARCH
252 /* Sparc in TSO mode does not require store/store barriers. */
253 __asm__ __volatile__ ("" : : : "memory");
254 #elif !defined(WITHSMP)
255 return;
256 #else
257 #error memory barriers unimplemented on this architecture
258 #endif
259 }
260
261 EXTERN_INLINE void
262 store_load_barrier(void) {
263 #if i386_HOST_ARCH
264 __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
265 #elif x86_64_HOST_ARCH
266 __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
267 #elif powerpc_HOST_ARCH
268 __asm__ __volatile__ ("sync" : : : "memory");
269 #elif sparc_HOST_ARCH
270 __asm__ __volatile__ ("membar #StoreLoad" : : : "memory");
271 #elif !defined(WITHSMP)
272 return;
273 #else
274 #error memory barriers unimplemented on this architecture
275 #endif
276 }
277
278 EXTERN_INLINE void
279 load_load_barrier(void) {
280 #if i386_HOST_ARCH
281 __asm__ __volatile__ ("" : : : "memory");
282 #elif x86_64_HOST_ARCH
283 __asm__ __volatile__ ("" : : : "memory");
284 #elif powerpc_HOST_ARCH
285 __asm__ __volatile__ ("lwsync" : : : "memory");
286 #elif sparc_HOST_ARCH
287 /* Sparc in TSO mode does not require load/load barriers. */
288 __asm__ __volatile__ ("" : : : "memory");
289 #elif !defined(WITHSMP)
290 return;
291 #else
292 #error memory barriers unimplemented on this architecture
293 #endif
294 }
295
296 // Load a pointer from a memory location that might be being modified
297 // concurrently. This prevents the compiler from optimising away
298 // multiple loads of the memory location, as it might otherwise do in
299 // a busy wait loop for example.
300 #define VOLATILE_LOAD(p) (*((StgVolatilePtr)(p)))
301
302 /* ---------------------------------------------------------------------- */
303 #else /* !THREADED_RTS */
304
305 #define write_barrier() /* nothing */
306 #define store_load_barrier() /* nothing */
307 #define load_load_barrier() /* nothing */
308
309 #if !IN_STG_CODE || IN_STGCRUN
310 INLINE_HEADER StgWord
311 xchg(StgPtr p, StgWord w)
312 {
313 StgWord old = *p;
314 *p = w;
315 return old;
316 }
317
318 EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
319 EXTERN_INLINE StgWord
320 cas(StgVolatilePtr p, StgWord o, StgWord n)
321 {
322 StgWord result;
323 result = *p;
324 if (result == o) {
325 *p = n;
326 }
327 return result;
328 }
329
330 INLINE_HEADER StgWord
331 atomic_inc(StgVolatilePtr p)
332 {
333 return ++(*p);
334 }
335
336 INLINE_HEADER StgWord
337 atomic_dec(StgVolatilePtr p)
338 {
339 return --(*p);
340 }
341 #endif
342
343 #define VOLATILE_LOAD(p) ((StgWord)*((StgWord*)(p)))
344
345 #endif /* !THREADED_RTS */
346
347 #endif /* SMP_H */