Update some files for new testsuite tests location
[ghc.git] / rts / WSDeque.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2009
4 *
5 * Work-stealing Deque data structure
6 *
7 * The implementation uses Double-Ended Queues with lock-free access
8 * (thereby often called "deque") as described in
9 *
10 * D.Chase and Y.Lev, Dynamic Circular Work-Stealing Deque.
11 * SPAA'05, July 2005, Las Vegas, USA.
12 * ACM 1-58113-986-1/05/0007
13 *
14 * Author: Jost Berthold MSRC 07-09/2008
15 *
16 * The DeQue is held as a circular array with known length. Positions
17 * of top (read-end) and bottom (write-end) always increase, and the
18 * array is accessed with indices modulo array-size. While this bears
19 * the risk of overflow, we assume that (with 64 bit indices), a
20 * program must run very long to reach that point.
21 *
22 * The write end of the queue (position bottom) can only be used with
23 * mutual exclusion, i.e. by exactly one caller at a time. At this
24 * end, new items can be enqueued using pushBottom()/newSpark(), and
25 * removed using popBottom()/reclaimSpark() (the latter implying a cas
26 * synchronisation with potential concurrent readers for the case of
27 * just one element).
28 *
29 * Multiple readers can steal from the read end (position top), and
30 * are synchronised without a lock, based on a cas of the top
31 * position. One reader wins, the others return NULL for a failure.
32 *
33 * Both popWSDeque and stealWSDeque also return NULL when the queue is empty.
34 *
35 * Testing: see testsuite/tests/rts/testwsdeque.c. If
36 * there's anything wrong with the deque implementation, this test
37 * will probably catch it.
38 *
39 * ---------------------------------------------------------------------------*/
40
41 #include "PosixSource.h"
42 #include "Rts.h"
43
44 #include "RtsUtils.h"
45 #include "WSDeque.h"
46
47 #define CASTOP(addr,old,new) ((old) == cas(((StgPtr)addr),(old),(new)))
48
49 /* -----------------------------------------------------------------------------
50 * newWSDeque
51 * -------------------------------------------------------------------------- */
52
53 /* internal helpers ... */
54
55 static StgWord
56 roundUp2(StgWord val)
57 {
58 StgWord rounded = 1;
59
60 /* StgWord is unsigned anyway, only catch 0 */
61 if (val == 0) {
62 barf("DeQue,roundUp2: invalid size 0 requested");
63 }
64 /* at least 1 bit set, shift up to its place */
65 do {
66 rounded = rounded << 1;
67 } while (0 != (val = val>>1));
68 return rounded;
69 }
70
71 WSDeque *
72 newWSDeque (nat size)
73 {
74 StgWord realsize;
75 WSDeque *q;
76
77 realsize = roundUp2(size); /* to compute modulo as a bitwise & */
78
79 q = (WSDeque*) stgMallocBytes(sizeof(WSDeque), /* admin fields */
80 "newWSDeque");
81 q->elements = stgMallocBytes(realsize * sizeof(StgClosurePtr), /* dataspace */
82 "newWSDeque:data space");
83 q->top=0;
84 q->bottom=0;
85 q->topBound=0; /* read by writer, updated each time top is read */
86
87 q->size = realsize; /* power of 2 */
88 q->moduloSize = realsize - 1; /* n % size == n & moduloSize */
89
90 ASSERT_WSDEQUE_INVARIANTS(q);
91 return q;
92 }
93
94 /* -----------------------------------------------------------------------------
95 * freeWSDeque
96 * -------------------------------------------------------------------------- */
97
98 void
99 freeWSDeque (WSDeque *q)
100 {
101 stgFree(q->elements);
102 stgFree(q);
103 }
104
105 /* -----------------------------------------------------------------------------
106 *
107 * popWSDeque: remove an element from the write end of the queue.
108 * Returns the removed spark, and NULL if a race is lost or the pool
109 * empty.
110 *
111 * If only one spark is left in the pool, we synchronise with
112 * concurrently stealing threads by using cas to modify the top field.
113 * This routine should NEVER be called by a task which does not own
114 * this deque.
115 *
116 * -------------------------------------------------------------------------- */
117
118 void *
119 popWSDeque (WSDeque *q)
120 {
121 /* also a bit tricky, has to avoid concurrent steal() calls by
122 accessing top with cas, when there is only one element left */
123 StgWord t, b;
124 long currSize;
125 void * removed;
126
127 ASSERT_WSDEQUE_INVARIANTS(q);
128
129 b = q->bottom;
130
131 // "decrement b as a test, see what happens"
132 b--;
133 q->bottom = b;
134
135 // very important that the following read of q->top does not occur
136 // before the earlier write to q->bottom.
137 store_load_barrier();
138
139 t = q->top; /* using topBound would give an *upper* bound, we
140 need a lower bound. We use the real top here, but
141 can update the topBound value */
142 q->topBound = t;
143 currSize = (long)b - (long)t;
144 if (currSize < 0) { /* was empty before decrementing b, set b
145 consistently and abort */
146 q->bottom = t;
147 return NULL;
148 }
149
150 // read the element at b
151 removed = q->elements[b & q->moduloSize];
152
153 if (currSize > 0) { /* no danger, still elements in buffer after b-- */
154 // debugBelch("popWSDeque: t=%ld b=%ld = %ld\n", t, b, removed);
155 return removed;
156 }
157 /* otherwise, has someone meanwhile stolen the same (last) element?
158 Check and increment top value to know */
159 if ( !(CASTOP(&(q->top),t,t+1)) ) {
160 removed = NULL; /* no success, but continue adjusting bottom */
161 }
162 q->bottom = t+1; /* anyway, empty now. Adjust bottom consistently. */
163 q->topBound = t+1; /* ...and cached top value as well */
164
165 ASSERT_WSDEQUE_INVARIANTS(q);
166 ASSERT(q->bottom >= q->top);
167
168 // debugBelch("popWSDeque: t=%ld b=%ld = %ld\n", t, b, removed);
169
170 return removed;
171 }
172
173 /* -----------------------------------------------------------------------------
174 * stealWSDeque
175 * -------------------------------------------------------------------------- */
176
177 void *
178 stealWSDeque_ (WSDeque *q)
179 {
180 void * stolen;
181 StgWord b,t;
182
183 // Can't do this on someone else's spark pool:
184 // ASSERT_WSDEQUE_INVARIANTS(q);
185
186 // NB. these loads must be ordered, otherwise there is a race
187 // between steal and pop.
188 t = q->top;
189 load_load_barrier();
190 b = q->bottom;
191
192 // NB. b and t are unsigned; we need a signed value for the test
193 // below, because it is possible that t > b during a
194 // concurrent popWSQueue() operation.
195 if ((long)b - (long)t <= 0 ) {
196 return NULL; /* already looks empty, abort */
197 }
198
199 /* now access array, see pushBottom() */
200 stolen = q->elements[t & q->moduloSize];
201
202 /* now decide whether we have won */
203 if ( !(CASTOP(&(q->top),t,t+1)) ) {
204 /* lost the race, someon else has changed top in the meantime */
205 return NULL;
206 } /* else: OK, top has been incremented by the cas call */
207
208 // debugBelch("stealWSDeque_: t=%d b=%d\n", t, b);
209
210 // Can't do this on someone else's spark pool:
211 // ASSERT_WSDEQUE_INVARIANTS(q);
212
213 return stolen;
214 }
215
216 void *
217 stealWSDeque (WSDeque *q)
218 {
219 void *stolen;
220
221 do {
222 stolen = stealWSDeque_(q);
223 } while (stolen == NULL && !looksEmptyWSDeque(q));
224
225 return stolen;
226 }
227
228 /* -----------------------------------------------------------------------------
229 * pushWSQueue
230 * -------------------------------------------------------------------------- */
231
232 #define DISCARD_NEW
233
234 /* enqueue an element. Should always succeed by resizing the array
235 (not implemented yet, silently fails in that case). */
236 rtsBool
237 pushWSDeque (WSDeque* q, void * elem)
238 {
239 StgWord t;
240 StgWord b;
241 StgWord sz = q->moduloSize;
242
243 ASSERT_WSDEQUE_INVARIANTS(q);
244
245 /* we try to avoid reading q->top (accessed by all) and use
246 q->topBound (accessed only by writer) instead.
247 This is why we do not just call empty(q) here.
248 */
249 b = q->bottom;
250 t = q->topBound;
251 if ( (StgInt)b - (StgInt)t >= (StgInt)sz ) {
252 /* NB. 1. sz == q->size - 1, thus ">="
253 2. signed comparison, it is possible that t > b
254 */
255 /* could be full, check the real top value in this case */
256 t = q->top;
257 q->topBound = t;
258 if (b - t >= sz) { /* really no space left :-( */
259 /* reallocate the array, copying the values. Concurrent steal()s
260 will in the meantime use the old one and modify only top.
261 This means: we cannot safely free the old space! Can keep it
262 on a free list internally here...
263
264 Potential bug in combination with steal(): if array is
265 replaced, it is unclear which one concurrent steal operations
266 use. Must read the array base address in advance in steal().
267 */
268 #if defined(DISCARD_NEW)
269 ASSERT_WSDEQUE_INVARIANTS(q);
270 return rtsFalse; // we didn't push anything
271 #else
272 /* could make room by incrementing the top position here. In
273 * this case, should use CASTOP. If this fails, someone else has
274 * removed something, and new room will be available.
275 */
276 ASSERT_WSDEQUE_INVARIANTS(q);
277 #endif
278 }
279 }
280
281 q->elements[b & sz] = elem;
282 q->bottom = b + 1;
283
284 ASSERT_WSDEQUE_INVARIANTS(q);
285 return rtsTrue;
286 }