Replace inline C functions with C-- macros in .cmm code
[ghc.git] / rts / Updates.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2004
4 *
5 * Performing updates.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifndef UPDATES_H
10 #define UPDATES_H
11
12 /* -----------------------------------------------------------------------------
13 Updates
14
15 We have two layers of update macros. The top layer, UPD_IND() and
16 friends perform all the work of an update. In detail:
17
18 - if the closure being updated is a blocking queue, then all the
19 threads waiting on the blocking queue are updated.
20
21 - then the lower level updateWithIndirection() macro is invoked
22 to actually replace the closure with an indirection (see below).
23
24 -------------------------------------------------------------------------- */
25
26 #ifdef TICKY_TICKY
27 # define UPD_IND(updclosure, heapptr) \
28 UPD_PERM_IND(updclosure,heapptr)
29 # define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \
30 UPD_PERM_IND(updclosure,heapptr); and_then
31 #else
32 # define SEMI ;
33 # define UPD_IND(updclosure, heapptr) \
34 UPD_REAL_IND(updclosure,INFO_PTR(stg_IND_info),heapptr,SEMI)
35 # define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \
36 UPD_REAL_IND(updclosure,ind_info,heapptr,and_then)
37 #endif
38
39 /* These macros have to work in both C and C--, so here's the
40 * impedence matching:
41 */
42 #ifdef CMINUSMINUS
43 #define BLOCK_BEGIN
44 #define BLOCK_END
45 #define DECLARE_IPTR(info) W_ info
46 #define FCALL foreign "C"
47 #define INFO_PTR(info) info
48 #define ARG_PTR "ptr"
49 #else
50 #define BLOCK_BEGIN {
51 #define BLOCK_END }
52 #define DECLARE_IPTR(info) const StgInfoTable *(info)
53 #define FCALL /* nothing */
54 #define INFO_PTR(info) &info
55 #define StgBlockingQueue_blocking_queue(closure) \
56 (((StgBlockingQueue *)closure)->blocking_queue)
57 #define ARG_PTR /* nothing */
58 #endif
59
60 /* UPD_IND actually does a PERM_IND if TICKY_TICKY is on;
61 if you *really* need an IND use UPD_REAL_IND
62 */
63 #define UPD_REAL_IND(updclosure, ind_info, heapptr, and_then) \
64 BLOCK_BEGIN \
65 DECLARE_IPTR(info); \
66 info = GET_INFO(updclosure); \
67 updateWithIndirection(ind_info, \
68 updclosure, \
69 heapptr, \
70 and_then); \
71 BLOCK_END
72
73 #if defined(PROFILING) || defined(TICKY_TICKY)
74 #define UPD_PERM_IND(updclosure, heapptr) \
75 BLOCK_BEGIN \
76 updateWithPermIndirection(updclosure, \
77 heapptr); \
78 BLOCK_END
79 #endif
80
81 #if defined(RTS_SUPPORTS_THREADS)
82
83 # ifdef TICKY_TICKY
84 # define UPD_IND_NOLOCK(updclosure, heapptr) \
85 BLOCK_BEGIN \
86 updateWithPermIndirection(updclosure, \
87 heapptr); \
88 BLOCK_END
89 # else
90 # define UPD_IND_NOLOCK(updclosure, heapptr) \
91 BLOCK_BEGIN \
92 updateWithIndirection(INFO_PTR(stg_IND_info), \
93 updclosure, \
94 heapptr,); \
95 BLOCK_END
96 # endif
97
98 #else
99 #define UPD_IND_NOLOCK(updclosure,heapptr) UPD_IND(updclosure,heapptr)
100 #endif
101
102 /* -----------------------------------------------------------------------------
103 Awaken any threads waiting on a blocking queue (BLACKHOLE_BQ).
104 -------------------------------------------------------------------------- */
105
106 #if defined(PAR)
107
108 /*
109 In a parallel setup several types of closures might have a blocking queue:
110 BLACKHOLE_BQ ... same as in the default concurrent setup; it will be
111 reawakened via calling UPD_IND on that closure after
112 having finished the computation of the graph
113 FETCH_ME_BQ ... a global indirection (FETCH_ME) may be entered by a
114 local TSO, turning it into a FETCH_ME_BQ; it will be
115 reawakened via calling processResume
116 RBH ... a revertible black hole may be entered by another
117 local TSO, putting it onto its blocking queue; since
118 RBHs only exist while the corresponding closure is in
119 transit, they will be reawakened via calling
120 convertToFetchMe (upon processing an ACK message)
121
122 In a parallel setup a blocking queue may contain 3 types of closures:
123 TSO ... as in the default concurrent setup
124 BLOCKED_FETCH ... indicating that a TSO on another PE is waiting for
125 the result of the current computation
126 CONSTR ... an RBHSave closure (which contains data ripped out of
127 the closure to make room for a blocking queue; since
128 it only contains data we use the exisiting type of
129 a CONSTR closure); this closure is the end of a
130 blocking queue for an RBH closure; it only exists in
131 this kind of blocking queue and must be at the end
132 of the queue
133 */
134 extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
135 #define DO_AWAKEN_BQ(bqe, node) STGCALL2(awakenBlockedQueue, bqe, node);
136
137 #define AWAKEN_BQ(info,closure) \
138 if (info == &stg_BLACKHOLE_BQ_info || \
139 info == &stg_FETCH_ME_BQ_info || \
140 get_itbl(closure)->type == RBH) { \
141 DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure); \
142 }
143
144 #elif defined(GRAN)
145
146 extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
147 #define DO_AWAKEN_BQ(bq, node) STGCALL2(awakenBlockedQueue, bq, node);
148
149 /* In GranSim we don't have FETCH_ME or FETCH_ME_BQ closures, so they are
150 not checked. The rest of the code is the same as for GUM.
151 */
152 #define AWAKEN_BQ(info,closure) \
153 if (info == &stg_BLACKHOLE_BQ_info || \
154 get_itbl(closure)->type == RBH) { \
155 DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure); \
156 }
157
158 #endif /* GRAN || PAR */
159
160
161 /* -----------------------------------------------------------------------------
162 Updates: lower-level macros which update a closure with an
163 indirection to another closure.
164
165 There are several variants of this code.
166
167 PROFILING:
168 -------------------------------------------------------------------------- */
169
170 /* LDV profiling:
171 * We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in
172 * which p1 resides.
173 *
174 * Note:
175 * After all, we do *NOT* need to call LDV_RECORD_CREATE() for both IND and
176 * IND_OLDGEN closures because they are inherently used. But, it corrupts
177 * the invariants that every closure keeps its creation time in the profiling
178 * field. So, we call LDV_RECORD_CREATE().
179 */
180
181 /* In the DEBUG case, we also zero out the slop of the old closure,
182 * so that the sanity checker can tell where the next closure is.
183 *
184 * Two important invariants: we should never try to update a closure
185 * to point to itself, and the closure being updated should not
186 * already have been updated (the mutable list will get messed up
187 * otherwise).
188 *
189 * NB. We do *not* do this in THREADED_RTS mode, because when we have the
190 * possibility of multiple threads entering the same closure, zeroing
191 * the slop in one of the threads would have a disastrous effect on
192 * the other (seen in the wild!).
193 */
194 #ifdef CMINUSMINUS
195
196 #define FILL_SLOP(p) \
197 W_ inf; \
198 W_ sz; \
199 W_ i; \
200 inf = %GET_STD_INFO(p); \
201 if (%INFO_TYPE(inf) != HALF_W_(BLACKHOLE) \
202 && %INFO_TYPE(inf) != HALF_W_(CAF_BLACKHOLE)) { \
203 if (%INFO_TYPE(inf) == HALF_W_(THUNK_SELECTOR)) { \
204 sz = BYTES_TO_WDS(SIZEOF_StgSelector_NoThunkHdr); \
205 } else { \
206 if (%INFO_TYPE(inf) == HALF_W_(AP_STACK)) { \
207 sz = StgAP_STACK_size(p) + BYTES_TO_WDS(SIZEOF_StgAP_STACK_NoThunkHdr); \
208 } else { \
209 if (%INFO_TYPE(inf) == HALF_W_(AP)) { \
210 sz = TO_W_(StgAP_n_args(p)) + BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \
211 } else { \
212 sz = TO_W_(%INFO_PTRS(inf)) + TO_W_(%INFO_NPTRS(inf)); \
213 } \
214 } \
215 } \
216 i = 0; \
217 for: \
218 if (i < sz) { \
219 StgThunk_payload(p,i) = 0; \
220 i = i + 1; \
221 goto for; \
222 } \
223 }
224
225 #else /* !CMINUSMINUS */
226
227 INLINE_HEADER void
228 FILL_SLOP(StgClosure *p)
229 {
230 StgInfoTable *inf = get_itbl(p);
231 nat i, sz;
232
233 switch (inf->type) {
234 case BLACKHOLE:
235 case CAF_BLACKHOLE:
236 case THUNK_SELECTOR:
237 sz = sizeofW(StgSelector) - sizeofW(StgThunkHeader);
238 break;
239 case AP:
240 sz = ((StgAP *)p)->n_args + sizeofW(StgAP) - sizeofW(StgThunkHeader);
241 break;
242 case AP_STACK:
243 sz = ((StgAP_STACK *)p)->size + sizeofW(StgAP_STACK) - sizeofW(StgThunkHeader);
244 break;
245 default:
246 sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
247 break;
248 }
249 for (i = 0; i < sz; i++) {
250 ((StgThunk *)p)->payload[i] = 0;
251 }
252 }
253
254 #endif /* CMINUSMINUS */
255
256 #if !defined(DEBUG) || defined(THREADED_RTS)
257 #define DEBUG_FILL_SLOP(p) /* do nothing */
258 #else
259 #define DEBUG_FILL_SLOP(p) FILL_SLOP(p)
260 #endif
261
262 /* We have two versions of this macro (sadly), one for use in C-- code,
263 * and the other for C.
264 *
265 * The and_then argument is a performance hack so that we can paste in
266 * the continuation code directly. It helps shave a couple of
267 * instructions off the common case in the update code, which is
268 * worthwhile (the update code is often part of the inner loop).
269 * (except that gcc now appears to common up this code again and
270 * invert the optimisation. Grrrr --SDM).
271 */
272 #ifdef CMINUSMINUS
273 #define generation(n) (W_[generations] + n*SIZEOF_generation)
274 #define updateWithIndirection(ind_info, p1, p2, and_then) \
275 W_ bd; \
276 \
277 DEBUG_FILL_SLOP(p1); \
278 LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); \
279 StgInd_indirectee(p1) = p2; \
280 prim %write_barrier() []; \
281 bd = Bdescr(p1); \
282 if (bdescr_gen_no(bd) != 0 :: CInt) { \
283 recordMutableCap(p1, TO_W_(bdescr_gen_no(bd)), R1); \
284 SET_INFO(p1, stg_IND_OLDGEN_info); \
285 LDV_RECORD_CREATE(p1); \
286 TICK_UPD_OLD_IND(); \
287 and_then; \
288 } else { \
289 SET_INFO(p1, ind_info); \
290 LDV_RECORD_CREATE(p1); \
291 TICK_UPD_NEW_IND(); \
292 and_then; \
293 }
294 #else
295 #define updateWithIndirection(ind_info, p1, p2, and_then) \
296 { \
297 bdescr *bd; \
298 \
299 /* cas(p1, 0, &stg_WHITEHOLE_info); */ \
300 ASSERT( (P_)p1 != (P_)p2 && !closure_IND(p1) ); \
301 DEBUG_FILL_SLOP(p1); \
302 LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); \
303 ((StgInd *)p1)->indirectee = p2; \
304 write_barrier(); \
305 bd = Bdescr((P_)p1); \
306 if (bd->gen_no != 0) { \
307 recordMutableGenLock(p1, &generations[bd->gen_no]); \
308 SET_INFO(p1, &stg_IND_OLDGEN_info); \
309 TICK_UPD_OLD_IND(); \
310 and_then; \
311 } else { \
312 SET_INFO(p1, ind_info); \
313 LDV_RECORD_CREATE(p1); \
314 TICK_UPD_NEW_IND(); \
315 and_then; \
316 } \
317 }
318 #endif
319
320 /* The permanent indirection version isn't performance critical. We
321 * therefore use an inline C function instead of the C-- macro.
322 */
323 #ifndef CMINUSMINUS
324 INLINE_HEADER void
325 updateWithPermIndirection(StgClosure *p1,
326 StgClosure *p2)
327 {
328 bdescr *bd;
329
330 ASSERT( p1 != p2 && !closure_IND(p1) );
331
332 /*
333 * @LDV profiling
334 * Destroy the old closure.
335 * Nb: LDV_* stuff cannot mix with ticky-ticky
336 */
337 LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);
338
339 bd = Bdescr((P_)p1);
340 if (bd->gen_no != 0) {
341 recordMutableGenLock(p1, &generations[bd->gen_no]);
342 ((StgInd *)p1)->indirectee = p2;
343 SET_INFO(p1, &stg_IND_OLDGEN_PERM_info);
344 /*
345 * @LDV profiling
346 * We have just created a new closure.
347 */
348 LDV_RECORD_CREATE(p1);
349 TICK_UPD_OLD_PERM_IND();
350 } else {
351 ((StgInd *)p1)->indirectee = p2;
352 SET_INFO(p1, &stg_IND_PERM_info);
353 /*
354 * @LDV profiling
355 * We have just created a new closure.
356 */
357 LDV_RECORD_CREATE(p1);
358 TICK_UPD_NEW_PERM_IND(p1);
359 }
360 }
361 #endif
362
363 #endif /* UPDATES_H */