e0a68f68d8af0af324e3480d90835f766fe282eb
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgCmmLayout
21 import StgCmmForeign
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmTicky
26 import StgCmmHeap
27 import StgCmmProf ( costCentreFrom, curCCS )
28
29 import DynFlags
30 import Platform
31 import BasicTypes
32 import BlockId
33 import MkGraph
34 import StgSyn
35 import Cmm
36 import CmmInfo
37 import Type ( Type, tyConAppTyCon )
38 import TyCon
39 import CLabel
40 import CmmUtils
41 import PrimOp
42 import SMRep
43 import FastString
44 import Outputable
45 import Util
46
47 import Prelude hiding ((<*>))
48
49 import Data.Bits ((.&.), bit)
50 import Control.Monad (liftM, when)
51
52 ------------------------------------------------------------------------
53 -- Primitive operations and foreign calls
54 ------------------------------------------------------------------------
55
56 {- Note [Foreign call results]
57 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 A foreign call always returns an unboxed tuple of results, one
59 of which is the state token. This seems to happen even for pure
60 calls.
61
62 Even if we returned a single result for pure calls, it'd still be
63 right to wrap it in a singleton unboxed tuple, because the result
64 might be a Haskell closure pointer, we don't want to evaluate it. -}
65
66 ----------------------------------
67 cgOpApp :: StgOp -- The op
68 -> [StgArg] -- Arguments
69 -> Type -- Result type (always an unboxed tuple)
70 -> FCode ReturnKind
71
72 -- Foreign calls
73 cgOpApp (StgFCallOp fcall _) stg_args res_ty
74 = cgForeignCall fcall stg_args res_ty
75 -- Note [Foreign call results]
76
77 -- tagToEnum# is special: we need to pull the constructor
78 -- out of the table, and perform an appropriate return.
79
80 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
81 = ASSERT(isEnumerationTyCon tycon)
82 do { dflags <- getDynFlags
83 ; args' <- getNonVoidArgAmodes [arg]
84 ; let amode = case args' of [amode] -> amode
85 _ -> panic "TagToEnumOp had void arg"
86 ; emitReturn [tagToClosure dflags tycon amode] }
87 where
88 -- If you're reading this code in the attempt to figure
89 -- out why the compiler panic'ed here, it is probably because
90 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
91 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
92 -- That won't work.
93 tycon = tyConAppTyCon res_ty
94
95 cgOpApp (StgPrimOp primop) args res_ty = do
96 dflags <- getDynFlags
97 cmm_args <- getNonVoidArgAmodes args
98 case shouldInlinePrimOp dflags primop cmm_args of
99 Nothing -> do -- out-of-line
100 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
101 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
102
103 Just f -- inline
104 | ReturnsPrim VoidRep <- result_info
105 -> do f []
106 emitReturn []
107
108 | ReturnsPrim rep <- result_info
109 -> do dflags <- getDynFlags
110 res <- newTemp (primRepCmmType dflags rep)
111 f [res]
112 emitReturn [CmmReg (CmmLocal res)]
113
114 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
115 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
116 f regs
117 emitReturn (map (CmmReg . CmmLocal) regs)
118
119 | otherwise -> panic "cgPrimop"
120 where
121 result_info = getPrimOpResultInfo primop
122
123 cgOpApp (StgPrimCallOp primcall) args _res_ty
124 = do { cmm_args <- getNonVoidArgAmodes args
125 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
126 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
127
128 -- | Interpret the argument as an unsigned value, assuming the value
129 -- is given in two-complement form in the given width.
130 --
131 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
132 --
133 -- This function is used to work around the fact that many array
134 -- primops take Int# arguments, but we interpret them as unsigned
135 -- quantities in the code gen. This means that we have to be careful
136 -- every time we work on e.g. a CmmInt literal that corresponds to the
137 -- array size, as it might contain a negative Integer value if the
138 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
139 -- literal.
140 asUnsigned :: Width -> Integer -> Integer
141 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
142
143 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
144 -- ByteOff (or some other fixed width signed type) to represent
145 -- array sizes or indices. This means that these will overflow for
146 -- large enough sizes.
147
148 -- | Decide whether an out-of-line primop should be replaced by an
149 -- inline implementation. This might happen e.g. if there's enough
150 -- static information, such as statically know arguments, to emit a
151 -- more efficient implementation inline.
152 --
153 -- Returns 'Nothing' if this primop should use its out-of-line
154 -- implementation (defined elsewhere) and 'Just' together with a code
155 -- generating function that takes the output regs as arguments
156 -- otherwise.
157 shouldInlinePrimOp :: DynFlags
158 -> PrimOp -- ^ The primop
159 -> [CmmExpr] -- ^ The primop arguments
160 -> Maybe ([LocalReg] -> FCode ())
161
162 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
163 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
164 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
165
166 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
167 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
168 Just $ \ [res] ->
169 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
170 [ (mkIntExpr dflags (fromInteger n),
171 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
172 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
173 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
174 ]
175 (fromInteger n) init
176
177 shouldInlinePrimOp _ CopyArrayOp
178 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
179 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
180
181 shouldInlinePrimOp _ CopyMutableArrayOp
182 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
183 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
184
185 shouldInlinePrimOp _ CopyArrayArrayOp
186 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
187 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
188
189 shouldInlinePrimOp _ CopyMutableArrayArrayOp
190 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
191 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
192
193 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
194 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
195 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
196
197 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
198 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
199 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
200
201 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
202 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
203 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
204
205 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
206 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
207 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
208
209 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
210 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
211 Just $ \ [res] ->
212 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
213 [ (mkIntExpr dflags (fromInteger n),
214 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
215 ]
216 (fromInteger n) init
217
218 shouldInlinePrimOp _ CopySmallArrayOp
219 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
220 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
221
222 shouldInlinePrimOp _ CopySmallMutableArrayOp
223 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
224 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
225
226 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
227 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
228 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
229
230 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
231 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
232 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
233
234 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
235 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
236 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
237
238 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
239 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
240 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
241
242 shouldInlinePrimOp dflags primop args
243 | primOpOutOfLine primop = Nothing
244 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
245
246 -- TODO: Several primops, such as 'copyArray#', only have an inline
247 -- implementation (below) but could possibly have both an inline
248 -- implementation and an out-of-line implementation, just like
249 -- 'newArray#'. This would lower the amount of code generated,
250 -- hopefully without a performance impact (needs to be measured).
251
252 ---------------------------------------------------
253 cgPrimOp :: [LocalReg] -- where to put the results
254 -> PrimOp -- the op
255 -> [StgArg] -- arguments
256 -> FCode ()
257
258 cgPrimOp results op args
259 = do dflags <- getDynFlags
260 arg_exprs <- getNonVoidArgAmodes args
261 emitPrimOp dflags results op arg_exprs
262
263
264 ------------------------------------------------------------------------
265 -- Emitting code for a primop
266 ------------------------------------------------------------------------
267
268 emitPrimOp :: DynFlags
269 -> [LocalReg] -- where to put the results
270 -> PrimOp -- the op
271 -> [CmmExpr] -- arguments
272 -> FCode ()
273
274 -- First we handle various awkward cases specially. The remaining
275 -- easy cases are then handled by translateOp, defined below.
276
277 emitPrimOp _ [res] ParOp [arg]
278 =
279 -- for now, just implement this in a C function
280 -- later, we might want to inline it.
281 emitCCall
282 [(res,NoHint)]
283 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
284 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
285
286 emitPrimOp dflags [res] SparkOp [arg]
287 = do
288 -- returns the value of arg in res. We're going to therefore
289 -- refer to arg twice (once to pass to newSpark(), and once to
290 -- assign to res), so put it in a temporary.
291 tmp <- assignTemp arg
292 tmp2 <- newTemp (bWord dflags)
293 emitCCall
294 [(tmp2,NoHint)]
295 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
296 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
297 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
298
299 emitPrimOp dflags [res] GetCCSOfOp [arg]
300 = emitAssign (CmmLocal res) val
301 where
302 val
303 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
304 | otherwise = CmmLit (zeroCLit dflags)
305
306 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
307 = emitAssign (CmmLocal res) curCCS
308
309 emitPrimOp dflags [res] ReadMutVarOp [mutv]
310 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
311
312 emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
313 = do -- Without this write barrier, other CPUs may see this pointer before
314 -- the writes for the closure it points to have occurred.
315 emitPrimCall res MO_WriteBarrier []
316 emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
317 emitCCall
318 [{-no results-}]
319 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
320 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
321
322 -- #define sizzeofByteArrayzh(r,a) \
323 -- r = ((StgArrBytes *)(a))->bytes
324 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
325 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
326
327 -- #define sizzeofMutableByteArrayzh(r,a) \
328 -- r = ((StgArrBytes *)(a))->bytes
329 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
330 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
331
332 -- #define getSizzeofMutableByteArrayzh(r,a) \
333 -- r = ((StgArrBytes *)(a))->bytes
334 emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
335 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
336
337
338 -- #define touchzh(o) /* nothing */
339 emitPrimOp _ res@[] TouchOp args@[_arg]
340 = do emitPrimCall res MO_Touch args
341
342 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
343 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
344 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
345
346 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
347 emitPrimOp dflags [res] StableNameToIntOp [arg]
348 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
349
350 -- #define eqStableNamezh(r,sn1,sn2) \
351 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
352 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
353 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
354 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
355 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
356 ])
357
358 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
359 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
360
361 -- #define addrToHValuezh(r,a) r=(P_)a
362 emitPrimOp _ [res] AddrToAnyOp [arg]
363 = emitAssign (CmmLocal res) arg
364
365 -- #define hvalueToAddrzh(r, a) r=(W_)a
366 emitPrimOp _ [res] AnyToAddrOp [arg]
367 = emitAssign (CmmLocal res) arg
368
369 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
370 -- Note: argument may be tagged!
371 emitPrimOp dflags [res] DataToTagOp [arg]
372 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
373
374 {- Freezing arrays-of-ptrs requires changing an info table, for the
375 benefit of the generational collector. It needs to scavenge mutable
376 objects, even if they are in old space. When they become immutable,
377 they can be removed from this scavenge list. -}
378
379 -- #define unsafeFreezzeArrayzh(r,a)
380 -- {
381 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
382 -- r = a;
383 -- }
384 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
385 = emit $ catAGraphs
386 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
387 mkAssign (CmmLocal res) arg ]
388 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
389 = emit $ catAGraphs
390 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
391 mkAssign (CmmLocal res) arg ]
392 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
393 = emit $ catAGraphs
394 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
395 mkAssign (CmmLocal res) arg ]
396
397 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
398 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
399 = emitAssign (CmmLocal res) arg
400
401 -- Reading/writing pointer arrays
402
403 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
404 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
405 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
406
407 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
408 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
409 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
410 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
411 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
412 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
413 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
414 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
415 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
416 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
417
418 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
419 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
420 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
421
422 -- Getting the size of pointer arrays
423
424 emitPrimOp dflags [res] SizeofArrayOp [arg]
425 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
426 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
427 (bWord dflags))
428 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
429 = emitPrimOp dflags [res] SizeofArrayOp [arg]
430 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
431 = emitPrimOp dflags [res] SizeofArrayOp [arg]
432 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
433 = emitPrimOp dflags [res] SizeofArrayOp [arg]
434
435 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
436 emit $ mkAssign (CmmLocal res)
437 (cmmLoadIndexW dflags arg
438 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
439 (bWord dflags))
440 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
441 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
442
443 -- IndexXXXoffAddr
444
445 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
446 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
447 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
448 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
449 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
450 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
451 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
452 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
453 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
454 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
455 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
456 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
457 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
458 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
459 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
460 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
461
462 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
463
464 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
465 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
466 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
467 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
468 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
469 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
470 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
471 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
472 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
473 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
474 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
475 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
476 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
477 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
478 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
479 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
480
481 -- IndexXXXArray
482
483 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
484 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
485 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
486 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
487 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
488 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
489 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
490 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
491 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
492 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
493 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
494 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
495 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
496 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
497 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
498 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
499
500 -- ReadXXXArray, identical to IndexXXXArray.
501
502 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
503 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
504 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
505 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
506 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
507 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
508 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
509 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
510 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
511 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
512 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
513 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
514 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
515 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
516 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
517 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
518
519 -- WriteXXXoffAddr
520
521 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
522 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
523 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
524 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
525 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
526 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
527 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
528 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
529 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
530 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
531 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
532 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
533 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
534 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
535 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
536 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
537
538 -- WriteXXXArray
539
540 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
541 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
542 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
543 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
544 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
545 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
546 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
547 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
548 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
549 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
550 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
551 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
552 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
553 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
554 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
555 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
556
557 -- Copying and setting byte arrays
558 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
559 doCopyByteArrayOp src src_off dst dst_off n
560 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
561 doCopyMutableByteArrayOp src src_off dst dst_off n
562 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
563 doCopyByteArrayToAddrOp src src_off dst n
564 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
565 doCopyMutableByteArrayToAddrOp src src_off dst n
566 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
567 doCopyAddrToByteArrayOp src dst dst_off n
568 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
569 doSetByteArrayOp ba off len c
570
571 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
572 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
573 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
574 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
575
576 -- Population count
577 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
578 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
579 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
580 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
581 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
582
583 -- count leading zeros
584 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
585 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
586 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
587 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
588 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
589
590 -- count trailing zeros
591 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
592 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
593 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
594 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
595 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
596
597 -- Unsigned int to floating point conversions
598 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
599 (MO_UF_Conv W32) [w]
600 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
601 (MO_UF_Conv W64) [w]
602
603 -- SIMD primops
604 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
605 checkVecCompatibility dflags vcat n w
606 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
607 where
608 zeros :: CmmExpr
609 zeros = CmmLit $ CmmVec (replicate n zero)
610
611 zero :: CmmLit
612 zero = case vcat of
613 IntVec -> CmmInt 0 w
614 WordVec -> CmmInt 0 w
615 FloatVec -> CmmFloat 0 w
616
617 ty :: CmmType
618 ty = vecVmmType vcat n w
619
620 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
621 checkVecCompatibility dflags vcat n w
622 when (length es /= n) $
623 panic "emitPrimOp: VecPackOp has wrong number of arguments"
624 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
625 where
626 zeros :: CmmExpr
627 zeros = CmmLit $ CmmVec (replicate n zero)
628
629 zero :: CmmLit
630 zero = case vcat of
631 IntVec -> CmmInt 0 w
632 WordVec -> CmmInt 0 w
633 FloatVec -> CmmFloat 0 w
634
635 ty :: CmmType
636 ty = vecVmmType vcat n w
637
638 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
639 checkVecCompatibility dflags vcat n w
640 when (length res /= n) $
641 panic "emitPrimOp: VecUnpackOp has wrong number of results"
642 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
643 where
644 ty :: CmmType
645 ty = vecVmmType vcat n w
646
647 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
648 checkVecCompatibility dflags vcat n w
649 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
650 where
651 ty :: CmmType
652 ty = vecVmmType vcat n w
653
654 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
655 checkVecCompatibility dflags vcat n w
656 doIndexByteArrayOp Nothing ty res args
657 where
658 ty :: CmmType
659 ty = vecVmmType vcat n w
660
661 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
662 checkVecCompatibility dflags vcat n w
663 doIndexByteArrayOp Nothing ty res args
664 where
665 ty :: CmmType
666 ty = vecVmmType vcat n w
667
668 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
669 checkVecCompatibility dflags vcat n w
670 doWriteByteArrayOp Nothing ty res args
671 where
672 ty :: CmmType
673 ty = vecVmmType vcat n w
674
675 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
676 checkVecCompatibility dflags vcat n w
677 doIndexOffAddrOp Nothing ty res args
678 where
679 ty :: CmmType
680 ty = vecVmmType vcat n w
681
682 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
683 checkVecCompatibility dflags vcat n w
684 doIndexOffAddrOp Nothing ty res args
685 where
686 ty :: CmmType
687 ty = vecVmmType vcat n w
688
689 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
690 checkVecCompatibility dflags vcat n w
691 doWriteOffAddrOp Nothing ty res args
692 where
693 ty :: CmmType
694 ty = vecVmmType vcat n w
695
696 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
697 checkVecCompatibility dflags vcat n w
698 doIndexByteArrayOpAs Nothing vecty ty res args
699 where
700 vecty :: CmmType
701 vecty = vecVmmType vcat n w
702
703 ty :: CmmType
704 ty = vecCmmCat vcat w
705
706 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
707 checkVecCompatibility dflags vcat n w
708 doIndexByteArrayOpAs Nothing vecty ty res args
709 where
710 vecty :: CmmType
711 vecty = vecVmmType vcat n w
712
713 ty :: CmmType
714 ty = vecCmmCat vcat w
715
716 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
717 checkVecCompatibility dflags vcat n w
718 doWriteByteArrayOp Nothing ty res args
719 where
720 ty :: CmmType
721 ty = vecCmmCat vcat w
722
723 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
724 checkVecCompatibility dflags vcat n w
725 doIndexOffAddrOpAs Nothing vecty ty res args
726 where
727 vecty :: CmmType
728 vecty = vecVmmType vcat n w
729
730 ty :: CmmType
731 ty = vecCmmCat vcat w
732
733 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
734 checkVecCompatibility dflags vcat n w
735 doIndexOffAddrOpAs Nothing vecty ty res args
736 where
737 vecty :: CmmType
738 vecty = vecVmmType vcat n w
739
740 ty :: CmmType
741 ty = vecCmmCat vcat w
742
743 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
744 checkVecCompatibility dflags vcat n w
745 doWriteOffAddrOp Nothing ty res args
746 where
747 ty :: CmmType
748 ty = vecCmmCat vcat w
749
750 -- Prefetch
751 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
752 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
753 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
754 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
755
756 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
757 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
758 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
759 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
760
761 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
762 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
763 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
764 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
765
766 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
767 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
768 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
769 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
770
771 -- Atomic read-modify-write
772 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
773 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
774 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
775 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
776 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
777 doAtomicRMW res AMO_And mba ix (bWord dflags) n
778 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
779 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
780 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
781 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
782 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
783 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
784 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
785 doAtomicReadByteArray res mba ix (bWord dflags)
786 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
787 doAtomicWriteByteArray mba ix (bWord dflags) val
788 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
789 doCasByteArray res mba ix (bWord dflags) old new
790
791 -- The rest just translate straightforwardly
792 emitPrimOp dflags [res] op [arg]
793 | nopOp op
794 = emitAssign (CmmLocal res) arg
795
796 | Just (mop,rep) <- narrowOp op
797 = emitAssign (CmmLocal res) $
798 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
799
800 emitPrimOp dflags r@[res] op args
801 | Just prim <- callishOp op
802 = do emitPrimCall r prim args
803
804 | Just mop <- translateOp dflags op
805 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
806 emit stmt
807
808 emitPrimOp dflags results op args
809 = case callishPrimOpSupported dflags op of
810 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
811 Right gen -> gen results args
812
813 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
814
815 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
816 callishPrimOpSupported dflags op
817 = case op of
818 IntQuotRemOp | ncg && (x86ish
819 || ppc) -> Left (MO_S_QuotRem (wordWidth dflags))
820 | otherwise -> Right (genericIntQuotRemOp dflags)
821
822 WordQuotRemOp | ncg && (x86ish
823 || ppc) -> Left (MO_U_QuotRem (wordWidth dflags))
824 | otherwise -> Right (genericWordQuotRemOp dflags)
825
826 WordQuotRem2Op | (ncg && (x86ish
827 || ppc))
828 || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
829 | otherwise -> Right (genericWordQuotRem2Op dflags)
830
831 WordAdd2Op | (ncg && (x86ish
832 || ppc))
833 || llvm -> Left (MO_Add2 (wordWidth dflags))
834 | otherwise -> Right genericWordAdd2Op
835
836 WordSubCOp | (ncg && (x86ish
837 || ppc))
838 || llvm -> Left (MO_SubWordC (wordWidth dflags))
839 | otherwise -> Right genericWordSubCOp
840
841 IntAddCOp | (ncg && (x86ish
842 || ppc))
843 || llvm -> Left (MO_AddIntC (wordWidth dflags))
844 | otherwise -> Right genericIntAddCOp
845
846 IntSubCOp | (ncg && (x86ish
847 || ppc))
848 || llvm -> Left (MO_SubIntC (wordWidth dflags))
849 | otherwise -> Right genericIntSubCOp
850
851 WordMul2Op | ncg && (x86ish
852 || ppc)
853 || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
854 | otherwise -> Right genericWordMul2Op
855 FloatFabsOp | (ncg && x86ish
856 || ppc)
857 || llvm -> Left MO_F32_Fabs
858 | otherwise -> Right $ genericFabsOp W32
859 DoubleFabsOp | (ncg && x86ish
860 || ppc)
861 || llvm -> Left MO_F64_Fabs
862 | otherwise -> Right $ genericFabsOp W64
863
864 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
865 where
866 ncg = case hscTarget dflags of
867 HscAsm -> True
868 _ -> False
869 llvm = case hscTarget dflags of
870 HscLlvm -> True
871 _ -> False
872 x86ish = case platformArch (targetPlatform dflags) of
873 ArchX86 -> True
874 ArchX86_64 -> True
875 _ -> False
876 ppc = case platformArch (targetPlatform dflags) of
877 ArchPPC -> True
878 ArchPPC_64 _ -> True
879 _ -> False
880
881 genericIntQuotRemOp :: DynFlags -> GenericOp
882 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
883 = emit $ mkAssign (CmmLocal res_q)
884 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
885 mkAssign (CmmLocal res_r)
886 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
887 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
888
889 genericWordQuotRemOp :: DynFlags -> GenericOp
890 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
891 = emit $ mkAssign (CmmLocal res_q)
892 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
893 mkAssign (CmmLocal res_r)
894 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
895 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
896
897 genericWordQuotRem2Op :: DynFlags -> GenericOp
898 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
899 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
900 where ty = cmmExprType dflags arg_x_high
901 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
902 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
903 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
904 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
905 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
906 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
907 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
908 zero = lit 0
909 one = lit 1
910 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
911 lit i = CmmLit (CmmInt i (wordWidth dflags))
912
913 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
914 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
915 mkAssign (CmmLocal res_r) high)
916 f i acc high low =
917 do roverflowedBit <- newTemp ty
918 rhigh' <- newTemp ty
919 rhigh'' <- newTemp ty
920 rlow' <- newTemp ty
921 risge <- newTemp ty
922 racc' <- newTemp ty
923 let high' = CmmReg (CmmLocal rhigh')
924 isge = CmmReg (CmmLocal risge)
925 overflowedBit = CmmReg (CmmLocal roverflowedBit)
926 let this = catAGraphs
927 [mkAssign (CmmLocal roverflowedBit)
928 (shr high negone),
929 mkAssign (CmmLocal rhigh')
930 (or (shl high one) (shr low negone)),
931 mkAssign (CmmLocal rlow')
932 (shl low one),
933 mkAssign (CmmLocal risge)
934 (or (overflowedBit `ne` zero)
935 (high' `ge` arg_y)),
936 mkAssign (CmmLocal rhigh'')
937 (high' `minus` (arg_y `times` isge)),
938 mkAssign (CmmLocal racc')
939 (or (shl acc one) isge)]
940 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
941 (CmmReg (CmmLocal rhigh''))
942 (CmmReg (CmmLocal rlow'))
943 return (this <*> rest)
944 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
945
946 genericWordAdd2Op :: GenericOp
947 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
948 = do dflags <- getDynFlags
949 r1 <- newTemp (cmmExprType dflags arg_x)
950 r2 <- newTemp (cmmExprType dflags arg_x)
951 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
952 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
953 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
954 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
955 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
956 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
957 (wordWidth dflags))
958 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
959 emit $ catAGraphs
960 [mkAssign (CmmLocal r1)
961 (add (bottomHalf arg_x) (bottomHalf arg_y)),
962 mkAssign (CmmLocal r2)
963 (add (topHalf (CmmReg (CmmLocal r1)))
964 (add (topHalf arg_x) (topHalf arg_y))),
965 mkAssign (CmmLocal res_h)
966 (topHalf (CmmReg (CmmLocal r2))),
967 mkAssign (CmmLocal res_l)
968 (or (toTopHalf (CmmReg (CmmLocal r2)))
969 (bottomHalf (CmmReg (CmmLocal r1))))]
970 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
971
972 genericWordSubCOp :: GenericOp
973 genericWordSubCOp [res_r, res_c] [aa, bb] = do
974 dflags <- getDynFlags
975 emit $ catAGraphs
976 [ -- Put the result into 'res_r'.
977 mkAssign (CmmLocal res_r) $
978 CmmMachOp (mo_wordSub dflags) [aa, bb]
979 -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise.
980 , mkAssign (CmmLocal res_c) $
981 CmmMachOp (mo_wordUGt dflags) [bb, aa]
982 ]
983 genericWordSubCOp _ _ = panic "genericWordSubCOp"
984
985 genericIntAddCOp :: GenericOp
986 genericIntAddCOp [res_r, res_c] [aa, bb]
987 {-
988 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
989 C, and without needing any comparisons. This may not be the
990 fastest way to do it - if you have better code, please send it! --SDM
991
992 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
993
994 We currently don't make use of the r value if c is != 0 (i.e.
995 overflow), we just convert to big integers and try again. This
996 could be improved by making r and c the correct values for
997 plugging into a new J#.
998
999 { r = ((I_)(a)) + ((I_)(b)); \
1000 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1001 >> (BITS_IN (I_) - 1); \
1002 }
1003 Wading through the mass of bracketry, it seems to reduce to:
1004 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
1005
1006 -}
1007 = do dflags <- getDynFlags
1008 emit $ catAGraphs [
1009 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
1010 mkAssign (CmmLocal res_c) $
1011 CmmMachOp (mo_wordUShr dflags) [
1012 CmmMachOp (mo_wordAnd dflags) [
1013 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
1014 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1015 ],
1016 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1017 ]
1018 ]
1019 genericIntAddCOp _ _ = panic "genericIntAddCOp"
1020
1021 genericIntSubCOp :: GenericOp
1022 genericIntSubCOp [res_r, res_c] [aa, bb]
1023 {- Similarly:
1024 #define subIntCzh(r,c,a,b) \
1025 { r = ((I_)(a)) - ((I_)(b)); \
1026 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1027 >> (BITS_IN (I_) - 1); \
1028 }
1029
1030 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
1031 -}
1032 = do dflags <- getDynFlags
1033 emit $ catAGraphs [
1034 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1035 mkAssign (CmmLocal res_c) $
1036 CmmMachOp (mo_wordUShr dflags) [
1037 CmmMachOp (mo_wordAnd dflags) [
1038 CmmMachOp (mo_wordXor dflags) [aa,bb],
1039 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1040 ],
1041 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1042 ]
1043 ]
1044 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1045
1046 genericWordMul2Op :: GenericOp
1047 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
1048 = do dflags <- getDynFlags
1049 let t = cmmExprType dflags arg_x
1050 xlyl <- liftM CmmLocal $ newTemp t
1051 xlyh <- liftM CmmLocal $ newTemp t
1052 xhyl <- liftM CmmLocal $ newTemp t
1053 r <- liftM CmmLocal $ newTemp t
1054 -- This generic implementation is very simple and slow. We might
1055 -- well be able to do better, but for now this at least works.
1056 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1057 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1058 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1059 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1060 sum = foldl1 add
1061 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1062 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1063 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1064 (wordWidth dflags))
1065 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1066 emit $ catAGraphs
1067 [mkAssign xlyl
1068 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1069 mkAssign xlyh
1070 (mul (bottomHalf arg_x) (topHalf arg_y)),
1071 mkAssign xhyl
1072 (mul (topHalf arg_x) (bottomHalf arg_y)),
1073 mkAssign r
1074 (sum [topHalf (CmmReg xlyl),
1075 bottomHalf (CmmReg xhyl),
1076 bottomHalf (CmmReg xlyh)]),
1077 mkAssign (CmmLocal res_l)
1078 (or (bottomHalf (CmmReg xlyl))
1079 (toTopHalf (CmmReg r))),
1080 mkAssign (CmmLocal res_h)
1081 (sum [mul (topHalf arg_x) (topHalf arg_y),
1082 topHalf (CmmReg xhyl),
1083 topHalf (CmmReg xlyh),
1084 topHalf (CmmReg r)])]
1085 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1086
1087 -- This replicates what we had in libraries/base/GHC/Float.hs:
1088 --
1089 -- abs x | x == 0 = 0 -- handles (-0.0)
1090 -- | x > 0 = x
1091 -- | otherwise = negateFloat x
1092 genericFabsOp :: Width -> GenericOp
1093 genericFabsOp w [res_r] [aa]
1094 = do dflags <- getDynFlags
1095 let zero = CmmLit (CmmFloat 0 w)
1096
1097 eq x y = CmmMachOp (MO_F_Eq w) [x, y]
1098 gt x y = CmmMachOp (MO_F_Gt w) [x, y]
1099
1100 neg x = CmmMachOp (MO_F_Neg w) [x]
1101
1102 g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
1103 g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
1104
1105 res_t <- CmmLocal <$> newTemp (cmmExprType dflags aa)
1106 let g3 = catAGraphs [mkAssign res_t aa,
1107 mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
1108
1109 g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
1110
1111 emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
1112
1113 genericFabsOp _ _ _ = panic "genericFabsOp"
1114
1115 -- These PrimOps are NOPs in Cmm
1116
1117 nopOp :: PrimOp -> Bool
1118 nopOp Int2WordOp = True
1119 nopOp Word2IntOp = True
1120 nopOp Int2AddrOp = True
1121 nopOp Addr2IntOp = True
1122 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1123 nopOp OrdOp = True
1124 nopOp _ = False
1125
1126 -- These PrimOps turn into double casts
1127
1128 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1129 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1130 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1131 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1132 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1133 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1134 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1135 narrowOp _ = Nothing
1136
1137 -- Native word signless ops
1138
1139 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1140 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1141 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1142 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1143 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1144 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1145 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1146
1147 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1148 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1149 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1150 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1151 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1152 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1153
1154 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1155 translateOp dflags OrOp = Just (mo_wordOr dflags)
1156 translateOp dflags XorOp = Just (mo_wordXor dflags)
1157 translateOp dflags NotOp = Just (mo_wordNot dflags)
1158 translateOp dflags SllOp = Just (mo_wordShl dflags)
1159 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1160
1161 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1162
1163 -- Native word signed ops
1164
1165 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1166 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1167 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1168 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1169 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1170
1171
1172 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1173 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1174 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1175 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1176
1177 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1178 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1179 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1180 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1181 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1182 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1183 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1184
1185 -- Native word unsigned ops
1186
1187 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1188 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1189 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1190 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1191
1192 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1193 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1194 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1195
1196 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1197 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1198 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1199 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1200
1201 -- Char# ops
1202
1203 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1204 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1205 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1206 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1207 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1208 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1209
1210 -- Double ops
1211
1212 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1213 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1214 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1215 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1216 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1217 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1218
1219 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1220 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1221 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1222 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1223 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1224
1225 -- Float ops
1226
1227 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1228 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1229 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1230 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1231 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1232 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1233
1234 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1235 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1236 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1237 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1238 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1239
1240 -- Vector ops
1241
1242 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1243 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1244 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1245 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1246 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1247
1248 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1249 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1250 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1251 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1252 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1253 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1254
1255 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1256 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1257 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1258 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1259 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1260
1261 -- Conversions
1262
1263 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1264 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1265
1266 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1267 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1268
1269 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1270 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1271
1272 -- Word comparisons masquerading as more exotic things.
1273
1274 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1275 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1276 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1277 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1278 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1279 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1280 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1281 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1282
1283 translateOp _ _ = Nothing
1284
1285 -- These primops are implemented by CallishMachOps, because they sometimes
1286 -- turn into foreign calls depending on the backend.
1287
1288 callishOp :: PrimOp -> Maybe CallishMachOp
1289 callishOp DoublePowerOp = Just MO_F64_Pwr
1290 callishOp DoubleSinOp = Just MO_F64_Sin
1291 callishOp DoubleCosOp = Just MO_F64_Cos
1292 callishOp DoubleTanOp = Just MO_F64_Tan
1293 callishOp DoubleSinhOp = Just MO_F64_Sinh
1294 callishOp DoubleCoshOp = Just MO_F64_Cosh
1295 callishOp DoubleTanhOp = Just MO_F64_Tanh
1296 callishOp DoubleAsinOp = Just MO_F64_Asin
1297 callishOp DoubleAcosOp = Just MO_F64_Acos
1298 callishOp DoubleAtanOp = Just MO_F64_Atan
1299 callishOp DoubleLogOp = Just MO_F64_Log
1300 callishOp DoubleExpOp = Just MO_F64_Exp
1301 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1302
1303 callishOp FloatPowerOp = Just MO_F32_Pwr
1304 callishOp FloatSinOp = Just MO_F32_Sin
1305 callishOp FloatCosOp = Just MO_F32_Cos
1306 callishOp FloatTanOp = Just MO_F32_Tan
1307 callishOp FloatSinhOp = Just MO_F32_Sinh
1308 callishOp FloatCoshOp = Just MO_F32_Cosh
1309 callishOp FloatTanhOp = Just MO_F32_Tanh
1310 callishOp FloatAsinOp = Just MO_F32_Asin
1311 callishOp FloatAcosOp = Just MO_F32_Acos
1312 callishOp FloatAtanOp = Just MO_F32_Atan
1313 callishOp FloatLogOp = Just MO_F32_Log
1314 callishOp FloatExpOp = Just MO_F32_Exp
1315 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1316
1317 callishOp _ = Nothing
1318
1319 ------------------------------------------------------------------------------
1320 -- Helpers for translating various minor variants of array indexing.
1321
1322 doIndexOffAddrOp :: Maybe MachOp
1323 -> CmmType
1324 -> [LocalReg]
1325 -> [CmmExpr]
1326 -> FCode ()
1327 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1328 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1329 doIndexOffAddrOp _ _ _ _
1330 = panic "StgCmmPrim: doIndexOffAddrOp"
1331
1332 doIndexOffAddrOpAs :: Maybe MachOp
1333 -> CmmType
1334 -> CmmType
1335 -> [LocalReg]
1336 -> [CmmExpr]
1337 -> FCode ()
1338 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1339 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1340 doIndexOffAddrOpAs _ _ _ _ _
1341 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1342
1343 doIndexByteArrayOp :: Maybe MachOp
1344 -> CmmType
1345 -> [LocalReg]
1346 -> [CmmExpr]
1347 -> FCode ()
1348 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1349 = do dflags <- getDynFlags
1350 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1351 doIndexByteArrayOp _ _ _ _
1352 = panic "StgCmmPrim: doIndexByteArrayOp"
1353
1354 doIndexByteArrayOpAs :: Maybe MachOp
1355 -> CmmType
1356 -> CmmType
1357 -> [LocalReg]
1358 -> [CmmExpr]
1359 -> FCode ()
1360 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1361 = do dflags <- getDynFlags
1362 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1363 doIndexByteArrayOpAs _ _ _ _ _
1364 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1365
1366 doReadPtrArrayOp :: LocalReg
1367 -> CmmExpr
1368 -> CmmExpr
1369 -> FCode ()
1370 doReadPtrArrayOp res addr idx
1371 = do dflags <- getDynFlags
1372 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1373
1374 doWriteOffAddrOp :: Maybe MachOp
1375 -> CmmType
1376 -> [LocalReg]
1377 -> [CmmExpr]
1378 -> FCode ()
1379 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1380 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1381 doWriteOffAddrOp _ _ _ _
1382 = panic "StgCmmPrim: doWriteOffAddrOp"
1383
1384 doWriteByteArrayOp :: Maybe MachOp
1385 -> CmmType
1386 -> [LocalReg]
1387 -> [CmmExpr]
1388 -> FCode ()
1389 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1390 = do dflags <- getDynFlags
1391 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1392 doWriteByteArrayOp _ _ _ _
1393 = panic "StgCmmPrim: doWriteByteArrayOp"
1394
1395 doWritePtrArrayOp :: CmmExpr
1396 -> CmmExpr
1397 -> CmmExpr
1398 -> FCode ()
1399 doWritePtrArrayOp addr idx val
1400 = do dflags <- getDynFlags
1401 let ty = cmmExprType dflags val
1402 -- This write barrier is to ensure that the heap writes to the object
1403 -- referred to by val have happened before we write val into the array.
1404 -- See #12469 for details.
1405 emitPrimCall [] MO_WriteBarrier []
1406 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1407 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1408 -- the write barrier. We must write a byte into the mark table:
1409 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1410 emit $ mkStore (
1411 cmmOffsetExpr dflags
1412 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1413 (loadArrPtrsSize dflags addr))
1414 (CmmMachOp (mo_wordUShr dflags) [idx,
1415 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1416 ) (CmmLit (CmmInt 1 W8))
1417
1418 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1419 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1420 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1421
1422 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1423 -> Maybe MachOp -- Optional result cast
1424 -> CmmType -- Type of element we are accessing
1425 -> LocalReg -- Destination
1426 -> CmmExpr -- Base address
1427 -> CmmType -- Type of element by which we are indexing
1428 -> CmmExpr -- Index
1429 -> FCode ()
1430 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1431 = do dflags <- getDynFlags
1432 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1433 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1434 = do dflags <- getDynFlags
1435 emitAssign (CmmLocal res) (CmmMachOp cast [
1436 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1437
1438 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1439 -> Maybe MachOp -- Optional value cast
1440 -> CmmExpr -- Base address
1441 -> CmmType -- Type of element by which we are indexing
1442 -> CmmExpr -- Index
1443 -> CmmExpr -- Value to write
1444 -> FCode ()
1445 mkBasicIndexedWrite off Nothing base idx_ty idx val
1446 = do dflags <- getDynFlags
1447 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1448 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1449 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1450
1451 -- ----------------------------------------------------------------------------
1452 -- Misc utils
1453
1454 cmmIndexOffExpr :: DynFlags
1455 -> ByteOff -- Initial offset in bytes
1456 -> Width -- Width of element by which we are indexing
1457 -> CmmExpr -- Base address
1458 -> CmmExpr -- Index
1459 -> CmmExpr
1460 cmmIndexOffExpr dflags off width base idx
1461 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1462
1463 cmmLoadIndexOffExpr :: DynFlags
1464 -> ByteOff -- Initial offset in bytes
1465 -> CmmType -- Type of element we are accessing
1466 -> CmmExpr -- Base address
1467 -> CmmType -- Type of element by which we are indexing
1468 -> CmmExpr -- Index
1469 -> CmmExpr
1470 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1471 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1472
1473 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1474 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1475
1476 ------------------------------------------------------------------------------
1477 -- Helpers for translating vector primops.
1478
1479 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1480 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1481
1482 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1483 vecCmmCat IntVec = cmmBits
1484 vecCmmCat WordVec = cmmBits
1485 vecCmmCat FloatVec = cmmFloat
1486
1487 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1488 vecElemInjectCast _ FloatVec _ = Nothing
1489 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1490 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1491 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1492 vecElemInjectCast _ IntVec W64 = Nothing
1493 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1494 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1495 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1496 vecElemInjectCast _ WordVec W64 = Nothing
1497 vecElemInjectCast _ _ _ = Nothing
1498
1499 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1500 vecElemProjectCast _ FloatVec _ = Nothing
1501 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1502 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1503 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1504 vecElemProjectCast _ IntVec W64 = Nothing
1505 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1506 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1507 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1508 vecElemProjectCast _ WordVec W64 = Nothing
1509 vecElemProjectCast _ _ _ = Nothing
1510
1511 -- Check to make sure that we can generate code for the specified vector type
1512 -- given the current set of dynamic flags.
1513 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1514 checkVecCompatibility dflags vcat l w = do
1515 when (hscTarget dflags /= HscLlvm) $ do
1516 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1517 ,"Please use -fllvm."]
1518 check vecWidth vcat l w
1519 where
1520 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1521 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1522 sorry $ "128-bit wide single-precision floating point " ++
1523 "SIMD vector instructions require at least -msse."
1524 check W128 _ _ _ | not (isSse2Enabled dflags) =
1525 sorry $ "128-bit wide integer and double precision " ++
1526 "SIMD vector instructions require at least -msse2."
1527 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1528 sorry $ "256-bit wide floating point " ++
1529 "SIMD vector instructions require at least -mavx."
1530 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1531 sorry $ "256-bit wide integer " ++
1532 "SIMD vector instructions require at least -mavx2."
1533 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1534 sorry $ "512-bit wide " ++
1535 "SIMD vector instructions require -mavx512f."
1536 check _ _ _ _ = return ()
1537
1538 vecWidth = typeWidth (vecVmmType vcat l w)
1539
1540 ------------------------------------------------------------------------------
1541 -- Helpers for translating vector packing and unpacking.
1542
1543 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1544 -> CmmType -- Type of vector
1545 -> CmmExpr -- Initial vector
1546 -> [CmmExpr] -- Elements
1547 -> CmmFormal -- Destination for result
1548 -> FCode ()
1549 doVecPackOp maybe_pre_write_cast ty z es res = do
1550 dst <- newTemp ty
1551 emitAssign (CmmLocal dst) z
1552 vecPack dst es 0
1553 where
1554 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1555 vecPack src [] _ =
1556 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1557
1558 vecPack src (e : es) i = do
1559 dst <- newTemp ty
1560 if isFloatType (vecElemType ty)
1561 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1562 [CmmReg (CmmLocal src), cast e, iLit])
1563 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1564 [CmmReg (CmmLocal src), cast e, iLit])
1565 vecPack dst es (i + 1)
1566 where
1567 -- vector indices are always 32-bits
1568 iLit = CmmLit (CmmInt (toInteger i) W32)
1569
1570 cast :: CmmExpr -> CmmExpr
1571 cast val = case maybe_pre_write_cast of
1572 Nothing -> val
1573 Just cast -> CmmMachOp cast [val]
1574
1575 len :: Length
1576 len = vecLength ty
1577
1578 wid :: Width
1579 wid = typeWidth (vecElemType ty)
1580
1581 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1582 -> CmmType -- Type of vector
1583 -> CmmExpr -- Vector
1584 -> [CmmFormal] -- Element results
1585 -> FCode ()
1586 doVecUnpackOp maybe_post_read_cast ty e res =
1587 vecUnpack res 0
1588 where
1589 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1590 vecUnpack [] _ =
1591 return ()
1592
1593 vecUnpack (r : rs) i = do
1594 if isFloatType (vecElemType ty)
1595 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1596 [e, iLit]))
1597 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1598 [e, iLit]))
1599 vecUnpack rs (i + 1)
1600 where
1601 -- vector indices are always 32-bits
1602 iLit = CmmLit (CmmInt (toInteger i) W32)
1603
1604 cast :: CmmExpr -> CmmExpr
1605 cast val = case maybe_post_read_cast of
1606 Nothing -> val
1607 Just cast -> CmmMachOp cast [val]
1608
1609 len :: Length
1610 len = vecLength ty
1611
1612 wid :: Width
1613 wid = typeWidth (vecElemType ty)
1614
1615 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1616 -> CmmType -- Vector type
1617 -> CmmExpr -- Source vector
1618 -> CmmExpr -- Element
1619 -> CmmExpr -- Index at which to insert element
1620 -> CmmFormal -- Destination for result
1621 -> FCode ()
1622 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1623 dflags <- getDynFlags
1624 -- vector indices are always 32-bits
1625 let idx' :: CmmExpr
1626 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1627 if isFloatType (vecElemType ty)
1628 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1629 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1630 where
1631 cast :: CmmExpr -> CmmExpr
1632 cast val = case maybe_pre_write_cast of
1633 Nothing -> val
1634 Just cast -> CmmMachOp cast [val]
1635
1636 len :: Length
1637 len = vecLength ty
1638
1639 wid :: Width
1640 wid = typeWidth (vecElemType ty)
1641
1642 ------------------------------------------------------------------------------
1643 -- Helpers for translating prefetching.
1644
1645
1646 -- | Translate byte array prefetch operations into proper primcalls.
1647 doPrefetchByteArrayOp :: Int
1648 -> [CmmExpr]
1649 -> FCode ()
1650 doPrefetchByteArrayOp locality [addr,idx]
1651 = do dflags <- getDynFlags
1652 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1653 doPrefetchByteArrayOp _ _
1654 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1655
1656 -- | Translate mutable byte array prefetch operations into proper primcalls.
1657 doPrefetchMutableByteArrayOp :: Int
1658 -> [CmmExpr]
1659 -> FCode ()
1660 doPrefetchMutableByteArrayOp locality [addr,idx]
1661 = do dflags <- getDynFlags
1662 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1663 doPrefetchMutableByteArrayOp _ _
1664 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1665
1666 -- | Translate address prefetch operations into proper primcalls.
1667 doPrefetchAddrOp ::Int
1668 -> [CmmExpr]
1669 -> FCode ()
1670 doPrefetchAddrOp locality [addr,idx]
1671 = mkBasicPrefetch locality 0 addr idx
1672 doPrefetchAddrOp _ _
1673 = panic "StgCmmPrim: doPrefetchAddrOp"
1674
1675 -- | Translate value prefetch operations into proper primcalls.
1676 doPrefetchValueOp :: Int
1677 -> [CmmExpr]
1678 -> FCode ()
1679 doPrefetchValueOp locality [addr]
1680 = do dflags <- getDynFlags
1681 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1682 doPrefetchValueOp _ _
1683 = panic "StgCmmPrim: doPrefetchValueOp"
1684
1685 -- | helper to generate prefetch primcalls
1686 mkBasicPrefetch :: Int -- Locality level 0-3
1687 -> ByteOff -- Initial offset in bytes
1688 -> CmmExpr -- Base address
1689 -> CmmExpr -- Index
1690 -> FCode ()
1691 mkBasicPrefetch locality off base idx
1692 = do dflags <- getDynFlags
1693 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1694 return ()
1695
1696 -- ----------------------------------------------------------------------------
1697 -- Allocating byte arrays
1698
1699 -- | Takes a register to return the newly allocated array in and the
1700 -- size of the new array in bytes. Allocates a new
1701 -- 'MutableByteArray#'.
1702 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1703 doNewByteArrayOp res_r n = do
1704 dflags <- getDynFlags
1705
1706 let info_ptr = mkLblExpr mkArrWords_infoLabel
1707 rep = arrWordsRep dflags n
1708
1709 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1710 (mkIntExpr dflags (nonHdrSize dflags rep))
1711 (zeroExpr dflags)
1712
1713 let hdr_size = fixedHdrSize dflags
1714
1715 base <- allocHeapClosure rep info_ptr curCCS
1716 [ (mkIntExpr dflags n,
1717 hdr_size + oFFSET_StgArrBytes_bytes dflags)
1718 ]
1719
1720 emit $ mkAssign (CmmLocal res_r) base
1721
1722 -- ----------------------------------------------------------------------------
1723 -- Copying byte arrays
1724
1725 -- | Takes a source 'ByteArray#', an offset in the source array, a
1726 -- destination 'MutableByteArray#', an offset into the destination
1727 -- array, and the number of bytes to copy. Copies the given number of
1728 -- bytes from the source array to the destination array.
1729 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1730 -> FCode ()
1731 doCopyByteArrayOp = emitCopyByteArray copy
1732 where
1733 -- Copy data (we assume the arrays aren't overlapping since
1734 -- they're of different types)
1735 copy _src _dst dst_p src_p bytes =
1736 emitMemcpyCall dst_p src_p bytes 1
1737
1738 -- | Takes a source 'MutableByteArray#', an offset in the source
1739 -- array, a destination 'MutableByteArray#', an offset into the
1740 -- destination array, and the number of bytes to copy. Copies the
1741 -- given number of bytes from the source array to the destination
1742 -- array.
1743 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1744 -> FCode ()
1745 doCopyMutableByteArrayOp = emitCopyByteArray copy
1746 where
1747 -- The only time the memory might overlap is when the two arrays
1748 -- we were provided are the same array!
1749 -- TODO: Optimize branch for common case of no aliasing.
1750 copy src dst dst_p src_p bytes = do
1751 dflags <- getDynFlags
1752 [moveCall, cpyCall] <- forkAlts [
1753 getCode $ emitMemmoveCall dst_p src_p bytes 1,
1754 getCode $ emitMemcpyCall dst_p src_p bytes 1
1755 ]
1756 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1757
1758 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1759 -> FCode ())
1760 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1761 -> FCode ()
1762 emitCopyByteArray copy src src_off dst dst_off n = do
1763 dflags <- getDynFlags
1764 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1765 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1766 copy src dst dst_p src_p n
1767
1768 -- | Takes a source 'ByteArray#', an offset in the source array, a
1769 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1770 -- number of bytes from the source array to the destination memory region.
1771 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1772 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1773 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1774 dflags <- getDynFlags
1775 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1776 emitMemcpyCall dst_p src_p bytes 1
1777
1778 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1779 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1780 -- number of bytes from the source array to the destination memory region.
1781 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1782 -> FCode ()
1783 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1784
1785 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1786 -- the destination array, and the number of bytes to copy. Copies the given
1787 -- number of bytes from the source memory region to the destination array.
1788 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1789 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1790 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1791 dflags <- getDynFlags
1792 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1793 emitMemcpyCall dst_p src_p bytes 1
1794
1795
1796 -- ----------------------------------------------------------------------------
1797 -- Setting byte arrays
1798
1799 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1800 -- and a byte, and sets each of the selected bytes in the array to the
1801 -- character.
1802 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1803 -> FCode ()
1804 doSetByteArrayOp ba off len c
1805 = do dflags <- getDynFlags
1806 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1807 emitMemsetCall p c len 1
1808
1809 -- ----------------------------------------------------------------------------
1810 -- Allocating arrays
1811
1812 -- | Allocate a new array.
1813 doNewArrayOp :: CmmFormal -- ^ return register
1814 -> SMRep -- ^ representation of the array
1815 -> CLabel -- ^ info pointer
1816 -> [(CmmExpr, ByteOff)] -- ^ header payload
1817 -> WordOff -- ^ array size
1818 -> CmmExpr -- ^ initial element
1819 -> FCode ()
1820 doNewArrayOp res_r rep info payload n init = do
1821 dflags <- getDynFlags
1822
1823 let info_ptr = mkLblExpr info
1824
1825 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1826 (mkIntExpr dflags (nonHdrSize dflags rep))
1827 (zeroExpr dflags)
1828
1829 base <- allocHeapClosure rep info_ptr curCCS payload
1830
1831 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1832 emit $ mkAssign arr base
1833
1834 -- Initialise all elements of the the array
1835 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1836 for <- newBlockId
1837 emitLabel for
1838 let loopBody =
1839 [ mkStore (CmmReg (CmmLocal p)) init
1840 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1841 , mkBranch for ]
1842 emit =<< mkCmmIfThen
1843 (cmmULtWord dflags (CmmReg (CmmLocal p))
1844 (cmmOffsetW dflags (CmmReg arr)
1845 (hdrSizeW dflags rep + n)))
1846 (catAGraphs loopBody)
1847
1848 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1849
1850 -- ----------------------------------------------------------------------------
1851 -- Copying pointer arrays
1852
1853 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1854 -- nowhere else in the code generator. This is mostly because these
1855 -- "primitive" ops result in a surprisingly large amount of code. It
1856 -- will likely be worthwhile to optimize what is emitted here, so that
1857 -- our optimization passes don't waste time repeatedly optimizing the
1858 -- same bits of code.
1859
1860 -- More closely imitates 'assignTemp' from the old code generator, which
1861 -- returns a CmmExpr rather than a LocalReg.
1862 assignTempE :: CmmExpr -> FCode CmmExpr
1863 assignTempE e = do
1864 t <- assignTemp e
1865 return (CmmReg (CmmLocal t))
1866
1867 -- | Takes a source 'Array#', an offset in the source array, a
1868 -- destination 'MutableArray#', an offset into the destination array,
1869 -- and the number of elements to copy. Copies the given number of
1870 -- elements from the source array to the destination array.
1871 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1872 -> FCode ()
1873 doCopyArrayOp = emitCopyArray copy
1874 where
1875 -- Copy data (we assume the arrays aren't overlapping since
1876 -- they're of different types)
1877 copy _src _dst dst_p src_p bytes =
1878 do dflags <- getDynFlags
1879 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1880 (wORD_SIZE dflags)
1881
1882
1883 -- | Takes a source 'MutableArray#', an offset in the source array, a
1884 -- destination 'MutableArray#', an offset into the destination array,
1885 -- and the number of elements to copy. Copies the given number of
1886 -- elements from the source array to the destination array.
1887 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1888 -> FCode ()
1889 doCopyMutableArrayOp = emitCopyArray copy
1890 where
1891 -- The only time the memory might overlap is when the two arrays
1892 -- we were provided are the same array!
1893 -- TODO: Optimize branch for common case of no aliasing.
1894 copy src dst dst_p src_p bytes = do
1895 dflags <- getDynFlags
1896 [moveCall, cpyCall] <- forkAlts [
1897 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1898 (wORD_SIZE dflags),
1899 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1900 (wORD_SIZE dflags)
1901 ]
1902 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1903
1904 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1905 -> FCode ()) -- ^ copy function
1906 -> CmmExpr -- ^ source array
1907 -> CmmExpr -- ^ offset in source array
1908 -> CmmExpr -- ^ destination array
1909 -> CmmExpr -- ^ offset in destination array
1910 -> WordOff -- ^ number of elements to copy
1911 -> FCode ()
1912 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1913 dflags <- getDynFlags
1914 when (n /= 0) $ do
1915 -- Passed as arguments (be careful)
1916 src <- assignTempE src0
1917 dst <- assignTempE dst0
1918 dst_off <- assignTempE dst_off0
1919
1920 -- Set the dirty bit in the header.
1921 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1922
1923 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1924 (arrPtrsHdrSize dflags)
1925 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1926 src_p <- assignTempE $ cmmOffsetExprW dflags
1927 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1928 let bytes = wordsToBytes dflags n
1929
1930 copy src dst dst_p src_p bytes
1931
1932 -- The base address of the destination card table
1933 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1934 (loadArrPtrsSize dflags dst)
1935
1936 emitSetCards dst_off dst_cards_p n
1937
1938 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1939 -> FCode ()
1940 doCopySmallArrayOp = emitCopySmallArray copy
1941 where
1942 -- Copy data (we assume the arrays aren't overlapping since
1943 -- they're of different types)
1944 copy _src _dst dst_p src_p bytes =
1945 do dflags <- getDynFlags
1946 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1947 (wORD_SIZE dflags)
1948
1949
1950 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1951 -> FCode ()
1952 doCopySmallMutableArrayOp = emitCopySmallArray copy
1953 where
1954 -- The only time the memory might overlap is when the two arrays
1955 -- we were provided are the same array!
1956 -- TODO: Optimize branch for common case of no aliasing.
1957 copy src dst dst_p src_p bytes = do
1958 dflags <- getDynFlags
1959 [moveCall, cpyCall] <- forkAlts
1960 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1961 (wORD_SIZE dflags)
1962 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1963 (wORD_SIZE dflags)
1964 ]
1965 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1966
1967 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1968 -> FCode ()) -- ^ copy function
1969 -> CmmExpr -- ^ source array
1970 -> CmmExpr -- ^ offset in source array
1971 -> CmmExpr -- ^ destination array
1972 -> CmmExpr -- ^ offset in destination array
1973 -> WordOff -- ^ number of elements to copy
1974 -> FCode ()
1975 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
1976 dflags <- getDynFlags
1977
1978 -- Passed as arguments (be careful)
1979 src <- assignTempE src0
1980 dst <- assignTempE dst0
1981
1982 -- Set the dirty bit in the header.
1983 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1984
1985 dst_p <- assignTempE $ cmmOffsetExprW dflags
1986 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
1987 src_p <- assignTempE $ cmmOffsetExprW dflags
1988 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
1989 let bytes = wordsToBytes dflags n
1990
1991 copy src dst dst_p src_p bytes
1992
1993 -- | Takes an info table label, a register to return the newly
1994 -- allocated array in, a source array, an offset in the source array,
1995 -- and the number of elements to copy. Allocates a new array and
1996 -- initializes it from the source array.
1997 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1998 -> FCode ()
1999 emitCloneArray info_p res_r src src_off n = do
2000 dflags <- getDynFlags
2001
2002 let info_ptr = mkLblExpr info_p
2003 rep = arrPtrsRep dflags n
2004
2005 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
2006 (mkIntExpr dflags (nonHdrSize dflags rep))
2007 (zeroExpr dflags)
2008
2009 let hdr_size = fixedHdrSize dflags
2010
2011 base <- allocHeapClosure rep info_ptr curCCS
2012 [ (mkIntExpr dflags n,
2013 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
2014 , (mkIntExpr dflags (nonHdrSizeW rep),
2015 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
2016 ]
2017
2018 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2019 emit $ mkAssign arr base
2020
2021 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2022 (arrPtrsHdrSize dflags)
2023 src_p <- assignTempE $ cmmOffsetExprW dflags src
2024 (cmmAddWord dflags
2025 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
2026
2027 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2028 (wORD_SIZE dflags)
2029
2030 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2031
2032 -- | Takes an info table label, a register to return the newly
2033 -- allocated array in, a source array, an offset in the source array,
2034 -- and the number of elements to copy. Allocates a new array and
2035 -- initializes it from the source array.
2036 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2037 -> FCode ()
2038 emitCloneSmallArray info_p res_r src src_off n = do
2039 dflags <- getDynFlags
2040
2041 let info_ptr = mkLblExpr info_p
2042 rep = smallArrPtrsRep n
2043
2044 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
2045 (mkIntExpr dflags (nonHdrSize dflags rep))
2046 (zeroExpr dflags)
2047
2048 let hdr_size = fixedHdrSize dflags
2049
2050 base <- allocHeapClosure rep info_ptr curCCS
2051 [ (mkIntExpr dflags n,
2052 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
2053 ]
2054
2055 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2056 emit $ mkAssign arr base
2057
2058 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2059 (smallArrPtrsHdrSize dflags)
2060 src_p <- assignTempE $ cmmOffsetExprW dflags src
2061 (cmmAddWord dflags
2062 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
2063
2064 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2065 (wORD_SIZE dflags)
2066
2067 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2068
2069 -- | Takes and offset in the destination array, the base address of
2070 -- the card table, and the number of elements affected (*not* the
2071 -- number of cards). The number of elements may not be zero.
2072 -- Marks the relevant cards as dirty.
2073 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2074 emitSetCards dst_start dst_cards_start n = do
2075 dflags <- getDynFlags
2076 start_card <- assignTempE $ cardCmm dflags dst_start
2077 let end_card = cardCmm dflags
2078 (cmmSubWord dflags
2079 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
2080 (mkIntExpr dflags 1))
2081 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
2082 (mkIntExpr dflags 1)
2083 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2084 1 -- no alignment (1 byte)
2085
2086 -- Convert an element index to a card index
2087 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2088 cardCmm dflags i =
2089 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2090
2091 ------------------------------------------------------------------------------
2092 -- SmallArray PrimOp implementations
2093
2094 doReadSmallPtrArrayOp :: LocalReg
2095 -> CmmExpr
2096 -> CmmExpr
2097 -> FCode ()
2098 doReadSmallPtrArrayOp res addr idx = do
2099 dflags <- getDynFlags
2100 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2101 (gcWord dflags) idx
2102
2103 doWriteSmallPtrArrayOp :: CmmExpr
2104 -> CmmExpr
2105 -> CmmExpr
2106 -> FCode ()
2107 doWriteSmallPtrArrayOp addr idx val = do
2108 dflags <- getDynFlags
2109 let ty = cmmExprType dflags val
2110 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2111 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2112
2113 ------------------------------------------------------------------------------
2114 -- Atomic read-modify-write
2115
2116 -- | Emit an atomic modification to a byte array element. The result
2117 -- reg contains that previous value of the element. Implies a full
2118 -- memory barrier.
2119 doAtomicRMW :: LocalReg -- ^ Result reg
2120 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2121 -> CmmExpr -- ^ MutableByteArray#
2122 -> CmmExpr -- ^ Index
2123 -> CmmType -- ^ Type of element by which we are indexing
2124 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2125 -> FCode ()
2126 doAtomicRMW res amop mba idx idx_ty n = do
2127 dflags <- getDynFlags
2128 let width = typeWidth idx_ty
2129 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2130 width mba idx
2131 emitPrimCall
2132 [ res ]
2133 (MO_AtomicRMW width amop)
2134 [ addr, n ]
2135
2136 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2137 doAtomicReadByteArray
2138 :: LocalReg -- ^ Result reg
2139 -> CmmExpr -- ^ MutableByteArray#
2140 -> CmmExpr -- ^ Index
2141 -> CmmType -- ^ Type of element by which we are indexing
2142 -> FCode ()
2143 doAtomicReadByteArray res mba idx idx_ty = do
2144 dflags <- getDynFlags
2145 let width = typeWidth idx_ty
2146 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2147 width mba idx
2148 emitPrimCall
2149 [ res ]
2150 (MO_AtomicRead width)
2151 [ addr ]
2152
2153 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2154 doAtomicWriteByteArray
2155 :: CmmExpr -- ^ MutableByteArray#
2156 -> CmmExpr -- ^ Index
2157 -> CmmType -- ^ Type of element by which we are indexing
2158 -> CmmExpr -- ^ Value to write
2159 -> FCode ()
2160 doAtomicWriteByteArray mba idx idx_ty val = do
2161 dflags <- getDynFlags
2162 let width = typeWidth idx_ty
2163 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2164 width mba idx
2165 emitPrimCall
2166 [ {- no results -} ]
2167 (MO_AtomicWrite width)
2168 [ addr, val ]
2169
2170 doCasByteArray
2171 :: LocalReg -- ^ Result reg
2172 -> CmmExpr -- ^ MutableByteArray#
2173 -> CmmExpr -- ^ Index
2174 -> CmmType -- ^ Type of element by which we are indexing
2175 -> CmmExpr -- ^ Old value
2176 -> CmmExpr -- ^ New value
2177 -> FCode ()
2178 doCasByteArray res mba idx idx_ty old new = do
2179 dflags <- getDynFlags
2180 let width = (typeWidth idx_ty)
2181 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2182 width mba idx
2183 emitPrimCall
2184 [ res ]
2185 (MO_Cmpxchg width)
2186 [ addr, old, new ]
2187
2188 ------------------------------------------------------------------------------
2189 -- Helpers for emitting function calls
2190
2191 -- | Emit a call to @memcpy@.
2192 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2193 emitMemcpyCall dst src n align = do
2194 emitPrimCall
2195 [ {-no results-} ]
2196 (MO_Memcpy align)
2197 [ dst, src, n ]
2198
2199 -- | Emit a call to @memmove@.
2200 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2201 emitMemmoveCall dst src n align = do
2202 emitPrimCall
2203 [ {- no results -} ]
2204 (MO_Memmove align)
2205 [ dst, src, n ]
2206
2207 -- | Emit a call to @memset@. The second argument must fit inside an
2208 -- unsigned char.
2209 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2210 emitMemsetCall dst c n align = do
2211 emitPrimCall
2212 [ {- no results -} ]
2213 (MO_Memset align)
2214 [ dst, c, n ]
2215
2216 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2217 emitBSwapCall res x width = do
2218 emitPrimCall
2219 [ res ]
2220 (MO_BSwap width)
2221 [ x ]
2222
2223 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2224 emitPopCntCall res x width = do
2225 emitPrimCall
2226 [ res ]
2227 (MO_PopCnt width)
2228 [ x ]
2229
2230 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2231 emitClzCall res x width = do
2232 emitPrimCall
2233 [ res ]
2234 (MO_Clz width)
2235 [ x ]
2236
2237 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2238 emitCtzCall res x width = do
2239 emitPrimCall
2240 [ res ]
2241 (MO_Ctz width)
2242 [ x ]