s/StgArrWords/StgArrBytes/
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgCmmLayout
21 import StgCmmForeign
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmTicky
26 import StgCmmHeap
27 import StgCmmProf ( costCentreFrom, curCCS )
28
29 import DynFlags
30 import Platform
31 import BasicTypes
32 import MkGraph
33 import StgSyn
34 import Cmm
35 import CmmInfo
36 import Type ( Type, tyConAppTyCon )
37 import TyCon
38 import CLabel
39 import CmmUtils
40 import PrimOp
41 import SMRep
42 import FastString
43 import Outputable
44 import Util
45
46 #if __GLASGOW_HASKELL__ >= 709
47 import Prelude hiding ((<*>))
48 #endif
49
50 import Data.Bits ((.&.), bit)
51 import Control.Monad (liftM, when)
52
53 ------------------------------------------------------------------------
54 -- Primitive operations and foreign calls
55 ------------------------------------------------------------------------
56
57 {- Note [Foreign call results]
58 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 A foreign call always returns an unboxed tuple of results, one
60 of which is the state token. This seems to happen even for pure
61 calls.
62
63 Even if we returned a single result for pure calls, it'd still be
64 right to wrap it in a singleton unboxed tuple, because the result
65 might be a Haskell closure pointer, we don't want to evaluate it. -}
66
67 ----------------------------------
68 cgOpApp :: StgOp -- The op
69 -> [StgArg] -- Arguments
70 -> Type -- Result type (always an unboxed tuple)
71 -> FCode ReturnKind
72
73 -- Foreign calls
74 cgOpApp (StgFCallOp fcall _) stg_args res_ty
75 = cgForeignCall fcall stg_args res_ty
76 -- Note [Foreign call results]
77
78 -- tagToEnum# is special: we need to pull the constructor
79 -- out of the table, and perform an appropriate return.
80
81 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
82 = ASSERT(isEnumerationTyCon tycon)
83 do { dflags <- getDynFlags
84 ; args' <- getNonVoidArgAmodes [arg]
85 ; let amode = case args' of [amode] -> amode
86 _ -> panic "TagToEnumOp had void arg"
87 ; emitReturn [tagToClosure dflags tycon amode] }
88 where
89 -- If you're reading this code in the attempt to figure
90 -- out why the compiler panic'ed here, it is probably because
91 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
92 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
93 -- That won't work.
94 tycon = tyConAppTyCon res_ty
95
96 cgOpApp (StgPrimOp primop) args res_ty = do
97 dflags <- getDynFlags
98 cmm_args <- getNonVoidArgAmodes args
99 case shouldInlinePrimOp dflags primop cmm_args of
100 Nothing -> do -- out-of-line
101 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
102 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
103
104 Just f -- inline
105 | ReturnsPrim VoidRep <- result_info
106 -> do f []
107 emitReturn []
108
109 | ReturnsPrim rep <- result_info
110 -> do dflags <- getDynFlags
111 res <- newTemp (primRepCmmType dflags rep)
112 f [res]
113 emitReturn [CmmReg (CmmLocal res)]
114
115 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
116 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
117 f regs
118 emitReturn (map (CmmReg . CmmLocal) regs)
119
120 | otherwise -> panic "cgPrimop"
121 where
122 result_info = getPrimOpResultInfo primop
123
124 cgOpApp (StgPrimCallOp primcall) args _res_ty
125 = do { cmm_args <- getNonVoidArgAmodes args
126 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
127 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
128
129 -- | Interpret the argument as an unsigned value, assuming the value
130 -- is given in two-complement form in the given width.
131 --
132 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
133 --
134 -- This function is used to work around the fact that many array
135 -- primops take Int# arguments, but we interpret them as unsigned
136 -- quantities in the code gen. This means that we have to be careful
137 -- every time we work on e.g. a CmmInt literal that corresponds to the
138 -- array size, as it might contain a negative Integer value if the
139 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
140 -- literal.
141 asUnsigned :: Width -> Integer -> Integer
142 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
143
144 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
145 -- ByteOff (or some other fixed width signed type) to represent
146 -- array sizes or indices. This means that these will overflow for
147 -- large enough sizes.
148
149 -- | Decide whether an out-of-line primop should be replaced by an
150 -- inline implementation. This might happen e.g. if there's enough
151 -- static information, such as statically know arguments, to emit a
152 -- more efficient implementation inline.
153 --
154 -- Returns 'Nothing' if this primop should use its out-of-line
155 -- implementation (defined elsewhere) and 'Just' together with a code
156 -- generating function that takes the output regs as arguments
157 -- otherwise.
158 shouldInlinePrimOp :: DynFlags
159 -> PrimOp -- ^ The primop
160 -> [CmmExpr] -- ^ The primop arguments
161 -> Maybe ([LocalReg] -> FCode ())
162
163 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
164 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
165 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
166
167 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
168 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
169 Just $ \ [res] ->
170 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
171 [ (mkIntExpr dflags (fromInteger n),
172 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
173 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
174 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
175 ]
176 (fromInteger n) init
177
178 shouldInlinePrimOp _ CopyArrayOp
179 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
180 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
181
182 shouldInlinePrimOp _ CopyMutableArrayOp
183 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
184 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
185
186 shouldInlinePrimOp _ CopyArrayArrayOp
187 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
188 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
189
190 shouldInlinePrimOp _ CopyMutableArrayArrayOp
191 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
192 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
193
194 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
195 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
196 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
197
198 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
199 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
200 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
201
202 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
203 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
204 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
205
206 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
207 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
208 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
209
210 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
211 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
212 Just $ \ [res] ->
213 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
214 [ (mkIntExpr dflags (fromInteger n),
215 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
216 ]
217 (fromInteger n) init
218
219 shouldInlinePrimOp _ CopySmallArrayOp
220 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
221 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
222
223 shouldInlinePrimOp _ CopySmallMutableArrayOp
224 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
225 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
226
227 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
228 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
229 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
230
231 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
232 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
233 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
234
235 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
236 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
237 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
238
239 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
240 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
241 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
242
243 shouldInlinePrimOp dflags primop args
244 | primOpOutOfLine primop = Nothing
245 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
246
247 -- TODO: Several primops, such as 'copyArray#', only have an inline
248 -- implementation (below) but could possibly have both an inline
249 -- implementation and an out-of-line implementation, just like
250 -- 'newArray#'. This would lower the amount of code generated,
251 -- hopefully without a performance impact (needs to be measured).
252
253 ---------------------------------------------------
254 cgPrimOp :: [LocalReg] -- where to put the results
255 -> PrimOp -- the op
256 -> [StgArg] -- arguments
257 -> FCode ()
258
259 cgPrimOp results op args
260 = do dflags <- getDynFlags
261 arg_exprs <- getNonVoidArgAmodes args
262 emitPrimOp dflags results op arg_exprs
263
264
265 ------------------------------------------------------------------------
266 -- Emitting code for a primop
267 ------------------------------------------------------------------------
268
269 emitPrimOp :: DynFlags
270 -> [LocalReg] -- where to put the results
271 -> PrimOp -- the op
272 -> [CmmExpr] -- arguments
273 -> FCode ()
274
275 -- First we handle various awkward cases specially. The remaining
276 -- easy cases are then handled by translateOp, defined below.
277
278 emitPrimOp _ [res] ParOp [arg]
279 =
280 -- for now, just implement this in a C function
281 -- later, we might want to inline it.
282 emitCCall
283 [(res,NoHint)]
284 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
285 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
286
287 emitPrimOp dflags [res] SparkOp [arg]
288 = do
289 -- returns the value of arg in res. We're going to therefore
290 -- refer to arg twice (once to pass to newSpark(), and once to
291 -- assign to res), so put it in a temporary.
292 tmp <- assignTemp arg
293 tmp2 <- newTemp (bWord dflags)
294 emitCCall
295 [(tmp2,NoHint)]
296 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
297 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
298 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
299
300 emitPrimOp dflags [res] GetCCSOfOp [arg]
301 = emitAssign (CmmLocal res) val
302 where
303 val
304 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
305 | otherwise = CmmLit (zeroCLit dflags)
306
307 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
308 = emitAssign (CmmLocal res) curCCS
309
310 emitPrimOp dflags [res] ReadMutVarOp [mutv]
311 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
312
313 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
314 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
315 emitCCall
316 [{-no results-}]
317 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
318 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
319
320 -- #define sizzeofByteArrayzh(r,a) \
321 -- r = ((StgArrBytes *)(a))->bytes
322 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
323 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
324
325 -- #define sizzeofMutableByteArrayzh(r,a) \
326 -- r = ((StgArrBytes *)(a))->bytes
327 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
328 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
329
330 -- #define getSizzeofMutableByteArrayzh(r,a) \
331 -- r = ((StgArrBytes *)(a))->bytes
332 emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
333 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
334
335
336 -- #define touchzh(o) /* nothing */
337 emitPrimOp _ res@[] TouchOp args@[_arg]
338 = do emitPrimCall res MO_Touch args
339
340 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
341 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
342 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
343
344 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
345 emitPrimOp dflags [res] StableNameToIntOp [arg]
346 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
347
348 -- #define eqStableNamezh(r,sn1,sn2) \
349 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
350 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
351 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
352 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
353 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
354 ])
355
356
357 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
358 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
359
360 -- #define addrToHValuezh(r,a) r=(P_)a
361 emitPrimOp _ [res] AddrToAnyOp [arg]
362 = emitAssign (CmmLocal res) arg
363
364 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
365 -- Note: argument may be tagged!
366 emitPrimOp dflags [res] DataToTagOp [arg]
367 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
368
369 {- Freezing arrays-of-ptrs requires changing an info table, for the
370 benefit of the generational collector. It needs to scavenge mutable
371 objects, even if they are in old space. When they become immutable,
372 they can be removed from this scavenge list. -}
373
374 -- #define unsafeFreezzeArrayzh(r,a)
375 -- {
376 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
377 -- r = a;
378 -- }
379 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
380 = emit $ catAGraphs
381 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
382 mkAssign (CmmLocal res) arg ]
383 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
384 = emit $ catAGraphs
385 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
386 mkAssign (CmmLocal res) arg ]
387 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
388 = emit $ catAGraphs
389 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
390 mkAssign (CmmLocal res) arg ]
391
392 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
393 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
394 = emitAssign (CmmLocal res) arg
395
396 -- Reading/writing pointer arrays
397
398 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
399 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
400 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
401
402 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
403 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
404 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
405 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
406 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
407 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
408 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
409 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
410 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
411 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
412
413 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
414 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
415 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
416
417 -- Getting the size of pointer arrays
418
419 emitPrimOp dflags [res] SizeofArrayOp [arg]
420 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
421 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
422 (bWord dflags))
423 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
424 = emitPrimOp dflags [res] SizeofArrayOp [arg]
425 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
426 = emitPrimOp dflags [res] SizeofArrayOp [arg]
427 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
428 = emitPrimOp dflags [res] SizeofArrayOp [arg]
429
430 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
431 emit $ mkAssign (CmmLocal res)
432 (cmmLoadIndexW dflags arg
433 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
434 (bWord dflags))
435 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
436 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
437
438 -- IndexXXXoffAddr
439
440 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
441 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
442 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
443 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
444 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
445 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
446 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
447 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
448 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
449 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
450 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
451 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
452 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
453 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
454 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
455 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
456
457 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
458
459 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
460 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
461 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
462 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
463 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
464 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
465 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
466 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
467 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
468 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
469 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
470 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
471 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
472 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
473 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
474 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
475
476 -- IndexXXXArray
477
478 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
479 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
480 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
481 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
482 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
483 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
484 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
485 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
486 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
487 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
488 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
489 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
490 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
491 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
492 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
493 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
494
495 -- ReadXXXArray, identical to IndexXXXArray.
496
497 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
498 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
499 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
500 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
501 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
502 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
503 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
504 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
505 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
506 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
507 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
508 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
509 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
510 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
511 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
512 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
513
514 -- WriteXXXoffAddr
515
516 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
517 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
518 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
519 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
520 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
521 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
522 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
523 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
524 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
525 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
526 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
527 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
528 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
529 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
530 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
531 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
532
533 -- WriteXXXArray
534
535 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
536 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
537 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
538 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
539 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
540 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
541 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
542 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
543 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
544 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
545 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
546 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
547 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
548 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
549 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
550 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
551
552 -- Copying and setting byte arrays
553 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
554 doCopyByteArrayOp src src_off dst dst_off n
555 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
556 doCopyMutableByteArrayOp src src_off dst dst_off n
557 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
558 doCopyByteArrayToAddrOp src src_off dst n
559 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
560 doCopyMutableByteArrayToAddrOp src src_off dst n
561 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
562 doCopyAddrToByteArrayOp src dst dst_off n
563 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
564 doSetByteArrayOp ba off len c
565
566 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
567 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
568 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
569 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
570
571 -- Population count
572 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
573 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
574 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
575 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
576 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
577
578 -- count leading zeros
579 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
580 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
581 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
582 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
583 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
584
585 -- count trailing zeros
586 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
587 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
588 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
589 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
590 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
591
592 -- Unsigned int to floating point conversions
593 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
594 (MO_UF_Conv W32) [w]
595 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
596 (MO_UF_Conv W64) [w]
597
598 -- SIMD primops
599 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
600 checkVecCompatibility dflags vcat n w
601 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
602 where
603 zeros :: CmmExpr
604 zeros = CmmLit $ CmmVec (replicate n zero)
605
606 zero :: CmmLit
607 zero = case vcat of
608 IntVec -> CmmInt 0 w
609 WordVec -> CmmInt 0 w
610 FloatVec -> CmmFloat 0 w
611
612 ty :: CmmType
613 ty = vecVmmType vcat n w
614
615 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
616 checkVecCompatibility dflags vcat n w
617 when (length es /= n) $
618 panic "emitPrimOp: VecPackOp has wrong number of arguments"
619 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
620 where
621 zeros :: CmmExpr
622 zeros = CmmLit $ CmmVec (replicate n zero)
623
624 zero :: CmmLit
625 zero = case vcat of
626 IntVec -> CmmInt 0 w
627 WordVec -> CmmInt 0 w
628 FloatVec -> CmmFloat 0 w
629
630 ty :: CmmType
631 ty = vecVmmType vcat n w
632
633 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
634 checkVecCompatibility dflags vcat n w
635 when (length res /= n) $
636 panic "emitPrimOp: VecUnpackOp has wrong number of results"
637 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
638 where
639 ty :: CmmType
640 ty = vecVmmType vcat n w
641
642 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
643 checkVecCompatibility dflags vcat n w
644 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
645 where
646 ty :: CmmType
647 ty = vecVmmType vcat n w
648
649 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
650 checkVecCompatibility dflags vcat n w
651 doIndexByteArrayOp Nothing ty res args
652 where
653 ty :: CmmType
654 ty = vecVmmType vcat n w
655
656 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
657 checkVecCompatibility dflags vcat n w
658 doIndexByteArrayOp Nothing ty res args
659 where
660 ty :: CmmType
661 ty = vecVmmType vcat n w
662
663 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
664 checkVecCompatibility dflags vcat n w
665 doWriteByteArrayOp Nothing ty res args
666 where
667 ty :: CmmType
668 ty = vecVmmType vcat n w
669
670 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
671 checkVecCompatibility dflags vcat n w
672 doIndexOffAddrOp Nothing ty res args
673 where
674 ty :: CmmType
675 ty = vecVmmType vcat n w
676
677 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
678 checkVecCompatibility dflags vcat n w
679 doIndexOffAddrOp Nothing ty res args
680 where
681 ty :: CmmType
682 ty = vecVmmType vcat n w
683
684 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
685 checkVecCompatibility dflags vcat n w
686 doWriteOffAddrOp Nothing ty res args
687 where
688 ty :: CmmType
689 ty = vecVmmType vcat n w
690
691 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
692 checkVecCompatibility dflags vcat n w
693 doIndexByteArrayOpAs Nothing vecty ty res args
694 where
695 vecty :: CmmType
696 vecty = vecVmmType vcat n w
697
698 ty :: CmmType
699 ty = vecCmmCat vcat w
700
701 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
702 checkVecCompatibility dflags vcat n w
703 doIndexByteArrayOpAs Nothing vecty ty res args
704 where
705 vecty :: CmmType
706 vecty = vecVmmType vcat n w
707
708 ty :: CmmType
709 ty = vecCmmCat vcat w
710
711 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
712 checkVecCompatibility dflags vcat n w
713 doWriteByteArrayOp Nothing ty res args
714 where
715 ty :: CmmType
716 ty = vecCmmCat vcat w
717
718 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
719 checkVecCompatibility dflags vcat n w
720 doIndexOffAddrOpAs Nothing vecty ty res args
721 where
722 vecty :: CmmType
723 vecty = vecVmmType vcat n w
724
725 ty :: CmmType
726 ty = vecCmmCat vcat w
727
728 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
729 checkVecCompatibility dflags vcat n w
730 doIndexOffAddrOpAs Nothing vecty ty res args
731 where
732 vecty :: CmmType
733 vecty = vecVmmType vcat n w
734
735 ty :: CmmType
736 ty = vecCmmCat vcat w
737
738 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
739 checkVecCompatibility dflags vcat n w
740 doWriteOffAddrOp Nothing ty res args
741 where
742 ty :: CmmType
743 ty = vecCmmCat vcat w
744
745 -- Prefetch
746 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
747 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
748 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
749 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
750
751 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
752 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
753 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
754 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
755
756 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
757 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
758 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
759 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
760
761 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
762 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
763 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
764 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
765
766 -- Atomic read-modify-write
767 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
768 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
769 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
770 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
771 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
772 doAtomicRMW res AMO_And mba ix (bWord dflags) n
773 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
774 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
775 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
776 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
777 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
778 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
779 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
780 doAtomicReadByteArray res mba ix (bWord dflags)
781 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
782 doAtomicWriteByteArray mba ix (bWord dflags) val
783 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
784 doCasByteArray res mba ix (bWord dflags) old new
785
786 -- The rest just translate straightforwardly
787 emitPrimOp dflags [res] op [arg]
788 | nopOp op
789 = emitAssign (CmmLocal res) arg
790
791 | Just (mop,rep) <- narrowOp op
792 = emitAssign (CmmLocal res) $
793 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
794
795 emitPrimOp dflags r@[res] op args
796 | Just prim <- callishOp op
797 = do emitPrimCall r prim args
798
799 | Just mop <- translateOp dflags op
800 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
801 emit stmt
802
803 emitPrimOp dflags results op args
804 = case callishPrimOpSupported dflags op of
805 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
806 Right gen -> gen results args
807
808 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
809
810 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
811 callishPrimOpSupported dflags op
812 = case op of
813 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
814 | otherwise -> Right (genericIntQuotRemOp dflags)
815
816 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
817 | otherwise -> Right (genericWordQuotRemOp dflags)
818
819 WordQuotRem2Op | (ncg && x86ish)
820 || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
821 | otherwise -> Right (genericWordQuotRem2Op dflags)
822
823 WordAdd2Op | (ncg && x86ish)
824 || llvm -> Left (MO_Add2 (wordWidth dflags))
825 | otherwise -> Right genericWordAdd2Op
826
827 IntAddCOp | (ncg && x86ish)
828 || llvm -> Left (MO_AddIntC (wordWidth dflags))
829 | otherwise -> Right genericIntAddCOp
830
831 IntSubCOp | (ncg && x86ish)
832 || llvm -> Left (MO_SubIntC (wordWidth dflags))
833 | otherwise -> Right genericIntSubCOp
834
835 WordMul2Op | ncg && x86ish
836 || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
837 | otherwise -> Right genericWordMul2Op
838
839 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
840 where
841 ncg = case hscTarget dflags of
842 HscAsm -> True
843 _ -> False
844 llvm = case hscTarget dflags of
845 HscLlvm -> True
846 _ -> False
847 x86ish = case platformArch (targetPlatform dflags) of
848 ArchX86 -> True
849 ArchX86_64 -> True
850 _ -> False
851
852 genericIntQuotRemOp :: DynFlags -> GenericOp
853 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
854 = emit $ mkAssign (CmmLocal res_q)
855 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
856 mkAssign (CmmLocal res_r)
857 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
858 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
859
860 genericWordQuotRemOp :: DynFlags -> GenericOp
861 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
862 = emit $ mkAssign (CmmLocal res_q)
863 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
864 mkAssign (CmmLocal res_r)
865 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
866 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
867
868 genericWordQuotRem2Op :: DynFlags -> GenericOp
869 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
870 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
871 where ty = cmmExprType dflags arg_x_high
872 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
873 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
874 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
875 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
876 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
877 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
878 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
879 zero = lit 0
880 one = lit 1
881 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
882 lit i = CmmLit (CmmInt i (wordWidth dflags))
883
884 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
885 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
886 mkAssign (CmmLocal res_r) high)
887 f i acc high low =
888 do roverflowedBit <- newTemp ty
889 rhigh' <- newTemp ty
890 rhigh'' <- newTemp ty
891 rlow' <- newTemp ty
892 risge <- newTemp ty
893 racc' <- newTemp ty
894 let high' = CmmReg (CmmLocal rhigh')
895 isge = CmmReg (CmmLocal risge)
896 overflowedBit = CmmReg (CmmLocal roverflowedBit)
897 let this = catAGraphs
898 [mkAssign (CmmLocal roverflowedBit)
899 (shr high negone),
900 mkAssign (CmmLocal rhigh')
901 (or (shl high one) (shr low negone)),
902 mkAssign (CmmLocal rlow')
903 (shl low one),
904 mkAssign (CmmLocal risge)
905 (or (overflowedBit `ne` zero)
906 (high' `ge` arg_y)),
907 mkAssign (CmmLocal rhigh'')
908 (high' `minus` (arg_y `times` isge)),
909 mkAssign (CmmLocal racc')
910 (or (shl acc one) isge)]
911 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
912 (CmmReg (CmmLocal rhigh''))
913 (CmmReg (CmmLocal rlow'))
914 return (this <*> rest)
915 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
916
917 genericWordAdd2Op :: GenericOp
918 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
919 = do dflags <- getDynFlags
920 r1 <- newTemp (cmmExprType dflags arg_x)
921 r2 <- newTemp (cmmExprType dflags arg_x)
922 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
923 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
924 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
925 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
926 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
927 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
928 (wordWidth dflags))
929 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
930 emit $ catAGraphs
931 [mkAssign (CmmLocal r1)
932 (add (bottomHalf arg_x) (bottomHalf arg_y)),
933 mkAssign (CmmLocal r2)
934 (add (topHalf (CmmReg (CmmLocal r1)))
935 (add (topHalf arg_x) (topHalf arg_y))),
936 mkAssign (CmmLocal res_h)
937 (topHalf (CmmReg (CmmLocal r2))),
938 mkAssign (CmmLocal res_l)
939 (or (toTopHalf (CmmReg (CmmLocal r2)))
940 (bottomHalf (CmmReg (CmmLocal r1))))]
941 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
942
943 genericIntAddCOp :: GenericOp
944 genericIntAddCOp [res_r, res_c] [aa, bb]
945 {-
946 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
947 C, and without needing any comparisons. This may not be the
948 fastest way to do it - if you have better code, please send it! --SDM
949
950 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
951
952 We currently don't make use of the r value if c is != 0 (i.e.
953 overflow), we just convert to big integers and try again. This
954 could be improved by making r and c the correct values for
955 plugging into a new J#.
956
957 { r = ((I_)(a)) + ((I_)(b)); \
958 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
959 >> (BITS_IN (I_) - 1); \
960 }
961 Wading through the mass of bracketry, it seems to reduce to:
962 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
963
964 -}
965 = do dflags <- getDynFlags
966 emit $ catAGraphs [
967 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
968 mkAssign (CmmLocal res_c) $
969 CmmMachOp (mo_wordUShr dflags) [
970 CmmMachOp (mo_wordAnd dflags) [
971 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
972 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
973 ],
974 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
975 ]
976 ]
977 genericIntAddCOp _ _ = panic "genericIntAddCOp"
978
979 genericIntSubCOp :: GenericOp
980 genericIntSubCOp [res_r, res_c] [aa, bb]
981 {- Similarly:
982 #define subIntCzh(r,c,a,b) \
983 { r = ((I_)(a)) - ((I_)(b)); \
984 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
985 >> (BITS_IN (I_) - 1); \
986 }
987
988 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
989 -}
990 = do dflags <- getDynFlags
991 emit $ catAGraphs [
992 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
993 mkAssign (CmmLocal res_c) $
994 CmmMachOp (mo_wordUShr dflags) [
995 CmmMachOp (mo_wordAnd dflags) [
996 CmmMachOp (mo_wordXor dflags) [aa,bb],
997 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
998 ],
999 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1000 ]
1001 ]
1002 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1003
1004 genericWordMul2Op :: GenericOp
1005 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
1006 = do dflags <- getDynFlags
1007 let t = cmmExprType dflags arg_x
1008 xlyl <- liftM CmmLocal $ newTemp t
1009 xlyh <- liftM CmmLocal $ newTemp t
1010 xhyl <- liftM CmmLocal $ newTemp t
1011 r <- liftM CmmLocal $ newTemp t
1012 -- This generic implementation is very simple and slow. We might
1013 -- well be able to do better, but for now this at least works.
1014 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1015 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1016 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1017 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1018 sum = foldl1 add
1019 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1020 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1021 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1022 (wordWidth dflags))
1023 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1024 emit $ catAGraphs
1025 [mkAssign xlyl
1026 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1027 mkAssign xlyh
1028 (mul (bottomHalf arg_x) (topHalf arg_y)),
1029 mkAssign xhyl
1030 (mul (topHalf arg_x) (bottomHalf arg_y)),
1031 mkAssign r
1032 (sum [topHalf (CmmReg xlyl),
1033 bottomHalf (CmmReg xhyl),
1034 bottomHalf (CmmReg xlyh)]),
1035 mkAssign (CmmLocal res_l)
1036 (or (bottomHalf (CmmReg xlyl))
1037 (toTopHalf (CmmReg r))),
1038 mkAssign (CmmLocal res_h)
1039 (sum [mul (topHalf arg_x) (topHalf arg_y),
1040 topHalf (CmmReg xhyl),
1041 topHalf (CmmReg xlyh),
1042 topHalf (CmmReg r)])]
1043 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1044
1045 -- These PrimOps are NOPs in Cmm
1046
1047 nopOp :: PrimOp -> Bool
1048 nopOp Int2WordOp = True
1049 nopOp Word2IntOp = True
1050 nopOp Int2AddrOp = True
1051 nopOp Addr2IntOp = True
1052 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1053 nopOp OrdOp = True
1054 nopOp _ = False
1055
1056 -- These PrimOps turn into double casts
1057
1058 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1059 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1060 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1061 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1062 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1063 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1064 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1065 narrowOp _ = Nothing
1066
1067 -- Native word signless ops
1068
1069 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1070 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1071 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1072 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1073 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1074 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1075 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1076
1077 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1078 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1079 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1080 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1081 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1082 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1083
1084 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1085 translateOp dflags OrOp = Just (mo_wordOr dflags)
1086 translateOp dflags XorOp = Just (mo_wordXor dflags)
1087 translateOp dflags NotOp = Just (mo_wordNot dflags)
1088 translateOp dflags SllOp = Just (mo_wordShl dflags)
1089 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1090
1091 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1092
1093 -- Native word signed ops
1094
1095 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1096 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1097 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1098 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1099 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1100
1101
1102 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1103 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1104 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1105 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1106
1107 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1108 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1109 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1110 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1111 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1112 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1113 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1114
1115 -- Native word unsigned ops
1116
1117 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1118 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1119 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1120 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1121
1122 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1123 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1124 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1125
1126 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1127 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1128 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1129 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1130
1131 -- Char# ops
1132
1133 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1134 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1135 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1136 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1137 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1138 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1139
1140 -- Double ops
1141
1142 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1143 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1144 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1145 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1146 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1147 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1148
1149 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1150 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1151 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1152 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1153 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1154
1155 -- Float ops
1156
1157 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1158 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1159 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1160 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1161 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1162 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1163
1164 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1165 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1166 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1167 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1168 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1169
1170 -- Vector ops
1171
1172 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1173 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1174 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1175 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1176 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1177
1178 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1179 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1180 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1181 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1182 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1183 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1184
1185 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1186 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1187 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1188 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1189 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1190
1191 -- Conversions
1192
1193 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1194 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1195
1196 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1197 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1198
1199 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1200 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1201
1202 -- Word comparisons masquerading as more exotic things.
1203
1204 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1205 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1206 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1207 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1208 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1209 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1210 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1211 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1212
1213 translateOp _ _ = Nothing
1214
1215 -- These primops are implemented by CallishMachOps, because they sometimes
1216 -- turn into foreign calls depending on the backend.
1217
1218 callishOp :: PrimOp -> Maybe CallishMachOp
1219 callishOp DoublePowerOp = Just MO_F64_Pwr
1220 callishOp DoubleSinOp = Just MO_F64_Sin
1221 callishOp DoubleCosOp = Just MO_F64_Cos
1222 callishOp DoubleTanOp = Just MO_F64_Tan
1223 callishOp DoubleSinhOp = Just MO_F64_Sinh
1224 callishOp DoubleCoshOp = Just MO_F64_Cosh
1225 callishOp DoubleTanhOp = Just MO_F64_Tanh
1226 callishOp DoubleAsinOp = Just MO_F64_Asin
1227 callishOp DoubleAcosOp = Just MO_F64_Acos
1228 callishOp DoubleAtanOp = Just MO_F64_Atan
1229 callishOp DoubleLogOp = Just MO_F64_Log
1230 callishOp DoubleExpOp = Just MO_F64_Exp
1231 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1232
1233 callishOp FloatPowerOp = Just MO_F32_Pwr
1234 callishOp FloatSinOp = Just MO_F32_Sin
1235 callishOp FloatCosOp = Just MO_F32_Cos
1236 callishOp FloatTanOp = Just MO_F32_Tan
1237 callishOp FloatSinhOp = Just MO_F32_Sinh
1238 callishOp FloatCoshOp = Just MO_F32_Cosh
1239 callishOp FloatTanhOp = Just MO_F32_Tanh
1240 callishOp FloatAsinOp = Just MO_F32_Asin
1241 callishOp FloatAcosOp = Just MO_F32_Acos
1242 callishOp FloatAtanOp = Just MO_F32_Atan
1243 callishOp FloatLogOp = Just MO_F32_Log
1244 callishOp FloatExpOp = Just MO_F32_Exp
1245 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1246
1247 callishOp _ = Nothing
1248
1249 ------------------------------------------------------------------------------
1250 -- Helpers for translating various minor variants of array indexing.
1251
1252 doIndexOffAddrOp :: Maybe MachOp
1253 -> CmmType
1254 -> [LocalReg]
1255 -> [CmmExpr]
1256 -> FCode ()
1257 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1258 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1259 doIndexOffAddrOp _ _ _ _
1260 = panic "StgCmmPrim: doIndexOffAddrOp"
1261
1262 doIndexOffAddrOpAs :: Maybe MachOp
1263 -> CmmType
1264 -> CmmType
1265 -> [LocalReg]
1266 -> [CmmExpr]
1267 -> FCode ()
1268 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1269 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1270 doIndexOffAddrOpAs _ _ _ _ _
1271 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1272
1273 doIndexByteArrayOp :: Maybe MachOp
1274 -> CmmType
1275 -> [LocalReg]
1276 -> [CmmExpr]
1277 -> FCode ()
1278 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1279 = do dflags <- getDynFlags
1280 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1281 doIndexByteArrayOp _ _ _ _
1282 = panic "StgCmmPrim: doIndexByteArrayOp"
1283
1284 doIndexByteArrayOpAs :: Maybe MachOp
1285 -> CmmType
1286 -> CmmType
1287 -> [LocalReg]
1288 -> [CmmExpr]
1289 -> FCode ()
1290 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1291 = do dflags <- getDynFlags
1292 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1293 doIndexByteArrayOpAs _ _ _ _ _
1294 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1295
1296 doReadPtrArrayOp :: LocalReg
1297 -> CmmExpr
1298 -> CmmExpr
1299 -> FCode ()
1300 doReadPtrArrayOp res addr idx
1301 = do dflags <- getDynFlags
1302 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1303
1304 doWriteOffAddrOp :: Maybe MachOp
1305 -> CmmType
1306 -> [LocalReg]
1307 -> [CmmExpr]
1308 -> FCode ()
1309 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1310 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1311 doWriteOffAddrOp _ _ _ _
1312 = panic "StgCmmPrim: doWriteOffAddrOp"
1313
1314 doWriteByteArrayOp :: Maybe MachOp
1315 -> CmmType
1316 -> [LocalReg]
1317 -> [CmmExpr]
1318 -> FCode ()
1319 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1320 = do dflags <- getDynFlags
1321 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1322 doWriteByteArrayOp _ _ _ _
1323 = panic "StgCmmPrim: doWriteByteArrayOp"
1324
1325 doWritePtrArrayOp :: CmmExpr
1326 -> CmmExpr
1327 -> CmmExpr
1328 -> FCode ()
1329 doWritePtrArrayOp addr idx val
1330 = do dflags <- getDynFlags
1331 let ty = cmmExprType dflags val
1332 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1333 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1334 -- the write barrier. We must write a byte into the mark table:
1335 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1336 emit $ mkStore (
1337 cmmOffsetExpr dflags
1338 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1339 (loadArrPtrsSize dflags addr))
1340 (CmmMachOp (mo_wordUShr dflags) [idx,
1341 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1342 ) (CmmLit (CmmInt 1 W8))
1343
1344 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1345 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1346 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1347
1348 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1349 -> Maybe MachOp -- Optional result cast
1350 -> CmmType -- Type of element we are accessing
1351 -> LocalReg -- Destination
1352 -> CmmExpr -- Base address
1353 -> CmmType -- Type of element by which we are indexing
1354 -> CmmExpr -- Index
1355 -> FCode ()
1356 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1357 = do dflags <- getDynFlags
1358 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1359 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1360 = do dflags <- getDynFlags
1361 emitAssign (CmmLocal res) (CmmMachOp cast [
1362 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1363
1364 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1365 -> Maybe MachOp -- Optional value cast
1366 -> CmmExpr -- Base address
1367 -> CmmType -- Type of element by which we are indexing
1368 -> CmmExpr -- Index
1369 -> CmmExpr -- Value to write
1370 -> FCode ()
1371 mkBasicIndexedWrite off Nothing base idx_ty idx val
1372 = do dflags <- getDynFlags
1373 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1374 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1375 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1376
1377 -- ----------------------------------------------------------------------------
1378 -- Misc utils
1379
1380 cmmIndexOffExpr :: DynFlags
1381 -> ByteOff -- Initial offset in bytes
1382 -> Width -- Width of element by which we are indexing
1383 -> CmmExpr -- Base address
1384 -> CmmExpr -- Index
1385 -> CmmExpr
1386 cmmIndexOffExpr dflags off width base idx
1387 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1388
1389 cmmLoadIndexOffExpr :: DynFlags
1390 -> ByteOff -- Initial offset in bytes
1391 -> CmmType -- Type of element we are accessing
1392 -> CmmExpr -- Base address
1393 -> CmmType -- Type of element by which we are indexing
1394 -> CmmExpr -- Index
1395 -> CmmExpr
1396 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1397 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1398
1399 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1400 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1401
1402 ------------------------------------------------------------------------------
1403 -- Helpers for translating vector primops.
1404
1405 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1406 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1407
1408 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1409 vecCmmCat IntVec = cmmBits
1410 vecCmmCat WordVec = cmmBits
1411 vecCmmCat FloatVec = cmmFloat
1412
1413 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1414 vecElemInjectCast _ FloatVec _ = Nothing
1415 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1416 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1417 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1418 vecElemInjectCast _ IntVec W64 = Nothing
1419 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1420 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1421 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1422 vecElemInjectCast _ WordVec W64 = Nothing
1423 vecElemInjectCast _ _ _ = Nothing
1424
1425 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1426 vecElemProjectCast _ FloatVec _ = Nothing
1427 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1428 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1429 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1430 vecElemProjectCast _ IntVec W64 = Nothing
1431 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1432 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1433 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1434 vecElemProjectCast _ WordVec W64 = Nothing
1435 vecElemProjectCast _ _ _ = Nothing
1436
1437 -- Check to make sure that we can generate code for the specified vector type
1438 -- given the current set of dynamic flags.
1439 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1440 checkVecCompatibility dflags vcat l w = do
1441 when (hscTarget dflags /= HscLlvm) $ do
1442 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1443 ,"Please use -fllvm."]
1444 check vecWidth vcat l w
1445 where
1446 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1447 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1448 sorry $ "128-bit wide single-precision floating point " ++
1449 "SIMD vector instructions require at least -msse."
1450 check W128 _ _ _ | not (isSse2Enabled dflags) =
1451 sorry $ "128-bit wide integer and double precision " ++
1452 "SIMD vector instructions require at least -msse2."
1453 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1454 sorry $ "256-bit wide floating point " ++
1455 "SIMD vector instructions require at least -mavx."
1456 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1457 sorry $ "256-bit wide integer " ++
1458 "SIMD vector instructions require at least -mavx2."
1459 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1460 sorry $ "512-bit wide " ++
1461 "SIMD vector instructions require -mavx512f."
1462 check _ _ _ _ = return ()
1463
1464 vecWidth = typeWidth (vecVmmType vcat l w)
1465
1466 ------------------------------------------------------------------------------
1467 -- Helpers for translating vector packing and unpacking.
1468
1469 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1470 -> CmmType -- Type of vector
1471 -> CmmExpr -- Initial vector
1472 -> [CmmExpr] -- Elements
1473 -> CmmFormal -- Destination for result
1474 -> FCode ()
1475 doVecPackOp maybe_pre_write_cast ty z es res = do
1476 dst <- newTemp ty
1477 emitAssign (CmmLocal dst) z
1478 vecPack dst es 0
1479 where
1480 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1481 vecPack src [] _ =
1482 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1483
1484 vecPack src (e : es) i = do
1485 dst <- newTemp ty
1486 if isFloatType (vecElemType ty)
1487 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1488 [CmmReg (CmmLocal src), cast e, iLit])
1489 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1490 [CmmReg (CmmLocal src), cast e, iLit])
1491 vecPack dst es (i + 1)
1492 where
1493 -- vector indices are always 32-bits
1494 iLit = CmmLit (CmmInt (toInteger i) W32)
1495
1496 cast :: CmmExpr -> CmmExpr
1497 cast val = case maybe_pre_write_cast of
1498 Nothing -> val
1499 Just cast -> CmmMachOp cast [val]
1500
1501 len :: Length
1502 len = vecLength ty
1503
1504 wid :: Width
1505 wid = typeWidth (vecElemType ty)
1506
1507 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1508 -> CmmType -- Type of vector
1509 -> CmmExpr -- Vector
1510 -> [CmmFormal] -- Element results
1511 -> FCode ()
1512 doVecUnpackOp maybe_post_read_cast ty e res =
1513 vecUnpack res 0
1514 where
1515 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1516 vecUnpack [] _ =
1517 return ()
1518
1519 vecUnpack (r : rs) i = do
1520 if isFloatType (vecElemType ty)
1521 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1522 [e, iLit]))
1523 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1524 [e, iLit]))
1525 vecUnpack rs (i + 1)
1526 where
1527 -- vector indices are always 32-bits
1528 iLit = CmmLit (CmmInt (toInteger i) W32)
1529
1530 cast :: CmmExpr -> CmmExpr
1531 cast val = case maybe_post_read_cast of
1532 Nothing -> val
1533 Just cast -> CmmMachOp cast [val]
1534
1535 len :: Length
1536 len = vecLength ty
1537
1538 wid :: Width
1539 wid = typeWidth (vecElemType ty)
1540
1541 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1542 -> CmmType -- Vector type
1543 -> CmmExpr -- Source vector
1544 -> CmmExpr -- Element
1545 -> CmmExpr -- Index at which to insert element
1546 -> CmmFormal -- Destination for result
1547 -> FCode ()
1548 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1549 dflags <- getDynFlags
1550 -- vector indices are always 32-bits
1551 let idx' :: CmmExpr
1552 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1553 if isFloatType (vecElemType ty)
1554 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1555 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1556 where
1557 cast :: CmmExpr -> CmmExpr
1558 cast val = case maybe_pre_write_cast of
1559 Nothing -> val
1560 Just cast -> CmmMachOp cast [val]
1561
1562 len :: Length
1563 len = vecLength ty
1564
1565 wid :: Width
1566 wid = typeWidth (vecElemType ty)
1567
1568 ------------------------------------------------------------------------------
1569 -- Helpers for translating prefetching.
1570
1571
1572 -- | Translate byte array prefetch operations into proper primcalls.
1573 doPrefetchByteArrayOp :: Int
1574 -> [CmmExpr]
1575 -> FCode ()
1576 doPrefetchByteArrayOp locality [addr,idx]
1577 = do dflags <- getDynFlags
1578 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1579 doPrefetchByteArrayOp _ _
1580 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1581
1582 -- | Translate mutable byte array prefetch operations into proper primcalls.
1583 doPrefetchMutableByteArrayOp :: Int
1584 -> [CmmExpr]
1585 -> FCode ()
1586 doPrefetchMutableByteArrayOp locality [addr,idx]
1587 = do dflags <- getDynFlags
1588 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1589 doPrefetchMutableByteArrayOp _ _
1590 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1591
1592 -- | Translate address prefetch operations into proper primcalls.
1593 doPrefetchAddrOp ::Int
1594 -> [CmmExpr]
1595 -> FCode ()
1596 doPrefetchAddrOp locality [addr,idx]
1597 = mkBasicPrefetch locality 0 addr idx
1598 doPrefetchAddrOp _ _
1599 = panic "StgCmmPrim: doPrefetchAddrOp"
1600
1601 -- | Translate value prefetch operations into proper primcalls.
1602 doPrefetchValueOp :: Int
1603 -> [CmmExpr]
1604 -> FCode ()
1605 doPrefetchValueOp locality [addr]
1606 = do dflags <- getDynFlags
1607 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1608 doPrefetchValueOp _ _
1609 = panic "StgCmmPrim: doPrefetchValueOp"
1610
1611 -- | helper to generate prefetch primcalls
1612 mkBasicPrefetch :: Int -- Locality level 0-3
1613 -> ByteOff -- Initial offset in bytes
1614 -> CmmExpr -- Base address
1615 -> CmmExpr -- Index
1616 -> FCode ()
1617 mkBasicPrefetch locality off base idx
1618 = do dflags <- getDynFlags
1619 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1620 return ()
1621
1622 -- ----------------------------------------------------------------------------
1623 -- Allocating byte arrays
1624
1625 -- | Takes a register to return the newly allocated array in and the
1626 -- size of the new array in bytes. Allocates a new
1627 -- 'MutableByteArray#'.
1628 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1629 doNewByteArrayOp res_r n = do
1630 dflags <- getDynFlags
1631
1632 let info_ptr = mkLblExpr mkArrWords_infoLabel
1633 rep = arrWordsRep dflags n
1634
1635 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1636 (mkIntExpr dflags (nonHdrSize dflags rep))
1637 (zeroExpr dflags)
1638
1639 let hdr_size = fixedHdrSize dflags
1640
1641 base <- allocHeapClosure rep info_ptr curCCS
1642 [ (mkIntExpr dflags n,
1643 hdr_size + oFFSET_StgArrBytes_bytes dflags)
1644 ]
1645
1646 emit $ mkAssign (CmmLocal res_r) base
1647
1648 -- ----------------------------------------------------------------------------
1649 -- Copying byte arrays
1650
1651 -- | Takes a source 'ByteArray#', an offset in the source array, a
1652 -- destination 'MutableByteArray#', an offset into the destination
1653 -- array, and the number of bytes to copy. Copies the given number of
1654 -- bytes from the source array to the destination array.
1655 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1656 -> FCode ()
1657 doCopyByteArrayOp = emitCopyByteArray copy
1658 where
1659 -- Copy data (we assume the arrays aren't overlapping since
1660 -- they're of different types)
1661 copy _src _dst dst_p src_p bytes =
1662 emitMemcpyCall dst_p src_p bytes 1
1663
1664 -- | Takes a source 'MutableByteArray#', an offset in the source
1665 -- array, a destination 'MutableByteArray#', an offset into the
1666 -- destination array, and the number of bytes to copy. Copies the
1667 -- given number of bytes from the source array to the destination
1668 -- array.
1669 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1670 -> FCode ()
1671 doCopyMutableByteArrayOp = emitCopyByteArray copy
1672 where
1673 -- The only time the memory might overlap is when the two arrays
1674 -- we were provided are the same array!
1675 -- TODO: Optimize branch for common case of no aliasing.
1676 copy src dst dst_p src_p bytes = do
1677 dflags <- getDynFlags
1678 [moveCall, cpyCall] <- forkAlts [
1679 getCode $ emitMemmoveCall dst_p src_p bytes 1,
1680 getCode $ emitMemcpyCall dst_p src_p bytes 1
1681 ]
1682 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1683
1684 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1685 -> FCode ())
1686 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1687 -> FCode ()
1688 emitCopyByteArray copy src src_off dst dst_off n = do
1689 dflags <- getDynFlags
1690 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1691 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1692 copy src dst dst_p src_p n
1693
1694 -- | Takes a source 'ByteArray#', an offset in the source array, a
1695 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1696 -- number of bytes from the source array to the destination memory region.
1697 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1698 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1699 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1700 dflags <- getDynFlags
1701 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1702 emitMemcpyCall dst_p src_p bytes 1
1703
1704 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1705 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1706 -- number of bytes from the source array to the destination memory region.
1707 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1708 -> FCode ()
1709 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1710
1711 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1712 -- the destination array, and the number of bytes to copy. Copies the given
1713 -- number of bytes from the source memory region to the destination array.
1714 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1715 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1716 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1717 dflags <- getDynFlags
1718 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1719 emitMemcpyCall dst_p src_p bytes 1
1720
1721
1722 -- ----------------------------------------------------------------------------
1723 -- Setting byte arrays
1724
1725 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1726 -- and a byte, and sets each of the selected bytes in the array to the
1727 -- character.
1728 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1729 -> FCode ()
1730 doSetByteArrayOp ba off len c
1731 = do dflags <- getDynFlags
1732 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1733 emitMemsetCall p c len 1
1734
1735 -- ----------------------------------------------------------------------------
1736 -- Allocating arrays
1737
1738 -- | Allocate a new array.
1739 doNewArrayOp :: CmmFormal -- ^ return register
1740 -> SMRep -- ^ representation of the array
1741 -> CLabel -- ^ info pointer
1742 -> [(CmmExpr, ByteOff)] -- ^ header payload
1743 -> WordOff -- ^ array size
1744 -> CmmExpr -- ^ initial element
1745 -> FCode ()
1746 doNewArrayOp res_r rep info payload n init = do
1747 dflags <- getDynFlags
1748
1749 let info_ptr = mkLblExpr info
1750
1751 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1752 (mkIntExpr dflags (nonHdrSize dflags rep))
1753 (zeroExpr dflags)
1754
1755 base <- allocHeapClosure rep info_ptr curCCS payload
1756
1757 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1758 emit $ mkAssign arr base
1759
1760 -- Initialise all elements of the the array
1761 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1762 for <- newLabelC
1763 emitLabel for
1764 let loopBody =
1765 [ mkStore (CmmReg (CmmLocal p)) init
1766 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1767 , mkBranch for ]
1768 emit =<< mkCmmIfThen
1769 (cmmULtWord dflags (CmmReg (CmmLocal p))
1770 (cmmOffsetW dflags (CmmReg arr)
1771 (hdrSizeW dflags rep + n)))
1772 (catAGraphs loopBody)
1773
1774 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1775
1776 -- ----------------------------------------------------------------------------
1777 -- Copying pointer arrays
1778
1779 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1780 -- nowhere else in the code generator. This is mostly because these
1781 -- "primitive" ops result in a surprisingly large amount of code. It
1782 -- will likely be worthwhile to optimize what is emitted here, so that
1783 -- our optimization passes don't waste time repeatedly optimizing the
1784 -- same bits of code.
1785
1786 -- More closely imitates 'assignTemp' from the old code generator, which
1787 -- returns a CmmExpr rather than a LocalReg.
1788 assignTempE :: CmmExpr -> FCode CmmExpr
1789 assignTempE e = do
1790 t <- assignTemp e
1791 return (CmmReg (CmmLocal t))
1792
1793 -- | Takes a source 'Array#', an offset in the source array, a
1794 -- destination 'MutableArray#', an offset into the destination array,
1795 -- and the number of elements to copy. Copies the given number of
1796 -- elements from the source array to the destination array.
1797 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1798 -> FCode ()
1799 doCopyArrayOp = emitCopyArray copy
1800 where
1801 -- Copy data (we assume the arrays aren't overlapping since
1802 -- they're of different types)
1803 copy _src _dst dst_p src_p bytes =
1804 do dflags <- getDynFlags
1805 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1806 (wORD_SIZE dflags)
1807
1808
1809 -- | Takes a source 'MutableArray#', an offset in the source array, a
1810 -- destination 'MutableArray#', an offset into the destination array,
1811 -- and the number of elements to copy. Copies the given number of
1812 -- elements from the source array to the destination array.
1813 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1814 -> FCode ()
1815 doCopyMutableArrayOp = emitCopyArray copy
1816 where
1817 -- The only time the memory might overlap is when the two arrays
1818 -- we were provided are the same array!
1819 -- TODO: Optimize branch for common case of no aliasing.
1820 copy src dst dst_p src_p bytes = do
1821 dflags <- getDynFlags
1822 [moveCall, cpyCall] <- forkAlts [
1823 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1824 (wORD_SIZE dflags),
1825 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1826 (wORD_SIZE dflags)
1827 ]
1828 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1829
1830 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1831 -> FCode ()) -- ^ copy function
1832 -> CmmExpr -- ^ source array
1833 -> CmmExpr -- ^ offset in source array
1834 -> CmmExpr -- ^ destination array
1835 -> CmmExpr -- ^ offset in destination array
1836 -> WordOff -- ^ number of elements to copy
1837 -> FCode ()
1838 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1839 dflags <- getDynFlags
1840 when (n /= 0) $ do
1841 -- Passed as arguments (be careful)
1842 src <- assignTempE src0
1843 dst <- assignTempE dst0
1844 dst_off <- assignTempE dst_off0
1845
1846 -- Set the dirty bit in the header.
1847 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1848
1849 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1850 (arrPtrsHdrSize dflags)
1851 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1852 src_p <- assignTempE $ cmmOffsetExprW dflags
1853 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1854 let bytes = wordsToBytes dflags n
1855
1856 copy src dst dst_p src_p bytes
1857
1858 -- The base address of the destination card table
1859 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1860 (loadArrPtrsSize dflags dst)
1861
1862 emitSetCards dst_off dst_cards_p n
1863
1864 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1865 -> FCode ()
1866 doCopySmallArrayOp = emitCopySmallArray copy
1867 where
1868 -- Copy data (we assume the arrays aren't overlapping since
1869 -- they're of different types)
1870 copy _src _dst dst_p src_p bytes =
1871 do dflags <- getDynFlags
1872 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1873 (wORD_SIZE dflags)
1874
1875
1876 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1877 -> FCode ()
1878 doCopySmallMutableArrayOp = emitCopySmallArray copy
1879 where
1880 -- The only time the memory might overlap is when the two arrays
1881 -- we were provided are the same array!
1882 -- TODO: Optimize branch for common case of no aliasing.
1883 copy src dst dst_p src_p bytes = do
1884 dflags <- getDynFlags
1885 [moveCall, cpyCall] <- forkAlts
1886 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1887 (wORD_SIZE dflags)
1888 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1889 (wORD_SIZE dflags)
1890 ]
1891 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1892
1893 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1894 -> FCode ()) -- ^ copy function
1895 -> CmmExpr -- ^ source array
1896 -> CmmExpr -- ^ offset in source array
1897 -> CmmExpr -- ^ destination array
1898 -> CmmExpr -- ^ offset in destination array
1899 -> WordOff -- ^ number of elements to copy
1900 -> FCode ()
1901 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
1902 dflags <- getDynFlags
1903
1904 -- Passed as arguments (be careful)
1905 src <- assignTempE src0
1906 dst <- assignTempE dst0
1907
1908 -- Set the dirty bit in the header.
1909 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1910
1911 dst_p <- assignTempE $ cmmOffsetExprW dflags
1912 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
1913 src_p <- assignTempE $ cmmOffsetExprW dflags
1914 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
1915 let bytes = wordsToBytes dflags n
1916
1917 copy src dst dst_p src_p bytes
1918
1919 -- | Takes an info table label, a register to return the newly
1920 -- allocated array in, a source array, an offset in the source array,
1921 -- and the number of elements to copy. Allocates a new array and
1922 -- initializes it from the source array.
1923 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1924 -> FCode ()
1925 emitCloneArray info_p res_r src src_off n = do
1926 dflags <- getDynFlags
1927
1928 let info_ptr = mkLblExpr info_p
1929 rep = arrPtrsRep dflags n
1930
1931 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1932 (mkIntExpr dflags (nonHdrSize dflags rep))
1933 (zeroExpr dflags)
1934
1935 let hdr_size = fixedHdrSize dflags
1936
1937 base <- allocHeapClosure rep info_ptr curCCS
1938 [ (mkIntExpr dflags n,
1939 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1940 , (mkIntExpr dflags (nonHdrSizeW rep),
1941 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1942 ]
1943
1944 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1945 emit $ mkAssign arr base
1946
1947 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1948 (arrPtrsHdrSize dflags)
1949 src_p <- assignTempE $ cmmOffsetExprW dflags src
1950 (cmmAddWord dflags
1951 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1952
1953 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1954 (wORD_SIZE dflags)
1955
1956 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1957
1958 -- | Takes an info table label, a register to return the newly
1959 -- allocated array in, a source array, an offset in the source array,
1960 -- and the number of elements to copy. Allocates a new array and
1961 -- initializes it from the source array.
1962 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1963 -> FCode ()
1964 emitCloneSmallArray info_p res_r src src_off n = do
1965 dflags <- getDynFlags
1966
1967 let info_ptr = mkLblExpr info_p
1968 rep = smallArrPtrsRep n
1969
1970 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
1971 (mkIntExpr dflags (nonHdrSize dflags rep))
1972 (zeroExpr dflags)
1973
1974 let hdr_size = fixedHdrSize dflags
1975
1976 base <- allocHeapClosure rep info_ptr curCCS
1977 [ (mkIntExpr dflags n,
1978 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
1979 ]
1980
1981 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1982 emit $ mkAssign arr base
1983
1984 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1985 (smallArrPtrsHdrSize dflags)
1986 src_p <- assignTempE $ cmmOffsetExprW dflags src
1987 (cmmAddWord dflags
1988 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
1989
1990 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1991 (wORD_SIZE dflags)
1992
1993 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1994
1995 -- | Takes and offset in the destination array, the base address of
1996 -- the card table, and the number of elements affected (*not* the
1997 -- number of cards). The number of elements may not be zero.
1998 -- Marks the relevant cards as dirty.
1999 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2000 emitSetCards dst_start dst_cards_start n = do
2001 dflags <- getDynFlags
2002 start_card <- assignTempE $ cardCmm dflags dst_start
2003 let end_card = cardCmm dflags
2004 (cmmSubWord dflags
2005 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
2006 (mkIntExpr dflags 1))
2007 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
2008 (mkIntExpr dflags 1)
2009 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2010 1 -- no alignment (1 byte)
2011
2012 -- Convert an element index to a card index
2013 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2014 cardCmm dflags i =
2015 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2016
2017 ------------------------------------------------------------------------------
2018 -- SmallArray PrimOp implementations
2019
2020 doReadSmallPtrArrayOp :: LocalReg
2021 -> CmmExpr
2022 -> CmmExpr
2023 -> FCode ()
2024 doReadSmallPtrArrayOp res addr idx = do
2025 dflags <- getDynFlags
2026 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2027 (gcWord dflags) idx
2028
2029 doWriteSmallPtrArrayOp :: CmmExpr
2030 -> CmmExpr
2031 -> CmmExpr
2032 -> FCode ()
2033 doWriteSmallPtrArrayOp addr idx val = do
2034 dflags <- getDynFlags
2035 let ty = cmmExprType dflags val
2036 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2037 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2038
2039 ------------------------------------------------------------------------------
2040 -- Atomic read-modify-write
2041
2042 -- | Emit an atomic modification to a byte array element. The result
2043 -- reg contains that previous value of the element. Implies a full
2044 -- memory barrier.
2045 doAtomicRMW :: LocalReg -- ^ Result reg
2046 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2047 -> CmmExpr -- ^ MutableByteArray#
2048 -> CmmExpr -- ^ Index
2049 -> CmmType -- ^ Type of element by which we are indexing
2050 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2051 -> FCode ()
2052 doAtomicRMW res amop mba idx idx_ty n = do
2053 dflags <- getDynFlags
2054 let width = typeWidth idx_ty
2055 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2056 width mba idx
2057 emitPrimCall
2058 [ res ]
2059 (MO_AtomicRMW width amop)
2060 [ addr, n ]
2061
2062 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2063 doAtomicReadByteArray
2064 :: LocalReg -- ^ Result reg
2065 -> CmmExpr -- ^ MutableByteArray#
2066 -> CmmExpr -- ^ Index
2067 -> CmmType -- ^ Type of element by which we are indexing
2068 -> FCode ()
2069 doAtomicReadByteArray res mba idx idx_ty = do
2070 dflags <- getDynFlags
2071 let width = typeWidth idx_ty
2072 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2073 width mba idx
2074 emitPrimCall
2075 [ res ]
2076 (MO_AtomicRead width)
2077 [ addr ]
2078
2079 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2080 doAtomicWriteByteArray
2081 :: CmmExpr -- ^ MutableByteArray#
2082 -> CmmExpr -- ^ Index
2083 -> CmmType -- ^ Type of element by which we are indexing
2084 -> CmmExpr -- ^ Value to write
2085 -> FCode ()
2086 doAtomicWriteByteArray mba idx idx_ty val = do
2087 dflags <- getDynFlags
2088 let width = typeWidth idx_ty
2089 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2090 width mba idx
2091 emitPrimCall
2092 [ {- no results -} ]
2093 (MO_AtomicWrite width)
2094 [ addr, val ]
2095
2096 doCasByteArray
2097 :: LocalReg -- ^ Result reg
2098 -> CmmExpr -- ^ MutableByteArray#
2099 -> CmmExpr -- ^ Index
2100 -> CmmType -- ^ Type of element by which we are indexing
2101 -> CmmExpr -- ^ Old value
2102 -> CmmExpr -- ^ New value
2103 -> FCode ()
2104 doCasByteArray res mba idx idx_ty old new = do
2105 dflags <- getDynFlags
2106 let width = (typeWidth idx_ty)
2107 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2108 width mba idx
2109 emitPrimCall
2110 [ res ]
2111 (MO_Cmpxchg width)
2112 [ addr, old, new ]
2113
2114 ------------------------------------------------------------------------------
2115 -- Helpers for emitting function calls
2116
2117 -- | Emit a call to @memcpy@.
2118 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2119 emitMemcpyCall dst src n align = do
2120 emitPrimCall
2121 [ {-no results-} ]
2122 (MO_Memcpy align)
2123 [ dst, src, n ]
2124
2125 -- | Emit a call to @memmove@.
2126 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2127 emitMemmoveCall dst src n align = do
2128 emitPrimCall
2129 [ {- no results -} ]
2130 (MO_Memmove align)
2131 [ dst, src, n ]
2132
2133 -- | Emit a call to @memset@. The second argument must fit inside an
2134 -- unsigned char.
2135 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2136 emitMemsetCall dst c n align = do
2137 emitPrimCall
2138 [ {- no results -} ]
2139 (MO_Memset align)
2140 [ dst, c, n ]
2141
2142 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2143 emitBSwapCall res x width = do
2144 emitPrimCall
2145 [ res ]
2146 (MO_BSwap width)
2147 [ x ]
2148
2149 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2150 emitPopCntCall res x width = do
2151 emitPrimCall
2152 [ res ]
2153 (MO_PopCnt width)
2154 [ x ]
2155
2156 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2157 emitClzCall res x width = do
2158 emitPrimCall
2159 [ res ]
2160 (MO_Clz width)
2161 [ x ]
2162
2163 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2164 emitCtzCall res x width = do
2165 emitPrimCall
2166 [ res ]
2167 (MO_Ctz width)
2168 [ x ]