Support MO_{Add,Sub}IntC and MO_Add2 in the LLVM backend
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgCmmLayout
21 import StgCmmForeign
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmTicky
26 import StgCmmHeap
27 import StgCmmProf ( costCentreFrom, curCCS )
28
29 import DynFlags
30 import Platform
31 import BasicTypes
32 import MkGraph
33 import StgSyn
34 import Cmm
35 import CmmInfo
36 import Type ( Type, tyConAppTyCon )
37 import TyCon
38 import CLabel
39 import CmmUtils
40 import PrimOp
41 import SMRep
42 import FastString
43 import Outputable
44 import Util
45
46 #if __GLASGOW_HASKELL__ >= 709
47 import Prelude hiding ((<*>))
48 #endif
49
50 import Data.Bits ((.&.), bit)
51 import Control.Monad (liftM, when)
52
53 ------------------------------------------------------------------------
54 -- Primitive operations and foreign calls
55 ------------------------------------------------------------------------
56
57 {- Note [Foreign call results]
58 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 A foreign call always returns an unboxed tuple of results, one
60 of which is the state token. This seems to happen even for pure
61 calls.
62
63 Even if we returned a single result for pure calls, it'd still be
64 right to wrap it in a singleton unboxed tuple, because the result
65 might be a Haskell closure pointer, we don't want to evaluate it. -}
66
67 ----------------------------------
68 cgOpApp :: StgOp -- The op
69 -> [StgArg] -- Arguments
70 -> Type -- Result type (always an unboxed tuple)
71 -> FCode ReturnKind
72
73 -- Foreign calls
74 cgOpApp (StgFCallOp fcall _) stg_args res_ty
75 = cgForeignCall fcall stg_args res_ty
76 -- Note [Foreign call results]
77
78 -- tagToEnum# is special: we need to pull the constructor
79 -- out of the table, and perform an appropriate return.
80
81 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
82 = ASSERT(isEnumerationTyCon tycon)
83 do { dflags <- getDynFlags
84 ; args' <- getNonVoidArgAmodes [arg]
85 ; let amode = case args' of [amode] -> amode
86 _ -> panic "TagToEnumOp had void arg"
87 ; emitReturn [tagToClosure dflags tycon amode] }
88 where
89 -- If you're reading this code in the attempt to figure
90 -- out why the compiler panic'ed here, it is probably because
91 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
92 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
93 -- That won't work.
94 tycon = tyConAppTyCon res_ty
95
96 cgOpApp (StgPrimOp primop) args res_ty = do
97 dflags <- getDynFlags
98 cmm_args <- getNonVoidArgAmodes args
99 case shouldInlinePrimOp dflags primop cmm_args of
100 Nothing -> do -- out-of-line
101 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
102 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
103
104 Just f -- inline
105 | ReturnsPrim VoidRep <- result_info
106 -> do f []
107 emitReturn []
108
109 | ReturnsPrim rep <- result_info
110 -> do dflags <- getDynFlags
111 res <- newTemp (primRepCmmType dflags rep)
112 f [res]
113 emitReturn [CmmReg (CmmLocal res)]
114
115 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
116 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
117 f regs
118 emitReturn (map (CmmReg . CmmLocal) regs)
119
120 | otherwise -> panic "cgPrimop"
121 where
122 result_info = getPrimOpResultInfo primop
123
124 cgOpApp (StgPrimCallOp primcall) args _res_ty
125 = do { cmm_args <- getNonVoidArgAmodes args
126 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
127 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
128
129 -- | Interpret the argument as an unsigned value, assuming the value
130 -- is given in two-complement form in the given width.
131 --
132 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
133 --
134 -- This function is used to work around the fact that many array
135 -- primops take Int# arguments, but we interpret them as unsigned
136 -- quantities in the code gen. This means that we have to be careful
137 -- every time we work on e.g. a CmmInt literal that corresponds to the
138 -- array size, as it might contain a negative Integer value if the
139 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
140 -- literal.
141 asUnsigned :: Width -> Integer -> Integer
142 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
143
144 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
145 -- ByteOff (or some other fixed width signed type) to represent
146 -- array sizes or indices. This means that these will overflow for
147 -- large enough sizes.
148
149 -- | Decide whether an out-of-line primop should be replaced by an
150 -- inline implementation. This might happen e.g. if there's enough
151 -- static information, such as statically know arguments, to emit a
152 -- more efficient implementation inline.
153 --
154 -- Returns 'Nothing' if this primop should use its out-of-line
155 -- implementation (defined elsewhere) and 'Just' together with a code
156 -- generating function that takes the output regs as arguments
157 -- otherwise.
158 shouldInlinePrimOp :: DynFlags
159 -> PrimOp -- ^ The primop
160 -> [CmmExpr] -- ^ The primop arguments
161 -> Maybe ([LocalReg] -> FCode ())
162
163 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
164 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
165 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
166
167 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
168 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
169 Just $ \ [res] ->
170 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
171 [ (mkIntExpr dflags (fromInteger n),
172 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
173 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
174 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
175 ]
176 (fromInteger n) init
177
178 shouldInlinePrimOp _ CopyArrayOp
179 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
180 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
181
182 shouldInlinePrimOp _ CopyMutableArrayOp
183 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
184 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
185
186 shouldInlinePrimOp _ CopyArrayArrayOp
187 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
188 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
189
190 shouldInlinePrimOp _ CopyMutableArrayArrayOp
191 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
192 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
193
194 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
195 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
196 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
197
198 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
199 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
200 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
201
202 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
203 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
204 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
205
206 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
207 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
208 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
209
210 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
211 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
212 Just $ \ [res] ->
213 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
214 [ (mkIntExpr dflags (fromInteger n),
215 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
216 ]
217 (fromInteger n) init
218
219 shouldInlinePrimOp _ CopySmallArrayOp
220 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
221 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
222
223 shouldInlinePrimOp _ CopySmallMutableArrayOp
224 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
225 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
226
227 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
228 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
229 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
230
231 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
232 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
233 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
234
235 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
236 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
237 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
238
239 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
240 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
241 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
242
243 shouldInlinePrimOp dflags primop args
244 | primOpOutOfLine primop = Nothing
245 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
246
247 -- TODO: Several primops, such as 'copyArray#', only have an inline
248 -- implementation (below) but could possibly have both an inline
249 -- implementation and an out-of-line implementation, just like
250 -- 'newArray#'. This would lower the amount of code generated,
251 -- hopefully without a performance impact (needs to be measured).
252
253 ---------------------------------------------------
254 cgPrimOp :: [LocalReg] -- where to put the results
255 -> PrimOp -- the op
256 -> [StgArg] -- arguments
257 -> FCode ()
258
259 cgPrimOp results op args
260 = do dflags <- getDynFlags
261 arg_exprs <- getNonVoidArgAmodes args
262 emitPrimOp dflags results op arg_exprs
263
264
265 ------------------------------------------------------------------------
266 -- Emitting code for a primop
267 ------------------------------------------------------------------------
268
269 emitPrimOp :: DynFlags
270 -> [LocalReg] -- where to put the results
271 -> PrimOp -- the op
272 -> [CmmExpr] -- arguments
273 -> FCode ()
274
275 -- First we handle various awkward cases specially. The remaining
276 -- easy cases are then handled by translateOp, defined below.
277
278 emitPrimOp _ [res] ParOp [arg]
279 =
280 -- for now, just implement this in a C function
281 -- later, we might want to inline it.
282 emitCCall
283 [(res,NoHint)]
284 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
285 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
286
287 emitPrimOp dflags [res] SparkOp [arg]
288 = do
289 -- returns the value of arg in res. We're going to therefore
290 -- refer to arg twice (once to pass to newSpark(), and once to
291 -- assign to res), so put it in a temporary.
292 tmp <- assignTemp arg
293 tmp2 <- newTemp (bWord dflags)
294 emitCCall
295 [(tmp2,NoHint)]
296 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
297 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
298 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
299
300 emitPrimOp dflags [res] GetCCSOfOp [arg]
301 = emitAssign (CmmLocal res) val
302 where
303 val
304 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
305 | otherwise = CmmLit (zeroCLit dflags)
306
307 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
308 = emitAssign (CmmLocal res) curCCS
309
310 emitPrimOp dflags [res] ReadMutVarOp [mutv]
311 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
312
313 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
314 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
315 emitCCall
316 [{-no results-}]
317 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
318 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
319
320 -- #define sizzeofByteArrayzh(r,a) \
321 -- r = ((StgArrWords *)(a))->bytes
322 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
323 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
324
325 -- #define sizzeofMutableByteArrayzh(r,a) \
326 -- r = ((StgArrWords *)(a))->bytes
327 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
328 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
329
330
331 -- #define touchzh(o) /* nothing */
332 emitPrimOp _ res@[] TouchOp args@[_arg]
333 = do emitPrimCall res MO_Touch args
334
335 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
336 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
337 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
338
339 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
340 emitPrimOp dflags [res] StableNameToIntOp [arg]
341 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
342
343 -- #define eqStableNamezh(r,sn1,sn2) \
344 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
345 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
346 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
347 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
348 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
349 ])
350
351
352 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
353 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
354
355 -- #define addrToHValuezh(r,a) r=(P_)a
356 emitPrimOp _ [res] AddrToAnyOp [arg]
357 = emitAssign (CmmLocal res) arg
358
359 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
360 -- Note: argument may be tagged!
361 emitPrimOp dflags [res] DataToTagOp [arg]
362 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
363
364 {- Freezing arrays-of-ptrs requires changing an info table, for the
365 benefit of the generational collector. It needs to scavenge mutable
366 objects, even if they are in old space. When they become immutable,
367 they can be removed from this scavenge list. -}
368
369 -- #define unsafeFreezzeArrayzh(r,a)
370 -- {
371 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
372 -- r = a;
373 -- }
374 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
375 = emit $ catAGraphs
376 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
377 mkAssign (CmmLocal res) arg ]
378 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
379 = emit $ catAGraphs
380 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
381 mkAssign (CmmLocal res) arg ]
382 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
383 = emit $ catAGraphs
384 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
385 mkAssign (CmmLocal res) arg ]
386
387 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
388 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
389 = emitAssign (CmmLocal res) arg
390
391 -- Reading/writing pointer arrays
392
393 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
394 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
395 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
396
397 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
398 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
399 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
400 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
401 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
402 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
403 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
404 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
405 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
406 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
407
408 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
409 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
410 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
411
412 -- Getting the size of pointer arrays
413
414 emitPrimOp dflags [res] SizeofArrayOp [arg]
415 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
416 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
417 = emitPrimOp dflags [res] SizeofArrayOp [arg]
418 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
419 = emitPrimOp dflags [res] SizeofArrayOp [arg]
420 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
421 = emitPrimOp dflags [res] SizeofArrayOp [arg]
422
423 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
424 emit $ mkAssign (CmmLocal res)
425 (cmmLoadIndexW dflags arg
426 (fixedHdrSizeW dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags) (bWord dflags))
427 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
428 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
429
430 -- IndexXXXoffAddr
431
432 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
433 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
434 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
435 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
436 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
437 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
438 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
439 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
440 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
441 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
442 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
443 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
444 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
445 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
446 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
447 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
448
449 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
450
451 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
452 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
453 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
454 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
455 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
456 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
457 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
458 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
459 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
460 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
461 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
462 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
463 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
464 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
465 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
466 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
467
468 -- IndexXXXArray
469
470 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
471 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
472 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
473 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
474 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
475 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
476 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
477 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
478 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
479 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
480 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
481 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
482 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
483 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
484 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
485 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
486
487 -- ReadXXXArray, identical to IndexXXXArray.
488
489 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
490 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
491 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
492 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
493 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
494 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
495 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
496 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
497 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
498 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
499 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
500 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
501 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
502 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
503 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
504 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
505
506 -- WriteXXXoffAddr
507
508 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
509 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
510 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
511 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
512 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
513 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
514 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
515 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
516 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
517 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
518 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
519 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
520 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
521 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
522 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
523 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
524
525 -- WriteXXXArray
526
527 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
528 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
529 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
530 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
531 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
532 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
533 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
534 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
535 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
536 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
537 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
538 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
539 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
540 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
541 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
542 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
543
544 -- Copying and setting byte arrays
545 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
546 doCopyByteArrayOp src src_off dst dst_off n
547 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
548 doCopyMutableByteArrayOp src src_off dst dst_off n
549 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
550 doCopyByteArrayToAddrOp src src_off dst n
551 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
552 doCopyMutableByteArrayToAddrOp src src_off dst n
553 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
554 doCopyAddrToByteArrayOp src dst dst_off n
555 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
556 doSetByteArrayOp ba off len c
557
558 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
559 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
560 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
561 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
562
563 -- Population count
564 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
565 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
566 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
567 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
568 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
569
570 -- count leading zeros
571 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
572 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
573 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
574 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
575 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
576
577 -- count trailing zeros
578 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
579 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
580 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
581 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
582 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
583
584 -- Unsigned int to floating point conversions
585 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
586 (MO_UF_Conv W32) [w]
587 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
588 (MO_UF_Conv W64) [w]
589
590 -- SIMD primops
591 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
592 checkVecCompatibility dflags vcat n w
593 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
594 where
595 zeros :: CmmExpr
596 zeros = CmmLit $ CmmVec (replicate n zero)
597
598 zero :: CmmLit
599 zero = case vcat of
600 IntVec -> CmmInt 0 w
601 WordVec -> CmmInt 0 w
602 FloatVec -> CmmFloat 0 w
603
604 ty :: CmmType
605 ty = vecVmmType vcat n w
606
607 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
608 checkVecCompatibility dflags vcat n w
609 when (length es /= n) $
610 panic "emitPrimOp: VecPackOp has wrong number of arguments"
611 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
612 where
613 zeros :: CmmExpr
614 zeros = CmmLit $ CmmVec (replicate n zero)
615
616 zero :: CmmLit
617 zero = case vcat of
618 IntVec -> CmmInt 0 w
619 WordVec -> CmmInt 0 w
620 FloatVec -> CmmFloat 0 w
621
622 ty :: CmmType
623 ty = vecVmmType vcat n w
624
625 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
626 checkVecCompatibility dflags vcat n w
627 when (length res /= n) $
628 panic "emitPrimOp: VecUnpackOp has wrong number of results"
629 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
630 where
631 ty :: CmmType
632 ty = vecVmmType vcat n w
633
634 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
635 checkVecCompatibility dflags vcat n w
636 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
637 where
638 ty :: CmmType
639 ty = vecVmmType vcat n w
640
641 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
642 checkVecCompatibility dflags vcat n w
643 doIndexByteArrayOp Nothing ty res args
644 where
645 ty :: CmmType
646 ty = vecVmmType vcat n w
647
648 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
649 checkVecCompatibility dflags vcat n w
650 doIndexByteArrayOp Nothing ty res args
651 where
652 ty :: CmmType
653 ty = vecVmmType vcat n w
654
655 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
656 checkVecCompatibility dflags vcat n w
657 doWriteByteArrayOp Nothing ty res args
658 where
659 ty :: CmmType
660 ty = vecVmmType vcat n w
661
662 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
663 checkVecCompatibility dflags vcat n w
664 doIndexOffAddrOp Nothing ty res args
665 where
666 ty :: CmmType
667 ty = vecVmmType vcat n w
668
669 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
670 checkVecCompatibility dflags vcat n w
671 doIndexOffAddrOp Nothing ty res args
672 where
673 ty :: CmmType
674 ty = vecVmmType vcat n w
675
676 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
677 checkVecCompatibility dflags vcat n w
678 doWriteOffAddrOp Nothing ty res args
679 where
680 ty :: CmmType
681 ty = vecVmmType vcat n w
682
683 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
684 checkVecCompatibility dflags vcat n w
685 doIndexByteArrayOpAs Nothing vecty ty res args
686 where
687 vecty :: CmmType
688 vecty = vecVmmType vcat n w
689
690 ty :: CmmType
691 ty = vecCmmCat vcat w
692
693 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
694 checkVecCompatibility dflags vcat n w
695 doIndexByteArrayOpAs Nothing vecty ty res args
696 where
697 vecty :: CmmType
698 vecty = vecVmmType vcat n w
699
700 ty :: CmmType
701 ty = vecCmmCat vcat w
702
703 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
704 checkVecCompatibility dflags vcat n w
705 doWriteByteArrayOp Nothing ty res args
706 where
707 ty :: CmmType
708 ty = vecCmmCat vcat w
709
710 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
711 checkVecCompatibility dflags vcat n w
712 doIndexOffAddrOpAs Nothing vecty ty res args
713 where
714 vecty :: CmmType
715 vecty = vecVmmType vcat n w
716
717 ty :: CmmType
718 ty = vecCmmCat vcat w
719
720 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
721 checkVecCompatibility dflags vcat n w
722 doIndexOffAddrOpAs Nothing vecty ty res args
723 where
724 vecty :: CmmType
725 vecty = vecVmmType vcat n w
726
727 ty :: CmmType
728 ty = vecCmmCat vcat w
729
730 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
731 checkVecCompatibility dflags vcat n w
732 doWriteOffAddrOp Nothing ty res args
733 where
734 ty :: CmmType
735 ty = vecCmmCat vcat w
736
737 -- Prefetch
738 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
739 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
740 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
741 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
742
743 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
744 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
745 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
746 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
747
748 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
749 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
750 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
751 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
752
753 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
754 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
755 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
756 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
757
758 -- Atomic read-modify-write
759 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
760 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
761 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
762 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
763 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
764 doAtomicRMW res AMO_And mba ix (bWord dflags) n
765 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
766 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
767 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
768 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
769 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
770 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
771 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
772 doAtomicReadByteArray res mba ix (bWord dflags)
773 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
774 doAtomicWriteByteArray mba ix (bWord dflags) val
775 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
776 doCasByteArray res mba ix (bWord dflags) old new
777
778 -- The rest just translate straightforwardly
779 emitPrimOp dflags [res] op [arg]
780 | nopOp op
781 = emitAssign (CmmLocal res) arg
782
783 | Just (mop,rep) <- narrowOp op
784 = emitAssign (CmmLocal res) $
785 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
786
787 emitPrimOp dflags r@[res] op args
788 | Just prim <- callishOp op
789 = do emitPrimCall r prim args
790
791 | Just mop <- translateOp dflags op
792 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
793 emit stmt
794
795 emitPrimOp dflags results op args
796 = case callishPrimOpSupported dflags op of
797 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
798 Right gen -> gen results args
799
800 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
801
802 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
803 callishPrimOpSupported dflags op
804 = case op of
805 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
806 | otherwise -> Right (genericIntQuotRemOp dflags)
807
808 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
809 | otherwise -> Right (genericWordQuotRemOp dflags)
810
811 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
812 | otherwise -> Right (genericWordQuotRem2Op dflags)
813
814 WordAdd2Op | (ncg && x86ish)
815 || llvm -> Left (MO_Add2 (wordWidth dflags))
816 | otherwise -> Right genericWordAdd2Op
817
818 IntAddCOp | (ncg && x86ish)
819 || llvm -> Left (MO_AddIntC (wordWidth dflags))
820 | otherwise -> Right genericIntAddCOp
821
822 IntSubCOp | (ncg && x86ish)
823 || llvm -> Left (MO_SubIntC (wordWidth dflags))
824 | otherwise -> Right genericIntSubCOp
825
826 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
827 | otherwise -> Right genericWordMul2Op
828
829 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
830 where
831 ncg = case hscTarget dflags of
832 HscAsm -> True
833 _ -> False
834 llvm = case hscTarget dflags of
835 HscLlvm -> True
836 _ -> False
837 x86ish = case platformArch (targetPlatform dflags) of
838 ArchX86 -> True
839 ArchX86_64 -> True
840 _ -> False
841
842 genericIntQuotRemOp :: DynFlags -> GenericOp
843 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
844 = emit $ mkAssign (CmmLocal res_q)
845 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
846 mkAssign (CmmLocal res_r)
847 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
848 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
849
850 genericWordQuotRemOp :: DynFlags -> GenericOp
851 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
852 = emit $ mkAssign (CmmLocal res_q)
853 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
854 mkAssign (CmmLocal res_r)
855 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
856 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
857
858 genericWordQuotRem2Op :: DynFlags -> GenericOp
859 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
860 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
861 where ty = cmmExprType dflags arg_x_high
862 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
863 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
864 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
865 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
866 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
867 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
868 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
869 zero = lit 0
870 one = lit 1
871 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
872 lit i = CmmLit (CmmInt i (wordWidth dflags))
873
874 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
875 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
876 mkAssign (CmmLocal res_r) high)
877 f i acc high low =
878 do roverflowedBit <- newTemp ty
879 rhigh' <- newTemp ty
880 rhigh'' <- newTemp ty
881 rlow' <- newTemp ty
882 risge <- newTemp ty
883 racc' <- newTemp ty
884 let high' = CmmReg (CmmLocal rhigh')
885 isge = CmmReg (CmmLocal risge)
886 overflowedBit = CmmReg (CmmLocal roverflowedBit)
887 let this = catAGraphs
888 [mkAssign (CmmLocal roverflowedBit)
889 (shr high negone),
890 mkAssign (CmmLocal rhigh')
891 (or (shl high one) (shr low negone)),
892 mkAssign (CmmLocal rlow')
893 (shl low one),
894 mkAssign (CmmLocal risge)
895 (or (overflowedBit `ne` zero)
896 (high' `ge` arg_y)),
897 mkAssign (CmmLocal rhigh'')
898 (high' `minus` (arg_y `times` isge)),
899 mkAssign (CmmLocal racc')
900 (or (shl acc one) isge)]
901 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
902 (CmmReg (CmmLocal rhigh''))
903 (CmmReg (CmmLocal rlow'))
904 return (this <*> rest)
905 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
906
907 genericWordAdd2Op :: GenericOp
908 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
909 = do dflags <- getDynFlags
910 r1 <- newTemp (cmmExprType dflags arg_x)
911 r2 <- newTemp (cmmExprType dflags arg_x)
912 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
913 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
914 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
915 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
916 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
917 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
918 (wordWidth dflags))
919 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
920 emit $ catAGraphs
921 [mkAssign (CmmLocal r1)
922 (add (bottomHalf arg_x) (bottomHalf arg_y)),
923 mkAssign (CmmLocal r2)
924 (add (topHalf (CmmReg (CmmLocal r1)))
925 (add (topHalf arg_x) (topHalf arg_y))),
926 mkAssign (CmmLocal res_h)
927 (topHalf (CmmReg (CmmLocal r2))),
928 mkAssign (CmmLocal res_l)
929 (or (toTopHalf (CmmReg (CmmLocal r2)))
930 (bottomHalf (CmmReg (CmmLocal r1))))]
931 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
932
933 genericIntAddCOp :: GenericOp
934 genericIntAddCOp [res_r, res_c] [aa, bb]
935 {-
936 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
937 C, and without needing any comparisons. This may not be the
938 fastest way to do it - if you have better code, please send it! --SDM
939
940 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
941
942 We currently don't make use of the r value if c is != 0 (i.e.
943 overflow), we just convert to big integers and try again. This
944 could be improved by making r and c the correct values for
945 plugging into a new J#.
946
947 { r = ((I_)(a)) + ((I_)(b)); \
948 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
949 >> (BITS_IN (I_) - 1); \
950 }
951 Wading through the mass of bracketry, it seems to reduce to:
952 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
953
954 -}
955 = do dflags <- getDynFlags
956 emit $ catAGraphs [
957 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
958 mkAssign (CmmLocal res_c) $
959 CmmMachOp (mo_wordUShr dflags) [
960 CmmMachOp (mo_wordAnd dflags) [
961 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
962 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
963 ],
964 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
965 ]
966 ]
967 genericIntAddCOp _ _ = panic "genericIntAddCOp"
968
969 genericIntSubCOp :: GenericOp
970 genericIntSubCOp [res_r, res_c] [aa, bb]
971 {- Similarly:
972 #define subIntCzh(r,c,a,b) \
973 { r = ((I_)(a)) - ((I_)(b)); \
974 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
975 >> (BITS_IN (I_) - 1); \
976 }
977
978 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
979 -}
980 = do dflags <- getDynFlags
981 emit $ catAGraphs [
982 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
983 mkAssign (CmmLocal res_c) $
984 CmmMachOp (mo_wordUShr dflags) [
985 CmmMachOp (mo_wordAnd dflags) [
986 CmmMachOp (mo_wordXor dflags) [aa,bb],
987 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
988 ],
989 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
990 ]
991 ]
992 genericIntSubCOp _ _ = panic "genericIntSubCOp"
993
994 genericWordMul2Op :: GenericOp
995 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
996 = do dflags <- getDynFlags
997 let t = cmmExprType dflags arg_x
998 xlyl <- liftM CmmLocal $ newTemp t
999 xlyh <- liftM CmmLocal $ newTemp t
1000 xhyl <- liftM CmmLocal $ newTemp t
1001 r <- liftM CmmLocal $ newTemp t
1002 -- This generic implementation is very simple and slow. We might
1003 -- well be able to do better, but for now this at least works.
1004 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1005 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1006 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1007 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1008 sum = foldl1 add
1009 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1010 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1011 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1012 (wordWidth dflags))
1013 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1014 emit $ catAGraphs
1015 [mkAssign xlyl
1016 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1017 mkAssign xlyh
1018 (mul (bottomHalf arg_x) (topHalf arg_y)),
1019 mkAssign xhyl
1020 (mul (topHalf arg_x) (bottomHalf arg_y)),
1021 mkAssign r
1022 (sum [topHalf (CmmReg xlyl),
1023 bottomHalf (CmmReg xhyl),
1024 bottomHalf (CmmReg xlyh)]),
1025 mkAssign (CmmLocal res_l)
1026 (or (bottomHalf (CmmReg xlyl))
1027 (toTopHalf (CmmReg r))),
1028 mkAssign (CmmLocal res_h)
1029 (sum [mul (topHalf arg_x) (topHalf arg_y),
1030 topHalf (CmmReg xhyl),
1031 topHalf (CmmReg xlyh),
1032 topHalf (CmmReg r)])]
1033 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1034
1035 -- These PrimOps are NOPs in Cmm
1036
1037 nopOp :: PrimOp -> Bool
1038 nopOp Int2WordOp = True
1039 nopOp Word2IntOp = True
1040 nopOp Int2AddrOp = True
1041 nopOp Addr2IntOp = True
1042 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1043 nopOp OrdOp = True
1044 nopOp _ = False
1045
1046 -- These PrimOps turn into double casts
1047
1048 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1049 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1050 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1051 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1052 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1053 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1054 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1055 narrowOp _ = Nothing
1056
1057 -- Native word signless ops
1058
1059 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1060 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1061 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1062 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1063 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1064 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1065 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1066
1067 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1068 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1069 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1070 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1071 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1072 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1073
1074 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1075 translateOp dflags OrOp = Just (mo_wordOr dflags)
1076 translateOp dflags XorOp = Just (mo_wordXor dflags)
1077 translateOp dflags NotOp = Just (mo_wordNot dflags)
1078 translateOp dflags SllOp = Just (mo_wordShl dflags)
1079 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1080
1081 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1082
1083 -- Native word signed ops
1084
1085 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1086 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1087 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1088 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1089 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1090
1091
1092 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1093 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1094 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1095 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1096
1097 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1098 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1099 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1100 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1101 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1102 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1103 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1104
1105 -- Native word unsigned ops
1106
1107 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1108 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1109 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1110 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1111
1112 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1113 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1114 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1115
1116 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1117 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1118 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1119 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1120
1121 -- Char# ops
1122
1123 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1124 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1125 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1126 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1127 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1128 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1129
1130 -- Double ops
1131
1132 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1133 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1134 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1135 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1136 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1137 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1138
1139 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1140 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1141 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1142 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1143 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1144
1145 -- Float ops
1146
1147 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1148 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1149 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1150 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1151 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1152 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1153
1154 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1155 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1156 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1157 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1158 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1159
1160 -- Vector ops
1161
1162 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1163 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1164 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1165 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1166 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1167
1168 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1169 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1170 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1171 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1172 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1173 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1174
1175 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1176 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1177 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1178 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1179 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1180
1181 -- Conversions
1182
1183 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1184 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1185
1186 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1187 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1188
1189 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1190 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1191
1192 -- Word comparisons masquerading as more exotic things.
1193
1194 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1195 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1196 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1197 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1198 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1199 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1200 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1201 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1202
1203 translateOp _ _ = Nothing
1204
1205 -- These primops are implemented by CallishMachOps, because they sometimes
1206 -- turn into foreign calls depending on the backend.
1207
1208 callishOp :: PrimOp -> Maybe CallishMachOp
1209 callishOp DoublePowerOp = Just MO_F64_Pwr
1210 callishOp DoubleSinOp = Just MO_F64_Sin
1211 callishOp DoubleCosOp = Just MO_F64_Cos
1212 callishOp DoubleTanOp = Just MO_F64_Tan
1213 callishOp DoubleSinhOp = Just MO_F64_Sinh
1214 callishOp DoubleCoshOp = Just MO_F64_Cosh
1215 callishOp DoubleTanhOp = Just MO_F64_Tanh
1216 callishOp DoubleAsinOp = Just MO_F64_Asin
1217 callishOp DoubleAcosOp = Just MO_F64_Acos
1218 callishOp DoubleAtanOp = Just MO_F64_Atan
1219 callishOp DoubleLogOp = Just MO_F64_Log
1220 callishOp DoubleExpOp = Just MO_F64_Exp
1221 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1222
1223 callishOp FloatPowerOp = Just MO_F32_Pwr
1224 callishOp FloatSinOp = Just MO_F32_Sin
1225 callishOp FloatCosOp = Just MO_F32_Cos
1226 callishOp FloatTanOp = Just MO_F32_Tan
1227 callishOp FloatSinhOp = Just MO_F32_Sinh
1228 callishOp FloatCoshOp = Just MO_F32_Cosh
1229 callishOp FloatTanhOp = Just MO_F32_Tanh
1230 callishOp FloatAsinOp = Just MO_F32_Asin
1231 callishOp FloatAcosOp = Just MO_F32_Acos
1232 callishOp FloatAtanOp = Just MO_F32_Atan
1233 callishOp FloatLogOp = Just MO_F32_Log
1234 callishOp FloatExpOp = Just MO_F32_Exp
1235 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1236
1237 callishOp _ = Nothing
1238
1239 ------------------------------------------------------------------------------
1240 -- Helpers for translating various minor variants of array indexing.
1241
1242 doIndexOffAddrOp :: Maybe MachOp
1243 -> CmmType
1244 -> [LocalReg]
1245 -> [CmmExpr]
1246 -> FCode ()
1247 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1248 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1249 doIndexOffAddrOp _ _ _ _
1250 = panic "StgCmmPrim: doIndexOffAddrOp"
1251
1252 doIndexOffAddrOpAs :: Maybe MachOp
1253 -> CmmType
1254 -> CmmType
1255 -> [LocalReg]
1256 -> [CmmExpr]
1257 -> FCode ()
1258 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1259 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1260 doIndexOffAddrOpAs _ _ _ _ _
1261 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1262
1263 doIndexByteArrayOp :: Maybe MachOp
1264 -> CmmType
1265 -> [LocalReg]
1266 -> [CmmExpr]
1267 -> FCode ()
1268 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1269 = do dflags <- getDynFlags
1270 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1271 doIndexByteArrayOp _ _ _ _
1272 = panic "StgCmmPrim: doIndexByteArrayOp"
1273
1274 doIndexByteArrayOpAs :: Maybe MachOp
1275 -> CmmType
1276 -> CmmType
1277 -> [LocalReg]
1278 -> [CmmExpr]
1279 -> FCode ()
1280 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1281 = do dflags <- getDynFlags
1282 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1283 doIndexByteArrayOpAs _ _ _ _ _
1284 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1285
1286 doReadPtrArrayOp :: LocalReg
1287 -> CmmExpr
1288 -> CmmExpr
1289 -> FCode ()
1290 doReadPtrArrayOp res addr idx
1291 = do dflags <- getDynFlags
1292 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1293
1294 doWriteOffAddrOp :: Maybe MachOp
1295 -> CmmType
1296 -> [LocalReg]
1297 -> [CmmExpr]
1298 -> FCode ()
1299 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1300 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1301 doWriteOffAddrOp _ _ _ _
1302 = panic "StgCmmPrim: doWriteOffAddrOp"
1303
1304 doWriteByteArrayOp :: Maybe MachOp
1305 -> CmmType
1306 -> [LocalReg]
1307 -> [CmmExpr]
1308 -> FCode ()
1309 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1310 = do dflags <- getDynFlags
1311 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1312 doWriteByteArrayOp _ _ _ _
1313 = panic "StgCmmPrim: doWriteByteArrayOp"
1314
1315 doWritePtrArrayOp :: CmmExpr
1316 -> CmmExpr
1317 -> CmmExpr
1318 -> FCode ()
1319 doWritePtrArrayOp addr idx val
1320 = do dflags <- getDynFlags
1321 let ty = cmmExprType dflags val
1322 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1323 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1324 -- the write barrier. We must write a byte into the mark table:
1325 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1326 emit $ mkStore (
1327 cmmOffsetExpr dflags
1328 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1329 (loadArrPtrsSize dflags addr))
1330 (CmmMachOp (mo_wordUShr dflags) [idx,
1331 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1332 ) (CmmLit (CmmInt 1 W8))
1333
1334 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1335 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1336 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1337
1338 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1339 -> Maybe MachOp -- Optional result cast
1340 -> CmmType -- Type of element we are accessing
1341 -> LocalReg -- Destination
1342 -> CmmExpr -- Base address
1343 -> CmmType -- Type of element by which we are indexing
1344 -> CmmExpr -- Index
1345 -> FCode ()
1346 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1347 = do dflags <- getDynFlags
1348 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1349 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1350 = do dflags <- getDynFlags
1351 emitAssign (CmmLocal res) (CmmMachOp cast [
1352 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1353
1354 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1355 -> Maybe MachOp -- Optional value cast
1356 -> CmmExpr -- Base address
1357 -> CmmType -- Type of element by which we are indexing
1358 -> CmmExpr -- Index
1359 -> CmmExpr -- Value to write
1360 -> FCode ()
1361 mkBasicIndexedWrite off Nothing base idx_ty idx val
1362 = do dflags <- getDynFlags
1363 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1364 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1365 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1366
1367 -- ----------------------------------------------------------------------------
1368 -- Misc utils
1369
1370 cmmIndexOffExpr :: DynFlags
1371 -> ByteOff -- Initial offset in bytes
1372 -> Width -- Width of element by which we are indexing
1373 -> CmmExpr -- Base address
1374 -> CmmExpr -- Index
1375 -> CmmExpr
1376 cmmIndexOffExpr dflags off width base idx
1377 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1378
1379 cmmLoadIndexOffExpr :: DynFlags
1380 -> ByteOff -- Initial offset in bytes
1381 -> CmmType -- Type of element we are accessing
1382 -> CmmExpr -- Base address
1383 -> CmmType -- Type of element by which we are indexing
1384 -> CmmExpr -- Index
1385 -> CmmExpr
1386 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1387 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1388
1389 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1390 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1391
1392 ------------------------------------------------------------------------------
1393 -- Helpers for translating vector primops.
1394
1395 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1396 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1397
1398 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1399 vecCmmCat IntVec = cmmBits
1400 vecCmmCat WordVec = cmmBits
1401 vecCmmCat FloatVec = cmmFloat
1402
1403 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1404 vecElemInjectCast _ FloatVec _ = Nothing
1405 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1406 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1407 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1408 vecElemInjectCast _ IntVec W64 = Nothing
1409 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1410 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1411 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1412 vecElemInjectCast _ WordVec W64 = Nothing
1413 vecElemInjectCast _ _ _ = Nothing
1414
1415 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1416 vecElemProjectCast _ FloatVec _ = Nothing
1417 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1418 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1419 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1420 vecElemProjectCast _ IntVec W64 = Nothing
1421 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1422 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1423 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1424 vecElemProjectCast _ WordVec W64 = Nothing
1425 vecElemProjectCast _ _ _ = Nothing
1426
1427 -- Check to make sure that we can generate code for the specified vector type
1428 -- given the current set of dynamic flags.
1429 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1430 checkVecCompatibility dflags vcat l w = do
1431 when (hscTarget dflags /= HscLlvm) $ do
1432 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1433 ,"Please use -fllvm."]
1434 check vecWidth vcat l w
1435 where
1436 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1437 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1438 sorry $ "128-bit wide single-precision floating point " ++
1439 "SIMD vector instructions require at least -msse."
1440 check W128 _ _ _ | not (isSse2Enabled dflags) =
1441 sorry $ "128-bit wide integer and double precision " ++
1442 "SIMD vector instructions require at least -msse2."
1443 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1444 sorry $ "256-bit wide floating point " ++
1445 "SIMD vector instructions require at least -mavx."
1446 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1447 sorry $ "256-bit wide integer " ++
1448 "SIMD vector instructions require at least -mavx2."
1449 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1450 sorry $ "512-bit wide " ++
1451 "SIMD vector instructions require -mavx512f."
1452 check _ _ _ _ = return ()
1453
1454 vecWidth = typeWidth (vecVmmType vcat l w)
1455
1456 ------------------------------------------------------------------------------
1457 -- Helpers for translating vector packing and unpacking.
1458
1459 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1460 -> CmmType -- Type of vector
1461 -> CmmExpr -- Initial vector
1462 -> [CmmExpr] -- Elements
1463 -> CmmFormal -- Destination for result
1464 -> FCode ()
1465 doVecPackOp maybe_pre_write_cast ty z es res = do
1466 dst <- newTemp ty
1467 emitAssign (CmmLocal dst) z
1468 vecPack dst es 0
1469 where
1470 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1471 vecPack src [] _ =
1472 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1473
1474 vecPack src (e : es) i = do
1475 dst <- newTemp ty
1476 if isFloatType (vecElemType ty)
1477 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1478 [CmmReg (CmmLocal src), cast e, iLit])
1479 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1480 [CmmReg (CmmLocal src), cast e, iLit])
1481 vecPack dst es (i + 1)
1482 where
1483 -- vector indices are always 32-bits
1484 iLit = CmmLit (CmmInt (toInteger i) W32)
1485
1486 cast :: CmmExpr -> CmmExpr
1487 cast val = case maybe_pre_write_cast of
1488 Nothing -> val
1489 Just cast -> CmmMachOp cast [val]
1490
1491 len :: Length
1492 len = vecLength ty
1493
1494 wid :: Width
1495 wid = typeWidth (vecElemType ty)
1496
1497 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1498 -> CmmType -- Type of vector
1499 -> CmmExpr -- Vector
1500 -> [CmmFormal] -- Element results
1501 -> FCode ()
1502 doVecUnpackOp maybe_post_read_cast ty e res =
1503 vecUnpack res 0
1504 where
1505 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1506 vecUnpack [] _ =
1507 return ()
1508
1509 vecUnpack (r : rs) i = do
1510 if isFloatType (vecElemType ty)
1511 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1512 [e, iLit]))
1513 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1514 [e, iLit]))
1515 vecUnpack rs (i + 1)
1516 where
1517 -- vector indices are always 32-bits
1518 iLit = CmmLit (CmmInt (toInteger i) W32)
1519
1520 cast :: CmmExpr -> CmmExpr
1521 cast val = case maybe_post_read_cast of
1522 Nothing -> val
1523 Just cast -> CmmMachOp cast [val]
1524
1525 len :: Length
1526 len = vecLength ty
1527
1528 wid :: Width
1529 wid = typeWidth (vecElemType ty)
1530
1531 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1532 -> CmmType -- Vector type
1533 -> CmmExpr -- Source vector
1534 -> CmmExpr -- Element
1535 -> CmmExpr -- Index at which to insert element
1536 -> CmmFormal -- Destination for result
1537 -> FCode ()
1538 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1539 dflags <- getDynFlags
1540 -- vector indices are always 32-bits
1541 let idx' :: CmmExpr
1542 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1543 if isFloatType (vecElemType ty)
1544 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1545 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1546 where
1547 cast :: CmmExpr -> CmmExpr
1548 cast val = case maybe_pre_write_cast of
1549 Nothing -> val
1550 Just cast -> CmmMachOp cast [val]
1551
1552 len :: Length
1553 len = vecLength ty
1554
1555 wid :: Width
1556 wid = typeWidth (vecElemType ty)
1557
1558 ------------------------------------------------------------------------------
1559 -- Helpers for translating prefetching.
1560
1561
1562 -- | Translate byte array prefetch operations into proper primcalls.
1563 doPrefetchByteArrayOp :: Int
1564 -> [CmmExpr]
1565 -> FCode ()
1566 doPrefetchByteArrayOp locality [addr,idx]
1567 = do dflags <- getDynFlags
1568 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1569 doPrefetchByteArrayOp _ _
1570 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1571
1572 -- | Translate mutable byte array prefetch operations into proper primcalls.
1573 doPrefetchMutableByteArrayOp :: Int
1574 -> [CmmExpr]
1575 -> FCode ()
1576 doPrefetchMutableByteArrayOp locality [addr,idx]
1577 = do dflags <- getDynFlags
1578 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1579 doPrefetchMutableByteArrayOp _ _
1580 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1581
1582 -- | Translate address prefetch operations into proper primcalls.
1583 doPrefetchAddrOp ::Int
1584 -> [CmmExpr]
1585 -> FCode ()
1586 doPrefetchAddrOp locality [addr,idx]
1587 = mkBasicPrefetch locality 0 addr idx
1588 doPrefetchAddrOp _ _
1589 = panic "StgCmmPrim: doPrefetchAddrOp"
1590
1591 -- | Translate value prefetch operations into proper primcalls.
1592 doPrefetchValueOp :: Int
1593 -> [CmmExpr]
1594 -> FCode ()
1595 doPrefetchValueOp locality [addr]
1596 = do dflags <- getDynFlags
1597 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1598 doPrefetchValueOp _ _
1599 = panic "StgCmmPrim: doPrefetchValueOp"
1600
1601 -- | helper to generate prefetch primcalls
1602 mkBasicPrefetch :: Int -- Locality level 0-3
1603 -> ByteOff -- Initial offset in bytes
1604 -> CmmExpr -- Base address
1605 -> CmmExpr -- Index
1606 -> FCode ()
1607 mkBasicPrefetch locality off base idx
1608 = do dflags <- getDynFlags
1609 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1610 return ()
1611
1612 -- ----------------------------------------------------------------------------
1613 -- Allocating byte arrays
1614
1615 -- | Takes a register to return the newly allocated array in and the
1616 -- size of the new array in bytes. Allocates a new
1617 -- 'MutableByteArray#'.
1618 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1619 doNewByteArrayOp res_r n = do
1620 dflags <- getDynFlags
1621
1622 let info_ptr = mkLblExpr mkArrWords_infoLabel
1623 rep = arrWordsRep dflags n
1624
1625 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1626 (mkIntExpr dflags (nonHdrSize dflags rep))
1627 (zeroExpr dflags)
1628
1629 let hdr_size = fixedHdrSize dflags
1630
1631 base <- allocHeapClosure rep info_ptr curCCS
1632 [ (mkIntExpr dflags n,
1633 hdr_size + oFFSET_StgArrWords_bytes dflags)
1634 ]
1635
1636 emit $ mkAssign (CmmLocal res_r) base
1637
1638 -- ----------------------------------------------------------------------------
1639 -- Copying byte arrays
1640
1641 -- | Takes a source 'ByteArray#', an offset in the source array, a
1642 -- destination 'MutableByteArray#', an offset into the destination
1643 -- array, and the number of bytes to copy. Copies the given number of
1644 -- bytes from the source array to the destination array.
1645 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1646 -> FCode ()
1647 doCopyByteArrayOp = emitCopyByteArray copy
1648 where
1649 -- Copy data (we assume the arrays aren't overlapping since
1650 -- they're of different types)
1651 copy _src _dst dst_p src_p bytes =
1652 emitMemcpyCall dst_p src_p bytes 1
1653
1654 -- | Takes a source 'MutableByteArray#', an offset in the source
1655 -- array, a destination 'MutableByteArray#', an offset into the
1656 -- destination array, and the number of bytes to copy. Copies the
1657 -- given number of bytes from the source array to the destination
1658 -- array.
1659 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1660 -> FCode ()
1661 doCopyMutableByteArrayOp = emitCopyByteArray copy
1662 where
1663 -- The only time the memory might overlap is when the two arrays
1664 -- we were provided are the same array!
1665 -- TODO: Optimize branch for common case of no aliasing.
1666 copy src dst dst_p src_p bytes = do
1667 dflags <- getDynFlags
1668 [moveCall, cpyCall] <- forkAlts [
1669 getCode $ emitMemmoveCall dst_p src_p bytes 1,
1670 getCode $ emitMemcpyCall dst_p src_p bytes 1
1671 ]
1672 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1673
1674 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1675 -> FCode ())
1676 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1677 -> FCode ()
1678 emitCopyByteArray copy src src_off dst dst_off n = do
1679 dflags <- getDynFlags
1680 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1681 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1682 copy src dst dst_p src_p n
1683
1684 -- | Takes a source 'ByteArray#', an offset in the source array, a
1685 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1686 -- number of bytes from the source array to the destination memory region.
1687 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1688 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1689 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1690 dflags <- getDynFlags
1691 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1692 emitMemcpyCall dst_p src_p bytes 1
1693
1694 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1695 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1696 -- number of bytes from the source array to the destination memory region.
1697 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1698 -> FCode ()
1699 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1700
1701 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1702 -- the destination array, and the number of bytes to copy. Copies the given
1703 -- number of bytes from the source memory region to the destination array.
1704 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1705 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1706 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1707 dflags <- getDynFlags
1708 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1709 emitMemcpyCall dst_p src_p bytes 1
1710
1711
1712 -- ----------------------------------------------------------------------------
1713 -- Setting byte arrays
1714
1715 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1716 -- and a byte, and sets each of the selected bytes in the array to the
1717 -- character.
1718 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1719 -> FCode ()
1720 doSetByteArrayOp ba off len c
1721 = do dflags <- getDynFlags
1722 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1723 emitMemsetCall p c len 1
1724
1725 -- ----------------------------------------------------------------------------
1726 -- Allocating arrays
1727
1728 -- | Allocate a new array.
1729 doNewArrayOp :: CmmFormal -- ^ return register
1730 -> SMRep -- ^ representation of the array
1731 -> CLabel -- ^ info pointer
1732 -> [(CmmExpr, ByteOff)] -- ^ header payload
1733 -> WordOff -- ^ array size
1734 -> CmmExpr -- ^ initial element
1735 -> FCode ()
1736 doNewArrayOp res_r rep info payload n init = do
1737 dflags <- getDynFlags
1738
1739 let info_ptr = mkLblExpr info
1740
1741 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1742 (mkIntExpr dflags (nonHdrSize dflags rep))
1743 (zeroExpr dflags)
1744
1745 base <- allocHeapClosure rep info_ptr curCCS payload
1746
1747 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1748 emit $ mkAssign arr base
1749
1750 -- Initialise all elements of the the array
1751 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1752 for <- newLabelC
1753 emitLabel for
1754 let loopBody =
1755 [ mkStore (CmmReg (CmmLocal p)) init
1756 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1757 , mkBranch for ]
1758 emit =<< mkCmmIfThen
1759 (cmmULtWord dflags (CmmReg (CmmLocal p))
1760 (cmmOffsetW dflags (CmmReg arr)
1761 (hdrSizeW dflags rep + n)))
1762 (catAGraphs loopBody)
1763
1764 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1765
1766 -- ----------------------------------------------------------------------------
1767 -- Copying pointer arrays
1768
1769 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1770 -- nowhere else in the code generator. This is mostly because these
1771 -- "primitive" ops result in a surprisingly large amount of code. It
1772 -- will likely be worthwhile to optimize what is emitted here, so that
1773 -- our optimization passes don't waste time repeatedly optimizing the
1774 -- same bits of code.
1775
1776 -- More closely imitates 'assignTemp' from the old code generator, which
1777 -- returns a CmmExpr rather than a LocalReg.
1778 assignTempE :: CmmExpr -> FCode CmmExpr
1779 assignTempE e = do
1780 t <- assignTemp e
1781 return (CmmReg (CmmLocal t))
1782
1783 -- | Takes a source 'Array#', an offset in the source array, a
1784 -- destination 'MutableArray#', an offset into the destination array,
1785 -- and the number of elements to copy. Copies the given number of
1786 -- elements from the source array to the destination array.
1787 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1788 -> FCode ()
1789 doCopyArrayOp = emitCopyArray copy
1790 where
1791 -- Copy data (we assume the arrays aren't overlapping since
1792 -- they're of different types)
1793 copy _src _dst dst_p src_p bytes =
1794 do dflags <- getDynFlags
1795 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1796 (wORD_SIZE dflags)
1797
1798
1799 -- | Takes a source 'MutableArray#', an offset in the source array, a
1800 -- destination 'MutableArray#', an offset into the destination array,
1801 -- and the number of elements to copy. Copies the given number of
1802 -- elements from the source array to the destination array.
1803 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1804 -> FCode ()
1805 doCopyMutableArrayOp = emitCopyArray copy
1806 where
1807 -- The only time the memory might overlap is when the two arrays
1808 -- we were provided are the same array!
1809 -- TODO: Optimize branch for common case of no aliasing.
1810 copy src dst dst_p src_p bytes = do
1811 dflags <- getDynFlags
1812 [moveCall, cpyCall] <- forkAlts [
1813 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1814 (wORD_SIZE dflags),
1815 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1816 (wORD_SIZE dflags)
1817 ]
1818 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1819
1820 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1821 -> FCode ()) -- ^ copy function
1822 -> CmmExpr -- ^ source array
1823 -> CmmExpr -- ^ offset in source array
1824 -> CmmExpr -- ^ destination array
1825 -> CmmExpr -- ^ offset in destination array
1826 -> WordOff -- ^ number of elements to copy
1827 -> FCode ()
1828 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1829 dflags <- getDynFlags
1830 when (n /= 0) $ do
1831 -- Passed as arguments (be careful)
1832 src <- assignTempE src0
1833 dst <- assignTempE dst0
1834 dst_off <- assignTempE dst_off0
1835
1836 -- Set the dirty bit in the header.
1837 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1838
1839 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1840 (arrPtrsHdrSize dflags)
1841 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1842 src_p <- assignTempE $ cmmOffsetExprW dflags
1843 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1844 let bytes = wordsToBytes dflags n
1845
1846 copy src dst dst_p src_p bytes
1847
1848 -- The base address of the destination card table
1849 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1850 (loadArrPtrsSize dflags dst)
1851
1852 emitSetCards dst_off dst_cards_p n
1853
1854 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1855 -> FCode ()
1856 doCopySmallArrayOp = emitCopySmallArray copy
1857 where
1858 -- Copy data (we assume the arrays aren't overlapping since
1859 -- they're of different types)
1860 copy _src _dst dst_p src_p bytes =
1861 do dflags <- getDynFlags
1862 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1863 (wORD_SIZE dflags)
1864
1865
1866 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1867 -> FCode ()
1868 doCopySmallMutableArrayOp = emitCopySmallArray copy
1869 where
1870 -- The only time the memory might overlap is when the two arrays
1871 -- we were provided are the same array!
1872 -- TODO: Optimize branch for common case of no aliasing.
1873 copy src dst dst_p src_p bytes = do
1874 dflags <- getDynFlags
1875 [moveCall, cpyCall] <- forkAlts
1876 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1877 (wORD_SIZE dflags)
1878 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1879 (wORD_SIZE dflags)
1880 ]
1881 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1882
1883 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1884 -> FCode ()) -- ^ copy function
1885 -> CmmExpr -- ^ source array
1886 -> CmmExpr -- ^ offset in source array
1887 -> CmmExpr -- ^ destination array
1888 -> CmmExpr -- ^ offset in destination array
1889 -> WordOff -- ^ number of elements to copy
1890 -> FCode ()
1891 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
1892 dflags <- getDynFlags
1893
1894 -- Passed as arguments (be careful)
1895 src <- assignTempE src0
1896 dst <- assignTempE dst0
1897
1898 -- Set the dirty bit in the header.
1899 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1900
1901 dst_p <- assignTempE $ cmmOffsetExprW dflags
1902 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
1903 src_p <- assignTempE $ cmmOffsetExprW dflags
1904 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
1905 let bytes = wordsToBytes dflags n
1906
1907 copy src dst dst_p src_p bytes
1908
1909 -- | Takes an info table label, a register to return the newly
1910 -- allocated array in, a source array, an offset in the source array,
1911 -- and the number of elements to copy. Allocates a new array and
1912 -- initializes it from the source array.
1913 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1914 -> FCode ()
1915 emitCloneArray info_p res_r src src_off n = do
1916 dflags <- getDynFlags
1917
1918 let info_ptr = mkLblExpr info_p
1919 rep = arrPtrsRep dflags n
1920
1921 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1922 (mkIntExpr dflags (nonHdrSize dflags rep))
1923 (zeroExpr dflags)
1924
1925 let hdr_size = fixedHdrSize dflags
1926
1927 base <- allocHeapClosure rep info_ptr curCCS
1928 [ (mkIntExpr dflags n,
1929 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1930 , (mkIntExpr dflags (nonHdrSizeW rep),
1931 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1932 ]
1933
1934 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1935 emit $ mkAssign arr base
1936
1937 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1938 (arrPtrsHdrSize dflags)
1939 src_p <- assignTempE $ cmmOffsetExprW dflags src
1940 (cmmAddWord dflags
1941 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1942
1943 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1944 (wORD_SIZE dflags)
1945
1946 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1947
1948 -- | Takes an info table label, a register to return the newly
1949 -- allocated array in, a source array, an offset in the source array,
1950 -- and the number of elements to copy. Allocates a new array and
1951 -- initializes it from the source array.
1952 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1953 -> FCode ()
1954 emitCloneSmallArray info_p res_r src src_off n = do
1955 dflags <- getDynFlags
1956
1957 let info_ptr = mkLblExpr info_p
1958 rep = smallArrPtrsRep n
1959
1960 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
1961 (mkIntExpr dflags (nonHdrSize dflags rep))
1962 (zeroExpr dflags)
1963
1964 let hdr_size = fixedHdrSize dflags
1965
1966 base <- allocHeapClosure rep info_ptr curCCS
1967 [ (mkIntExpr dflags n,
1968 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
1969 ]
1970
1971 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1972 emit $ mkAssign arr base
1973
1974 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1975 (smallArrPtrsHdrSize dflags)
1976 src_p <- assignTempE $ cmmOffsetExprW dflags src
1977 (cmmAddWord dflags
1978 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
1979
1980 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1981 (wORD_SIZE dflags)
1982
1983 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1984
1985 -- | Takes and offset in the destination array, the base address of
1986 -- the card table, and the number of elements affected (*not* the
1987 -- number of cards). The number of elements may not be zero.
1988 -- Marks the relevant cards as dirty.
1989 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
1990 emitSetCards dst_start dst_cards_start n = do
1991 dflags <- getDynFlags
1992 start_card <- assignTempE $ cardCmm dflags dst_start
1993 let end_card = cardCmm dflags
1994 (cmmSubWord dflags
1995 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
1996 (mkIntExpr dflags 1))
1997 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1998 (mkIntExpr dflags 1)
1999 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2000 1 -- no alignment (1 byte)
2001
2002 -- Convert an element index to a card index
2003 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2004 cardCmm dflags i =
2005 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2006
2007 ------------------------------------------------------------------------------
2008 -- SmallArray PrimOp implementations
2009
2010 doReadSmallPtrArrayOp :: LocalReg
2011 -> CmmExpr
2012 -> CmmExpr
2013 -> FCode ()
2014 doReadSmallPtrArrayOp res addr idx = do
2015 dflags <- getDynFlags
2016 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2017 (gcWord dflags) idx
2018
2019 doWriteSmallPtrArrayOp :: CmmExpr
2020 -> CmmExpr
2021 -> CmmExpr
2022 -> FCode ()
2023 doWriteSmallPtrArrayOp addr idx val = do
2024 dflags <- getDynFlags
2025 let ty = cmmExprType dflags val
2026 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2027 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2028
2029 ------------------------------------------------------------------------------
2030 -- Atomic read-modify-write
2031
2032 -- | Emit an atomic modification to a byte array element. The result
2033 -- reg contains that previous value of the element. Implies a full
2034 -- memory barrier.
2035 doAtomicRMW :: LocalReg -- ^ Result reg
2036 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2037 -> CmmExpr -- ^ MutableByteArray#
2038 -> CmmExpr -- ^ Index
2039 -> CmmType -- ^ Type of element by which we are indexing
2040 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2041 -> FCode ()
2042 doAtomicRMW res amop mba idx idx_ty n = do
2043 dflags <- getDynFlags
2044 let width = typeWidth idx_ty
2045 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2046 width mba idx
2047 emitPrimCall
2048 [ res ]
2049 (MO_AtomicRMW width amop)
2050 [ addr, n ]
2051
2052 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2053 doAtomicReadByteArray
2054 :: LocalReg -- ^ Result reg
2055 -> CmmExpr -- ^ MutableByteArray#
2056 -> CmmExpr -- ^ Index
2057 -> CmmType -- ^ Type of element by which we are indexing
2058 -> FCode ()
2059 doAtomicReadByteArray res mba idx idx_ty = do
2060 dflags <- getDynFlags
2061 let width = typeWidth idx_ty
2062 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2063 width mba idx
2064 emitPrimCall
2065 [ res ]
2066 (MO_AtomicRead width)
2067 [ addr ]
2068
2069 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2070 doAtomicWriteByteArray
2071 :: CmmExpr -- ^ MutableByteArray#
2072 -> CmmExpr -- ^ Index
2073 -> CmmType -- ^ Type of element by which we are indexing
2074 -> CmmExpr -- ^ Value to write
2075 -> FCode ()
2076 doAtomicWriteByteArray mba idx idx_ty val = do
2077 dflags <- getDynFlags
2078 let width = typeWidth idx_ty
2079 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2080 width mba idx
2081 emitPrimCall
2082 [ {- no results -} ]
2083 (MO_AtomicWrite width)
2084 [ addr, val ]
2085
2086 doCasByteArray
2087 :: LocalReg -- ^ Result reg
2088 -> CmmExpr -- ^ MutableByteArray#
2089 -> CmmExpr -- ^ Index
2090 -> CmmType -- ^ Type of element by which we are indexing
2091 -> CmmExpr -- ^ Old value
2092 -> CmmExpr -- ^ New value
2093 -> FCode ()
2094 doCasByteArray res mba idx idx_ty old new = do
2095 dflags <- getDynFlags
2096 let width = (typeWidth idx_ty)
2097 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2098 width mba idx
2099 emitPrimCall
2100 [ res ]
2101 (MO_Cmpxchg width)
2102 [ addr, old, new ]
2103
2104 ------------------------------------------------------------------------------
2105 -- Helpers for emitting function calls
2106
2107 -- | Emit a call to @memcpy@.
2108 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2109 emitMemcpyCall dst src n align = do
2110 emitPrimCall
2111 [ {-no results-} ]
2112 (MO_Memcpy align)
2113 [ dst, src, n ]
2114
2115 -- | Emit a call to @memmove@.
2116 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2117 emitMemmoveCall dst src n align = do
2118 emitPrimCall
2119 [ {- no results -} ]
2120 (MO_Memmove align)
2121 [ dst, src, n ]
2122
2123 -- | Emit a call to @memset@. The second argument must fit inside an
2124 -- unsigned char.
2125 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2126 emitMemsetCall dst c n align = do
2127 emitPrimCall
2128 [ {- no results -} ]
2129 (MO_Memset align)
2130 [ dst, c, n ]
2131
2132 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2133 emitBSwapCall res x width = do
2134 emitPrimCall
2135 [ res ]
2136 (MO_BSwap width)
2137 [ x ]
2138
2139 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2140 emitPopCntCall res x width = do
2141 emitPrimCall
2142 [ res ]
2143 (MO_PopCnt width)
2144 [ x ]
2145
2146 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2147 emitClzCall res x width = do
2148 emitPrimCall
2149 [ res ]
2150 (MO_Clz width)
2151 [ x ]
2152
2153 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2154 emitCtzCall res x width = do
2155 emitPrimCall
2156 [ res ]
2157 (MO_Ctz width)
2158 [ x ]