d3c09c584eae9174babf81d342f0289fe391beb9
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgCmmLayout
21 import StgCmmForeign
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmTicky
26 import StgCmmHeap
27 import StgCmmProf ( costCentreFrom, curCCS )
28
29 import DynFlags
30 import Platform
31 import BasicTypes
32 import MkGraph
33 import StgSyn
34 import Cmm
35 import CmmInfo
36 import Type ( Type, tyConAppTyCon )
37 import TyCon
38 import CLabel
39 import CmmUtils
40 import PrimOp
41 import SMRep
42 import FastString
43 import Outputable
44 import Util
45
46 import Prelude hiding ((<*>))
47
48 import Data.Bits ((.&.), bit)
49 import Control.Monad (liftM, when)
50
51 ------------------------------------------------------------------------
52 -- Primitive operations and foreign calls
53 ------------------------------------------------------------------------
54
55 {- Note [Foreign call results]
56 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
57 A foreign call always returns an unboxed tuple of results, one
58 of which is the state token. This seems to happen even for pure
59 calls.
60
61 Even if we returned a single result for pure calls, it'd still be
62 right to wrap it in a singleton unboxed tuple, because the result
63 might be a Haskell closure pointer, we don't want to evaluate it. -}
64
65 ----------------------------------
66 cgOpApp :: StgOp -- The op
67 -> [StgArg] -- Arguments
68 -> Type -- Result type (always an unboxed tuple)
69 -> FCode ReturnKind
70
71 -- Foreign calls
72 cgOpApp (StgFCallOp fcall _) stg_args res_ty
73 = cgForeignCall fcall stg_args res_ty
74 -- Note [Foreign call results]
75
76 -- tagToEnum# is special: we need to pull the constructor
77 -- out of the table, and perform an appropriate return.
78
79 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
80 = ASSERT(isEnumerationTyCon tycon)
81 do { dflags <- getDynFlags
82 ; args' <- getNonVoidArgAmodes [arg]
83 ; let amode = case args' of [amode] -> amode
84 _ -> panic "TagToEnumOp had void arg"
85 ; emitReturn [tagToClosure dflags tycon amode] }
86 where
87 -- If you're reading this code in the attempt to figure
88 -- out why the compiler panic'ed here, it is probably because
89 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
90 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
91 -- That won't work.
92 tycon = tyConAppTyCon res_ty
93
94 cgOpApp (StgPrimOp primop) args res_ty = do
95 dflags <- getDynFlags
96 cmm_args <- getNonVoidArgAmodes args
97 case shouldInlinePrimOp dflags primop cmm_args of
98 Nothing -> do -- out-of-line
99 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
100 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
101
102 Just f -- inline
103 | ReturnsPrim VoidRep <- result_info
104 -> do f []
105 emitReturn []
106
107 | ReturnsPrim rep <- result_info
108 -> do dflags <- getDynFlags
109 res <- newTemp (primRepCmmType dflags rep)
110 f [res]
111 emitReturn [CmmReg (CmmLocal res)]
112
113 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
114 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
115 f regs
116 emitReturn (map (CmmReg . CmmLocal) regs)
117
118 | otherwise -> panic "cgPrimop"
119 where
120 result_info = getPrimOpResultInfo primop
121
122 cgOpApp (StgPrimCallOp primcall) args _res_ty
123 = do { cmm_args <- getNonVoidArgAmodes args
124 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
125 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
126
127 -- | Interpret the argument as an unsigned value, assuming the value
128 -- is given in two-complement form in the given width.
129 --
130 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
131 --
132 -- This function is used to work around the fact that many array
133 -- primops take Int# arguments, but we interpret them as unsigned
134 -- quantities in the code gen. This means that we have to be careful
135 -- every time we work on e.g. a CmmInt literal that corresponds to the
136 -- array size, as it might contain a negative Integer value if the
137 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
138 -- literal.
139 asUnsigned :: Width -> Integer -> Integer
140 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
141
142 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
143 -- ByteOff (or some other fixed width signed type) to represent
144 -- array sizes or indices. This means that these will overflow for
145 -- large enough sizes.
146
147 -- | Decide whether an out-of-line primop should be replaced by an
148 -- inline implementation. This might happen e.g. if there's enough
149 -- static information, such as statically know arguments, to emit a
150 -- more efficient implementation inline.
151 --
152 -- Returns 'Nothing' if this primop should use its out-of-line
153 -- implementation (defined elsewhere) and 'Just' together with a code
154 -- generating function that takes the output regs as arguments
155 -- otherwise.
156 shouldInlinePrimOp :: DynFlags
157 -> PrimOp -- ^ The primop
158 -> [CmmExpr] -- ^ The primop arguments
159 -> Maybe ([LocalReg] -> FCode ())
160
161 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
162 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
163 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
164
165 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
166 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
167 Just $ \ [res] ->
168 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
169 [ (mkIntExpr dflags (fromInteger n),
170 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
171 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
172 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
173 ]
174 (fromInteger n) init
175
176 shouldInlinePrimOp _ CopyArrayOp
177 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
178 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
179
180 shouldInlinePrimOp _ CopyMutableArrayOp
181 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
182 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
183
184 shouldInlinePrimOp _ CopyArrayArrayOp
185 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
186 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
187
188 shouldInlinePrimOp _ CopyMutableArrayArrayOp
189 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
190 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
191
192 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
193 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
194 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
195
196 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
197 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
198 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
199
200 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
201 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
202 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
203
204 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
205 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
206 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
207
208 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
209 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
210 Just $ \ [res] ->
211 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
212 [ (mkIntExpr dflags (fromInteger n),
213 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
214 ]
215 (fromInteger n) init
216
217 shouldInlinePrimOp _ CopySmallArrayOp
218 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
219 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
220
221 shouldInlinePrimOp _ CopySmallMutableArrayOp
222 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
223 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
224
225 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
226 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
227 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
228
229 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
230 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
231 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
232
233 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
234 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
235 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
236
237 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
238 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
239 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
240
241 shouldInlinePrimOp dflags primop args
242 | primOpOutOfLine primop = Nothing
243 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
244
245 -- TODO: Several primops, such as 'copyArray#', only have an inline
246 -- implementation (below) but could possibly have both an inline
247 -- implementation and an out-of-line implementation, just like
248 -- 'newArray#'. This would lower the amount of code generated,
249 -- hopefully without a performance impact (needs to be measured).
250
251 ---------------------------------------------------
252 cgPrimOp :: [LocalReg] -- where to put the results
253 -> PrimOp -- the op
254 -> [StgArg] -- arguments
255 -> FCode ()
256
257 cgPrimOp results op args
258 = do dflags <- getDynFlags
259 arg_exprs <- getNonVoidArgAmodes args
260 emitPrimOp dflags results op arg_exprs
261
262
263 ------------------------------------------------------------------------
264 -- Emitting code for a primop
265 ------------------------------------------------------------------------
266
267 emitPrimOp :: DynFlags
268 -> [LocalReg] -- where to put the results
269 -> PrimOp -- the op
270 -> [CmmExpr] -- arguments
271 -> FCode ()
272
273 -- First we handle various awkward cases specially. The remaining
274 -- easy cases are then handled by translateOp, defined below.
275
276 emitPrimOp _ [res] ParOp [arg]
277 =
278 -- for now, just implement this in a C function
279 -- later, we might want to inline it.
280 emitCCall
281 [(res,NoHint)]
282 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
283 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
284
285 emitPrimOp dflags [res] SparkOp [arg]
286 = do
287 -- returns the value of arg in res. We're going to therefore
288 -- refer to arg twice (once to pass to newSpark(), and once to
289 -- assign to res), so put it in a temporary.
290 tmp <- assignTemp arg
291 tmp2 <- newTemp (bWord dflags)
292 emitCCall
293 [(tmp2,NoHint)]
294 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
295 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
296 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
297
298 emitPrimOp dflags [res] GetCCSOfOp [arg]
299 = emitAssign (CmmLocal res) val
300 where
301 val
302 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
303 | otherwise = CmmLit (zeroCLit dflags)
304
305 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
306 = emitAssign (CmmLocal res) curCCS
307
308 emitPrimOp dflags [res] ReadMutVarOp [mutv]
309 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
310
311 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
312 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
313 emitCCall
314 [{-no results-}]
315 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
316 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
317
318 -- #define sizzeofByteArrayzh(r,a) \
319 -- r = ((StgArrBytes *)(a))->bytes
320 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
321 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
322
323 -- #define sizzeofMutableByteArrayzh(r,a) \
324 -- r = ((StgArrBytes *)(a))->bytes
325 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
326 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
327
328 -- #define getSizzeofMutableByteArrayzh(r,a) \
329 -- r = ((StgArrBytes *)(a))->bytes
330 emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
331 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
332
333
334 -- #define touchzh(o) /* nothing */
335 emitPrimOp _ res@[] TouchOp args@[_arg]
336 = do emitPrimCall res MO_Touch args
337
338 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
339 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
340 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
341
342 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
343 emitPrimOp dflags [res] StableNameToIntOp [arg]
344 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
345
346 -- #define eqStableNamezh(r,sn1,sn2) \
347 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
348 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
349 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
350 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
351 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
352 ])
353
354 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
355 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
356
357 -- #define addrToHValuezh(r,a) r=(P_)a
358 emitPrimOp _ [res] AddrToAnyOp [arg]
359 = emitAssign (CmmLocal res) arg
360
361 -- #define hvalueToAddrzh(r, a) r=(W_)a
362 emitPrimOp _ [res] AnyToAddrOp [arg]
363 = emitAssign (CmmLocal res) arg
364
365 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
366 -- Note: argument may be tagged!
367 emitPrimOp dflags [res] DataToTagOp [arg]
368 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
369
370 {- Freezing arrays-of-ptrs requires changing an info table, for the
371 benefit of the generational collector. It needs to scavenge mutable
372 objects, even if they are in old space. When they become immutable,
373 they can be removed from this scavenge list. -}
374
375 -- #define unsafeFreezzeArrayzh(r,a)
376 -- {
377 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
378 -- r = a;
379 -- }
380 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
381 = emit $ catAGraphs
382 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
383 mkAssign (CmmLocal res) arg ]
384 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
385 = emit $ catAGraphs
386 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
387 mkAssign (CmmLocal res) arg ]
388 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
389 = emit $ catAGraphs
390 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
391 mkAssign (CmmLocal res) arg ]
392
393 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
394 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
395 = emitAssign (CmmLocal res) arg
396
397 -- Reading/writing pointer arrays
398
399 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
400 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
401 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
402
403 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
404 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
405 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
406 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
407 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
408 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
409 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
410 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
411 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
412 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
413
414 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
415 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
416 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
417
418 -- Getting the size of pointer arrays
419
420 emitPrimOp dflags [res] SizeofArrayOp [arg]
421 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
422 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
423 (bWord dflags))
424 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
425 = emitPrimOp dflags [res] SizeofArrayOp [arg]
426 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
427 = emitPrimOp dflags [res] SizeofArrayOp [arg]
428 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
429 = emitPrimOp dflags [res] SizeofArrayOp [arg]
430
431 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
432 emit $ mkAssign (CmmLocal res)
433 (cmmLoadIndexW dflags arg
434 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
435 (bWord dflags))
436 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
437 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
438
439 -- IndexXXXoffAddr
440
441 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
442 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
443 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
444 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
445 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
446 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
447 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
448 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
449 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
450 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
451 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
452 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
453 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
454 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
455 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
456 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
457
458 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
459
460 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
461 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
462 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
463 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
464 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
465 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
466 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
467 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
468 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
469 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
470 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
471 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
472 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
473 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
474 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
475 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
476
477 -- IndexXXXArray
478
479 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
480 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
481 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
482 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
483 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
484 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
485 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
486 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
487 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
488 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
489 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
490 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
491 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
492 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
493 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
494 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
495
496 -- ReadXXXArray, identical to IndexXXXArray.
497
498 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
499 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
500 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
501 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
502 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
503 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
504 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
505 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
506 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
507 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
508 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
509 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
510 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
511 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
512 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
513 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
514
515 -- WriteXXXoffAddr
516
517 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
518 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
519 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
520 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
521 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
522 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
523 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
524 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
525 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
526 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
527 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
528 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
529 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
530 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
531 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
532 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
533
534 -- WriteXXXArray
535
536 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
537 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
538 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
539 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
540 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
541 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
542 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
543 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
544 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
545 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
546 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
547 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
548 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
549 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
550 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
551 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
552
553 -- Copying and setting byte arrays
554 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
555 doCopyByteArrayOp src src_off dst dst_off n
556 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
557 doCopyMutableByteArrayOp src src_off dst dst_off n
558 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
559 doCopyByteArrayToAddrOp src src_off dst n
560 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
561 doCopyMutableByteArrayToAddrOp src src_off dst n
562 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
563 doCopyAddrToByteArrayOp src dst dst_off n
564 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
565 doSetByteArrayOp ba off len c
566
567 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
568 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
569 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
570 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
571
572 -- Population count
573 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
574 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
575 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
576 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
577 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
578
579 -- count leading zeros
580 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
581 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
582 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
583 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
584 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
585
586 -- count trailing zeros
587 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
588 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
589 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
590 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
591 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
592
593 -- Unsigned int to floating point conversions
594 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
595 (MO_UF_Conv W32) [w]
596 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
597 (MO_UF_Conv W64) [w]
598
599 -- SIMD primops
600 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
601 checkVecCompatibility dflags vcat n w
602 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
603 where
604 zeros :: CmmExpr
605 zeros = CmmLit $ CmmVec (replicate n zero)
606
607 zero :: CmmLit
608 zero = case vcat of
609 IntVec -> CmmInt 0 w
610 WordVec -> CmmInt 0 w
611 FloatVec -> CmmFloat 0 w
612
613 ty :: CmmType
614 ty = vecVmmType vcat n w
615
616 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
617 checkVecCompatibility dflags vcat n w
618 when (length es /= n) $
619 panic "emitPrimOp: VecPackOp has wrong number of arguments"
620 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
621 where
622 zeros :: CmmExpr
623 zeros = CmmLit $ CmmVec (replicate n zero)
624
625 zero :: CmmLit
626 zero = case vcat of
627 IntVec -> CmmInt 0 w
628 WordVec -> CmmInt 0 w
629 FloatVec -> CmmFloat 0 w
630
631 ty :: CmmType
632 ty = vecVmmType vcat n w
633
634 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
635 checkVecCompatibility dflags vcat n w
636 when (length res /= n) $
637 panic "emitPrimOp: VecUnpackOp has wrong number of results"
638 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
639 where
640 ty :: CmmType
641 ty = vecVmmType vcat n w
642
643 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
644 checkVecCompatibility dflags vcat n w
645 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
646 where
647 ty :: CmmType
648 ty = vecVmmType vcat n w
649
650 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
651 checkVecCompatibility dflags vcat n w
652 doIndexByteArrayOp Nothing ty res args
653 where
654 ty :: CmmType
655 ty = vecVmmType vcat n w
656
657 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
658 checkVecCompatibility dflags vcat n w
659 doIndexByteArrayOp Nothing ty res args
660 where
661 ty :: CmmType
662 ty = vecVmmType vcat n w
663
664 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
665 checkVecCompatibility dflags vcat n w
666 doWriteByteArrayOp Nothing ty res args
667 where
668 ty :: CmmType
669 ty = vecVmmType vcat n w
670
671 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
672 checkVecCompatibility dflags vcat n w
673 doIndexOffAddrOp Nothing ty res args
674 where
675 ty :: CmmType
676 ty = vecVmmType vcat n w
677
678 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
679 checkVecCompatibility dflags vcat n w
680 doIndexOffAddrOp Nothing ty res args
681 where
682 ty :: CmmType
683 ty = vecVmmType vcat n w
684
685 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
686 checkVecCompatibility dflags vcat n w
687 doWriteOffAddrOp Nothing ty res args
688 where
689 ty :: CmmType
690 ty = vecVmmType vcat n w
691
692 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
693 checkVecCompatibility dflags vcat n w
694 doIndexByteArrayOpAs Nothing vecty ty res args
695 where
696 vecty :: CmmType
697 vecty = vecVmmType vcat n w
698
699 ty :: CmmType
700 ty = vecCmmCat vcat w
701
702 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
703 checkVecCompatibility dflags vcat n w
704 doIndexByteArrayOpAs Nothing vecty ty res args
705 where
706 vecty :: CmmType
707 vecty = vecVmmType vcat n w
708
709 ty :: CmmType
710 ty = vecCmmCat vcat w
711
712 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
713 checkVecCompatibility dflags vcat n w
714 doWriteByteArrayOp Nothing ty res args
715 where
716 ty :: CmmType
717 ty = vecCmmCat vcat w
718
719 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
720 checkVecCompatibility dflags vcat n w
721 doIndexOffAddrOpAs Nothing vecty ty res args
722 where
723 vecty :: CmmType
724 vecty = vecVmmType vcat n w
725
726 ty :: CmmType
727 ty = vecCmmCat vcat w
728
729 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
730 checkVecCompatibility dflags vcat n w
731 doIndexOffAddrOpAs Nothing vecty ty res args
732 where
733 vecty :: CmmType
734 vecty = vecVmmType vcat n w
735
736 ty :: CmmType
737 ty = vecCmmCat vcat w
738
739 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
740 checkVecCompatibility dflags vcat n w
741 doWriteOffAddrOp Nothing ty res args
742 where
743 ty :: CmmType
744 ty = vecCmmCat vcat w
745
746 -- Prefetch
747 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
748 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
749 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
750 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
751
752 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
753 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
754 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
755 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
756
757 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
758 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
759 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
760 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
761
762 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
763 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
764 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
765 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
766
767 -- Atomic read-modify-write
768 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
769 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
770 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
771 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
772 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
773 doAtomicRMW res AMO_And mba ix (bWord dflags) n
774 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
775 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
776 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
777 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
778 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
779 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
780 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
781 doAtomicReadByteArray res mba ix (bWord dflags)
782 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
783 doAtomicWriteByteArray mba ix (bWord dflags) val
784 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
785 doCasByteArray res mba ix (bWord dflags) old new
786
787 -- The rest just translate straightforwardly
788 emitPrimOp dflags [res] op [arg]
789 | nopOp op
790 = emitAssign (CmmLocal res) arg
791
792 | Just (mop,rep) <- narrowOp op
793 = emitAssign (CmmLocal res) $
794 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
795
796 emitPrimOp dflags r@[res] op args
797 | Just prim <- callishOp op
798 = do emitPrimCall r prim args
799
800 | Just mop <- translateOp dflags op
801 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
802 emit stmt
803
804 emitPrimOp dflags results op args
805 = case callishPrimOpSupported dflags op of
806 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
807 Right gen -> gen results args
808
809 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
810
811 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
812 callishPrimOpSupported dflags op
813 = case op of
814 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
815 | otherwise -> Right (genericIntQuotRemOp dflags)
816
817 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
818 | otherwise -> Right (genericWordQuotRemOp dflags)
819
820 WordQuotRem2Op | (ncg && x86ish)
821 || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
822 | otherwise -> Right (genericWordQuotRem2Op dflags)
823
824 WordAdd2Op | (ncg && x86ish)
825 || llvm -> Left (MO_Add2 (wordWidth dflags))
826 | otherwise -> Right genericWordAdd2Op
827
828 WordSubCOp | (ncg && x86ish)
829 || llvm -> Left (MO_SubWordC (wordWidth dflags))
830 | otherwise -> Right genericWordSubCOp
831
832 IntAddCOp | (ncg && x86ish)
833 || llvm -> Left (MO_AddIntC (wordWidth dflags))
834 | otherwise -> Right genericIntAddCOp
835
836 IntSubCOp | (ncg && x86ish)
837 || llvm -> Left (MO_SubIntC (wordWidth dflags))
838 | otherwise -> Right genericIntSubCOp
839
840 WordMul2Op | ncg && x86ish
841 || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
842 | otherwise -> Right genericWordMul2Op
843
844 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
845 where
846 ncg = case hscTarget dflags of
847 HscAsm -> True
848 _ -> False
849 llvm = case hscTarget dflags of
850 HscLlvm -> True
851 _ -> False
852 x86ish = case platformArch (targetPlatform dflags) of
853 ArchX86 -> True
854 ArchX86_64 -> True
855 _ -> False
856
857 genericIntQuotRemOp :: DynFlags -> GenericOp
858 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
859 = emit $ mkAssign (CmmLocal res_q)
860 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
861 mkAssign (CmmLocal res_r)
862 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
863 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
864
865 genericWordQuotRemOp :: DynFlags -> GenericOp
866 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
867 = emit $ mkAssign (CmmLocal res_q)
868 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
869 mkAssign (CmmLocal res_r)
870 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
871 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
872
873 genericWordQuotRem2Op :: DynFlags -> GenericOp
874 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
875 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
876 where ty = cmmExprType dflags arg_x_high
877 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
878 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
879 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
880 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
881 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
882 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
883 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
884 zero = lit 0
885 one = lit 1
886 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
887 lit i = CmmLit (CmmInt i (wordWidth dflags))
888
889 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
890 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
891 mkAssign (CmmLocal res_r) high)
892 f i acc high low =
893 do roverflowedBit <- newTemp ty
894 rhigh' <- newTemp ty
895 rhigh'' <- newTemp ty
896 rlow' <- newTemp ty
897 risge <- newTemp ty
898 racc' <- newTemp ty
899 let high' = CmmReg (CmmLocal rhigh')
900 isge = CmmReg (CmmLocal risge)
901 overflowedBit = CmmReg (CmmLocal roverflowedBit)
902 let this = catAGraphs
903 [mkAssign (CmmLocal roverflowedBit)
904 (shr high negone),
905 mkAssign (CmmLocal rhigh')
906 (or (shl high one) (shr low negone)),
907 mkAssign (CmmLocal rlow')
908 (shl low one),
909 mkAssign (CmmLocal risge)
910 (or (overflowedBit `ne` zero)
911 (high' `ge` arg_y)),
912 mkAssign (CmmLocal rhigh'')
913 (high' `minus` (arg_y `times` isge)),
914 mkAssign (CmmLocal racc')
915 (or (shl acc one) isge)]
916 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
917 (CmmReg (CmmLocal rhigh''))
918 (CmmReg (CmmLocal rlow'))
919 return (this <*> rest)
920 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
921
922 genericWordAdd2Op :: GenericOp
923 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
924 = do dflags <- getDynFlags
925 r1 <- newTemp (cmmExprType dflags arg_x)
926 r2 <- newTemp (cmmExprType dflags arg_x)
927 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
928 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
929 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
930 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
931 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
932 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
933 (wordWidth dflags))
934 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
935 emit $ catAGraphs
936 [mkAssign (CmmLocal r1)
937 (add (bottomHalf arg_x) (bottomHalf arg_y)),
938 mkAssign (CmmLocal r2)
939 (add (topHalf (CmmReg (CmmLocal r1)))
940 (add (topHalf arg_x) (topHalf arg_y))),
941 mkAssign (CmmLocal res_h)
942 (topHalf (CmmReg (CmmLocal r2))),
943 mkAssign (CmmLocal res_l)
944 (or (toTopHalf (CmmReg (CmmLocal r2)))
945 (bottomHalf (CmmReg (CmmLocal r1))))]
946 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
947
948 genericWordSubCOp :: GenericOp
949 genericWordSubCOp [res_r, res_c] [aa, bb] = do
950 dflags <- getDynFlags
951 emit $ catAGraphs
952 [ -- Put the result into 'res_r'.
953 mkAssign (CmmLocal res_r) $
954 CmmMachOp (mo_wordSub dflags) [aa, bb]
955 -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise.
956 , mkAssign (CmmLocal res_c) $
957 CmmMachOp (mo_wordUGt dflags) [bb, aa]
958 ]
959 genericWordSubCOp _ _ = panic "genericWordSubCOp"
960
961 genericIntAddCOp :: GenericOp
962 genericIntAddCOp [res_r, res_c] [aa, bb]
963 {-
964 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
965 C, and without needing any comparisons. This may not be the
966 fastest way to do it - if you have better code, please send it! --SDM
967
968 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
969
970 We currently don't make use of the r value if c is != 0 (i.e.
971 overflow), we just convert to big integers and try again. This
972 could be improved by making r and c the correct values for
973 plugging into a new J#.
974
975 { r = ((I_)(a)) + ((I_)(b)); \
976 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
977 >> (BITS_IN (I_) - 1); \
978 }
979 Wading through the mass of bracketry, it seems to reduce to:
980 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
981
982 -}
983 = do dflags <- getDynFlags
984 emit $ catAGraphs [
985 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
986 mkAssign (CmmLocal res_c) $
987 CmmMachOp (mo_wordUShr dflags) [
988 CmmMachOp (mo_wordAnd dflags) [
989 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
990 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
991 ],
992 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
993 ]
994 ]
995 genericIntAddCOp _ _ = panic "genericIntAddCOp"
996
997 genericIntSubCOp :: GenericOp
998 genericIntSubCOp [res_r, res_c] [aa, bb]
999 {- Similarly:
1000 #define subIntCzh(r,c,a,b) \
1001 { r = ((I_)(a)) - ((I_)(b)); \
1002 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1003 >> (BITS_IN (I_) - 1); \
1004 }
1005
1006 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
1007 -}
1008 = do dflags <- getDynFlags
1009 emit $ catAGraphs [
1010 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1011 mkAssign (CmmLocal res_c) $
1012 CmmMachOp (mo_wordUShr dflags) [
1013 CmmMachOp (mo_wordAnd dflags) [
1014 CmmMachOp (mo_wordXor dflags) [aa,bb],
1015 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1016 ],
1017 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1018 ]
1019 ]
1020 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1021
1022 genericWordMul2Op :: GenericOp
1023 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
1024 = do dflags <- getDynFlags
1025 let t = cmmExprType dflags arg_x
1026 xlyl <- liftM CmmLocal $ newTemp t
1027 xlyh <- liftM CmmLocal $ newTemp t
1028 xhyl <- liftM CmmLocal $ newTemp t
1029 r <- liftM CmmLocal $ newTemp t
1030 -- This generic implementation is very simple and slow. We might
1031 -- well be able to do better, but for now this at least works.
1032 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1033 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1034 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1035 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1036 sum = foldl1 add
1037 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1038 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1039 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1040 (wordWidth dflags))
1041 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1042 emit $ catAGraphs
1043 [mkAssign xlyl
1044 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1045 mkAssign xlyh
1046 (mul (bottomHalf arg_x) (topHalf arg_y)),
1047 mkAssign xhyl
1048 (mul (topHalf arg_x) (bottomHalf arg_y)),
1049 mkAssign r
1050 (sum [topHalf (CmmReg xlyl),
1051 bottomHalf (CmmReg xhyl),
1052 bottomHalf (CmmReg xlyh)]),
1053 mkAssign (CmmLocal res_l)
1054 (or (bottomHalf (CmmReg xlyl))
1055 (toTopHalf (CmmReg r))),
1056 mkAssign (CmmLocal res_h)
1057 (sum [mul (topHalf arg_x) (topHalf arg_y),
1058 topHalf (CmmReg xhyl),
1059 topHalf (CmmReg xlyh),
1060 topHalf (CmmReg r)])]
1061 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1062
1063 -- These PrimOps are NOPs in Cmm
1064
1065 nopOp :: PrimOp -> Bool
1066 nopOp Int2WordOp = True
1067 nopOp Word2IntOp = True
1068 nopOp Int2AddrOp = True
1069 nopOp Addr2IntOp = True
1070 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1071 nopOp OrdOp = True
1072 nopOp _ = False
1073
1074 -- These PrimOps turn into double casts
1075
1076 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1077 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1078 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1079 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1080 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1081 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1082 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1083 narrowOp _ = Nothing
1084
1085 -- Native word signless ops
1086
1087 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1088 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1089 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1090 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1091 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1092 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1093 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1094
1095 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1096 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1097 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1098 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1099 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1100 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1101
1102 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1103 translateOp dflags OrOp = Just (mo_wordOr dflags)
1104 translateOp dflags XorOp = Just (mo_wordXor dflags)
1105 translateOp dflags NotOp = Just (mo_wordNot dflags)
1106 translateOp dflags SllOp = Just (mo_wordShl dflags)
1107 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1108
1109 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1110
1111 -- Native word signed ops
1112
1113 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1114 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1115 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1116 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1117 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1118
1119
1120 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1121 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1122 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1123 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1124
1125 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1126 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1127 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1128 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1129 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1130 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1131 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1132
1133 -- Native word unsigned ops
1134
1135 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1136 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1137 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1138 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1139
1140 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1141 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1142 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1143
1144 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1145 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1146 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1147 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1148
1149 -- Char# ops
1150
1151 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1152 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1153 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1154 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1155 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1156 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1157
1158 -- Double ops
1159
1160 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1161 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1162 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1163 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1164 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1165 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1166
1167 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1168 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1169 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1170 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1171 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1172
1173 -- Float ops
1174
1175 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1176 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1177 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1178 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1179 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1180 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1181
1182 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1183 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1184 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1185 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1186 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1187
1188 -- Vector ops
1189
1190 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1191 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1192 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1193 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1194 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1195
1196 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1197 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1198 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1199 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1200 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1201 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1202
1203 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1204 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1205 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1206 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1207 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1208
1209 -- Conversions
1210
1211 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1212 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1213
1214 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1215 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1216
1217 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1218 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1219
1220 -- Word comparisons masquerading as more exotic things.
1221
1222 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1223 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1224 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1225 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1226 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1227 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1228 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1229 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1230
1231 translateOp _ _ = Nothing
1232
1233 -- These primops are implemented by CallishMachOps, because they sometimes
1234 -- turn into foreign calls depending on the backend.
1235
1236 callishOp :: PrimOp -> Maybe CallishMachOp
1237 callishOp DoublePowerOp = Just MO_F64_Pwr
1238 callishOp DoubleSinOp = Just MO_F64_Sin
1239 callishOp DoubleCosOp = Just MO_F64_Cos
1240 callishOp DoubleTanOp = Just MO_F64_Tan
1241 callishOp DoubleSinhOp = Just MO_F64_Sinh
1242 callishOp DoubleCoshOp = Just MO_F64_Cosh
1243 callishOp DoubleTanhOp = Just MO_F64_Tanh
1244 callishOp DoubleAsinOp = Just MO_F64_Asin
1245 callishOp DoubleAcosOp = Just MO_F64_Acos
1246 callishOp DoubleAtanOp = Just MO_F64_Atan
1247 callishOp DoubleLogOp = Just MO_F64_Log
1248 callishOp DoubleExpOp = Just MO_F64_Exp
1249 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1250
1251 callishOp FloatPowerOp = Just MO_F32_Pwr
1252 callishOp FloatSinOp = Just MO_F32_Sin
1253 callishOp FloatCosOp = Just MO_F32_Cos
1254 callishOp FloatTanOp = Just MO_F32_Tan
1255 callishOp FloatSinhOp = Just MO_F32_Sinh
1256 callishOp FloatCoshOp = Just MO_F32_Cosh
1257 callishOp FloatTanhOp = Just MO_F32_Tanh
1258 callishOp FloatAsinOp = Just MO_F32_Asin
1259 callishOp FloatAcosOp = Just MO_F32_Acos
1260 callishOp FloatAtanOp = Just MO_F32_Atan
1261 callishOp FloatLogOp = Just MO_F32_Log
1262 callishOp FloatExpOp = Just MO_F32_Exp
1263 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1264
1265 callishOp _ = Nothing
1266
1267 ------------------------------------------------------------------------------
1268 -- Helpers for translating various minor variants of array indexing.
1269
1270 doIndexOffAddrOp :: Maybe MachOp
1271 -> CmmType
1272 -> [LocalReg]
1273 -> [CmmExpr]
1274 -> FCode ()
1275 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1276 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1277 doIndexOffAddrOp _ _ _ _
1278 = panic "StgCmmPrim: doIndexOffAddrOp"
1279
1280 doIndexOffAddrOpAs :: Maybe MachOp
1281 -> CmmType
1282 -> CmmType
1283 -> [LocalReg]
1284 -> [CmmExpr]
1285 -> FCode ()
1286 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1287 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1288 doIndexOffAddrOpAs _ _ _ _ _
1289 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1290
1291 doIndexByteArrayOp :: Maybe MachOp
1292 -> CmmType
1293 -> [LocalReg]
1294 -> [CmmExpr]
1295 -> FCode ()
1296 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1297 = do dflags <- getDynFlags
1298 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1299 doIndexByteArrayOp _ _ _ _
1300 = panic "StgCmmPrim: doIndexByteArrayOp"
1301
1302 doIndexByteArrayOpAs :: Maybe MachOp
1303 -> CmmType
1304 -> CmmType
1305 -> [LocalReg]
1306 -> [CmmExpr]
1307 -> FCode ()
1308 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1309 = do dflags <- getDynFlags
1310 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1311 doIndexByteArrayOpAs _ _ _ _ _
1312 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1313
1314 doReadPtrArrayOp :: LocalReg
1315 -> CmmExpr
1316 -> CmmExpr
1317 -> FCode ()
1318 doReadPtrArrayOp res addr idx
1319 = do dflags <- getDynFlags
1320 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1321
1322 doWriteOffAddrOp :: Maybe MachOp
1323 -> CmmType
1324 -> [LocalReg]
1325 -> [CmmExpr]
1326 -> FCode ()
1327 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1328 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1329 doWriteOffAddrOp _ _ _ _
1330 = panic "StgCmmPrim: doWriteOffAddrOp"
1331
1332 doWriteByteArrayOp :: Maybe MachOp
1333 -> CmmType
1334 -> [LocalReg]
1335 -> [CmmExpr]
1336 -> FCode ()
1337 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1338 = do dflags <- getDynFlags
1339 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1340 doWriteByteArrayOp _ _ _ _
1341 = panic "StgCmmPrim: doWriteByteArrayOp"
1342
1343 doWritePtrArrayOp :: CmmExpr
1344 -> CmmExpr
1345 -> CmmExpr
1346 -> FCode ()
1347 doWritePtrArrayOp addr idx val
1348 = do dflags <- getDynFlags
1349 let ty = cmmExprType dflags val
1350 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1351 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1352 -- the write barrier. We must write a byte into the mark table:
1353 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1354 emit $ mkStore (
1355 cmmOffsetExpr dflags
1356 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1357 (loadArrPtrsSize dflags addr))
1358 (CmmMachOp (mo_wordUShr dflags) [idx,
1359 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1360 ) (CmmLit (CmmInt 1 W8))
1361
1362 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1363 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1364 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1365
1366 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1367 -> Maybe MachOp -- Optional result cast
1368 -> CmmType -- Type of element we are accessing
1369 -> LocalReg -- Destination
1370 -> CmmExpr -- Base address
1371 -> CmmType -- Type of element by which we are indexing
1372 -> CmmExpr -- Index
1373 -> FCode ()
1374 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1375 = do dflags <- getDynFlags
1376 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1377 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1378 = do dflags <- getDynFlags
1379 emitAssign (CmmLocal res) (CmmMachOp cast [
1380 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1381
1382 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1383 -> Maybe MachOp -- Optional value cast
1384 -> CmmExpr -- Base address
1385 -> CmmType -- Type of element by which we are indexing
1386 -> CmmExpr -- Index
1387 -> CmmExpr -- Value to write
1388 -> FCode ()
1389 mkBasicIndexedWrite off Nothing base idx_ty idx val
1390 = do dflags <- getDynFlags
1391 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1392 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1393 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1394
1395 -- ----------------------------------------------------------------------------
1396 -- Misc utils
1397
1398 cmmIndexOffExpr :: DynFlags
1399 -> ByteOff -- Initial offset in bytes
1400 -> Width -- Width of element by which we are indexing
1401 -> CmmExpr -- Base address
1402 -> CmmExpr -- Index
1403 -> CmmExpr
1404 cmmIndexOffExpr dflags off width base idx
1405 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1406
1407 cmmLoadIndexOffExpr :: DynFlags
1408 -> ByteOff -- Initial offset in bytes
1409 -> CmmType -- Type of element we are accessing
1410 -> CmmExpr -- Base address
1411 -> CmmType -- Type of element by which we are indexing
1412 -> CmmExpr -- Index
1413 -> CmmExpr
1414 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1415 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1416
1417 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1418 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1419
1420 ------------------------------------------------------------------------------
1421 -- Helpers for translating vector primops.
1422
1423 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1424 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1425
1426 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1427 vecCmmCat IntVec = cmmBits
1428 vecCmmCat WordVec = cmmBits
1429 vecCmmCat FloatVec = cmmFloat
1430
1431 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1432 vecElemInjectCast _ FloatVec _ = Nothing
1433 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1434 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1435 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1436 vecElemInjectCast _ IntVec W64 = Nothing
1437 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1438 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1439 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1440 vecElemInjectCast _ WordVec W64 = Nothing
1441 vecElemInjectCast _ _ _ = Nothing
1442
1443 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1444 vecElemProjectCast _ FloatVec _ = Nothing
1445 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1446 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1447 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1448 vecElemProjectCast _ IntVec W64 = Nothing
1449 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1450 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1451 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1452 vecElemProjectCast _ WordVec W64 = Nothing
1453 vecElemProjectCast _ _ _ = Nothing
1454
1455 -- Check to make sure that we can generate code for the specified vector type
1456 -- given the current set of dynamic flags.
1457 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1458 checkVecCompatibility dflags vcat l w = do
1459 when (hscTarget dflags /= HscLlvm) $ do
1460 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1461 ,"Please use -fllvm."]
1462 check vecWidth vcat l w
1463 where
1464 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1465 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1466 sorry $ "128-bit wide single-precision floating point " ++
1467 "SIMD vector instructions require at least -msse."
1468 check W128 _ _ _ | not (isSse2Enabled dflags) =
1469 sorry $ "128-bit wide integer and double precision " ++
1470 "SIMD vector instructions require at least -msse2."
1471 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1472 sorry $ "256-bit wide floating point " ++
1473 "SIMD vector instructions require at least -mavx."
1474 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1475 sorry $ "256-bit wide integer " ++
1476 "SIMD vector instructions require at least -mavx2."
1477 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1478 sorry $ "512-bit wide " ++
1479 "SIMD vector instructions require -mavx512f."
1480 check _ _ _ _ = return ()
1481
1482 vecWidth = typeWidth (vecVmmType vcat l w)
1483
1484 ------------------------------------------------------------------------------
1485 -- Helpers for translating vector packing and unpacking.
1486
1487 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1488 -> CmmType -- Type of vector
1489 -> CmmExpr -- Initial vector
1490 -> [CmmExpr] -- Elements
1491 -> CmmFormal -- Destination for result
1492 -> FCode ()
1493 doVecPackOp maybe_pre_write_cast ty z es res = do
1494 dst <- newTemp ty
1495 emitAssign (CmmLocal dst) z
1496 vecPack dst es 0
1497 where
1498 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1499 vecPack src [] _ =
1500 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1501
1502 vecPack src (e : es) i = do
1503 dst <- newTemp ty
1504 if isFloatType (vecElemType ty)
1505 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1506 [CmmReg (CmmLocal src), cast e, iLit])
1507 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1508 [CmmReg (CmmLocal src), cast e, iLit])
1509 vecPack dst es (i + 1)
1510 where
1511 -- vector indices are always 32-bits
1512 iLit = CmmLit (CmmInt (toInteger i) W32)
1513
1514 cast :: CmmExpr -> CmmExpr
1515 cast val = case maybe_pre_write_cast of
1516 Nothing -> val
1517 Just cast -> CmmMachOp cast [val]
1518
1519 len :: Length
1520 len = vecLength ty
1521
1522 wid :: Width
1523 wid = typeWidth (vecElemType ty)
1524
1525 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1526 -> CmmType -- Type of vector
1527 -> CmmExpr -- Vector
1528 -> [CmmFormal] -- Element results
1529 -> FCode ()
1530 doVecUnpackOp maybe_post_read_cast ty e res =
1531 vecUnpack res 0
1532 where
1533 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1534 vecUnpack [] _ =
1535 return ()
1536
1537 vecUnpack (r : rs) i = do
1538 if isFloatType (vecElemType ty)
1539 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1540 [e, iLit]))
1541 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1542 [e, iLit]))
1543 vecUnpack rs (i + 1)
1544 where
1545 -- vector indices are always 32-bits
1546 iLit = CmmLit (CmmInt (toInteger i) W32)
1547
1548 cast :: CmmExpr -> CmmExpr
1549 cast val = case maybe_post_read_cast of
1550 Nothing -> val
1551 Just cast -> CmmMachOp cast [val]
1552
1553 len :: Length
1554 len = vecLength ty
1555
1556 wid :: Width
1557 wid = typeWidth (vecElemType ty)
1558
1559 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1560 -> CmmType -- Vector type
1561 -> CmmExpr -- Source vector
1562 -> CmmExpr -- Element
1563 -> CmmExpr -- Index at which to insert element
1564 -> CmmFormal -- Destination for result
1565 -> FCode ()
1566 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1567 dflags <- getDynFlags
1568 -- vector indices are always 32-bits
1569 let idx' :: CmmExpr
1570 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1571 if isFloatType (vecElemType ty)
1572 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1573 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1574 where
1575 cast :: CmmExpr -> CmmExpr
1576 cast val = case maybe_pre_write_cast of
1577 Nothing -> val
1578 Just cast -> CmmMachOp cast [val]
1579
1580 len :: Length
1581 len = vecLength ty
1582
1583 wid :: Width
1584 wid = typeWidth (vecElemType ty)
1585
1586 ------------------------------------------------------------------------------
1587 -- Helpers for translating prefetching.
1588
1589
1590 -- | Translate byte array prefetch operations into proper primcalls.
1591 doPrefetchByteArrayOp :: Int
1592 -> [CmmExpr]
1593 -> FCode ()
1594 doPrefetchByteArrayOp locality [addr,idx]
1595 = do dflags <- getDynFlags
1596 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1597 doPrefetchByteArrayOp _ _
1598 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1599
1600 -- | Translate mutable byte array prefetch operations into proper primcalls.
1601 doPrefetchMutableByteArrayOp :: Int
1602 -> [CmmExpr]
1603 -> FCode ()
1604 doPrefetchMutableByteArrayOp locality [addr,idx]
1605 = do dflags <- getDynFlags
1606 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1607 doPrefetchMutableByteArrayOp _ _
1608 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1609
1610 -- | Translate address prefetch operations into proper primcalls.
1611 doPrefetchAddrOp ::Int
1612 -> [CmmExpr]
1613 -> FCode ()
1614 doPrefetchAddrOp locality [addr,idx]
1615 = mkBasicPrefetch locality 0 addr idx
1616 doPrefetchAddrOp _ _
1617 = panic "StgCmmPrim: doPrefetchAddrOp"
1618
1619 -- | Translate value prefetch operations into proper primcalls.
1620 doPrefetchValueOp :: Int
1621 -> [CmmExpr]
1622 -> FCode ()
1623 doPrefetchValueOp locality [addr]
1624 = do dflags <- getDynFlags
1625 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1626 doPrefetchValueOp _ _
1627 = panic "StgCmmPrim: doPrefetchValueOp"
1628
1629 -- | helper to generate prefetch primcalls
1630 mkBasicPrefetch :: Int -- Locality level 0-3
1631 -> ByteOff -- Initial offset in bytes
1632 -> CmmExpr -- Base address
1633 -> CmmExpr -- Index
1634 -> FCode ()
1635 mkBasicPrefetch locality off base idx
1636 = do dflags <- getDynFlags
1637 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1638 return ()
1639
1640 -- ----------------------------------------------------------------------------
1641 -- Allocating byte arrays
1642
1643 -- | Takes a register to return the newly allocated array in and the
1644 -- size of the new array in bytes. Allocates a new
1645 -- 'MutableByteArray#'.
1646 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1647 doNewByteArrayOp res_r n = do
1648 dflags <- getDynFlags
1649
1650 let info_ptr = mkLblExpr mkArrWords_infoLabel
1651 rep = arrWordsRep dflags n
1652
1653 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1654 (mkIntExpr dflags (nonHdrSize dflags rep))
1655 (zeroExpr dflags)
1656
1657 let hdr_size = fixedHdrSize dflags
1658
1659 base <- allocHeapClosure rep info_ptr curCCS
1660 [ (mkIntExpr dflags n,
1661 hdr_size + oFFSET_StgArrBytes_bytes dflags)
1662 ]
1663
1664 emit $ mkAssign (CmmLocal res_r) base
1665
1666 -- ----------------------------------------------------------------------------
1667 -- Copying byte arrays
1668
1669 -- | Takes a source 'ByteArray#', an offset in the source array, a
1670 -- destination 'MutableByteArray#', an offset into the destination
1671 -- array, and the number of bytes to copy. Copies the given number of
1672 -- bytes from the source array to the destination array.
1673 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1674 -> FCode ()
1675 doCopyByteArrayOp = emitCopyByteArray copy
1676 where
1677 -- Copy data (we assume the arrays aren't overlapping since
1678 -- they're of different types)
1679 copy _src _dst dst_p src_p bytes =
1680 emitMemcpyCall dst_p src_p bytes 1
1681
1682 -- | Takes a source 'MutableByteArray#', an offset in the source
1683 -- array, a destination 'MutableByteArray#', an offset into the
1684 -- destination array, and the number of bytes to copy. Copies the
1685 -- given number of bytes from the source array to the destination
1686 -- array.
1687 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1688 -> FCode ()
1689 doCopyMutableByteArrayOp = emitCopyByteArray copy
1690 where
1691 -- The only time the memory might overlap is when the two arrays
1692 -- we were provided are the same array!
1693 -- TODO: Optimize branch for common case of no aliasing.
1694 copy src dst dst_p src_p bytes = do
1695 dflags <- getDynFlags
1696 [moveCall, cpyCall] <- forkAlts [
1697 getCode $ emitMemmoveCall dst_p src_p bytes 1,
1698 getCode $ emitMemcpyCall dst_p src_p bytes 1
1699 ]
1700 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1701
1702 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1703 -> FCode ())
1704 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1705 -> FCode ()
1706 emitCopyByteArray copy src src_off dst dst_off n = do
1707 dflags <- getDynFlags
1708 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1709 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1710 copy src dst dst_p src_p n
1711
1712 -- | Takes a source 'ByteArray#', an offset in the source array, a
1713 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1714 -- number of bytes from the source array to the destination memory region.
1715 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1716 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1717 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1718 dflags <- getDynFlags
1719 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1720 emitMemcpyCall dst_p src_p bytes 1
1721
1722 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1723 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1724 -- number of bytes from the source array to the destination memory region.
1725 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1726 -> FCode ()
1727 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1728
1729 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1730 -- the destination array, and the number of bytes to copy. Copies the given
1731 -- number of bytes from the source memory region to the destination array.
1732 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1733 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1734 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1735 dflags <- getDynFlags
1736 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1737 emitMemcpyCall dst_p src_p bytes 1
1738
1739
1740 -- ----------------------------------------------------------------------------
1741 -- Setting byte arrays
1742
1743 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1744 -- and a byte, and sets each of the selected bytes in the array to the
1745 -- character.
1746 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1747 -> FCode ()
1748 doSetByteArrayOp ba off len c
1749 = do dflags <- getDynFlags
1750 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1751 emitMemsetCall p c len 1
1752
1753 -- ----------------------------------------------------------------------------
1754 -- Allocating arrays
1755
1756 -- | Allocate a new array.
1757 doNewArrayOp :: CmmFormal -- ^ return register
1758 -> SMRep -- ^ representation of the array
1759 -> CLabel -- ^ info pointer
1760 -> [(CmmExpr, ByteOff)] -- ^ header payload
1761 -> WordOff -- ^ array size
1762 -> CmmExpr -- ^ initial element
1763 -> FCode ()
1764 doNewArrayOp res_r rep info payload n init = do
1765 dflags <- getDynFlags
1766
1767 let info_ptr = mkLblExpr info
1768
1769 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1770 (mkIntExpr dflags (nonHdrSize dflags rep))
1771 (zeroExpr dflags)
1772
1773 base <- allocHeapClosure rep info_ptr curCCS payload
1774
1775 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1776 emit $ mkAssign arr base
1777
1778 -- Initialise all elements of the the array
1779 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1780 for <- newLabelC
1781 emitLabel for
1782 let loopBody =
1783 [ mkStore (CmmReg (CmmLocal p)) init
1784 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1785 , mkBranch for ]
1786 emit =<< mkCmmIfThen
1787 (cmmULtWord dflags (CmmReg (CmmLocal p))
1788 (cmmOffsetW dflags (CmmReg arr)
1789 (hdrSizeW dflags rep + n)))
1790 (catAGraphs loopBody)
1791
1792 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1793
1794 -- ----------------------------------------------------------------------------
1795 -- Copying pointer arrays
1796
1797 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1798 -- nowhere else in the code generator. This is mostly because these
1799 -- "primitive" ops result in a surprisingly large amount of code. It
1800 -- will likely be worthwhile to optimize what is emitted here, so that
1801 -- our optimization passes don't waste time repeatedly optimizing the
1802 -- same bits of code.
1803
1804 -- More closely imitates 'assignTemp' from the old code generator, which
1805 -- returns a CmmExpr rather than a LocalReg.
1806 assignTempE :: CmmExpr -> FCode CmmExpr
1807 assignTempE e = do
1808 t <- assignTemp e
1809 return (CmmReg (CmmLocal t))
1810
1811 -- | Takes a source 'Array#', an offset in the source array, a
1812 -- destination 'MutableArray#', an offset into the destination array,
1813 -- and the number of elements to copy. Copies the given number of
1814 -- elements from the source array to the destination array.
1815 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1816 -> FCode ()
1817 doCopyArrayOp = emitCopyArray copy
1818 where
1819 -- Copy data (we assume the arrays aren't overlapping since
1820 -- they're of different types)
1821 copy _src _dst dst_p src_p bytes =
1822 do dflags <- getDynFlags
1823 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1824 (wORD_SIZE dflags)
1825
1826
1827 -- | Takes a source 'MutableArray#', an offset in the source array, a
1828 -- destination 'MutableArray#', an offset into the destination array,
1829 -- and the number of elements to copy. Copies the given number of
1830 -- elements from the source array to the destination array.
1831 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1832 -> FCode ()
1833 doCopyMutableArrayOp = emitCopyArray copy
1834 where
1835 -- The only time the memory might overlap is when the two arrays
1836 -- we were provided are the same array!
1837 -- TODO: Optimize branch for common case of no aliasing.
1838 copy src dst dst_p src_p bytes = do
1839 dflags <- getDynFlags
1840 [moveCall, cpyCall] <- forkAlts [
1841 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1842 (wORD_SIZE dflags),
1843 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1844 (wORD_SIZE dflags)
1845 ]
1846 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1847
1848 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1849 -> FCode ()) -- ^ copy function
1850 -> CmmExpr -- ^ source array
1851 -> CmmExpr -- ^ offset in source array
1852 -> CmmExpr -- ^ destination array
1853 -> CmmExpr -- ^ offset in destination array
1854 -> WordOff -- ^ number of elements to copy
1855 -> FCode ()
1856 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1857 dflags <- getDynFlags
1858 when (n /= 0) $ do
1859 -- Passed as arguments (be careful)
1860 src <- assignTempE src0
1861 dst <- assignTempE dst0
1862 dst_off <- assignTempE dst_off0
1863
1864 -- Set the dirty bit in the header.
1865 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1866
1867 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1868 (arrPtrsHdrSize dflags)
1869 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1870 src_p <- assignTempE $ cmmOffsetExprW dflags
1871 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1872 let bytes = wordsToBytes dflags n
1873
1874 copy src dst dst_p src_p bytes
1875
1876 -- The base address of the destination card table
1877 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1878 (loadArrPtrsSize dflags dst)
1879
1880 emitSetCards dst_off dst_cards_p n
1881
1882 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1883 -> FCode ()
1884 doCopySmallArrayOp = emitCopySmallArray copy
1885 where
1886 -- Copy data (we assume the arrays aren't overlapping since
1887 -- they're of different types)
1888 copy _src _dst dst_p src_p bytes =
1889 do dflags <- getDynFlags
1890 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1891 (wORD_SIZE dflags)
1892
1893
1894 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1895 -> FCode ()
1896 doCopySmallMutableArrayOp = emitCopySmallArray copy
1897 where
1898 -- The only time the memory might overlap is when the two arrays
1899 -- we were provided are the same array!
1900 -- TODO: Optimize branch for common case of no aliasing.
1901 copy src dst dst_p src_p bytes = do
1902 dflags <- getDynFlags
1903 [moveCall, cpyCall] <- forkAlts
1904 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1905 (wORD_SIZE dflags)
1906 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1907 (wORD_SIZE dflags)
1908 ]
1909 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1910
1911 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1912 -> FCode ()) -- ^ copy function
1913 -> CmmExpr -- ^ source array
1914 -> CmmExpr -- ^ offset in source array
1915 -> CmmExpr -- ^ destination array
1916 -> CmmExpr -- ^ offset in destination array
1917 -> WordOff -- ^ number of elements to copy
1918 -> FCode ()
1919 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
1920 dflags <- getDynFlags
1921
1922 -- Passed as arguments (be careful)
1923 src <- assignTempE src0
1924 dst <- assignTempE dst0
1925
1926 -- Set the dirty bit in the header.
1927 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1928
1929 dst_p <- assignTempE $ cmmOffsetExprW dflags
1930 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
1931 src_p <- assignTempE $ cmmOffsetExprW dflags
1932 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
1933 let bytes = wordsToBytes dflags n
1934
1935 copy src dst dst_p src_p bytes
1936
1937 -- | Takes an info table label, a register to return the newly
1938 -- allocated array in, a source array, an offset in the source array,
1939 -- and the number of elements to copy. Allocates a new array and
1940 -- initializes it from the source array.
1941 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1942 -> FCode ()
1943 emitCloneArray info_p res_r src src_off n = do
1944 dflags <- getDynFlags
1945
1946 let info_ptr = mkLblExpr info_p
1947 rep = arrPtrsRep dflags n
1948
1949 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1950 (mkIntExpr dflags (nonHdrSize dflags rep))
1951 (zeroExpr dflags)
1952
1953 let hdr_size = fixedHdrSize dflags
1954
1955 base <- allocHeapClosure rep info_ptr curCCS
1956 [ (mkIntExpr dflags n,
1957 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1958 , (mkIntExpr dflags (nonHdrSizeW rep),
1959 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1960 ]
1961
1962 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1963 emit $ mkAssign arr base
1964
1965 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1966 (arrPtrsHdrSize dflags)
1967 src_p <- assignTempE $ cmmOffsetExprW dflags src
1968 (cmmAddWord dflags
1969 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1970
1971 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1972 (wORD_SIZE dflags)
1973
1974 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1975
1976 -- | Takes an info table label, a register to return the newly
1977 -- allocated array in, a source array, an offset in the source array,
1978 -- and the number of elements to copy. Allocates a new array and
1979 -- initializes it from the source array.
1980 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1981 -> FCode ()
1982 emitCloneSmallArray info_p res_r src src_off n = do
1983 dflags <- getDynFlags
1984
1985 let info_ptr = mkLblExpr info_p
1986 rep = smallArrPtrsRep n
1987
1988 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
1989 (mkIntExpr dflags (nonHdrSize dflags rep))
1990 (zeroExpr dflags)
1991
1992 let hdr_size = fixedHdrSize dflags
1993
1994 base <- allocHeapClosure rep info_ptr curCCS
1995 [ (mkIntExpr dflags n,
1996 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
1997 ]
1998
1999 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2000 emit $ mkAssign arr base
2001
2002 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2003 (smallArrPtrsHdrSize dflags)
2004 src_p <- assignTempE $ cmmOffsetExprW dflags src
2005 (cmmAddWord dflags
2006 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
2007
2008 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2009 (wORD_SIZE dflags)
2010
2011 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2012
2013 -- | Takes and offset in the destination array, the base address of
2014 -- the card table, and the number of elements affected (*not* the
2015 -- number of cards). The number of elements may not be zero.
2016 -- Marks the relevant cards as dirty.
2017 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2018 emitSetCards dst_start dst_cards_start n = do
2019 dflags <- getDynFlags
2020 start_card <- assignTempE $ cardCmm dflags dst_start
2021 let end_card = cardCmm dflags
2022 (cmmSubWord dflags
2023 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
2024 (mkIntExpr dflags 1))
2025 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
2026 (mkIntExpr dflags 1)
2027 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2028 1 -- no alignment (1 byte)
2029
2030 -- Convert an element index to a card index
2031 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2032 cardCmm dflags i =
2033 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2034
2035 ------------------------------------------------------------------------------
2036 -- SmallArray PrimOp implementations
2037
2038 doReadSmallPtrArrayOp :: LocalReg
2039 -> CmmExpr
2040 -> CmmExpr
2041 -> FCode ()
2042 doReadSmallPtrArrayOp res addr idx = do
2043 dflags <- getDynFlags
2044 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2045 (gcWord dflags) idx
2046
2047 doWriteSmallPtrArrayOp :: CmmExpr
2048 -> CmmExpr
2049 -> CmmExpr
2050 -> FCode ()
2051 doWriteSmallPtrArrayOp addr idx val = do
2052 dflags <- getDynFlags
2053 let ty = cmmExprType dflags val
2054 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2055 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2056
2057 ------------------------------------------------------------------------------
2058 -- Atomic read-modify-write
2059
2060 -- | Emit an atomic modification to a byte array element. The result
2061 -- reg contains that previous value of the element. Implies a full
2062 -- memory barrier.
2063 doAtomicRMW :: LocalReg -- ^ Result reg
2064 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2065 -> CmmExpr -- ^ MutableByteArray#
2066 -> CmmExpr -- ^ Index
2067 -> CmmType -- ^ Type of element by which we are indexing
2068 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2069 -> FCode ()
2070 doAtomicRMW res amop mba idx idx_ty n = do
2071 dflags <- getDynFlags
2072 let width = typeWidth idx_ty
2073 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2074 width mba idx
2075 emitPrimCall
2076 [ res ]
2077 (MO_AtomicRMW width amop)
2078 [ addr, n ]
2079
2080 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2081 doAtomicReadByteArray
2082 :: LocalReg -- ^ Result reg
2083 -> CmmExpr -- ^ MutableByteArray#
2084 -> CmmExpr -- ^ Index
2085 -> CmmType -- ^ Type of element by which we are indexing
2086 -> FCode ()
2087 doAtomicReadByteArray res mba idx idx_ty = do
2088 dflags <- getDynFlags
2089 let width = typeWidth idx_ty
2090 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2091 width mba idx
2092 emitPrimCall
2093 [ res ]
2094 (MO_AtomicRead width)
2095 [ addr ]
2096
2097 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2098 doAtomicWriteByteArray
2099 :: CmmExpr -- ^ MutableByteArray#
2100 -> CmmExpr -- ^ Index
2101 -> CmmType -- ^ Type of element by which we are indexing
2102 -> CmmExpr -- ^ Value to write
2103 -> FCode ()
2104 doAtomicWriteByteArray mba idx idx_ty val = do
2105 dflags <- getDynFlags
2106 let width = typeWidth idx_ty
2107 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2108 width mba idx
2109 emitPrimCall
2110 [ {- no results -} ]
2111 (MO_AtomicWrite width)
2112 [ addr, val ]
2113
2114 doCasByteArray
2115 :: LocalReg -- ^ Result reg
2116 -> CmmExpr -- ^ MutableByteArray#
2117 -> CmmExpr -- ^ Index
2118 -> CmmType -- ^ Type of element by which we are indexing
2119 -> CmmExpr -- ^ Old value
2120 -> CmmExpr -- ^ New value
2121 -> FCode ()
2122 doCasByteArray res mba idx idx_ty old new = do
2123 dflags <- getDynFlags
2124 let width = (typeWidth idx_ty)
2125 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2126 width mba idx
2127 emitPrimCall
2128 [ res ]
2129 (MO_Cmpxchg width)
2130 [ addr, old, new ]
2131
2132 ------------------------------------------------------------------------------
2133 -- Helpers for emitting function calls
2134
2135 -- | Emit a call to @memcpy@.
2136 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2137 emitMemcpyCall dst src n align = do
2138 emitPrimCall
2139 [ {-no results-} ]
2140 (MO_Memcpy align)
2141 [ dst, src, n ]
2142
2143 -- | Emit a call to @memmove@.
2144 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2145 emitMemmoveCall dst src n align = do
2146 emitPrimCall
2147 [ {- no results -} ]
2148 (MO_Memmove align)
2149 [ dst, src, n ]
2150
2151 -- | Emit a call to @memset@. The second argument must fit inside an
2152 -- unsigned char.
2153 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2154 emitMemsetCall dst c n align = do
2155 emitPrimCall
2156 [ {- no results -} ]
2157 (MO_Memset align)
2158 [ dst, c, n ]
2159
2160 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2161 emitBSwapCall res x width = do
2162 emitPrimCall
2163 [ res ]
2164 (MO_BSwap width)
2165 [ x ]
2166
2167 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2168 emitPopCntCall res x width = do
2169 emitPrimCall
2170 [ res ]
2171 (MO_PopCnt width)
2172 [ x ]
2173
2174 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2175 emitClzCall res x width = do
2176 emitPrimCall
2177 [ res ]
2178 (MO_Clz width)
2179 [ x ]
2180
2181 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2182 emitCtzCall res x width = do
2183 emitPrimCall
2184 [ res ]
2185 (MO_Ctz width)
2186 [ x ]