Make the "matchable-given" check happen first
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgCmmLayout
21 import StgCmmForeign
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmTicky
26 import StgCmmHeap
27 import StgCmmProf ( costCentreFrom, curCCS )
28
29 import DynFlags
30 import Platform
31 import BasicTypes
32 import MkGraph
33 import StgSyn
34 import Cmm
35 import CmmInfo
36 import Type ( Type, tyConAppTyCon )
37 import TyCon
38 import CLabel
39 import CmmUtils
40 import PrimOp
41 import SMRep
42 import FastString
43 import Outputable
44 import Util
45
46 #if __GLASGOW_HASKELL__ >= 709
47 import Prelude hiding ((<*>))
48 #endif
49
50 import Data.Bits ((.&.), bit)
51 import Control.Monad (liftM, when)
52
53 ------------------------------------------------------------------------
54 -- Primitive operations and foreign calls
55 ------------------------------------------------------------------------
56
57 {- Note [Foreign call results]
58 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 A foreign call always returns an unboxed tuple of results, one
60 of which is the state token. This seems to happen even for pure
61 calls.
62
63 Even if we returned a single result for pure calls, it'd still be
64 right to wrap it in a singleton unboxed tuple, because the result
65 might be a Haskell closure pointer, we don't want to evaluate it. -}
66
67 ----------------------------------
68 cgOpApp :: StgOp -- The op
69 -> [StgArg] -- Arguments
70 -> Type -- Result type (always an unboxed tuple)
71 -> FCode ReturnKind
72
73 -- Foreign calls
74 cgOpApp (StgFCallOp fcall _) stg_args res_ty
75 = cgForeignCall fcall stg_args res_ty
76 -- Note [Foreign call results]
77
78 -- tagToEnum# is special: we need to pull the constructor
79 -- out of the table, and perform an appropriate return.
80
81 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
82 = ASSERT(isEnumerationTyCon tycon)
83 do { dflags <- getDynFlags
84 ; args' <- getNonVoidArgAmodes [arg]
85 ; let amode = case args' of [amode] -> amode
86 _ -> panic "TagToEnumOp had void arg"
87 ; emitReturn [tagToClosure dflags tycon amode] }
88 where
89 -- If you're reading this code in the attempt to figure
90 -- out why the compiler panic'ed here, it is probably because
91 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
92 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
93 -- That won't work.
94 tycon = tyConAppTyCon res_ty
95
96 cgOpApp (StgPrimOp primop) args res_ty = do
97 dflags <- getDynFlags
98 cmm_args <- getNonVoidArgAmodes args
99 case shouldInlinePrimOp dflags primop cmm_args of
100 Nothing -> do -- out-of-line
101 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
102 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
103
104 Just f -- inline
105 | ReturnsPrim VoidRep <- result_info
106 -> do f []
107 emitReturn []
108
109 | ReturnsPrim rep <- result_info
110 -> do dflags <- getDynFlags
111 res <- newTemp (primRepCmmType dflags rep)
112 f [res]
113 emitReturn [CmmReg (CmmLocal res)]
114
115 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
116 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
117 f regs
118 emitReturn (map (CmmReg . CmmLocal) regs)
119
120 | otherwise -> panic "cgPrimop"
121 where
122 result_info = getPrimOpResultInfo primop
123
124 cgOpApp (StgPrimCallOp primcall) args _res_ty
125 = do { cmm_args <- getNonVoidArgAmodes args
126 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
127 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
128
129 -- | Interpret the argument as an unsigned value, assuming the value
130 -- is given in two-complement form in the given width.
131 --
132 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
133 --
134 -- This function is used to work around the fact that many array
135 -- primops take Int# arguments, but we interpret them as unsigned
136 -- quantities in the code gen. This means that we have to be careful
137 -- every time we work on e.g. a CmmInt literal that corresponds to the
138 -- array size, as it might contain a negative Integer value if the
139 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
140 -- literal.
141 asUnsigned :: Width -> Integer -> Integer
142 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
143
144 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
145 -- ByteOff (or some other fixed width signed type) to represent
146 -- array sizes or indices. This means that these will overflow for
147 -- large enough sizes.
148
149 -- | Decide whether an out-of-line primop should be replaced by an
150 -- inline implementation. This might happen e.g. if there's enough
151 -- static information, such as statically know arguments, to emit a
152 -- more efficient implementation inline.
153 --
154 -- Returns 'Nothing' if this primop should use its out-of-line
155 -- implementation (defined elsewhere) and 'Just' together with a code
156 -- generating function that takes the output regs as arguments
157 -- otherwise.
158 shouldInlinePrimOp :: DynFlags
159 -> PrimOp -- ^ The primop
160 -> [CmmExpr] -- ^ The primop arguments
161 -> Maybe ([LocalReg] -> FCode ())
162
163 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
164 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
165 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
166
167 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
168 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
169 Just $ \ [res] ->
170 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
171 [ (mkIntExpr dflags (fromInteger n),
172 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
173 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
174 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
175 ]
176 (fromInteger n) init
177
178 shouldInlinePrimOp _ CopyArrayOp
179 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
180 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
181
182 shouldInlinePrimOp _ CopyMutableArrayOp
183 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
184 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
185
186 shouldInlinePrimOp _ CopyArrayArrayOp
187 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
188 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
189
190 shouldInlinePrimOp _ CopyMutableArrayArrayOp
191 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
192 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
193
194 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
195 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
196 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
197
198 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
199 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
200 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
201
202 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
203 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
204 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
205
206 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
207 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
208 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
209
210 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
211 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
212 Just $ \ [res] ->
213 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
214 [ (mkIntExpr dflags (fromInteger n),
215 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
216 ]
217 (fromInteger n) init
218
219 shouldInlinePrimOp _ CopySmallArrayOp
220 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
221 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
222
223 shouldInlinePrimOp _ CopySmallMutableArrayOp
224 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
225 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
226
227 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
228 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
229 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
230
231 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
232 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
233 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
234
235 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
236 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
237 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
238
239 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
240 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
241 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
242
243 shouldInlinePrimOp dflags primop args
244 | primOpOutOfLine primop = Nothing
245 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
246
247 -- TODO: Several primops, such as 'copyArray#', only have an inline
248 -- implementation (below) but could possibly have both an inline
249 -- implementation and an out-of-line implementation, just like
250 -- 'newArray#'. This would lower the amount of code generated,
251 -- hopefully without a performance impact (needs to be measured).
252
253 ---------------------------------------------------
254 cgPrimOp :: [LocalReg] -- where to put the results
255 -> PrimOp -- the op
256 -> [StgArg] -- arguments
257 -> FCode ()
258
259 cgPrimOp results op args
260 = do dflags <- getDynFlags
261 arg_exprs <- getNonVoidArgAmodes args
262 emitPrimOp dflags results op arg_exprs
263
264
265 ------------------------------------------------------------------------
266 -- Emitting code for a primop
267 ------------------------------------------------------------------------
268
269 emitPrimOp :: DynFlags
270 -> [LocalReg] -- where to put the results
271 -> PrimOp -- the op
272 -> [CmmExpr] -- arguments
273 -> FCode ()
274
275 -- First we handle various awkward cases specially. The remaining
276 -- easy cases are then handled by translateOp, defined below.
277
278 emitPrimOp _ [res] ParOp [arg]
279 =
280 -- for now, just implement this in a C function
281 -- later, we might want to inline it.
282 emitCCall
283 [(res,NoHint)]
284 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
285 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
286
287 emitPrimOp dflags [res] SparkOp [arg]
288 = do
289 -- returns the value of arg in res. We're going to therefore
290 -- refer to arg twice (once to pass to newSpark(), and once to
291 -- assign to res), so put it in a temporary.
292 tmp <- assignTemp arg
293 tmp2 <- newTemp (bWord dflags)
294 emitCCall
295 [(tmp2,NoHint)]
296 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
297 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
298 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
299
300 emitPrimOp dflags [res] GetCCSOfOp [arg]
301 = emitAssign (CmmLocal res) val
302 where
303 val
304 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
305 | otherwise = CmmLit (zeroCLit dflags)
306
307 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
308 = emitAssign (CmmLocal res) curCCS
309
310 emitPrimOp dflags [res] ReadMutVarOp [mutv]
311 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
312
313 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
314 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
315 emitCCall
316 [{-no results-}]
317 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
318 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
319
320 -- #define sizzeofByteArrayzh(r,a) \
321 -- r = ((StgArrWords *)(a))->bytes
322 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
323 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
324
325 -- #define sizzeofMutableByteArrayzh(r,a) \
326 -- r = ((StgArrWords *)(a))->bytes
327 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
328 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
329
330
331 -- #define touchzh(o) /* nothing */
332 emitPrimOp _ res@[] TouchOp args@[_arg]
333 = do emitPrimCall res MO_Touch args
334
335 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
336 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
337 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
338
339 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
340 emitPrimOp dflags [res] StableNameToIntOp [arg]
341 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
342
343 -- #define eqStableNamezh(r,sn1,sn2) \
344 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
345 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
346 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
347 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
348 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
349 ])
350
351
352 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
353 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
354
355 -- #define addrToHValuezh(r,a) r=(P_)a
356 emitPrimOp _ [res] AddrToAnyOp [arg]
357 = emitAssign (CmmLocal res) arg
358
359 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
360 -- Note: argument may be tagged!
361 emitPrimOp dflags [res] DataToTagOp [arg]
362 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
363
364 {- Freezing arrays-of-ptrs requires changing an info table, for the
365 benefit of the generational collector. It needs to scavenge mutable
366 objects, even if they are in old space. When they become immutable,
367 they can be removed from this scavenge list. -}
368
369 -- #define unsafeFreezzeArrayzh(r,a)
370 -- {
371 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
372 -- r = a;
373 -- }
374 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
375 = emit $ catAGraphs
376 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
377 mkAssign (CmmLocal res) arg ]
378 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
379 = emit $ catAGraphs
380 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
381 mkAssign (CmmLocal res) arg ]
382 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
383 = emit $ catAGraphs
384 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
385 mkAssign (CmmLocal res) arg ]
386
387 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
388 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
389 = emitAssign (CmmLocal res) arg
390
391 -- Reading/writing pointer arrays
392
393 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
394 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
395 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
396
397 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
398 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
399 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
400 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
401 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
402 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
403 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
404 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
405 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
406 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
407
408 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
409 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
410 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
411
412 -- Getting the size of pointer arrays
413
414 emitPrimOp dflags [res] SizeofArrayOp [arg]
415 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
416 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
417 = emitPrimOp dflags [res] SizeofArrayOp [arg]
418 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
419 = emitPrimOp dflags [res] SizeofArrayOp [arg]
420 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
421 = emitPrimOp dflags [res] SizeofArrayOp [arg]
422
423 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
424 emit $ mkAssign (CmmLocal res)
425 (cmmLoadIndexW dflags arg
426 (fixedHdrSizeW dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags) (bWord dflags))
427 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
428 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
429
430 -- IndexXXXoffAddr
431
432 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
433 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
434 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
435 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
436 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
437 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
438 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
439 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
440 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
441 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
442 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
443 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
444 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
445 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
446 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
447 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
448
449 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
450
451 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
452 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
453 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
454 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
455 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
456 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
457 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
458 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
459 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
460 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
461 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
462 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
463 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
464 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
465 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
466 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
467
468 -- IndexXXXArray
469
470 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
471 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
472 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
473 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
474 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
475 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
476 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
477 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
478 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
479 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
480 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
481 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
482 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
483 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
484 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
485 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
486
487 -- ReadXXXArray, identical to IndexXXXArray.
488
489 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
490 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
491 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
492 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
493 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
494 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
495 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
496 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
497 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
498 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
499 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
500 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
501 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
502 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
503 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
504 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
505
506 -- WriteXXXoffAddr
507
508 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
509 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
510 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
511 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
512 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
513 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
514 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
515 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
516 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
517 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
518 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
519 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
520 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
521 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
522 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
523 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
524
525 -- WriteXXXArray
526
527 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
528 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
529 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
530 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
531 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
532 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
533 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
534 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
535 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
536 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
537 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
538 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
539 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
540 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
541 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
542 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
543
544 -- Copying and setting byte arrays
545 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
546 doCopyByteArrayOp src src_off dst dst_off n
547 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
548 doCopyMutableByteArrayOp src src_off dst dst_off n
549 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
550 doCopyByteArrayToAddrOp src src_off dst n
551 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
552 doCopyMutableByteArrayToAddrOp src src_off dst n
553 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
554 doCopyAddrToByteArrayOp src dst dst_off n
555 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
556 doSetByteArrayOp ba off len c
557
558 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
559 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
560 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
561 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
562
563 -- Population count
564 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
565 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
566 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
567 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
568 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
569
570 -- count leading zeros
571 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
572 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
573 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
574 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
575 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
576
577 -- count trailing zeros
578 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
579 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
580 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
581 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
582 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
583
584 -- Unsigned int to floating point conversions
585 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
586 (MO_UF_Conv W32) [w]
587 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
588 (MO_UF_Conv W64) [w]
589
590 -- SIMD primops
591 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
592 checkVecCompatibility dflags vcat n w
593 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
594 where
595 zeros :: CmmExpr
596 zeros = CmmLit $ CmmVec (replicate n zero)
597
598 zero :: CmmLit
599 zero = case vcat of
600 IntVec -> CmmInt 0 w
601 WordVec -> CmmInt 0 w
602 FloatVec -> CmmFloat 0 w
603
604 ty :: CmmType
605 ty = vecVmmType vcat n w
606
607 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
608 checkVecCompatibility dflags vcat n w
609 when (length es /= n) $
610 panic "emitPrimOp: VecPackOp has wrong number of arguments"
611 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
612 where
613 zeros :: CmmExpr
614 zeros = CmmLit $ CmmVec (replicate n zero)
615
616 zero :: CmmLit
617 zero = case vcat of
618 IntVec -> CmmInt 0 w
619 WordVec -> CmmInt 0 w
620 FloatVec -> CmmFloat 0 w
621
622 ty :: CmmType
623 ty = vecVmmType vcat n w
624
625 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
626 checkVecCompatibility dflags vcat n w
627 when (length res /= n) $
628 panic "emitPrimOp: VecUnpackOp has wrong number of results"
629 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
630 where
631 ty :: CmmType
632 ty = vecVmmType vcat n w
633
634 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
635 checkVecCompatibility dflags vcat n w
636 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
637 where
638 ty :: CmmType
639 ty = vecVmmType vcat n w
640
641 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
642 checkVecCompatibility dflags vcat n w
643 doIndexByteArrayOp Nothing ty res args
644 where
645 ty :: CmmType
646 ty = vecVmmType vcat n w
647
648 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
649 checkVecCompatibility dflags vcat n w
650 doIndexByteArrayOp Nothing ty res args
651 where
652 ty :: CmmType
653 ty = vecVmmType vcat n w
654
655 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
656 checkVecCompatibility dflags vcat n w
657 doWriteByteArrayOp Nothing ty res args
658 where
659 ty :: CmmType
660 ty = vecVmmType vcat n w
661
662 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
663 checkVecCompatibility dflags vcat n w
664 doIndexOffAddrOp Nothing ty res args
665 where
666 ty :: CmmType
667 ty = vecVmmType vcat n w
668
669 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
670 checkVecCompatibility dflags vcat n w
671 doIndexOffAddrOp Nothing ty res args
672 where
673 ty :: CmmType
674 ty = vecVmmType vcat n w
675
676 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
677 checkVecCompatibility dflags vcat n w
678 doWriteOffAddrOp Nothing ty res args
679 where
680 ty :: CmmType
681 ty = vecVmmType vcat n w
682
683 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
684 checkVecCompatibility dflags vcat n w
685 doIndexByteArrayOpAs Nothing vecty ty res args
686 where
687 vecty :: CmmType
688 vecty = vecVmmType vcat n w
689
690 ty :: CmmType
691 ty = vecCmmCat vcat w
692
693 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
694 checkVecCompatibility dflags vcat n w
695 doIndexByteArrayOpAs Nothing vecty ty res args
696 where
697 vecty :: CmmType
698 vecty = vecVmmType vcat n w
699
700 ty :: CmmType
701 ty = vecCmmCat vcat w
702
703 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
704 checkVecCompatibility dflags vcat n w
705 doWriteByteArrayOp Nothing ty res args
706 where
707 ty :: CmmType
708 ty = vecCmmCat vcat w
709
710 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
711 checkVecCompatibility dflags vcat n w
712 doIndexOffAddrOpAs Nothing vecty ty res args
713 where
714 vecty :: CmmType
715 vecty = vecVmmType vcat n w
716
717 ty :: CmmType
718 ty = vecCmmCat vcat w
719
720 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
721 checkVecCompatibility dflags vcat n w
722 doIndexOffAddrOpAs Nothing vecty ty res args
723 where
724 vecty :: CmmType
725 vecty = vecVmmType vcat n w
726
727 ty :: CmmType
728 ty = vecCmmCat vcat w
729
730 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
731 checkVecCompatibility dflags vcat n w
732 doWriteOffAddrOp Nothing ty res args
733 where
734 ty :: CmmType
735 ty = vecCmmCat vcat w
736
737 -- Prefetch
738 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
739 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
740 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
741 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
742
743 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
744 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
745 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
746 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
747
748 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
749 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
750 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
751 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
752
753 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
754 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
755 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
756 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
757
758 -- Atomic read-modify-write
759 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
760 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
761 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
762 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
763 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
764 doAtomicRMW res AMO_And mba ix (bWord dflags) n
765 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
766 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
767 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
768 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
769 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
770 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
771 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
772 doAtomicReadByteArray res mba ix (bWord dflags)
773 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
774 doAtomicWriteByteArray mba ix (bWord dflags) val
775 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
776 doCasByteArray res mba ix (bWord dflags) old new
777
778 -- The rest just translate straightforwardly
779 emitPrimOp dflags [res] op [arg]
780 | nopOp op
781 = emitAssign (CmmLocal res) arg
782
783 | Just (mop,rep) <- narrowOp op
784 = emitAssign (CmmLocal res) $
785 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
786
787 emitPrimOp dflags r@[res] op args
788 | Just prim <- callishOp op
789 = do emitPrimCall r prim args
790
791 | Just mop <- translateOp dflags op
792 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
793 emit stmt
794
795 emitPrimOp dflags results op args
796 = case callishPrimOpSupported dflags op of
797 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
798 Right gen -> gen results args
799
800 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
801
802 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
803 callishPrimOpSupported dflags op
804 = case op of
805 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
806 | otherwise -> Right (genericIntQuotRemOp dflags)
807
808 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
809 | otherwise -> Right (genericWordQuotRemOp dflags)
810
811 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
812 | otherwise -> Right (genericWordQuotRem2Op dflags)
813
814 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
815 | otherwise -> Right genericWordAdd2Op
816
817 IntAddCOp | ncg && x86ish -> Left (MO_AddIntC (wordWidth dflags))
818 | otherwise -> Right genericIntAddCOp
819
820 IntSubCOp | ncg && x86ish -> Left (MO_SubIntC (wordWidth dflags))
821 | otherwise -> Right genericIntSubCOp
822
823 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
824 | otherwise -> Right genericWordMul2Op
825
826 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
827 where
828 ncg = case hscTarget dflags of
829 HscAsm -> True
830 _ -> False
831
832 x86ish = case platformArch (targetPlatform dflags) of
833 ArchX86 -> True
834 ArchX86_64 -> True
835 _ -> False
836
837 genericIntQuotRemOp :: DynFlags -> GenericOp
838 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
839 = emit $ mkAssign (CmmLocal res_q)
840 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
841 mkAssign (CmmLocal res_r)
842 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
843 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
844
845 genericWordQuotRemOp :: DynFlags -> GenericOp
846 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
847 = emit $ mkAssign (CmmLocal res_q)
848 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
849 mkAssign (CmmLocal res_r)
850 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
851 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
852
853 genericWordQuotRem2Op :: DynFlags -> GenericOp
854 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
855 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
856 where ty = cmmExprType dflags arg_x_high
857 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
858 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
859 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
860 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
861 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
862 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
863 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
864 zero = lit 0
865 one = lit 1
866 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
867 lit i = CmmLit (CmmInt i (wordWidth dflags))
868
869 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
870 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
871 mkAssign (CmmLocal res_r) high)
872 f i acc high low =
873 do roverflowedBit <- newTemp ty
874 rhigh' <- newTemp ty
875 rhigh'' <- newTemp ty
876 rlow' <- newTemp ty
877 risge <- newTemp ty
878 racc' <- newTemp ty
879 let high' = CmmReg (CmmLocal rhigh')
880 isge = CmmReg (CmmLocal risge)
881 overflowedBit = CmmReg (CmmLocal roverflowedBit)
882 let this = catAGraphs
883 [mkAssign (CmmLocal roverflowedBit)
884 (shr high negone),
885 mkAssign (CmmLocal rhigh')
886 (or (shl high one) (shr low negone)),
887 mkAssign (CmmLocal rlow')
888 (shl low one),
889 mkAssign (CmmLocal risge)
890 (or (overflowedBit `ne` zero)
891 (high' `ge` arg_y)),
892 mkAssign (CmmLocal rhigh'')
893 (high' `minus` (arg_y `times` isge)),
894 mkAssign (CmmLocal racc')
895 (or (shl acc one) isge)]
896 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
897 (CmmReg (CmmLocal rhigh''))
898 (CmmReg (CmmLocal rlow'))
899 return (this <*> rest)
900 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
901
902 genericWordAdd2Op :: GenericOp
903 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
904 = do dflags <- getDynFlags
905 r1 <- newTemp (cmmExprType dflags arg_x)
906 r2 <- newTemp (cmmExprType dflags arg_x)
907 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
908 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
909 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
910 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
911 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
912 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
913 (wordWidth dflags))
914 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
915 emit $ catAGraphs
916 [mkAssign (CmmLocal r1)
917 (add (bottomHalf arg_x) (bottomHalf arg_y)),
918 mkAssign (CmmLocal r2)
919 (add (topHalf (CmmReg (CmmLocal r1)))
920 (add (topHalf arg_x) (topHalf arg_y))),
921 mkAssign (CmmLocal res_h)
922 (topHalf (CmmReg (CmmLocal r2))),
923 mkAssign (CmmLocal res_l)
924 (or (toTopHalf (CmmReg (CmmLocal r2)))
925 (bottomHalf (CmmReg (CmmLocal r1))))]
926 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
927
928 genericIntAddCOp :: GenericOp
929 genericIntAddCOp [res_r, res_c] [aa, bb]
930 {-
931 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
932 C, and without needing any comparisons. This may not be the
933 fastest way to do it - if you have better code, please send it! --SDM
934
935 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
936
937 We currently don't make use of the r value if c is != 0 (i.e.
938 overflow), we just convert to big integers and try again. This
939 could be improved by making r and c the correct values for
940 plugging into a new J#.
941
942 { r = ((I_)(a)) + ((I_)(b)); \
943 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
944 >> (BITS_IN (I_) - 1); \
945 }
946 Wading through the mass of bracketry, it seems to reduce to:
947 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
948
949 -}
950 = do dflags <- getDynFlags
951 emit $ catAGraphs [
952 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
953 mkAssign (CmmLocal res_c) $
954 CmmMachOp (mo_wordUShr dflags) [
955 CmmMachOp (mo_wordAnd dflags) [
956 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
957 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
958 ],
959 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
960 ]
961 ]
962 genericIntAddCOp _ _ = panic "genericIntAddCOp"
963
964 genericIntSubCOp :: GenericOp
965 genericIntSubCOp [res_r, res_c] [aa, bb]
966 {- Similarly:
967 #define subIntCzh(r,c,a,b) \
968 { r = ((I_)(a)) - ((I_)(b)); \
969 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
970 >> (BITS_IN (I_) - 1); \
971 }
972
973 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
974 -}
975 = do dflags <- getDynFlags
976 emit $ catAGraphs [
977 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
978 mkAssign (CmmLocal res_c) $
979 CmmMachOp (mo_wordUShr dflags) [
980 CmmMachOp (mo_wordAnd dflags) [
981 CmmMachOp (mo_wordXor dflags) [aa,bb],
982 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
983 ],
984 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
985 ]
986 ]
987 genericIntSubCOp _ _ = panic "genericIntSubCOp"
988
989 genericWordMul2Op :: GenericOp
990 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
991 = do dflags <- getDynFlags
992 let t = cmmExprType dflags arg_x
993 xlyl <- liftM CmmLocal $ newTemp t
994 xlyh <- liftM CmmLocal $ newTemp t
995 xhyl <- liftM CmmLocal $ newTemp t
996 r <- liftM CmmLocal $ newTemp t
997 -- This generic implementation is very simple and slow. We might
998 -- well be able to do better, but for now this at least works.
999 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1000 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1001 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1002 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1003 sum = foldl1 add
1004 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1005 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1006 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1007 (wordWidth dflags))
1008 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1009 emit $ catAGraphs
1010 [mkAssign xlyl
1011 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1012 mkAssign xlyh
1013 (mul (bottomHalf arg_x) (topHalf arg_y)),
1014 mkAssign xhyl
1015 (mul (topHalf arg_x) (bottomHalf arg_y)),
1016 mkAssign r
1017 (sum [topHalf (CmmReg xlyl),
1018 bottomHalf (CmmReg xhyl),
1019 bottomHalf (CmmReg xlyh)]),
1020 mkAssign (CmmLocal res_l)
1021 (or (bottomHalf (CmmReg xlyl))
1022 (toTopHalf (CmmReg r))),
1023 mkAssign (CmmLocal res_h)
1024 (sum [mul (topHalf arg_x) (topHalf arg_y),
1025 topHalf (CmmReg xhyl),
1026 topHalf (CmmReg xlyh),
1027 topHalf (CmmReg r)])]
1028 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1029
1030 -- These PrimOps are NOPs in Cmm
1031
1032 nopOp :: PrimOp -> Bool
1033 nopOp Int2WordOp = True
1034 nopOp Word2IntOp = True
1035 nopOp Int2AddrOp = True
1036 nopOp Addr2IntOp = True
1037 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1038 nopOp OrdOp = True
1039 nopOp _ = False
1040
1041 -- These PrimOps turn into double casts
1042
1043 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1044 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1045 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1046 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1047 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1048 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1049 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1050 narrowOp _ = Nothing
1051
1052 -- Native word signless ops
1053
1054 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1055 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1056 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1057 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1058 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1059 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1060 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1061
1062 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1063 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1064 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1065 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1066 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1067 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1068
1069 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1070 translateOp dflags OrOp = Just (mo_wordOr dflags)
1071 translateOp dflags XorOp = Just (mo_wordXor dflags)
1072 translateOp dflags NotOp = Just (mo_wordNot dflags)
1073 translateOp dflags SllOp = Just (mo_wordShl dflags)
1074 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1075
1076 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1077
1078 -- Native word signed ops
1079
1080 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1081 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1082 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1083 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1084 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1085
1086
1087 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1088 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1089 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1090 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1091
1092 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1093 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1094 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1095 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1096 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1097 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1098 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1099
1100 -- Native word unsigned ops
1101
1102 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1103 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1104 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1105 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1106
1107 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1108 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1109 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1110
1111 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1112 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1113 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1114 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1115
1116 -- Char# ops
1117
1118 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1119 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1120 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1121 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1122 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1123 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1124
1125 -- Double ops
1126
1127 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1128 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1129 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1130 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1131 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1132 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1133
1134 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1135 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1136 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1137 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1138 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1139
1140 -- Float ops
1141
1142 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1143 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1144 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1145 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1146 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1147 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1148
1149 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1150 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1151 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1152 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1153 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1154
1155 -- Vector ops
1156
1157 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1158 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1159 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1160 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1161 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1162
1163 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1164 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1165 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1166 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1167 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1168 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1169
1170 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1171 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1172 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1173 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1174 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1175
1176 -- Conversions
1177
1178 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1179 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1180
1181 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1182 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1183
1184 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1185 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1186
1187 -- Word comparisons masquerading as more exotic things.
1188
1189 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1190 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1191 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1192 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1193 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1194 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1195 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1196 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1197
1198 translateOp _ _ = Nothing
1199
1200 -- These primops are implemented by CallishMachOps, because they sometimes
1201 -- turn into foreign calls depending on the backend.
1202
1203 callishOp :: PrimOp -> Maybe CallishMachOp
1204 callishOp DoublePowerOp = Just MO_F64_Pwr
1205 callishOp DoubleSinOp = Just MO_F64_Sin
1206 callishOp DoubleCosOp = Just MO_F64_Cos
1207 callishOp DoubleTanOp = Just MO_F64_Tan
1208 callishOp DoubleSinhOp = Just MO_F64_Sinh
1209 callishOp DoubleCoshOp = Just MO_F64_Cosh
1210 callishOp DoubleTanhOp = Just MO_F64_Tanh
1211 callishOp DoubleAsinOp = Just MO_F64_Asin
1212 callishOp DoubleAcosOp = Just MO_F64_Acos
1213 callishOp DoubleAtanOp = Just MO_F64_Atan
1214 callishOp DoubleLogOp = Just MO_F64_Log
1215 callishOp DoubleExpOp = Just MO_F64_Exp
1216 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1217
1218 callishOp FloatPowerOp = Just MO_F32_Pwr
1219 callishOp FloatSinOp = Just MO_F32_Sin
1220 callishOp FloatCosOp = Just MO_F32_Cos
1221 callishOp FloatTanOp = Just MO_F32_Tan
1222 callishOp FloatSinhOp = Just MO_F32_Sinh
1223 callishOp FloatCoshOp = Just MO_F32_Cosh
1224 callishOp FloatTanhOp = Just MO_F32_Tanh
1225 callishOp FloatAsinOp = Just MO_F32_Asin
1226 callishOp FloatAcosOp = Just MO_F32_Acos
1227 callishOp FloatAtanOp = Just MO_F32_Atan
1228 callishOp FloatLogOp = Just MO_F32_Log
1229 callishOp FloatExpOp = Just MO_F32_Exp
1230 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1231
1232 callishOp _ = Nothing
1233
1234 ------------------------------------------------------------------------------
1235 -- Helpers for translating various minor variants of array indexing.
1236
1237 doIndexOffAddrOp :: Maybe MachOp
1238 -> CmmType
1239 -> [LocalReg]
1240 -> [CmmExpr]
1241 -> FCode ()
1242 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1243 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1244 doIndexOffAddrOp _ _ _ _
1245 = panic "StgCmmPrim: doIndexOffAddrOp"
1246
1247 doIndexOffAddrOpAs :: Maybe MachOp
1248 -> CmmType
1249 -> CmmType
1250 -> [LocalReg]
1251 -> [CmmExpr]
1252 -> FCode ()
1253 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1254 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1255 doIndexOffAddrOpAs _ _ _ _ _
1256 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1257
1258 doIndexByteArrayOp :: Maybe MachOp
1259 -> CmmType
1260 -> [LocalReg]
1261 -> [CmmExpr]
1262 -> FCode ()
1263 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1264 = do dflags <- getDynFlags
1265 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1266 doIndexByteArrayOp _ _ _ _
1267 = panic "StgCmmPrim: doIndexByteArrayOp"
1268
1269 doIndexByteArrayOpAs :: Maybe MachOp
1270 -> CmmType
1271 -> CmmType
1272 -> [LocalReg]
1273 -> [CmmExpr]
1274 -> FCode ()
1275 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1276 = do dflags <- getDynFlags
1277 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1278 doIndexByteArrayOpAs _ _ _ _ _
1279 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1280
1281 doReadPtrArrayOp :: LocalReg
1282 -> CmmExpr
1283 -> CmmExpr
1284 -> FCode ()
1285 doReadPtrArrayOp res addr idx
1286 = do dflags <- getDynFlags
1287 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1288
1289 doWriteOffAddrOp :: Maybe MachOp
1290 -> CmmType
1291 -> [LocalReg]
1292 -> [CmmExpr]
1293 -> FCode ()
1294 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1295 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1296 doWriteOffAddrOp _ _ _ _
1297 = panic "StgCmmPrim: doWriteOffAddrOp"
1298
1299 doWriteByteArrayOp :: Maybe MachOp
1300 -> CmmType
1301 -> [LocalReg]
1302 -> [CmmExpr]
1303 -> FCode ()
1304 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1305 = do dflags <- getDynFlags
1306 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1307 doWriteByteArrayOp _ _ _ _
1308 = panic "StgCmmPrim: doWriteByteArrayOp"
1309
1310 doWritePtrArrayOp :: CmmExpr
1311 -> CmmExpr
1312 -> CmmExpr
1313 -> FCode ()
1314 doWritePtrArrayOp addr idx val
1315 = do dflags <- getDynFlags
1316 let ty = cmmExprType dflags val
1317 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1318 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1319 -- the write barrier. We must write a byte into the mark table:
1320 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1321 emit $ mkStore (
1322 cmmOffsetExpr dflags
1323 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1324 (loadArrPtrsSize dflags addr))
1325 (CmmMachOp (mo_wordUShr dflags) [idx,
1326 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1327 ) (CmmLit (CmmInt 1 W8))
1328
1329 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1330 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1331 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1332
1333 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1334 -> Maybe MachOp -- Optional result cast
1335 -> CmmType -- Type of element we are accessing
1336 -> LocalReg -- Destination
1337 -> CmmExpr -- Base address
1338 -> CmmType -- Type of element by which we are indexing
1339 -> CmmExpr -- Index
1340 -> FCode ()
1341 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1342 = do dflags <- getDynFlags
1343 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1344 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1345 = do dflags <- getDynFlags
1346 emitAssign (CmmLocal res) (CmmMachOp cast [
1347 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1348
1349 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1350 -> Maybe MachOp -- Optional value cast
1351 -> CmmExpr -- Base address
1352 -> CmmType -- Type of element by which we are indexing
1353 -> CmmExpr -- Index
1354 -> CmmExpr -- Value to write
1355 -> FCode ()
1356 mkBasicIndexedWrite off Nothing base idx_ty idx val
1357 = do dflags <- getDynFlags
1358 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1359 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1360 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1361
1362 -- ----------------------------------------------------------------------------
1363 -- Misc utils
1364
1365 cmmIndexOffExpr :: DynFlags
1366 -> ByteOff -- Initial offset in bytes
1367 -> Width -- Width of element by which we are indexing
1368 -> CmmExpr -- Base address
1369 -> CmmExpr -- Index
1370 -> CmmExpr
1371 cmmIndexOffExpr dflags off width base idx
1372 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1373
1374 cmmLoadIndexOffExpr :: DynFlags
1375 -> ByteOff -- Initial offset in bytes
1376 -> CmmType -- Type of element we are accessing
1377 -> CmmExpr -- Base address
1378 -> CmmType -- Type of element by which we are indexing
1379 -> CmmExpr -- Index
1380 -> CmmExpr
1381 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1382 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1383
1384 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1385 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1386
1387 ------------------------------------------------------------------------------
1388 -- Helpers for translating vector primops.
1389
1390 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1391 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1392
1393 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1394 vecCmmCat IntVec = cmmBits
1395 vecCmmCat WordVec = cmmBits
1396 vecCmmCat FloatVec = cmmFloat
1397
1398 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1399 vecElemInjectCast _ FloatVec _ = Nothing
1400 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1401 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1402 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1403 vecElemInjectCast _ IntVec W64 = Nothing
1404 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1405 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1406 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1407 vecElemInjectCast _ WordVec W64 = Nothing
1408 vecElemInjectCast _ _ _ = Nothing
1409
1410 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1411 vecElemProjectCast _ FloatVec _ = Nothing
1412 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1413 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1414 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1415 vecElemProjectCast _ IntVec W64 = Nothing
1416 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1417 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1418 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1419 vecElemProjectCast _ WordVec W64 = Nothing
1420 vecElemProjectCast _ _ _ = Nothing
1421
1422 -- Check to make sure that we can generate code for the specified vector type
1423 -- given the current set of dynamic flags.
1424 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1425 checkVecCompatibility dflags vcat l w = do
1426 when (hscTarget dflags /= HscLlvm) $ do
1427 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1428 ,"Please use -fllvm."]
1429 check vecWidth vcat l w
1430 where
1431 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1432 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1433 sorry $ "128-bit wide single-precision floating point " ++
1434 "SIMD vector instructions require at least -msse."
1435 check W128 _ _ _ | not (isSse2Enabled dflags) =
1436 sorry $ "128-bit wide integer and double precision " ++
1437 "SIMD vector instructions require at least -msse2."
1438 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1439 sorry $ "256-bit wide floating point " ++
1440 "SIMD vector instructions require at least -mavx."
1441 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1442 sorry $ "256-bit wide integer " ++
1443 "SIMD vector instructions require at least -mavx2."
1444 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1445 sorry $ "512-bit wide " ++
1446 "SIMD vector instructions require -mavx512f."
1447 check _ _ _ _ = return ()
1448
1449 vecWidth = typeWidth (vecVmmType vcat l w)
1450
1451 ------------------------------------------------------------------------------
1452 -- Helpers for translating vector packing and unpacking.
1453
1454 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1455 -> CmmType -- Type of vector
1456 -> CmmExpr -- Initial vector
1457 -> [CmmExpr] -- Elements
1458 -> CmmFormal -- Destination for result
1459 -> FCode ()
1460 doVecPackOp maybe_pre_write_cast ty z es res = do
1461 dst <- newTemp ty
1462 emitAssign (CmmLocal dst) z
1463 vecPack dst es 0
1464 where
1465 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1466 vecPack src [] _ =
1467 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1468
1469 vecPack src (e : es) i = do
1470 dst <- newTemp ty
1471 if isFloatType (vecElemType ty)
1472 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1473 [CmmReg (CmmLocal src), cast e, iLit])
1474 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1475 [CmmReg (CmmLocal src), cast e, iLit])
1476 vecPack dst es (i + 1)
1477 where
1478 -- vector indices are always 32-bits
1479 iLit = CmmLit (CmmInt (toInteger i) W32)
1480
1481 cast :: CmmExpr -> CmmExpr
1482 cast val = case maybe_pre_write_cast of
1483 Nothing -> val
1484 Just cast -> CmmMachOp cast [val]
1485
1486 len :: Length
1487 len = vecLength ty
1488
1489 wid :: Width
1490 wid = typeWidth (vecElemType ty)
1491
1492 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1493 -> CmmType -- Type of vector
1494 -> CmmExpr -- Vector
1495 -> [CmmFormal] -- Element results
1496 -> FCode ()
1497 doVecUnpackOp maybe_post_read_cast ty e res =
1498 vecUnpack res 0
1499 where
1500 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1501 vecUnpack [] _ =
1502 return ()
1503
1504 vecUnpack (r : rs) i = do
1505 if isFloatType (vecElemType ty)
1506 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1507 [e, iLit]))
1508 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1509 [e, iLit]))
1510 vecUnpack rs (i + 1)
1511 where
1512 -- vector indices are always 32-bits
1513 iLit = CmmLit (CmmInt (toInteger i) W32)
1514
1515 cast :: CmmExpr -> CmmExpr
1516 cast val = case maybe_post_read_cast of
1517 Nothing -> val
1518 Just cast -> CmmMachOp cast [val]
1519
1520 len :: Length
1521 len = vecLength ty
1522
1523 wid :: Width
1524 wid = typeWidth (vecElemType ty)
1525
1526 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1527 -> CmmType -- Vector type
1528 -> CmmExpr -- Source vector
1529 -> CmmExpr -- Element
1530 -> CmmExpr -- Index at which to insert element
1531 -> CmmFormal -- Destination for result
1532 -> FCode ()
1533 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1534 dflags <- getDynFlags
1535 -- vector indices are always 32-bits
1536 let idx' :: CmmExpr
1537 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1538 if isFloatType (vecElemType ty)
1539 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1540 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1541 where
1542 cast :: CmmExpr -> CmmExpr
1543 cast val = case maybe_pre_write_cast of
1544 Nothing -> val
1545 Just cast -> CmmMachOp cast [val]
1546
1547 len :: Length
1548 len = vecLength ty
1549
1550 wid :: Width
1551 wid = typeWidth (vecElemType ty)
1552
1553 ------------------------------------------------------------------------------
1554 -- Helpers for translating prefetching.
1555
1556
1557 -- | Translate byte array prefetch operations into proper primcalls.
1558 doPrefetchByteArrayOp :: Int
1559 -> [CmmExpr]
1560 -> FCode ()
1561 doPrefetchByteArrayOp locality [addr,idx]
1562 = do dflags <- getDynFlags
1563 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1564 doPrefetchByteArrayOp _ _
1565 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1566
1567 -- | Translate mutable byte array prefetch operations into proper primcalls.
1568 doPrefetchMutableByteArrayOp :: Int
1569 -> [CmmExpr]
1570 -> FCode ()
1571 doPrefetchMutableByteArrayOp locality [addr,idx]
1572 = do dflags <- getDynFlags
1573 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1574 doPrefetchMutableByteArrayOp _ _
1575 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1576
1577 -- | Translate address prefetch operations into proper primcalls.
1578 doPrefetchAddrOp ::Int
1579 -> [CmmExpr]
1580 -> FCode ()
1581 doPrefetchAddrOp locality [addr,idx]
1582 = mkBasicPrefetch locality 0 addr idx
1583 doPrefetchAddrOp _ _
1584 = panic "StgCmmPrim: doPrefetchAddrOp"
1585
1586 -- | Translate value prefetch operations into proper primcalls.
1587 doPrefetchValueOp :: Int
1588 -> [CmmExpr]
1589 -> FCode ()
1590 doPrefetchValueOp locality [addr]
1591 = do dflags <- getDynFlags
1592 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1593 doPrefetchValueOp _ _
1594 = panic "StgCmmPrim: doPrefetchValueOp"
1595
1596 -- | helper to generate prefetch primcalls
1597 mkBasicPrefetch :: Int -- Locality level 0-3
1598 -> ByteOff -- Initial offset in bytes
1599 -> CmmExpr -- Base address
1600 -> CmmExpr -- Index
1601 -> FCode ()
1602 mkBasicPrefetch locality off base idx
1603 = do dflags <- getDynFlags
1604 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1605 return ()
1606
1607 -- ----------------------------------------------------------------------------
1608 -- Allocating byte arrays
1609
1610 -- | Takes a register to return the newly allocated array in and the
1611 -- size of the new array in bytes. Allocates a new
1612 -- 'MutableByteArray#'.
1613 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1614 doNewByteArrayOp res_r n = do
1615 dflags <- getDynFlags
1616
1617 let info_ptr = mkLblExpr mkArrWords_infoLabel
1618 rep = arrWordsRep dflags n
1619
1620 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1621 (mkIntExpr dflags (nonHdrSize dflags rep))
1622 (zeroExpr dflags)
1623
1624 let hdr_size = fixedHdrSize dflags
1625
1626 base <- allocHeapClosure rep info_ptr curCCS
1627 [ (mkIntExpr dflags n,
1628 hdr_size + oFFSET_StgArrWords_bytes dflags)
1629 ]
1630
1631 emit $ mkAssign (CmmLocal res_r) base
1632
1633 -- ----------------------------------------------------------------------------
1634 -- Copying byte arrays
1635
1636 -- | Takes a source 'ByteArray#', an offset in the source array, a
1637 -- destination 'MutableByteArray#', an offset into the destination
1638 -- array, and the number of bytes to copy. Copies the given number of
1639 -- bytes from the source array to the destination array.
1640 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1641 -> FCode ()
1642 doCopyByteArrayOp = emitCopyByteArray copy
1643 where
1644 -- Copy data (we assume the arrays aren't overlapping since
1645 -- they're of different types)
1646 copy _src _dst dst_p src_p bytes =
1647 do dflags <- getDynFlags
1648 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1649
1650 -- | Takes a source 'MutableByteArray#', an offset in the source
1651 -- array, a destination 'MutableByteArray#', an offset into the
1652 -- destination array, and the number of bytes to copy. Copies the
1653 -- given number of bytes from the source array to the destination
1654 -- array.
1655 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1656 -> FCode ()
1657 doCopyMutableByteArrayOp = emitCopyByteArray copy
1658 where
1659 -- The only time the memory might overlap is when the two arrays
1660 -- we were provided are the same array!
1661 -- TODO: Optimize branch for common case of no aliasing.
1662 copy src dst dst_p src_p bytes = do
1663 dflags <- getDynFlags
1664 [moveCall, cpyCall] <- forkAlts [
1665 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1666 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1667 ]
1668 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1669
1670 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1671 -> FCode ())
1672 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1673 -> FCode ()
1674 emitCopyByteArray copy src src_off dst dst_off n = do
1675 dflags <- getDynFlags
1676 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1677 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1678 copy src dst dst_p src_p n
1679
1680 -- | Takes a source 'ByteArray#', an offset in the source array, a
1681 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1682 -- number of bytes from the source array to the destination memory region.
1683 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1684 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1685 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1686 dflags <- getDynFlags
1687 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1688 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1689
1690 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1691 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1692 -- number of bytes from the source array to the destination memory region.
1693 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1694 -> FCode ()
1695 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1696
1697 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1698 -- the destination array, and the number of bytes to copy. Copies the given
1699 -- number of bytes from the source memory region to the destination array.
1700 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1701 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1702 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1703 dflags <- getDynFlags
1704 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1705 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1706
1707
1708 -- ----------------------------------------------------------------------------
1709 -- Setting byte arrays
1710
1711 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1712 -- and a byte, and sets each of the selected bytes in the array to the
1713 -- character.
1714 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1715 -> FCode ()
1716 doSetByteArrayOp ba off len c
1717 = do dflags <- getDynFlags
1718 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1719 emitMemsetCall p c len (mkIntExpr dflags 1)
1720
1721 -- ----------------------------------------------------------------------------
1722 -- Allocating arrays
1723
1724 -- | Allocate a new array.
1725 doNewArrayOp :: CmmFormal -- ^ return register
1726 -> SMRep -- ^ representation of the array
1727 -> CLabel -- ^ info pointer
1728 -> [(CmmExpr, ByteOff)] -- ^ header payload
1729 -> WordOff -- ^ array size
1730 -> CmmExpr -- ^ initial element
1731 -> FCode ()
1732 doNewArrayOp res_r rep info payload n init = do
1733 dflags <- getDynFlags
1734
1735 let info_ptr = mkLblExpr info
1736
1737 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1738 (mkIntExpr dflags (nonHdrSize dflags rep))
1739 (zeroExpr dflags)
1740
1741 base <- allocHeapClosure rep info_ptr curCCS payload
1742
1743 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1744 emit $ mkAssign arr base
1745
1746 -- Initialise all elements of the the array
1747 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1748 for <- newLabelC
1749 emitLabel for
1750 let loopBody =
1751 [ mkStore (CmmReg (CmmLocal p)) init
1752 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1753 , mkBranch for ]
1754 emit =<< mkCmmIfThen
1755 (cmmULtWord dflags (CmmReg (CmmLocal p))
1756 (cmmOffsetW dflags (CmmReg arr)
1757 (hdrSizeW dflags rep + n)))
1758 (catAGraphs loopBody)
1759
1760 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1761
1762 -- ----------------------------------------------------------------------------
1763 -- Copying pointer arrays
1764
1765 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1766 -- nowhere else in the code generator. This is mostly because these
1767 -- "primitive" ops result in a surprisingly large amount of code. It
1768 -- will likely be worthwhile to optimize what is emitted here, so that
1769 -- our optimization passes don't waste time repeatedly optimizing the
1770 -- same bits of code.
1771
1772 -- More closely imitates 'assignTemp' from the old code generator, which
1773 -- returns a CmmExpr rather than a LocalReg.
1774 assignTempE :: CmmExpr -> FCode CmmExpr
1775 assignTempE e = do
1776 t <- assignTemp e
1777 return (CmmReg (CmmLocal t))
1778
1779 -- | Takes a source 'Array#', an offset in the source array, a
1780 -- destination 'MutableArray#', an offset into the destination array,
1781 -- and the number of elements to copy. Copies the given number of
1782 -- elements from the source array to the destination array.
1783 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1784 -> FCode ()
1785 doCopyArrayOp = emitCopyArray copy
1786 where
1787 -- Copy data (we assume the arrays aren't overlapping since
1788 -- they're of different types)
1789 copy _src _dst dst_p src_p bytes =
1790 do dflags <- getDynFlags
1791 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1792 (mkIntExpr dflags (wORD_SIZE dflags))
1793
1794
1795 -- | Takes a source 'MutableArray#', an offset in the source array, a
1796 -- destination 'MutableArray#', an offset into the destination array,
1797 -- and the number of elements to copy. Copies the given number of
1798 -- elements from the source array to the destination array.
1799 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1800 -> FCode ()
1801 doCopyMutableArrayOp = emitCopyArray copy
1802 where
1803 -- The only time the memory might overlap is when the two arrays
1804 -- we were provided are the same array!
1805 -- TODO: Optimize branch for common case of no aliasing.
1806 copy src dst dst_p src_p bytes = do
1807 dflags <- getDynFlags
1808 [moveCall, cpyCall] <- forkAlts [
1809 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1810 (mkIntExpr dflags (wORD_SIZE dflags)),
1811 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1812 (mkIntExpr dflags (wORD_SIZE dflags))
1813 ]
1814 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1815
1816 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1817 -> FCode ()) -- ^ copy function
1818 -> CmmExpr -- ^ source array
1819 -> CmmExpr -- ^ offset in source array
1820 -> CmmExpr -- ^ destination array
1821 -> CmmExpr -- ^ offset in destination array
1822 -> WordOff -- ^ number of elements to copy
1823 -> FCode ()
1824 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1825 dflags <- getDynFlags
1826 when (n /= 0) $ do
1827 -- Passed as arguments (be careful)
1828 src <- assignTempE src0
1829 dst <- assignTempE dst0
1830 dst_off <- assignTempE dst_off0
1831
1832 -- Set the dirty bit in the header.
1833 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1834
1835 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1836 (arrPtrsHdrSize dflags)
1837 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1838 src_p <- assignTempE $ cmmOffsetExprW dflags
1839 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1840 let bytes = wordsToBytes dflags n
1841
1842 copy src dst dst_p src_p bytes
1843
1844 -- The base address of the destination card table
1845 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1846 (loadArrPtrsSize dflags dst)
1847
1848 emitSetCards dst_off dst_cards_p n
1849
1850 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1851 -> FCode ()
1852 doCopySmallArrayOp = emitCopySmallArray copy
1853 where
1854 -- Copy data (we assume the arrays aren't overlapping since
1855 -- they're of different types)
1856 copy _src _dst dst_p src_p bytes =
1857 do dflags <- getDynFlags
1858 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1859 (mkIntExpr dflags (wORD_SIZE dflags))
1860
1861
1862 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1863 -> FCode ()
1864 doCopySmallMutableArrayOp = emitCopySmallArray copy
1865 where
1866 -- The only time the memory might overlap is when the two arrays
1867 -- we were provided are the same array!
1868 -- TODO: Optimize branch for common case of no aliasing.
1869 copy src dst dst_p src_p bytes = do
1870 dflags <- getDynFlags
1871 [moveCall, cpyCall] <- forkAlts
1872 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1873 (mkIntExpr dflags (wORD_SIZE dflags))
1874 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1875 (mkIntExpr dflags (wORD_SIZE dflags))
1876 ]
1877 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1878
1879 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1880 -> FCode ()) -- ^ copy function
1881 -> CmmExpr -- ^ source array
1882 -> CmmExpr -- ^ offset in source array
1883 -> CmmExpr -- ^ destination array
1884 -> CmmExpr -- ^ offset in destination array
1885 -> WordOff -- ^ number of elements to copy
1886 -> FCode ()
1887 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
1888 dflags <- getDynFlags
1889
1890 -- Passed as arguments (be careful)
1891 src <- assignTempE src0
1892 dst <- assignTempE dst0
1893
1894 -- Set the dirty bit in the header.
1895 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1896
1897 dst_p <- assignTempE $ cmmOffsetExprW dflags
1898 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
1899 src_p <- assignTempE $ cmmOffsetExprW dflags
1900 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
1901 let bytes = wordsToBytes dflags n
1902
1903 copy src dst dst_p src_p bytes
1904
1905 -- | Takes an info table label, a register to return the newly
1906 -- allocated array in, a source array, an offset in the source array,
1907 -- and the number of elements to copy. Allocates a new array and
1908 -- initializes it from the source array.
1909 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1910 -> FCode ()
1911 emitCloneArray info_p res_r src src_off n = do
1912 dflags <- getDynFlags
1913
1914 let info_ptr = mkLblExpr info_p
1915 rep = arrPtrsRep dflags n
1916
1917 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1918 (mkIntExpr dflags (nonHdrSize dflags rep))
1919 (zeroExpr dflags)
1920
1921 let hdr_size = fixedHdrSize dflags
1922
1923 base <- allocHeapClosure rep info_ptr curCCS
1924 [ (mkIntExpr dflags n,
1925 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1926 , (mkIntExpr dflags (nonHdrSizeW rep),
1927 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1928 ]
1929
1930 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1931 emit $ mkAssign arr base
1932
1933 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1934 (arrPtrsHdrSize dflags)
1935 src_p <- assignTempE $ cmmOffsetExprW dflags src
1936 (cmmAddWord dflags
1937 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1938
1939 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1940 (mkIntExpr dflags (wORD_SIZE dflags))
1941
1942 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1943
1944 -- | Takes an info table label, a register to return the newly
1945 -- allocated array in, a source array, an offset in the source array,
1946 -- and the number of elements to copy. Allocates a new array and
1947 -- initializes it from the source array.
1948 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1949 -> FCode ()
1950 emitCloneSmallArray info_p res_r src src_off n = do
1951 dflags <- getDynFlags
1952
1953 let info_ptr = mkLblExpr info_p
1954 rep = smallArrPtrsRep n
1955
1956 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
1957 (mkIntExpr dflags (nonHdrSize dflags rep))
1958 (zeroExpr dflags)
1959
1960 let hdr_size = fixedHdrSize dflags
1961
1962 base <- allocHeapClosure rep info_ptr curCCS
1963 [ (mkIntExpr dflags n,
1964 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
1965 ]
1966
1967 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1968 emit $ mkAssign arr base
1969
1970 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1971 (smallArrPtrsHdrSize dflags)
1972 src_p <- assignTempE $ cmmOffsetExprW dflags src
1973 (cmmAddWord dflags
1974 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
1975
1976 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1977 (mkIntExpr dflags (wORD_SIZE dflags))
1978
1979 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1980
1981 -- | Takes and offset in the destination array, the base address of
1982 -- the card table, and the number of elements affected (*not* the
1983 -- number of cards). The number of elements may not be zero.
1984 -- Marks the relevant cards as dirty.
1985 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
1986 emitSetCards dst_start dst_cards_start n = do
1987 dflags <- getDynFlags
1988 start_card <- assignTempE $ cardCmm dflags dst_start
1989 let end_card = cardCmm dflags
1990 (cmmSubWord dflags
1991 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
1992 (mkIntExpr dflags 1))
1993 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1994 (mkIntExpr dflags 1)
1995 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1996 (mkIntExpr dflags 1) -- no alignment (1 byte)
1997
1998 -- Convert an element index to a card index
1999 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2000 cardCmm dflags i =
2001 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2002
2003 ------------------------------------------------------------------------------
2004 -- SmallArray PrimOp implementations
2005
2006 doReadSmallPtrArrayOp :: LocalReg
2007 -> CmmExpr
2008 -> CmmExpr
2009 -> FCode ()
2010 doReadSmallPtrArrayOp res addr idx = do
2011 dflags <- getDynFlags
2012 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2013 (gcWord dflags) idx
2014
2015 doWriteSmallPtrArrayOp :: CmmExpr
2016 -> CmmExpr
2017 -> CmmExpr
2018 -> FCode ()
2019 doWriteSmallPtrArrayOp addr idx val = do
2020 dflags <- getDynFlags
2021 let ty = cmmExprType dflags val
2022 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2023 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2024
2025 ------------------------------------------------------------------------------
2026 -- Atomic read-modify-write
2027
2028 -- | Emit an atomic modification to a byte array element. The result
2029 -- reg contains that previous value of the element. Implies a full
2030 -- memory barrier.
2031 doAtomicRMW :: LocalReg -- ^ Result reg
2032 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2033 -> CmmExpr -- ^ MutableByteArray#
2034 -> CmmExpr -- ^ Index
2035 -> CmmType -- ^ Type of element by which we are indexing
2036 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2037 -> FCode ()
2038 doAtomicRMW res amop mba idx idx_ty n = do
2039 dflags <- getDynFlags
2040 let width = typeWidth idx_ty
2041 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2042 width mba idx
2043 emitPrimCall
2044 [ res ]
2045 (MO_AtomicRMW width amop)
2046 [ addr, n ]
2047
2048 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2049 doAtomicReadByteArray
2050 :: LocalReg -- ^ Result reg
2051 -> CmmExpr -- ^ MutableByteArray#
2052 -> CmmExpr -- ^ Index
2053 -> CmmType -- ^ Type of element by which we are indexing
2054 -> FCode ()
2055 doAtomicReadByteArray res mba idx idx_ty = do
2056 dflags <- getDynFlags
2057 let width = typeWidth idx_ty
2058 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2059 width mba idx
2060 emitPrimCall
2061 [ res ]
2062 (MO_AtomicRead width)
2063 [ addr ]
2064
2065 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2066 doAtomicWriteByteArray
2067 :: CmmExpr -- ^ MutableByteArray#
2068 -> CmmExpr -- ^ Index
2069 -> CmmType -- ^ Type of element by which we are indexing
2070 -> CmmExpr -- ^ Value to write
2071 -> FCode ()
2072 doAtomicWriteByteArray mba idx idx_ty val = do
2073 dflags <- getDynFlags
2074 let width = typeWidth idx_ty
2075 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2076 width mba idx
2077 emitPrimCall
2078 [ {- no results -} ]
2079 (MO_AtomicWrite width)
2080 [ addr, val ]
2081
2082 doCasByteArray
2083 :: LocalReg -- ^ Result reg
2084 -> CmmExpr -- ^ MutableByteArray#
2085 -> CmmExpr -- ^ Index
2086 -> CmmType -- ^ Type of element by which we are indexing
2087 -> CmmExpr -- ^ Old value
2088 -> CmmExpr -- ^ New value
2089 -> FCode ()
2090 doCasByteArray res mba idx idx_ty old new = do
2091 dflags <- getDynFlags
2092 let width = (typeWidth idx_ty)
2093 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2094 width mba idx
2095 emitPrimCall
2096 [ res ]
2097 (MO_Cmpxchg width)
2098 [ addr, old, new ]
2099
2100 ------------------------------------------------------------------------------
2101 -- Helpers for emitting function calls
2102
2103 -- | Emit a call to @memcpy@.
2104 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2105 emitMemcpyCall dst src n align = do
2106 emitPrimCall
2107 [ {-no results-} ]
2108 MO_Memcpy
2109 [ dst, src, n, align ]
2110
2111 -- | Emit a call to @memmove@.
2112 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2113 emitMemmoveCall dst src n align = do
2114 emitPrimCall
2115 [ {- no results -} ]
2116 MO_Memmove
2117 [ dst, src, n, align ]
2118
2119 -- | Emit a call to @memset@. The second argument must fit inside an
2120 -- unsigned char.
2121 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2122 emitMemsetCall dst c n align = do
2123 emitPrimCall
2124 [ {- no results -} ]
2125 MO_Memset
2126 [ dst, c, n, align ]
2127
2128 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2129 emitBSwapCall res x width = do
2130 emitPrimCall
2131 [ res ]
2132 (MO_BSwap width)
2133 [ x ]
2134
2135 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2136 emitPopCntCall res x width = do
2137 emitPrimCall
2138 [ res ]
2139 (MO_PopCnt width)
2140 [ x ]
2141
2142 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2143 emitClzCall res x width = do
2144 emitPrimCall
2145 [ res ]
2146 (MO_Clz width)
2147 [ x ]
2148
2149 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2150 emitCtzCall res x width = do
2151 emitPrimCall
2152 [ res ]
2153 (MO_Ctz width)
2154 [ x ]