Add ptr-eq short-cut to `compareByteArrays#` primitive
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import GhcPrelude hiding ((<*>))
21
22 import StgCmmLayout
23 import StgCmmForeign
24 import StgCmmEnv
25 import StgCmmMonad
26 import StgCmmUtils
27 import StgCmmTicky
28 import StgCmmHeap
29 import StgCmmProf ( costCentreFrom, curCCS )
30
31 import DynFlags
32 import Platform
33 import BasicTypes
34 import BlockId
35 import MkGraph
36 import StgSyn
37 import Cmm
38 import CmmInfo
39 import Type ( Type, tyConAppTyCon )
40 import TyCon
41 import CLabel
42 import CmmUtils
43 import PrimOp
44 import SMRep
45 import FastString
46 import Outputable
47 import Util
48
49 import Data.Bits ((.&.), bit)
50 import Control.Monad (liftM, when, unless)
51
52 ------------------------------------------------------------------------
53 -- Primitive operations and foreign calls
54 ------------------------------------------------------------------------
55
56 {- Note [Foreign call results]
57 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 A foreign call always returns an unboxed tuple of results, one
59 of which is the state token. This seems to happen even for pure
60 calls.
61
62 Even if we returned a single result for pure calls, it'd still be
63 right to wrap it in a singleton unboxed tuple, because the result
64 might be a Haskell closure pointer, we don't want to evaluate it. -}
65
66 ----------------------------------
67 cgOpApp :: StgOp -- The op
68 -> [StgArg] -- Arguments
69 -> Type -- Result type (always an unboxed tuple)
70 -> FCode ReturnKind
71
72 -- Foreign calls
73 cgOpApp (StgFCallOp fcall _) stg_args res_ty
74 = cgForeignCall fcall stg_args res_ty
75 -- Note [Foreign call results]
76
77 -- tagToEnum# is special: we need to pull the constructor
78 -- out of the table, and perform an appropriate return.
79
80 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
81 = ASSERT(isEnumerationTyCon tycon)
82 do { dflags <- getDynFlags
83 ; args' <- getNonVoidArgAmodes [arg]
84 ; let amode = case args' of [amode] -> amode
85 _ -> panic "TagToEnumOp had void arg"
86 ; emitReturn [tagToClosure dflags tycon amode] }
87 where
88 -- If you're reading this code in the attempt to figure
89 -- out why the compiler panic'ed here, it is probably because
90 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
91 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
92 -- That won't work.
93 tycon = tyConAppTyCon res_ty
94
95 cgOpApp (StgPrimOp primop) args res_ty = do
96 dflags <- getDynFlags
97 cmm_args <- getNonVoidArgAmodes args
98 case shouldInlinePrimOp dflags primop cmm_args of
99 Nothing -> do -- out-of-line
100 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
101 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
102
103 Just f -- inline
104 | ReturnsPrim VoidRep <- result_info
105 -> do f []
106 emitReturn []
107
108 | ReturnsPrim rep <- result_info
109 -> do dflags <- getDynFlags
110 res <- newTemp (primRepCmmType dflags rep)
111 f [res]
112 emitReturn [CmmReg (CmmLocal res)]
113
114 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
115 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
116 f regs
117 emitReturn (map (CmmReg . CmmLocal) regs)
118
119 | otherwise -> panic "cgPrimop"
120 where
121 result_info = getPrimOpResultInfo primop
122
123 cgOpApp (StgPrimCallOp primcall) args _res_ty
124 = do { cmm_args <- getNonVoidArgAmodes args
125 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
126 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
127
128 -- | Interpret the argument as an unsigned value, assuming the value
129 -- is given in two-complement form in the given width.
130 --
131 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
132 --
133 -- This function is used to work around the fact that many array
134 -- primops take Int# arguments, but we interpret them as unsigned
135 -- quantities in the code gen. This means that we have to be careful
136 -- every time we work on e.g. a CmmInt literal that corresponds to the
137 -- array size, as it might contain a negative Integer value if the
138 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
139 -- literal.
140 asUnsigned :: Width -> Integer -> Integer
141 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
142
143 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
144 -- ByteOff (or some other fixed width signed type) to represent
145 -- array sizes or indices. This means that these will overflow for
146 -- large enough sizes.
147
148 -- | Decide whether an out-of-line primop should be replaced by an
149 -- inline implementation. This might happen e.g. if there's enough
150 -- static information, such as statically know arguments, to emit a
151 -- more efficient implementation inline.
152 --
153 -- Returns 'Nothing' if this primop should use its out-of-line
154 -- implementation (defined elsewhere) and 'Just' together with a code
155 -- generating function that takes the output regs as arguments
156 -- otherwise.
157 shouldInlinePrimOp :: DynFlags
158 -> PrimOp -- ^ The primop
159 -> [CmmExpr] -- ^ The primop arguments
160 -> Maybe ([LocalReg] -> FCode ())
161
162 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
163 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
164 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
165
166 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
167 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
168 Just $ \ [res] ->
169 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
170 [ (mkIntExpr dflags (fromInteger n),
171 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
172 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
173 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
174 ]
175 (fromInteger n) init
176
177 shouldInlinePrimOp _ CopyArrayOp
178 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
179 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
180
181 shouldInlinePrimOp _ CopyMutableArrayOp
182 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
183 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
184
185 shouldInlinePrimOp _ CopyArrayArrayOp
186 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
187 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
188
189 shouldInlinePrimOp _ CopyMutableArrayArrayOp
190 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
191 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
192
193 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
194 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
195 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
196
197 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
198 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
199 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
200
201 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
202 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
203 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
204
205 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
206 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
207 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
208
209 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
210 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
211 Just $ \ [res] ->
212 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
213 [ (mkIntExpr dflags (fromInteger n),
214 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
215 ]
216 (fromInteger n) init
217
218 shouldInlinePrimOp _ CopySmallArrayOp
219 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
220 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
221
222 shouldInlinePrimOp _ CopySmallMutableArrayOp
223 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
224 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
225
226 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
227 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
228 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
229
230 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
231 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
232 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
233
234 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
235 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
236 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
237
238 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
239 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
240 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
241
242 shouldInlinePrimOp dflags primop args
243 | primOpOutOfLine primop = Nothing
244 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
245
246 -- TODO: Several primops, such as 'copyArray#', only have an inline
247 -- implementation (below) but could possibly have both an inline
248 -- implementation and an out-of-line implementation, just like
249 -- 'newArray#'. This would lower the amount of code generated,
250 -- hopefully without a performance impact (needs to be measured).
251
252 ---------------------------------------------------
253 cgPrimOp :: [LocalReg] -- where to put the results
254 -> PrimOp -- the op
255 -> [StgArg] -- arguments
256 -> FCode ()
257
258 cgPrimOp results op args
259 = do dflags <- getDynFlags
260 arg_exprs <- getNonVoidArgAmodes args
261 emitPrimOp dflags results op arg_exprs
262
263
264 ------------------------------------------------------------------------
265 -- Emitting code for a primop
266 ------------------------------------------------------------------------
267
268 emitPrimOp :: DynFlags
269 -> [LocalReg] -- where to put the results
270 -> PrimOp -- the op
271 -> [CmmExpr] -- arguments
272 -> FCode ()
273
274 -- First we handle various awkward cases specially. The remaining
275 -- easy cases are then handled by translateOp, defined below.
276
277 emitPrimOp _ [res] ParOp [arg]
278 =
279 -- for now, just implement this in a C function
280 -- later, we might want to inline it.
281 emitCCall
282 [(res,NoHint)]
283 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
284 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
285
286 emitPrimOp dflags [res] SparkOp [arg]
287 = do
288 -- returns the value of arg in res. We're going to therefore
289 -- refer to arg twice (once to pass to newSpark(), and once to
290 -- assign to res), so put it in a temporary.
291 tmp <- assignTemp arg
292 tmp2 <- newTemp (bWord dflags)
293 emitCCall
294 [(tmp2,NoHint)]
295 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
296 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
297 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
298
299 emitPrimOp dflags [res] GetCCSOfOp [arg]
300 = emitAssign (CmmLocal res) val
301 where
302 val
303 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
304 | otherwise = CmmLit (zeroCLit dflags)
305
306 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
307 = emitAssign (CmmLocal res) curCCS
308
309 emitPrimOp dflags [res] ReadMutVarOp [mutv]
310 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
311
312 emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
313 = do -- Without this write barrier, other CPUs may see this pointer before
314 -- the writes for the closure it points to have occurred.
315 emitPrimCall res MO_WriteBarrier []
316 emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
317 emitCCall
318 [{-no results-}]
319 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
320 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
321
322 -- #define sizzeofByteArrayzh(r,a) \
323 -- r = ((StgArrBytes *)(a))->bytes
324 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
325 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
326
327 -- #define sizzeofMutableByteArrayzh(r,a) \
328 -- r = ((StgArrBytes *)(a))->bytes
329 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
330 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
331
332 -- #define getSizzeofMutableByteArrayzh(r,a) \
333 -- r = ((StgArrBytes *)(a))->bytes
334 emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
335 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
336
337
338 -- #define touchzh(o) /* nothing */
339 emitPrimOp _ res@[] TouchOp args@[_arg]
340 = do emitPrimCall res MO_Touch args
341
342 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
343 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
344 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
345
346 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
347 emitPrimOp dflags [res] StableNameToIntOp [arg]
348 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
349
350 -- #define eqStableNamezh(r,sn1,sn2) \
351 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
352 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
353 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
354 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
355 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
356 ])
357
358 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
359 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
360
361 -- #define addrToHValuezh(r,a) r=(P_)a
362 emitPrimOp _ [res] AddrToAnyOp [arg]
363 = emitAssign (CmmLocal res) arg
364
365 -- #define hvalueToAddrzh(r, a) r=(W_)a
366 emitPrimOp _ [res] AnyToAddrOp [arg]
367 = emitAssign (CmmLocal res) arg
368
369 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
370 -- Note: argument may be tagged!
371 emitPrimOp dflags [res] DataToTagOp [arg]
372 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
373
374 {- Freezing arrays-of-ptrs requires changing an info table, for the
375 benefit of the generational collector. It needs to scavenge mutable
376 objects, even if they are in old space. When they become immutable,
377 they can be removed from this scavenge list. -}
378
379 -- #define unsafeFreezzeArrayzh(r,a)
380 -- {
381 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
382 -- r = a;
383 -- }
384 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
385 = emit $ catAGraphs
386 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
387 mkAssign (CmmLocal res) arg ]
388 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
389 = emit $ catAGraphs
390 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
391 mkAssign (CmmLocal res) arg ]
392 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
393 = emit $ catAGraphs
394 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
395 mkAssign (CmmLocal res) arg ]
396
397 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
398 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
399 = emitAssign (CmmLocal res) arg
400
401 -- Reading/writing pointer arrays
402
403 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
404 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
405 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
406
407 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
408 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
409 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
410 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
411 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
412 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
413 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
414 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
415 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
416 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
417
418 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
419 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
420 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
421
422 -- Getting the size of pointer arrays
423
424 emitPrimOp dflags [res] SizeofArrayOp [arg]
425 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
426 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
427 (bWord dflags))
428 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
429 = emitPrimOp dflags [res] SizeofArrayOp [arg]
430 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
431 = emitPrimOp dflags [res] SizeofArrayOp [arg]
432 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
433 = emitPrimOp dflags [res] SizeofArrayOp [arg]
434
435 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
436 emit $ mkAssign (CmmLocal res)
437 (cmmLoadIndexW dflags arg
438 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
439 (bWord dflags))
440 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
441 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
442
443 -- IndexXXXoffAddr
444
445 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
446 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
447 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
448 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
449 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
450 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
451 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
452 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
453 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
454 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
455 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
456 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
457 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
458 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
459 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
460 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
461
462 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
463
464 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
465 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
466 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
467 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
468 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
469 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
470 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
471 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
472 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
473 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
474 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
475 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
476 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
477 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
478 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
479 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
480
481 -- IndexXXXArray
482
483 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
484 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
485 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
486 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
487 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
488 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
489 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
490 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
491 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
492 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
493 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
494 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
495 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
496 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
497 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
498 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
499
500 -- ReadXXXArray, identical to IndexXXXArray.
501
502 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
503 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
504 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
505 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
506 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
507 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
508 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
509 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
510 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
511 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
512 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
513 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
514 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
515 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
516 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
517 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
518
519 -- WriteXXXoffAddr
520
521 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
522 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
523 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
524 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
525 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
526 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
527 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
528 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
529 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
530 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
531 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
532 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
533 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
534 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
535 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
536 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
537
538 -- WriteXXXArray
539
540 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
541 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
542 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
543 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
544 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
545 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
546 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
547 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
548 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
549 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
550 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
551 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
552 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
553 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
554 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
555 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
556
557 -- Copying and setting byte arrays
558 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
559 doCopyByteArrayOp src src_off dst dst_off n
560 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
561 doCopyMutableByteArrayOp src src_off dst dst_off n
562 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
563 doCopyByteArrayToAddrOp src src_off dst n
564 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
565 doCopyMutableByteArrayToAddrOp src src_off dst n
566 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
567 doCopyAddrToByteArrayOp src dst dst_off n
568 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
569 doSetByteArrayOp ba off len c
570
571 -- Comparing byte arrays
572 emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
573 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
574
575 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
576 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
577 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
578 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
579
580 -- Population count
581 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
582 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
583 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
584 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
585 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
586
587 -- Parallel bit deposit
588 emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8
589 emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
590 emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
591 emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
592 emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags)
593
594 -- Parallel bit extract
595 emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8
596 emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16
597 emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32
598 emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64
599 emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags)
600
601 -- count leading zeros
602 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
603 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
604 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
605 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
606 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
607
608 -- count trailing zeros
609 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
610 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
611 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
612 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
613 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
614
615 -- Unsigned int to floating point conversions
616 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
617 (MO_UF_Conv W32) [w]
618 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
619 (MO_UF_Conv W64) [w]
620
621 -- SIMD primops
622 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
623 checkVecCompatibility dflags vcat n w
624 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
625 where
626 zeros :: CmmExpr
627 zeros = CmmLit $ CmmVec (replicate n zero)
628
629 zero :: CmmLit
630 zero = case vcat of
631 IntVec -> CmmInt 0 w
632 WordVec -> CmmInt 0 w
633 FloatVec -> CmmFloat 0 w
634
635 ty :: CmmType
636 ty = vecVmmType vcat n w
637
638 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
639 checkVecCompatibility dflags vcat n w
640 when (es `lengthIsNot` n) $
641 panic "emitPrimOp: VecPackOp has wrong number of arguments"
642 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
643 where
644 zeros :: CmmExpr
645 zeros = CmmLit $ CmmVec (replicate n zero)
646
647 zero :: CmmLit
648 zero = case vcat of
649 IntVec -> CmmInt 0 w
650 WordVec -> CmmInt 0 w
651 FloatVec -> CmmFloat 0 w
652
653 ty :: CmmType
654 ty = vecVmmType vcat n w
655
656 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
657 checkVecCompatibility dflags vcat n w
658 when (res `lengthIsNot` n) $
659 panic "emitPrimOp: VecUnpackOp has wrong number of results"
660 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
661 where
662 ty :: CmmType
663 ty = vecVmmType vcat n w
664
665 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
666 checkVecCompatibility dflags vcat n w
667 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
668 where
669 ty :: CmmType
670 ty = vecVmmType vcat n w
671
672 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
673 checkVecCompatibility dflags vcat n w
674 doIndexByteArrayOp Nothing ty res args
675 where
676 ty :: CmmType
677 ty = vecVmmType vcat n w
678
679 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
680 checkVecCompatibility dflags vcat n w
681 doIndexByteArrayOp Nothing ty res args
682 where
683 ty :: CmmType
684 ty = vecVmmType vcat n w
685
686 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
687 checkVecCompatibility dflags vcat n w
688 doWriteByteArrayOp Nothing ty res args
689 where
690 ty :: CmmType
691 ty = vecVmmType vcat n w
692
693 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
694 checkVecCompatibility dflags vcat n w
695 doIndexOffAddrOp Nothing ty res args
696 where
697 ty :: CmmType
698 ty = vecVmmType vcat n w
699
700 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
701 checkVecCompatibility dflags vcat n w
702 doIndexOffAddrOp Nothing ty res args
703 where
704 ty :: CmmType
705 ty = vecVmmType vcat n w
706
707 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
708 checkVecCompatibility dflags vcat n w
709 doWriteOffAddrOp Nothing ty res args
710 where
711 ty :: CmmType
712 ty = vecVmmType vcat n w
713
714 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
715 checkVecCompatibility dflags vcat n w
716 doIndexByteArrayOpAs Nothing vecty ty res args
717 where
718 vecty :: CmmType
719 vecty = vecVmmType vcat n w
720
721 ty :: CmmType
722 ty = vecCmmCat vcat w
723
724 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
725 checkVecCompatibility dflags vcat n w
726 doIndexByteArrayOpAs Nothing vecty ty res args
727 where
728 vecty :: CmmType
729 vecty = vecVmmType vcat n w
730
731 ty :: CmmType
732 ty = vecCmmCat vcat w
733
734 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
735 checkVecCompatibility dflags vcat n w
736 doWriteByteArrayOp Nothing ty res args
737 where
738 ty :: CmmType
739 ty = vecCmmCat vcat w
740
741 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
742 checkVecCompatibility dflags vcat n w
743 doIndexOffAddrOpAs Nothing vecty ty res args
744 where
745 vecty :: CmmType
746 vecty = vecVmmType vcat n w
747
748 ty :: CmmType
749 ty = vecCmmCat vcat w
750
751 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
752 checkVecCompatibility dflags vcat n w
753 doIndexOffAddrOpAs Nothing vecty ty res args
754 where
755 vecty :: CmmType
756 vecty = vecVmmType vcat n w
757
758 ty :: CmmType
759 ty = vecCmmCat vcat w
760
761 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
762 checkVecCompatibility dflags vcat n w
763 doWriteOffAddrOp Nothing ty res args
764 where
765 ty :: CmmType
766 ty = vecCmmCat vcat w
767
768 -- Prefetch
769 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
770 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
771 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
772 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
773
774 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
775 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
776 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
777 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
778
779 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
780 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
781 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
782 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
783
784 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
785 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
786 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
787 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
788
789 -- Atomic read-modify-write
790 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
791 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
792 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
793 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
794 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
795 doAtomicRMW res AMO_And mba ix (bWord dflags) n
796 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
797 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
798 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
799 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
800 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
801 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
802 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
803 doAtomicReadByteArray res mba ix (bWord dflags)
804 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
805 doAtomicWriteByteArray mba ix (bWord dflags) val
806 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
807 doCasByteArray res mba ix (bWord dflags) old new
808
809 -- The rest just translate straightforwardly
810 emitPrimOp dflags [res] op [arg]
811 | nopOp op
812 = emitAssign (CmmLocal res) arg
813
814 | Just (mop,rep) <- narrowOp op
815 = emitAssign (CmmLocal res) $
816 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
817
818 emitPrimOp dflags r@[res] op args
819 | Just prim <- callishOp op
820 = do emitPrimCall r prim args
821
822 | Just mop <- translateOp dflags op
823 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
824 emit stmt
825
826 emitPrimOp dflags results op args
827 = case callishPrimOpSupported dflags op of
828 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
829 Right gen -> gen results args
830
831 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
832
833 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
834 callishPrimOpSupported dflags op
835 = case op of
836 IntQuotRemOp | ncg && (x86ish
837 || ppc) -> Left (MO_S_QuotRem (wordWidth dflags))
838 | otherwise -> Right (genericIntQuotRemOp dflags)
839
840 WordQuotRemOp | ncg && (x86ish
841 || ppc) -> Left (MO_U_QuotRem (wordWidth dflags))
842 | otherwise -> Right (genericWordQuotRemOp dflags)
843
844 WordQuotRem2Op | (ncg && (x86ish
845 || ppc))
846 || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
847 | otherwise -> Right (genericWordQuotRem2Op dflags)
848
849 WordAdd2Op | (ncg && (x86ish
850 || ppc))
851 || llvm -> Left (MO_Add2 (wordWidth dflags))
852 | otherwise -> Right genericWordAdd2Op
853
854 WordSubCOp | (ncg && (x86ish
855 || ppc))
856 || llvm -> Left (MO_SubWordC (wordWidth dflags))
857 | otherwise -> Right genericWordSubCOp
858
859 IntAddCOp | (ncg && (x86ish
860 || ppc))
861 || llvm -> Left (MO_AddIntC (wordWidth dflags))
862 | otherwise -> Right genericIntAddCOp
863
864 IntSubCOp | (ncg && (x86ish
865 || ppc))
866 || llvm -> Left (MO_SubIntC (wordWidth dflags))
867 | otherwise -> Right genericIntSubCOp
868
869 WordMul2Op | ncg && (x86ish
870 || ppc)
871 || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
872 | otherwise -> Right genericWordMul2Op
873 FloatFabsOp | (ncg && x86ish
874 || ppc)
875 || llvm -> Left MO_F32_Fabs
876 | otherwise -> Right $ genericFabsOp W32
877 DoubleFabsOp | (ncg && x86ish
878 || ppc)
879 || llvm -> Left MO_F64_Fabs
880 | otherwise -> Right $ genericFabsOp W64
881
882 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
883 where
884 ncg = case hscTarget dflags of
885 HscAsm -> True
886 _ -> False
887 llvm = case hscTarget dflags of
888 HscLlvm -> True
889 _ -> False
890 x86ish = case platformArch (targetPlatform dflags) of
891 ArchX86 -> True
892 ArchX86_64 -> True
893 _ -> False
894 ppc = case platformArch (targetPlatform dflags) of
895 ArchPPC -> True
896 ArchPPC_64 _ -> True
897 _ -> False
898
899 genericIntQuotRemOp :: DynFlags -> GenericOp
900 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
901 = emit $ mkAssign (CmmLocal res_q)
902 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
903 mkAssign (CmmLocal res_r)
904 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
905 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
906
907 genericWordQuotRemOp :: DynFlags -> GenericOp
908 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
909 = emit $ mkAssign (CmmLocal res_q)
910 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
911 mkAssign (CmmLocal res_r)
912 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
913 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
914
915 genericWordQuotRem2Op :: DynFlags -> GenericOp
916 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
917 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
918 where ty = cmmExprType dflags arg_x_high
919 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
920 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
921 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
922 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
923 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
924 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
925 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
926 zero = lit 0
927 one = lit 1
928 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
929 lit i = CmmLit (CmmInt i (wordWidth dflags))
930
931 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
932 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
933 mkAssign (CmmLocal res_r) high)
934 f i acc high low =
935 do roverflowedBit <- newTemp ty
936 rhigh' <- newTemp ty
937 rhigh'' <- newTemp ty
938 rlow' <- newTemp ty
939 risge <- newTemp ty
940 racc' <- newTemp ty
941 let high' = CmmReg (CmmLocal rhigh')
942 isge = CmmReg (CmmLocal risge)
943 overflowedBit = CmmReg (CmmLocal roverflowedBit)
944 let this = catAGraphs
945 [mkAssign (CmmLocal roverflowedBit)
946 (shr high negone),
947 mkAssign (CmmLocal rhigh')
948 (or (shl high one) (shr low negone)),
949 mkAssign (CmmLocal rlow')
950 (shl low one),
951 mkAssign (CmmLocal risge)
952 (or (overflowedBit `ne` zero)
953 (high' `ge` arg_y)),
954 mkAssign (CmmLocal rhigh'')
955 (high' `minus` (arg_y `times` isge)),
956 mkAssign (CmmLocal racc')
957 (or (shl acc one) isge)]
958 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
959 (CmmReg (CmmLocal rhigh''))
960 (CmmReg (CmmLocal rlow'))
961 return (this <*> rest)
962 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
963
964 genericWordAdd2Op :: GenericOp
965 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
966 = do dflags <- getDynFlags
967 r1 <- newTemp (cmmExprType dflags arg_x)
968 r2 <- newTemp (cmmExprType dflags arg_x)
969 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
970 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
971 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
972 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
973 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
974 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
975 (wordWidth dflags))
976 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
977 emit $ catAGraphs
978 [mkAssign (CmmLocal r1)
979 (add (bottomHalf arg_x) (bottomHalf arg_y)),
980 mkAssign (CmmLocal r2)
981 (add (topHalf (CmmReg (CmmLocal r1)))
982 (add (topHalf arg_x) (topHalf arg_y))),
983 mkAssign (CmmLocal res_h)
984 (topHalf (CmmReg (CmmLocal r2))),
985 mkAssign (CmmLocal res_l)
986 (or (toTopHalf (CmmReg (CmmLocal r2)))
987 (bottomHalf (CmmReg (CmmLocal r1))))]
988 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
989
990 genericWordSubCOp :: GenericOp
991 genericWordSubCOp [res_r, res_c] [aa, bb] = do
992 dflags <- getDynFlags
993 emit $ catAGraphs
994 [ -- Put the result into 'res_r'.
995 mkAssign (CmmLocal res_r) $
996 CmmMachOp (mo_wordSub dflags) [aa, bb]
997 -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise.
998 , mkAssign (CmmLocal res_c) $
999 CmmMachOp (mo_wordUGt dflags) [bb, aa]
1000 ]
1001 genericWordSubCOp _ _ = panic "genericWordSubCOp"
1002
1003 genericIntAddCOp :: GenericOp
1004 genericIntAddCOp [res_r, res_c] [aa, bb]
1005 {-
1006 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
1007 C, and without needing any comparisons. This may not be the
1008 fastest way to do it - if you have better code, please send it! --SDM
1009
1010 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
1011
1012 We currently don't make use of the r value if c is != 0 (i.e.
1013 overflow), we just convert to big integers and try again. This
1014 could be improved by making r and c the correct values for
1015 plugging into a new J#.
1016
1017 { r = ((I_)(a)) + ((I_)(b)); \
1018 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1019 >> (BITS_IN (I_) - 1); \
1020 }
1021 Wading through the mass of bracketry, it seems to reduce to:
1022 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
1023
1024 -}
1025 = do dflags <- getDynFlags
1026 emit $ catAGraphs [
1027 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
1028 mkAssign (CmmLocal res_c) $
1029 CmmMachOp (mo_wordUShr dflags) [
1030 CmmMachOp (mo_wordAnd dflags) [
1031 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
1032 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1033 ],
1034 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1035 ]
1036 ]
1037 genericIntAddCOp _ _ = panic "genericIntAddCOp"
1038
1039 genericIntSubCOp :: GenericOp
1040 genericIntSubCOp [res_r, res_c] [aa, bb]
1041 {- Similarly:
1042 #define subIntCzh(r,c,a,b) \
1043 { r = ((I_)(a)) - ((I_)(b)); \
1044 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1045 >> (BITS_IN (I_) - 1); \
1046 }
1047
1048 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
1049 -}
1050 = do dflags <- getDynFlags
1051 emit $ catAGraphs [
1052 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1053 mkAssign (CmmLocal res_c) $
1054 CmmMachOp (mo_wordUShr dflags) [
1055 CmmMachOp (mo_wordAnd dflags) [
1056 CmmMachOp (mo_wordXor dflags) [aa,bb],
1057 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1058 ],
1059 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1060 ]
1061 ]
1062 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1063
1064 genericWordMul2Op :: GenericOp
1065 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
1066 = do dflags <- getDynFlags
1067 let t = cmmExprType dflags arg_x
1068 xlyl <- liftM CmmLocal $ newTemp t
1069 xlyh <- liftM CmmLocal $ newTemp t
1070 xhyl <- liftM CmmLocal $ newTemp t
1071 r <- liftM CmmLocal $ newTemp t
1072 -- This generic implementation is very simple and slow. We might
1073 -- well be able to do better, but for now this at least works.
1074 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1075 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1076 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1077 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1078 sum = foldl1 add
1079 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1080 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1081 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1082 (wordWidth dflags))
1083 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1084 emit $ catAGraphs
1085 [mkAssign xlyl
1086 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1087 mkAssign xlyh
1088 (mul (bottomHalf arg_x) (topHalf arg_y)),
1089 mkAssign xhyl
1090 (mul (topHalf arg_x) (bottomHalf arg_y)),
1091 mkAssign r
1092 (sum [topHalf (CmmReg xlyl),
1093 bottomHalf (CmmReg xhyl),
1094 bottomHalf (CmmReg xlyh)]),
1095 mkAssign (CmmLocal res_l)
1096 (or (bottomHalf (CmmReg xlyl))
1097 (toTopHalf (CmmReg r))),
1098 mkAssign (CmmLocal res_h)
1099 (sum [mul (topHalf arg_x) (topHalf arg_y),
1100 topHalf (CmmReg xhyl),
1101 topHalf (CmmReg xlyh),
1102 topHalf (CmmReg r)])]
1103 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1104
1105 -- This replicates what we had in libraries/base/GHC/Float.hs:
1106 --
1107 -- abs x | x == 0 = 0 -- handles (-0.0)
1108 -- | x > 0 = x
1109 -- | otherwise = negateFloat x
1110 genericFabsOp :: Width -> GenericOp
1111 genericFabsOp w [res_r] [aa]
1112 = do dflags <- getDynFlags
1113 let zero = CmmLit (CmmFloat 0 w)
1114
1115 eq x y = CmmMachOp (MO_F_Eq w) [x, y]
1116 gt x y = CmmMachOp (MO_F_Gt w) [x, y]
1117
1118 neg x = CmmMachOp (MO_F_Neg w) [x]
1119
1120 g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
1121 g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
1122
1123 res_t <- CmmLocal <$> newTemp (cmmExprType dflags aa)
1124 let g3 = catAGraphs [mkAssign res_t aa,
1125 mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
1126
1127 g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
1128
1129 emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
1130
1131 genericFabsOp _ _ _ = panic "genericFabsOp"
1132
1133 -- These PrimOps are NOPs in Cmm
1134
1135 nopOp :: PrimOp -> Bool
1136 nopOp Int2WordOp = True
1137 nopOp Word2IntOp = True
1138 nopOp Int2AddrOp = True
1139 nopOp Addr2IntOp = True
1140 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1141 nopOp OrdOp = True
1142 nopOp _ = False
1143
1144 -- These PrimOps turn into double casts
1145
1146 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1147 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1148 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1149 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1150 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1151 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1152 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1153 narrowOp _ = Nothing
1154
1155 -- Native word signless ops
1156
1157 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1158 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1159 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1160 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1161 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1162 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1163 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1164
1165 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1166 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1167 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1168 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1169 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1170 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1171
1172 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1173 translateOp dflags OrOp = Just (mo_wordOr dflags)
1174 translateOp dflags XorOp = Just (mo_wordXor dflags)
1175 translateOp dflags NotOp = Just (mo_wordNot dflags)
1176 translateOp dflags SllOp = Just (mo_wordShl dflags)
1177 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1178
1179 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1180
1181 -- Native word signed ops
1182
1183 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1184 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1185 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1186 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1187 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1188
1189
1190 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1191 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1192 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1193 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1194
1195 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1196 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1197 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1198 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1199 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1200 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1201 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1202
1203 -- Native word unsigned ops
1204
1205 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1206 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1207 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1208 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1209
1210 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1211 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1212 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1213
1214 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1215 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1216 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1217 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1218
1219 -- Char# ops
1220
1221 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1222 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1223 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1224 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1225 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1226 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1227
1228 -- Double ops
1229
1230 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1231 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1232 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1233 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1234 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1235 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1236
1237 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1238 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1239 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1240 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1241 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1242
1243 -- Float ops
1244
1245 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1246 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1247 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1248 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1249 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1250 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1251
1252 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1253 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1254 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1255 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1256 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1257
1258 -- Vector ops
1259
1260 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1261 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1262 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1263 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1264 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1265
1266 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1267 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1268 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1269 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1270 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1271 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1272
1273 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1274 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1275 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1276 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1277 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1278
1279 -- Conversions
1280
1281 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1282 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1283
1284 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1285 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1286
1287 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1288 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1289
1290 -- Word comparisons masquerading as more exotic things.
1291
1292 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1293 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1294 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1295 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1296 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1297 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1298 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1299 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1300
1301 translateOp _ _ = Nothing
1302
1303 -- These primops are implemented by CallishMachOps, because they sometimes
1304 -- turn into foreign calls depending on the backend.
1305
1306 callishOp :: PrimOp -> Maybe CallishMachOp
1307 callishOp DoublePowerOp = Just MO_F64_Pwr
1308 callishOp DoubleSinOp = Just MO_F64_Sin
1309 callishOp DoubleCosOp = Just MO_F64_Cos
1310 callishOp DoubleTanOp = Just MO_F64_Tan
1311 callishOp DoubleSinhOp = Just MO_F64_Sinh
1312 callishOp DoubleCoshOp = Just MO_F64_Cosh
1313 callishOp DoubleTanhOp = Just MO_F64_Tanh
1314 callishOp DoubleAsinOp = Just MO_F64_Asin
1315 callishOp DoubleAcosOp = Just MO_F64_Acos
1316 callishOp DoubleAtanOp = Just MO_F64_Atan
1317 callishOp DoubleLogOp = Just MO_F64_Log
1318 callishOp DoubleExpOp = Just MO_F64_Exp
1319 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1320
1321 callishOp FloatPowerOp = Just MO_F32_Pwr
1322 callishOp FloatSinOp = Just MO_F32_Sin
1323 callishOp FloatCosOp = Just MO_F32_Cos
1324 callishOp FloatTanOp = Just MO_F32_Tan
1325 callishOp FloatSinhOp = Just MO_F32_Sinh
1326 callishOp FloatCoshOp = Just MO_F32_Cosh
1327 callishOp FloatTanhOp = Just MO_F32_Tanh
1328 callishOp FloatAsinOp = Just MO_F32_Asin
1329 callishOp FloatAcosOp = Just MO_F32_Acos
1330 callishOp FloatAtanOp = Just MO_F32_Atan
1331 callishOp FloatLogOp = Just MO_F32_Log
1332 callishOp FloatExpOp = Just MO_F32_Exp
1333 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1334
1335 callishOp _ = Nothing
1336
1337 ------------------------------------------------------------------------------
1338 -- Helpers for translating various minor variants of array indexing.
1339
1340 doIndexOffAddrOp :: Maybe MachOp
1341 -> CmmType
1342 -> [LocalReg]
1343 -> [CmmExpr]
1344 -> FCode ()
1345 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1346 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1347 doIndexOffAddrOp _ _ _ _
1348 = panic "StgCmmPrim: doIndexOffAddrOp"
1349
1350 doIndexOffAddrOpAs :: Maybe MachOp
1351 -> CmmType
1352 -> CmmType
1353 -> [LocalReg]
1354 -> [CmmExpr]
1355 -> FCode ()
1356 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1357 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1358 doIndexOffAddrOpAs _ _ _ _ _
1359 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1360
1361 doIndexByteArrayOp :: Maybe MachOp
1362 -> CmmType
1363 -> [LocalReg]
1364 -> [CmmExpr]
1365 -> FCode ()
1366 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1367 = do dflags <- getDynFlags
1368 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1369 doIndexByteArrayOp _ _ _ _
1370 = panic "StgCmmPrim: doIndexByteArrayOp"
1371
1372 doIndexByteArrayOpAs :: Maybe MachOp
1373 -> CmmType
1374 -> CmmType
1375 -> [LocalReg]
1376 -> [CmmExpr]
1377 -> FCode ()
1378 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1379 = do dflags <- getDynFlags
1380 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1381 doIndexByteArrayOpAs _ _ _ _ _
1382 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1383
1384 doReadPtrArrayOp :: LocalReg
1385 -> CmmExpr
1386 -> CmmExpr
1387 -> FCode ()
1388 doReadPtrArrayOp res addr idx
1389 = do dflags <- getDynFlags
1390 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1391
1392 doWriteOffAddrOp :: Maybe MachOp
1393 -> CmmType
1394 -> [LocalReg]
1395 -> [CmmExpr]
1396 -> FCode ()
1397 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1398 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1399 doWriteOffAddrOp _ _ _ _
1400 = panic "StgCmmPrim: doWriteOffAddrOp"
1401
1402 doWriteByteArrayOp :: Maybe MachOp
1403 -> CmmType
1404 -> [LocalReg]
1405 -> [CmmExpr]
1406 -> FCode ()
1407 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1408 = do dflags <- getDynFlags
1409 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1410 doWriteByteArrayOp _ _ _ _
1411 = panic "StgCmmPrim: doWriteByteArrayOp"
1412
1413 doWritePtrArrayOp :: CmmExpr
1414 -> CmmExpr
1415 -> CmmExpr
1416 -> FCode ()
1417 doWritePtrArrayOp addr idx val
1418 = do dflags <- getDynFlags
1419 let ty = cmmExprType dflags val
1420 -- This write barrier is to ensure that the heap writes to the object
1421 -- referred to by val have happened before we write val into the array.
1422 -- See #12469 for details.
1423 emitPrimCall [] MO_WriteBarrier []
1424 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1425 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1426 -- the write barrier. We must write a byte into the mark table:
1427 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1428 emit $ mkStore (
1429 cmmOffsetExpr dflags
1430 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1431 (loadArrPtrsSize dflags addr))
1432 (CmmMachOp (mo_wordUShr dflags) [idx,
1433 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1434 ) (CmmLit (CmmInt 1 W8))
1435
1436 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1437 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1438 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1439
1440 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1441 -> Maybe MachOp -- Optional result cast
1442 -> CmmType -- Type of element we are accessing
1443 -> LocalReg -- Destination
1444 -> CmmExpr -- Base address
1445 -> CmmType -- Type of element by which we are indexing
1446 -> CmmExpr -- Index
1447 -> FCode ()
1448 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1449 = do dflags <- getDynFlags
1450 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1451 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1452 = do dflags <- getDynFlags
1453 emitAssign (CmmLocal res) (CmmMachOp cast [
1454 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1455
1456 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1457 -> Maybe MachOp -- Optional value cast
1458 -> CmmExpr -- Base address
1459 -> CmmType -- Type of element by which we are indexing
1460 -> CmmExpr -- Index
1461 -> CmmExpr -- Value to write
1462 -> FCode ()
1463 mkBasicIndexedWrite off Nothing base idx_ty idx val
1464 = do dflags <- getDynFlags
1465 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1466 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1467 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1468
1469 -- ----------------------------------------------------------------------------
1470 -- Misc utils
1471
1472 cmmIndexOffExpr :: DynFlags
1473 -> ByteOff -- Initial offset in bytes
1474 -> Width -- Width of element by which we are indexing
1475 -> CmmExpr -- Base address
1476 -> CmmExpr -- Index
1477 -> CmmExpr
1478 cmmIndexOffExpr dflags off width base idx
1479 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1480
1481 cmmLoadIndexOffExpr :: DynFlags
1482 -> ByteOff -- Initial offset in bytes
1483 -> CmmType -- Type of element we are accessing
1484 -> CmmExpr -- Base address
1485 -> CmmType -- Type of element by which we are indexing
1486 -> CmmExpr -- Index
1487 -> CmmExpr
1488 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1489 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1490
1491 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1492 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1493
1494 ------------------------------------------------------------------------------
1495 -- Helpers for translating vector primops.
1496
1497 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1498 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1499
1500 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1501 vecCmmCat IntVec = cmmBits
1502 vecCmmCat WordVec = cmmBits
1503 vecCmmCat FloatVec = cmmFloat
1504
1505 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1506 vecElemInjectCast _ FloatVec _ = Nothing
1507 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1508 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1509 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1510 vecElemInjectCast _ IntVec W64 = Nothing
1511 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1512 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1513 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1514 vecElemInjectCast _ WordVec W64 = Nothing
1515 vecElemInjectCast _ _ _ = Nothing
1516
1517 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1518 vecElemProjectCast _ FloatVec _ = Nothing
1519 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1520 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1521 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1522 vecElemProjectCast _ IntVec W64 = Nothing
1523 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1524 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1525 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1526 vecElemProjectCast _ WordVec W64 = Nothing
1527 vecElemProjectCast _ _ _ = Nothing
1528
1529 -- Check to make sure that we can generate code for the specified vector type
1530 -- given the current set of dynamic flags.
1531 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1532 checkVecCompatibility dflags vcat l w = do
1533 when (hscTarget dflags /= HscLlvm) $ do
1534 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1535 ,"Please use -fllvm."]
1536 check vecWidth vcat l w
1537 where
1538 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1539 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1540 sorry $ "128-bit wide single-precision floating point " ++
1541 "SIMD vector instructions require at least -msse."
1542 check W128 _ _ _ | not (isSse2Enabled dflags) =
1543 sorry $ "128-bit wide integer and double precision " ++
1544 "SIMD vector instructions require at least -msse2."
1545 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1546 sorry $ "256-bit wide floating point " ++
1547 "SIMD vector instructions require at least -mavx."
1548 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1549 sorry $ "256-bit wide integer " ++
1550 "SIMD vector instructions require at least -mavx2."
1551 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1552 sorry $ "512-bit wide " ++
1553 "SIMD vector instructions require -mavx512f."
1554 check _ _ _ _ = return ()
1555
1556 vecWidth = typeWidth (vecVmmType vcat l w)
1557
1558 ------------------------------------------------------------------------------
1559 -- Helpers for translating vector packing and unpacking.
1560
1561 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1562 -> CmmType -- Type of vector
1563 -> CmmExpr -- Initial vector
1564 -> [CmmExpr] -- Elements
1565 -> CmmFormal -- Destination for result
1566 -> FCode ()
1567 doVecPackOp maybe_pre_write_cast ty z es res = do
1568 dst <- newTemp ty
1569 emitAssign (CmmLocal dst) z
1570 vecPack dst es 0
1571 where
1572 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1573 vecPack src [] _ =
1574 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1575
1576 vecPack src (e : es) i = do
1577 dst <- newTemp ty
1578 if isFloatType (vecElemType ty)
1579 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1580 [CmmReg (CmmLocal src), cast e, iLit])
1581 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1582 [CmmReg (CmmLocal src), cast e, iLit])
1583 vecPack dst es (i + 1)
1584 where
1585 -- vector indices are always 32-bits
1586 iLit = CmmLit (CmmInt (toInteger i) W32)
1587
1588 cast :: CmmExpr -> CmmExpr
1589 cast val = case maybe_pre_write_cast of
1590 Nothing -> val
1591 Just cast -> CmmMachOp cast [val]
1592
1593 len :: Length
1594 len = vecLength ty
1595
1596 wid :: Width
1597 wid = typeWidth (vecElemType ty)
1598
1599 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1600 -> CmmType -- Type of vector
1601 -> CmmExpr -- Vector
1602 -> [CmmFormal] -- Element results
1603 -> FCode ()
1604 doVecUnpackOp maybe_post_read_cast ty e res =
1605 vecUnpack res 0
1606 where
1607 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1608 vecUnpack [] _ =
1609 return ()
1610
1611 vecUnpack (r : rs) i = do
1612 if isFloatType (vecElemType ty)
1613 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1614 [e, iLit]))
1615 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1616 [e, iLit]))
1617 vecUnpack rs (i + 1)
1618 where
1619 -- vector indices are always 32-bits
1620 iLit = CmmLit (CmmInt (toInteger i) W32)
1621
1622 cast :: CmmExpr -> CmmExpr
1623 cast val = case maybe_post_read_cast of
1624 Nothing -> val
1625 Just cast -> CmmMachOp cast [val]
1626
1627 len :: Length
1628 len = vecLength ty
1629
1630 wid :: Width
1631 wid = typeWidth (vecElemType ty)
1632
1633 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1634 -> CmmType -- Vector type
1635 -> CmmExpr -- Source vector
1636 -> CmmExpr -- Element
1637 -> CmmExpr -- Index at which to insert element
1638 -> CmmFormal -- Destination for result
1639 -> FCode ()
1640 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1641 dflags <- getDynFlags
1642 -- vector indices are always 32-bits
1643 let idx' :: CmmExpr
1644 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1645 if isFloatType (vecElemType ty)
1646 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1647 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1648 where
1649 cast :: CmmExpr -> CmmExpr
1650 cast val = case maybe_pre_write_cast of
1651 Nothing -> val
1652 Just cast -> CmmMachOp cast [val]
1653
1654 len :: Length
1655 len = vecLength ty
1656
1657 wid :: Width
1658 wid = typeWidth (vecElemType ty)
1659
1660 ------------------------------------------------------------------------------
1661 -- Helpers for translating prefetching.
1662
1663
1664 -- | Translate byte array prefetch operations into proper primcalls.
1665 doPrefetchByteArrayOp :: Int
1666 -> [CmmExpr]
1667 -> FCode ()
1668 doPrefetchByteArrayOp locality [addr,idx]
1669 = do dflags <- getDynFlags
1670 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1671 doPrefetchByteArrayOp _ _
1672 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1673
1674 -- | Translate mutable byte array prefetch operations into proper primcalls.
1675 doPrefetchMutableByteArrayOp :: Int
1676 -> [CmmExpr]
1677 -> FCode ()
1678 doPrefetchMutableByteArrayOp locality [addr,idx]
1679 = do dflags <- getDynFlags
1680 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1681 doPrefetchMutableByteArrayOp _ _
1682 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1683
1684 -- | Translate address prefetch operations into proper primcalls.
1685 doPrefetchAddrOp ::Int
1686 -> [CmmExpr]
1687 -> FCode ()
1688 doPrefetchAddrOp locality [addr,idx]
1689 = mkBasicPrefetch locality 0 addr idx
1690 doPrefetchAddrOp _ _
1691 = panic "StgCmmPrim: doPrefetchAddrOp"
1692
1693 -- | Translate value prefetch operations into proper primcalls.
1694 doPrefetchValueOp :: Int
1695 -> [CmmExpr]
1696 -> FCode ()
1697 doPrefetchValueOp locality [addr]
1698 = do dflags <- getDynFlags
1699 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1700 doPrefetchValueOp _ _
1701 = panic "StgCmmPrim: doPrefetchValueOp"
1702
1703 -- | helper to generate prefetch primcalls
1704 mkBasicPrefetch :: Int -- Locality level 0-3
1705 -> ByteOff -- Initial offset in bytes
1706 -> CmmExpr -- Base address
1707 -> CmmExpr -- Index
1708 -> FCode ()
1709 mkBasicPrefetch locality off base idx
1710 = do dflags <- getDynFlags
1711 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1712 return ()
1713
1714 -- ----------------------------------------------------------------------------
1715 -- Allocating byte arrays
1716
1717 -- | Takes a register to return the newly allocated array in and the
1718 -- size of the new array in bytes. Allocates a new
1719 -- 'MutableByteArray#'.
1720 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1721 doNewByteArrayOp res_r n = do
1722 dflags <- getDynFlags
1723
1724 let info_ptr = mkLblExpr mkArrWords_infoLabel
1725 rep = arrWordsRep dflags n
1726
1727 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1728 (mkIntExpr dflags (nonHdrSize dflags rep))
1729 (zeroExpr dflags)
1730
1731 let hdr_size = fixedHdrSize dflags
1732
1733 base <- allocHeapClosure rep info_ptr curCCS
1734 [ (mkIntExpr dflags n,
1735 hdr_size + oFFSET_StgArrBytes_bytes dflags)
1736 ]
1737
1738 emit $ mkAssign (CmmLocal res_r) base
1739
1740 -- ----------------------------------------------------------------------------
1741 -- Comparing byte arrays
1742
1743 doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1744 -> FCode ()
1745 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
1746 dflags <- getDynFlags
1747 ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
1748 ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
1749
1750 -- short-cut in case of equal pointers avoiding a costly
1751 -- subroutine call to the memcmp(3) routine; the Cmm logic below
1752 -- results in assembly code being generated for
1753 --
1754 -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
1755 -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
1756 --
1757 -- that looks like
1758 --
1759 -- leaq 16(%r14),%rax
1760 -- leaq 16(%rsi),%rbx
1761 -- xorl %ecx,%ecx
1762 -- cmpq %rbx,%rax
1763 -- je l_ptr_eq
1764 --
1765 -- ; NB: the common case (unequal pointers) falls-through
1766 -- ; the conditional jump, and therefore matches the
1767 -- ; usual static branch prediction convention of modern cpus
1768 --
1769 -- subq $8,%rsp
1770 -- movq %rbx,%rsi
1771 -- movq %rax,%rdi
1772 -- movl $10,%edx
1773 -- xorl %eax,%eax
1774 -- call memcmp
1775 -- addq $8,%rsp
1776 -- movslq %eax,%rax
1777 -- movq %rax,%rcx
1778 -- l_ptr_eq:
1779 -- movq %rcx,%rbx
1780 -- jmp *(%rbp)
1781
1782 l_ptr_eq <- newBlockId
1783 l_ptr_ne <- newBlockId
1784
1785 emit (mkAssign (CmmLocal res) (zeroExpr dflags))
1786 emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p)
1787 l_ptr_eq l_ptr_ne (Just False))
1788
1789 emitLabel l_ptr_ne
1790 emitMemcmpCall res ba1_p ba2_p n 1
1791
1792 emitLabel l_ptr_eq
1793
1794 -- ----------------------------------------------------------------------------
1795 -- Copying byte arrays
1796
1797 -- | Takes a source 'ByteArray#', an offset in the source array, a
1798 -- destination 'MutableByteArray#', an offset into the destination
1799 -- array, and the number of bytes to copy. Copies the given number of
1800 -- bytes from the source array to the destination array.
1801 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1802 -> FCode ()
1803 doCopyByteArrayOp = emitCopyByteArray copy
1804 where
1805 -- Copy data (we assume the arrays aren't overlapping since
1806 -- they're of different types)
1807 copy _src _dst dst_p src_p bytes =
1808 emitMemcpyCall dst_p src_p bytes 1
1809
1810 -- | Takes a source 'MutableByteArray#', an offset in the source
1811 -- array, a destination 'MutableByteArray#', an offset into the
1812 -- destination array, and the number of bytes to copy. Copies the
1813 -- given number of bytes from the source array to the destination
1814 -- array.
1815 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1816 -> FCode ()
1817 doCopyMutableByteArrayOp = emitCopyByteArray copy
1818 where
1819 -- The only time the memory might overlap is when the two arrays
1820 -- we were provided are the same array!
1821 -- TODO: Optimize branch for common case of no aliasing.
1822 copy src dst dst_p src_p bytes = do
1823 dflags <- getDynFlags
1824 [moveCall, cpyCall] <- forkAlts [
1825 getCode $ emitMemmoveCall dst_p src_p bytes 1,
1826 getCode $ emitMemcpyCall dst_p src_p bytes 1
1827 ]
1828 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1829
1830 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1831 -> FCode ())
1832 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1833 -> FCode ()
1834 emitCopyByteArray copy src src_off dst dst_off n = do
1835 dflags <- getDynFlags
1836 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1837 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1838 copy src dst dst_p src_p n
1839
1840 -- | Takes a source 'ByteArray#', an offset in the source array, a
1841 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1842 -- number of bytes from the source array to the destination memory region.
1843 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1844 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1845 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1846 dflags <- getDynFlags
1847 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1848 emitMemcpyCall dst_p src_p bytes 1
1849
1850 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1851 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1852 -- number of bytes from the source array to the destination memory region.
1853 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1854 -> FCode ()
1855 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1856
1857 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1858 -- the destination array, and the number of bytes to copy. Copies the given
1859 -- number of bytes from the source memory region to the destination array.
1860 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1861 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1862 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1863 dflags <- getDynFlags
1864 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1865 emitMemcpyCall dst_p src_p bytes 1
1866
1867
1868 -- ----------------------------------------------------------------------------
1869 -- Setting byte arrays
1870
1871 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1872 -- and a byte, and sets each of the selected bytes in the array to the
1873 -- character.
1874 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1875 -> FCode ()
1876 doSetByteArrayOp ba off len c
1877 = do dflags <- getDynFlags
1878 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1879 emitMemsetCall p c len 1
1880
1881 -- ----------------------------------------------------------------------------
1882 -- Allocating arrays
1883
1884 -- | Allocate a new array.
1885 doNewArrayOp :: CmmFormal -- ^ return register
1886 -> SMRep -- ^ representation of the array
1887 -> CLabel -- ^ info pointer
1888 -> [(CmmExpr, ByteOff)] -- ^ header payload
1889 -> WordOff -- ^ array size
1890 -> CmmExpr -- ^ initial element
1891 -> FCode ()
1892 doNewArrayOp res_r rep info payload n init = do
1893 dflags <- getDynFlags
1894
1895 let info_ptr = mkLblExpr info
1896
1897 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1898 (mkIntExpr dflags (nonHdrSize dflags rep))
1899 (zeroExpr dflags)
1900
1901 base <- allocHeapClosure rep info_ptr curCCS payload
1902
1903 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1904 emit $ mkAssign arr base
1905
1906 -- Initialise all elements of the array
1907 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1908 for <- newBlockId
1909 emitLabel for
1910 let loopBody =
1911 [ mkStore (CmmReg (CmmLocal p)) init
1912 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1913 , mkBranch for ]
1914 emit =<< mkCmmIfThen
1915 (cmmULtWord dflags (CmmReg (CmmLocal p))
1916 (cmmOffsetW dflags (CmmReg arr)
1917 (hdrSizeW dflags rep + n)))
1918 (catAGraphs loopBody)
1919
1920 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1921
1922 -- ----------------------------------------------------------------------------
1923 -- Copying pointer arrays
1924
1925 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1926 -- nowhere else in the code generator. This is mostly because these
1927 -- "primitive" ops result in a surprisingly large amount of code. It
1928 -- will likely be worthwhile to optimize what is emitted here, so that
1929 -- our optimization passes don't waste time repeatedly optimizing the
1930 -- same bits of code.
1931
1932 -- More closely imitates 'assignTemp' from the old code generator, which
1933 -- returns a CmmExpr rather than a LocalReg.
1934 assignTempE :: CmmExpr -> FCode CmmExpr
1935 assignTempE e = do
1936 t <- assignTemp e
1937 return (CmmReg (CmmLocal t))
1938
1939 -- | Takes a source 'Array#', an offset in the source array, a
1940 -- destination 'MutableArray#', an offset into the destination array,
1941 -- and the number of elements to copy. Copies the given number of
1942 -- elements from the source array to the destination array.
1943 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1944 -> FCode ()
1945 doCopyArrayOp = emitCopyArray copy
1946 where
1947 -- Copy data (we assume the arrays aren't overlapping since
1948 -- they're of different types)
1949 copy _src _dst dst_p src_p bytes =
1950 do dflags <- getDynFlags
1951 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1952 (wORD_SIZE dflags)
1953
1954
1955 -- | Takes a source 'MutableArray#', an offset in the source array, a
1956 -- destination 'MutableArray#', an offset into the destination array,
1957 -- and the number of elements to copy. Copies the given number of
1958 -- elements from the source array to the destination array.
1959 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1960 -> FCode ()
1961 doCopyMutableArrayOp = emitCopyArray copy
1962 where
1963 -- The only time the memory might overlap is when the two arrays
1964 -- we were provided are the same array!
1965 -- TODO: Optimize branch for common case of no aliasing.
1966 copy src dst dst_p src_p bytes = do
1967 dflags <- getDynFlags
1968 [moveCall, cpyCall] <- forkAlts [
1969 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1970 (wORD_SIZE dflags),
1971 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1972 (wORD_SIZE dflags)
1973 ]
1974 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1975
1976 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1977 -> FCode ()) -- ^ copy function
1978 -> CmmExpr -- ^ source array
1979 -> CmmExpr -- ^ offset in source array
1980 -> CmmExpr -- ^ destination array
1981 -> CmmExpr -- ^ offset in destination array
1982 -> WordOff -- ^ number of elements to copy
1983 -> FCode ()
1984 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1985 dflags <- getDynFlags
1986 when (n /= 0) $ do
1987 -- Passed as arguments (be careful)
1988 src <- assignTempE src0
1989 dst <- assignTempE dst0
1990 dst_off <- assignTempE dst_off0
1991
1992 -- Set the dirty bit in the header.
1993 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1994
1995 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1996 (arrPtrsHdrSize dflags)
1997 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1998 src_p <- assignTempE $ cmmOffsetExprW dflags
1999 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
2000 let bytes = wordsToBytes dflags n
2001
2002 copy src dst dst_p src_p bytes
2003
2004 -- The base address of the destination card table
2005 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
2006 (loadArrPtrsSize dflags dst)
2007
2008 emitSetCards dst_off dst_cards_p n
2009
2010 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2011 -> FCode ()
2012 doCopySmallArrayOp = emitCopySmallArray copy
2013 where
2014 -- Copy data (we assume the arrays aren't overlapping since
2015 -- they're of different types)
2016 copy _src _dst dst_p src_p bytes =
2017 do dflags <- getDynFlags
2018 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2019 (wORD_SIZE dflags)
2020
2021
2022 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2023 -> FCode ()
2024 doCopySmallMutableArrayOp = emitCopySmallArray copy
2025 where
2026 -- The only time the memory might overlap is when the two arrays
2027 -- we were provided are the same array!
2028 -- TODO: Optimize branch for common case of no aliasing.
2029 copy src dst dst_p src_p bytes = do
2030 dflags <- getDynFlags
2031 [moveCall, cpyCall] <- forkAlts
2032 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
2033 (wORD_SIZE dflags)
2034 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2035 (wORD_SIZE dflags)
2036 ]
2037 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
2038
2039 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
2040 -> FCode ()) -- ^ copy function
2041 -> CmmExpr -- ^ source array
2042 -> CmmExpr -- ^ offset in source array
2043 -> CmmExpr -- ^ destination array
2044 -> CmmExpr -- ^ offset in destination array
2045 -> WordOff -- ^ number of elements to copy
2046 -> FCode ()
2047 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
2048 dflags <- getDynFlags
2049
2050 -- Passed as arguments (be careful)
2051 src <- assignTempE src0
2052 dst <- assignTempE dst0
2053
2054 -- Set the dirty bit in the header.
2055 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2056
2057 dst_p <- assignTempE $ cmmOffsetExprW dflags
2058 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
2059 src_p <- assignTempE $ cmmOffsetExprW dflags
2060 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
2061 let bytes = wordsToBytes dflags n
2062
2063 copy src dst dst_p src_p bytes
2064
2065 -- | Takes an info table label, a register to return the newly
2066 -- allocated array in, a source array, an offset in the source array,
2067 -- and the number of elements to copy. Allocates a new array and
2068 -- initializes it from the source array.
2069 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2070 -> FCode ()
2071 emitCloneArray info_p res_r src src_off n = do
2072 dflags <- getDynFlags
2073
2074 let info_ptr = mkLblExpr info_p
2075 rep = arrPtrsRep dflags n
2076
2077 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
2078 (mkIntExpr dflags (nonHdrSize dflags rep))
2079 (zeroExpr dflags)
2080
2081 let hdr_size = fixedHdrSize dflags
2082
2083 base <- allocHeapClosure rep info_ptr curCCS
2084 [ (mkIntExpr dflags n,
2085 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
2086 , (mkIntExpr dflags (nonHdrSizeW rep),
2087 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
2088 ]
2089
2090 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2091 emit $ mkAssign arr base
2092
2093 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2094 (arrPtrsHdrSize dflags)
2095 src_p <- assignTempE $ cmmOffsetExprW dflags src
2096 (cmmAddWord dflags
2097 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
2098
2099 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2100 (wORD_SIZE dflags)
2101
2102 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2103
2104 -- | Takes an info table label, a register to return the newly
2105 -- allocated array in, a source array, an offset in the source array,
2106 -- and the number of elements to copy. Allocates a new array and
2107 -- initializes it from the source array.
2108 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2109 -> FCode ()
2110 emitCloneSmallArray info_p res_r src src_off n = do
2111 dflags <- getDynFlags
2112
2113 let info_ptr = mkLblExpr info_p
2114 rep = smallArrPtrsRep n
2115
2116 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
2117 (mkIntExpr dflags (nonHdrSize dflags rep))
2118 (zeroExpr dflags)
2119
2120 let hdr_size = fixedHdrSize dflags
2121
2122 base <- allocHeapClosure rep info_ptr curCCS
2123 [ (mkIntExpr dflags n,
2124 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
2125 ]
2126
2127 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2128 emit $ mkAssign arr base
2129
2130 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2131 (smallArrPtrsHdrSize dflags)
2132 src_p <- assignTempE $ cmmOffsetExprW dflags src
2133 (cmmAddWord dflags
2134 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
2135
2136 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2137 (wORD_SIZE dflags)
2138
2139 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2140
2141 -- | Takes and offset in the destination array, the base address of
2142 -- the card table, and the number of elements affected (*not* the
2143 -- number of cards). The number of elements may not be zero.
2144 -- Marks the relevant cards as dirty.
2145 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2146 emitSetCards dst_start dst_cards_start n = do
2147 dflags <- getDynFlags
2148 start_card <- assignTempE $ cardCmm dflags dst_start
2149 let end_card = cardCmm dflags
2150 (cmmSubWord dflags
2151 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
2152 (mkIntExpr dflags 1))
2153 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
2154 (mkIntExpr dflags 1)
2155 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2156 1 -- no alignment (1 byte)
2157
2158 -- Convert an element index to a card index
2159 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2160 cardCmm dflags i =
2161 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2162
2163 ------------------------------------------------------------------------------
2164 -- SmallArray PrimOp implementations
2165
2166 doReadSmallPtrArrayOp :: LocalReg
2167 -> CmmExpr
2168 -> CmmExpr
2169 -> FCode ()
2170 doReadSmallPtrArrayOp res addr idx = do
2171 dflags <- getDynFlags
2172 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2173 (gcWord dflags) idx
2174
2175 doWriteSmallPtrArrayOp :: CmmExpr
2176 -> CmmExpr
2177 -> CmmExpr
2178 -> FCode ()
2179 doWriteSmallPtrArrayOp addr idx val = do
2180 dflags <- getDynFlags
2181 let ty = cmmExprType dflags val
2182 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2183 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2184
2185 ------------------------------------------------------------------------------
2186 -- Atomic read-modify-write
2187
2188 -- | Emit an atomic modification to a byte array element. The result
2189 -- reg contains that previous value of the element. Implies a full
2190 -- memory barrier.
2191 doAtomicRMW :: LocalReg -- ^ Result reg
2192 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2193 -> CmmExpr -- ^ MutableByteArray#
2194 -> CmmExpr -- ^ Index
2195 -> CmmType -- ^ Type of element by which we are indexing
2196 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2197 -> FCode ()
2198 doAtomicRMW res amop mba idx idx_ty n = do
2199 dflags <- getDynFlags
2200 let width = typeWidth idx_ty
2201 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2202 width mba idx
2203 emitPrimCall
2204 [ res ]
2205 (MO_AtomicRMW width amop)
2206 [ addr, n ]
2207
2208 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2209 doAtomicReadByteArray
2210 :: LocalReg -- ^ Result reg
2211 -> CmmExpr -- ^ MutableByteArray#
2212 -> CmmExpr -- ^ Index
2213 -> CmmType -- ^ Type of element by which we are indexing
2214 -> FCode ()
2215 doAtomicReadByteArray res mba idx idx_ty = do
2216 dflags <- getDynFlags
2217 let width = typeWidth idx_ty
2218 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2219 width mba idx
2220 emitPrimCall
2221 [ res ]
2222 (MO_AtomicRead width)
2223 [ addr ]
2224
2225 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2226 doAtomicWriteByteArray
2227 :: CmmExpr -- ^ MutableByteArray#
2228 -> CmmExpr -- ^ Index
2229 -> CmmType -- ^ Type of element by which we are indexing
2230 -> CmmExpr -- ^ Value to write
2231 -> FCode ()
2232 doAtomicWriteByteArray mba idx idx_ty val = do
2233 dflags <- getDynFlags
2234 let width = typeWidth idx_ty
2235 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2236 width mba idx
2237 emitPrimCall
2238 [ {- no results -} ]
2239 (MO_AtomicWrite width)
2240 [ addr, val ]
2241
2242 doCasByteArray
2243 :: LocalReg -- ^ Result reg
2244 -> CmmExpr -- ^ MutableByteArray#
2245 -> CmmExpr -- ^ Index
2246 -> CmmType -- ^ Type of element by which we are indexing
2247 -> CmmExpr -- ^ Old value
2248 -> CmmExpr -- ^ New value
2249 -> FCode ()
2250 doCasByteArray res mba idx idx_ty old new = do
2251 dflags <- getDynFlags
2252 let width = (typeWidth idx_ty)
2253 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2254 width mba idx
2255 emitPrimCall
2256 [ res ]
2257 (MO_Cmpxchg width)
2258 [ addr, old, new ]
2259
2260 ------------------------------------------------------------------------------
2261 -- Helpers for emitting function calls
2262
2263 -- | Emit a call to @memcpy@.
2264 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2265 emitMemcpyCall dst src n align = do
2266 emitPrimCall
2267 [ {-no results-} ]
2268 (MO_Memcpy align)
2269 [ dst, src, n ]
2270
2271 -- | Emit a call to @memmove@.
2272 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2273 emitMemmoveCall dst src n align = do
2274 emitPrimCall
2275 [ {- no results -} ]
2276 (MO_Memmove align)
2277 [ dst, src, n ]
2278
2279 -- | Emit a call to @memset@. The second argument must fit inside an
2280 -- unsigned char.
2281 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2282 emitMemsetCall dst c n align = do
2283 emitPrimCall
2284 [ {- no results -} ]
2285 (MO_Memset align)
2286 [ dst, c, n ]
2287
2288 emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2289 emitMemcmpCall res ptr1 ptr2 n align = do
2290 -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
2291 -- code-gens currently call out to the @memcmp(3)@ C function.
2292 -- This was easier than moving the sign-extensions into
2293 -- all the code-gens.
2294 dflags <- getDynFlags
2295 let is32Bit = typeWidth (localRegType res) == W32
2296
2297 cres <- if is32Bit
2298 then return res
2299 else newTemp b32
2300
2301 emitPrimCall
2302 [ cres ]
2303 (MO_Memcmp align)
2304 [ ptr1, ptr2, n ]
2305
2306 unless is32Bit $ do
2307 emit $ mkAssign (CmmLocal res)
2308 (CmmMachOp
2309 (mo_s_32ToWord dflags)
2310 [(CmmReg (CmmLocal cres))])
2311
2312 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2313 emitBSwapCall res x width = do
2314 emitPrimCall
2315 [ res ]
2316 (MO_BSwap width)
2317 [ x ]
2318
2319 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2320 emitPopCntCall res x width = do
2321 emitPrimCall
2322 [ res ]
2323 (MO_PopCnt width)
2324 [ x ]
2325
2326 emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
2327 emitPdepCall res x y width = do
2328 emitPrimCall
2329 [ res ]
2330 (MO_Pdep width)
2331 [ x, y ]
2332
2333 emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
2334 emitPextCall res x y width = do
2335 emitPrimCall
2336 [ res ]
2337 (MO_Pext width)
2338 [ x, y ]
2339
2340 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2341 emitClzCall res x width = do
2342 emitPrimCall
2343 [ res ]
2344 (MO_Clz width)
2345 [ x ]
2346
2347 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2348 emitCtzCall res x width = do
2349 emitPrimCall
2350 [ res ]
2351 (MO_Ctz width)
2352 [ x ]