809dc55a582656fbf891bae59720997bf76c7b8e
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2 -- emitPrimOp is quite large
3 {-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-}
4
5 ----------------------------------------------------------------------------
6 --
7 -- Stg to C--: primitive operations
8 --
9 -- (c) The University of Glasgow 2004-2006
10 --
11 -----------------------------------------------------------------------------
12
13 module StgCmmPrim (
14 cgOpApp,
15 cgPrimOp, -- internal(ish), used by cgCase to get code for a
16 -- comparison without also turning it into a Bool.
17 shouldInlinePrimOp
18 ) where
19
20 #include "HsVersions.h"
21
22 import GhcPrelude hiding ((<*>))
23
24 import StgCmmLayout
25 import StgCmmForeign
26 import StgCmmEnv
27 import StgCmmMonad
28 import StgCmmUtils
29 import StgCmmTicky
30 import StgCmmHeap
31 import StgCmmProf ( costCentreFrom )
32
33 import DynFlags
34 import Platform
35 import BasicTypes
36 import BlockId
37 import MkGraph
38 import StgSyn
39 import Cmm
40 import CmmInfo
41 import Type ( Type, tyConAppTyCon )
42 import TyCon
43 import CLabel
44 import CmmUtils
45 import PrimOp
46 import SMRep
47 import FastString
48 import Outputable
49 import Util
50
51 import Data.Bits ((.&.), bit)
52 import Control.Monad (liftM, when, unless)
53
54 ------------------------------------------------------------------------
55 -- Primitive operations and foreign calls
56 ------------------------------------------------------------------------
57
58 {- Note [Foreign call results]
59 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
60 A foreign call always returns an unboxed tuple of results, one
61 of which is the state token. This seems to happen even for pure
62 calls.
63
64 Even if we returned a single result for pure calls, it'd still be
65 right to wrap it in a singleton unboxed tuple, because the result
66 might be a Haskell closure pointer, we don't want to evaluate it. -}
67
68 ----------------------------------
69 cgOpApp :: StgOp -- The op
70 -> [StgArg] -- Arguments
71 -> Type -- Result type (always an unboxed tuple)
72 -> FCode ReturnKind
73
74 -- Foreign calls
75 cgOpApp (StgFCallOp fcall _) stg_args res_ty
76 = cgForeignCall fcall stg_args res_ty
77 -- Note [Foreign call results]
78
79 -- tagToEnum# is special: we need to pull the constructor
80 -- out of the table, and perform an appropriate return.
81
82 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
83 = ASSERT(isEnumerationTyCon tycon)
84 do { dflags <- getDynFlags
85 ; args' <- getNonVoidArgAmodes [arg]
86 ; let amode = case args' of [amode] -> amode
87 _ -> panic "TagToEnumOp had void arg"
88 ; emitReturn [tagToClosure dflags tycon amode] }
89 where
90 -- If you're reading this code in the attempt to figure
91 -- out why the compiler panic'ed here, it is probably because
92 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
93 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
94 -- That won't work.
95 tycon = tyConAppTyCon res_ty
96
97 cgOpApp (StgPrimOp primop) args res_ty = do
98 dflags <- getDynFlags
99 cmm_args <- getNonVoidArgAmodes args
100 case shouldInlinePrimOp dflags primop cmm_args of
101 Nothing -> do -- out-of-line
102 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
103 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
104
105 Just f -- inline
106 | ReturnsPrim VoidRep <- result_info
107 -> do f []
108 emitReturn []
109
110 | ReturnsPrim rep <- result_info
111 -> do dflags <- getDynFlags
112 res <- newTemp (primRepCmmType dflags rep)
113 f [res]
114 emitReturn [CmmReg (CmmLocal res)]
115
116 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
117 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
118 f regs
119 emitReturn (map (CmmReg . CmmLocal) regs)
120
121 | otherwise -> panic "cgPrimop"
122 where
123 result_info = getPrimOpResultInfo primop
124
125 cgOpApp (StgPrimCallOp primcall) args _res_ty
126 = do { cmm_args <- getNonVoidArgAmodes args
127 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
128 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
129
130 -- | Interpret the argument as an unsigned value, assuming the value
131 -- is given in two-complement form in the given width.
132 --
133 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
134 --
135 -- This function is used to work around the fact that many array
136 -- primops take Int# arguments, but we interpret them as unsigned
137 -- quantities in the code gen. This means that we have to be careful
138 -- every time we work on e.g. a CmmInt literal that corresponds to the
139 -- array size, as it might contain a negative Integer value if the
140 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
141 -- literal.
142 asUnsigned :: Width -> Integer -> Integer
143 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
144
145 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
146 -- ByteOff (or some other fixed width signed type) to represent
147 -- array sizes or indices. This means that these will overflow for
148 -- large enough sizes.
149
150 -- | Decide whether an out-of-line primop should be replaced by an
151 -- inline implementation. This might happen e.g. if there's enough
152 -- static information, such as statically know arguments, to emit a
153 -- more efficient implementation inline.
154 --
155 -- Returns 'Nothing' if this primop should use its out-of-line
156 -- implementation (defined elsewhere) and 'Just' together with a code
157 -- generating function that takes the output regs as arguments
158 -- otherwise.
159 shouldInlinePrimOp :: DynFlags
160 -> PrimOp -- ^ The primop
161 -> [CmmExpr] -- ^ The primop arguments
162 -> Maybe ([LocalReg] -> FCode ())
163
164 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
165 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
166 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
167
168 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
169 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
170 Just $ \ [res] ->
171 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
172 [ (mkIntExpr dflags (fromInteger n),
173 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
174 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
175 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
176 ]
177 (fromInteger n) init
178
179 shouldInlinePrimOp _ CopyArrayOp
180 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
181 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
182
183 shouldInlinePrimOp _ CopyMutableArrayOp
184 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
185 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
186
187 shouldInlinePrimOp _ CopyArrayArrayOp
188 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
189 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
190
191 shouldInlinePrimOp _ CopyMutableArrayArrayOp
192 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
193 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
194
195 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
196 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
197 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
198
199 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
200 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
201 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
202
203 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
204 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
205 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
206
207 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
208 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
209 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
210
211 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
212 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
213 Just $ \ [res] ->
214 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
215 [ (mkIntExpr dflags (fromInteger n),
216 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
217 ]
218 (fromInteger n) init
219
220 shouldInlinePrimOp _ CopySmallArrayOp
221 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
222 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
223
224 shouldInlinePrimOp _ CopySmallMutableArrayOp
225 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
226 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
227
228 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
229 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
230 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
231
232 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
233 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
234 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
235
236 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
237 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
238 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
239
240 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
241 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
242 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
243
244 shouldInlinePrimOp dflags primop args
245 | primOpOutOfLine primop = Nothing
246 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
247
248 -- TODO: Several primops, such as 'copyArray#', only have an inline
249 -- implementation (below) but could possibly have both an inline
250 -- implementation and an out-of-line implementation, just like
251 -- 'newArray#'. This would lower the amount of code generated,
252 -- hopefully without a performance impact (needs to be measured).
253
254 ---------------------------------------------------
255 cgPrimOp :: [LocalReg] -- where to put the results
256 -> PrimOp -- the op
257 -> [StgArg] -- arguments
258 -> FCode ()
259
260 cgPrimOp results op args
261 = do dflags <- getDynFlags
262 arg_exprs <- getNonVoidArgAmodes args
263 emitPrimOp dflags results op arg_exprs
264
265
266 ------------------------------------------------------------------------
267 -- Emitting code for a primop
268 ------------------------------------------------------------------------
269
270 emitPrimOp :: DynFlags
271 -> [LocalReg] -- where to put the results
272 -> PrimOp -- the op
273 -> [CmmExpr] -- arguments
274 -> FCode ()
275
276 -- First we handle various awkward cases specially. The remaining
277 -- easy cases are then handled by translateOp, defined below.
278
279 emitPrimOp _ [res] ParOp [arg]
280 =
281 -- for now, just implement this in a C function
282 -- later, we might want to inline it.
283 emitCCall
284 [(res,NoHint)]
285 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
286 [(baseExpr, AddrHint), (arg,AddrHint)]
287
288 emitPrimOp dflags [res] SparkOp [arg]
289 = do
290 -- returns the value of arg in res. We're going to therefore
291 -- refer to arg twice (once to pass to newSpark(), and once to
292 -- assign to res), so put it in a temporary.
293 tmp <- assignTemp arg
294 tmp2 <- newTemp (bWord dflags)
295 emitCCall
296 [(tmp2,NoHint)]
297 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
298 [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
299 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
300
301 emitPrimOp dflags [res] GetCCSOfOp [arg]
302 = emitAssign (CmmLocal res) val
303 where
304 val
305 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
306 | otherwise = CmmLit (zeroCLit dflags)
307
308 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
309 = emitAssign (CmmLocal res) cccsExpr
310
311 emitPrimOp _ [res] MyThreadIdOp []
312 = emitAssign (CmmLocal res) currentTSOExpr
313
314 emitPrimOp dflags [res] ReadMutVarOp [mutv]
315 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
316
317 emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
318 = do -- Without this write barrier, other CPUs may see this pointer before
319 -- the writes for the closure it points to have occurred.
320 emitPrimCall res MO_WriteBarrier []
321 emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
322 emitCCall
323 [{-no results-}]
324 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
325 [(baseExpr, AddrHint), (mutv,AddrHint)]
326
327 -- #define sizzeofByteArrayzh(r,a) \
328 -- r = ((StgArrBytes *)(a))->bytes
329 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
330 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
331
332 -- #define sizzeofMutableByteArrayzh(r,a) \
333 -- r = ((StgArrBytes *)(a))->bytes
334 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
335 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
336
337 -- #define getSizzeofMutableByteArrayzh(r,a) \
338 -- r = ((StgArrBytes *)(a))->bytes
339 emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
340 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
341
342
343 -- #define touchzh(o) /* nothing */
344 emitPrimOp _ res@[] TouchOp args@[_arg]
345 = do emitPrimCall res MO_Touch args
346
347 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
348 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
349 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
350
351 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
352 emitPrimOp dflags [res] StableNameToIntOp [arg]
353 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
354
355 -- #define eqStableNamezh(r,sn1,sn2) \
356 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
357 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
358 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
359 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
360 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
361 ])
362
363 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
364 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
365
366 -- #define addrToHValuezh(r,a) r=(P_)a
367 emitPrimOp _ [res] AddrToAnyOp [arg]
368 = emitAssign (CmmLocal res) arg
369
370 -- #define hvalueToAddrzh(r, a) r=(W_)a
371 emitPrimOp _ [res] AnyToAddrOp [arg]
372 = emitAssign (CmmLocal res) arg
373
374 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
375 -- Note: argument may be tagged!
376 emitPrimOp dflags [res] DataToTagOp [arg]
377 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
378
379 {- Freezing arrays-of-ptrs requires changing an info table, for the
380 benefit of the generational collector. It needs to scavenge mutable
381 objects, even if they are in old space. When they become immutable,
382 they can be removed from this scavenge list. -}
383
384 -- #define unsafeFreezzeArrayzh(r,a)
385 -- {
386 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
387 -- r = a;
388 -- }
389 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
390 = emit $ catAGraphs
391 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
392 mkAssign (CmmLocal res) arg ]
393 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
394 = emit $ catAGraphs
395 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
396 mkAssign (CmmLocal res) arg ]
397 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
398 = emit $ catAGraphs
399 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
400 mkAssign (CmmLocal res) arg ]
401
402 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
403 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
404 = emitAssign (CmmLocal res) arg
405
406 -- Reading/writing pointer arrays
407
408 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
409 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
410 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
411
412 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
413 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
414 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
415 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
416 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
417 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
418 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
419 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
420 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
421 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
422
423 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
424 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
425 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
426
427 -- Getting the size of pointer arrays
428
429 emitPrimOp dflags [res] SizeofArrayOp [arg]
430 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
431 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
432 (bWord dflags))
433 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
434 = emitPrimOp dflags [res] SizeofArrayOp [arg]
435 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
436 = emitPrimOp dflags [res] SizeofArrayOp [arg]
437 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
438 = emitPrimOp dflags [res] SizeofArrayOp [arg]
439
440 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
441 emit $ mkAssign (CmmLocal res)
442 (cmmLoadIndexW dflags arg
443 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
444 (bWord dflags))
445 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
446 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
447
448 -- IndexXXXoffAddr
449
450 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
451 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
452 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
453 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
454 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
455 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
456 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
457 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
458 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
459 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
460 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
461 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
462 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
463 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
464 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
465 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
466
467 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
468
469 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
470 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
471 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
472 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
473 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
474 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
475 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
476 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
477 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
478 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
479 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
480 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
481 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
482 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
483 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
484 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
485
486 -- IndexXXXArray
487
488 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
489 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
490 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
491 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
492 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
493 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
494 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
495 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
496 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
497 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
498 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
499 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
500 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
501 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
502 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
503 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
504
505 -- ReadXXXArray, identical to IndexXXXArray.
506
507 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
508 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
509 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
510 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
511 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
512 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
513 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
514 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
515 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
516 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
517 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
518 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
519 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
520 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
521 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
522 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
523
524 -- IndexWord8ArrayAsXXX
525
526 emitPrimOp dflags res IndexByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
527 emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
528 emitPrimOp dflags res IndexByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
529 emitPrimOp dflags res IndexByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
530 emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
531 emitPrimOp _ res IndexByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
532 emitPrimOp _ res IndexByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
533 emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
534 emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
535 emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
536 emitPrimOp _ res IndexByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
537 emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
538 emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
539 emitPrimOp _ res IndexByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
540
541 -- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
542
543 emitPrimOp dflags res ReadByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
544 emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
545 emitPrimOp dflags res ReadByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
546 emitPrimOp dflags res ReadByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
547 emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
548 emitPrimOp _ res ReadByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
549 emitPrimOp _ res ReadByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
550 emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
551 emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
552 emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
553 emitPrimOp _ res ReadByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
554 emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
555 emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
556 emitPrimOp _ res ReadByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
557
558 -- WriteXXXoffAddr
559
560 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
561 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
562 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
563 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
564 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
565 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
566 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
567 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
568 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
569 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
570 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
571 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
572 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
573 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
574 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
575 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
576
577 -- WriteXXXArray
578
579 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
580 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
581 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
582 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
583 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
584 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
585 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
586 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
587 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
588 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
589 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
590 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
591 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
592 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
593 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
594 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
595
596 -- WriteInt8ArrayAsXXX
597
598 emitPrimOp dflags res WriteByteArrayOp_Word8AsChar args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
599 emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
600 emitPrimOp _ res WriteByteArrayOp_Word8AsInt args = doWriteByteArrayOp Nothing b8 res args
601 emitPrimOp _ res WriteByteArrayOp_Word8AsWord args = doWriteByteArrayOp Nothing b8 res args
602 emitPrimOp _ res WriteByteArrayOp_Word8AsAddr args = doWriteByteArrayOp Nothing b8 res args
603 emitPrimOp _ res WriteByteArrayOp_Word8AsFloat args = doWriteByteArrayOp Nothing b8 res args
604 emitPrimOp _ res WriteByteArrayOp_Word8AsDouble args = doWriteByteArrayOp Nothing b8 res args
605 emitPrimOp _ res WriteByteArrayOp_Word8AsStablePtr args = doWriteByteArrayOp Nothing b8 res args
606 emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
607 emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
608 emitPrimOp _ res WriteByteArrayOp_Word8AsInt64 args = doWriteByteArrayOp Nothing b8 res args
609 emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
610 emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
611 emitPrimOp _ res WriteByteArrayOp_Word8AsWord64 args = doWriteByteArrayOp Nothing b8 res args
612
613 -- Copying and setting byte arrays
614 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
615 doCopyByteArrayOp src src_off dst dst_off n
616 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
617 doCopyMutableByteArrayOp src src_off dst dst_off n
618 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
619 doCopyByteArrayToAddrOp src src_off dst n
620 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
621 doCopyMutableByteArrayToAddrOp src src_off dst n
622 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
623 doCopyAddrToByteArrayOp src dst dst_off n
624 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
625 doSetByteArrayOp ba off len c
626
627 -- Comparing byte arrays
628 emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
629 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
630
631 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
632 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
633 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
634 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
635
636 -- Population count
637 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
638 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
639 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
640 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
641 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
642
643 -- Parallel bit deposit
644 emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8
645 emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
646 emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
647 emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
648 emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags)
649
650 -- Parallel bit extract
651 emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8
652 emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16
653 emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32
654 emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64
655 emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags)
656
657 -- count leading zeros
658 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
659 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
660 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
661 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
662 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
663
664 -- count trailing zeros
665 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
666 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
667 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
668 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
669 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
670
671 -- Unsigned int to floating point conversions
672 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
673 (MO_UF_Conv W32) [w]
674 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
675 (MO_UF_Conv W64) [w]
676
677 -- SIMD primops
678 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
679 checkVecCompatibility dflags vcat n w
680 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
681 where
682 zeros :: CmmExpr
683 zeros = CmmLit $ CmmVec (replicate n zero)
684
685 zero :: CmmLit
686 zero = case vcat of
687 IntVec -> CmmInt 0 w
688 WordVec -> CmmInt 0 w
689 FloatVec -> CmmFloat 0 w
690
691 ty :: CmmType
692 ty = vecVmmType vcat n w
693
694 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
695 checkVecCompatibility dflags vcat n w
696 when (es `lengthIsNot` n) $
697 panic "emitPrimOp: VecPackOp has wrong number of arguments"
698 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
699 where
700 zeros :: CmmExpr
701 zeros = CmmLit $ CmmVec (replicate n zero)
702
703 zero :: CmmLit
704 zero = case vcat of
705 IntVec -> CmmInt 0 w
706 WordVec -> CmmInt 0 w
707 FloatVec -> CmmFloat 0 w
708
709 ty :: CmmType
710 ty = vecVmmType vcat n w
711
712 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
713 checkVecCompatibility dflags vcat n w
714 when (res `lengthIsNot` n) $
715 panic "emitPrimOp: VecUnpackOp has wrong number of results"
716 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
717 where
718 ty :: CmmType
719 ty = vecVmmType vcat n w
720
721 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
722 checkVecCompatibility dflags vcat n w
723 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
724 where
725 ty :: CmmType
726 ty = vecVmmType vcat n w
727
728 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
729 checkVecCompatibility dflags vcat n w
730 doIndexByteArrayOp Nothing ty res args
731 where
732 ty :: CmmType
733 ty = vecVmmType vcat n w
734
735 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
736 checkVecCompatibility dflags vcat n w
737 doIndexByteArrayOp Nothing ty res args
738 where
739 ty :: CmmType
740 ty = vecVmmType vcat n w
741
742 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
743 checkVecCompatibility dflags vcat n w
744 doWriteByteArrayOp Nothing ty res args
745 where
746 ty :: CmmType
747 ty = vecVmmType vcat n w
748
749 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
750 checkVecCompatibility dflags vcat n w
751 doIndexOffAddrOp Nothing ty res args
752 where
753 ty :: CmmType
754 ty = vecVmmType vcat n w
755
756 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
757 checkVecCompatibility dflags vcat n w
758 doIndexOffAddrOp Nothing ty res args
759 where
760 ty :: CmmType
761 ty = vecVmmType vcat n w
762
763 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
764 checkVecCompatibility dflags vcat n w
765 doWriteOffAddrOp Nothing ty res args
766 where
767 ty :: CmmType
768 ty = vecVmmType vcat n w
769
770 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
771 checkVecCompatibility dflags vcat n w
772 doIndexByteArrayOpAs Nothing vecty ty res args
773 where
774 vecty :: CmmType
775 vecty = vecVmmType vcat n w
776
777 ty :: CmmType
778 ty = vecCmmCat vcat w
779
780 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
781 checkVecCompatibility dflags vcat n w
782 doIndexByteArrayOpAs Nothing vecty ty res args
783 where
784 vecty :: CmmType
785 vecty = vecVmmType vcat n w
786
787 ty :: CmmType
788 ty = vecCmmCat vcat w
789
790 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
791 checkVecCompatibility dflags vcat n w
792 doWriteByteArrayOp Nothing ty res args
793 where
794 ty :: CmmType
795 ty = vecCmmCat vcat w
796
797 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
798 checkVecCompatibility dflags vcat n w
799 doIndexOffAddrOpAs Nothing vecty ty res args
800 where
801 vecty :: CmmType
802 vecty = vecVmmType vcat n w
803
804 ty :: CmmType
805 ty = vecCmmCat vcat w
806
807 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
808 checkVecCompatibility dflags vcat n w
809 doIndexOffAddrOpAs Nothing vecty ty res args
810 where
811 vecty :: CmmType
812 vecty = vecVmmType vcat n w
813
814 ty :: CmmType
815 ty = vecCmmCat vcat w
816
817 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
818 checkVecCompatibility dflags vcat n w
819 doWriteOffAddrOp Nothing ty res args
820 where
821 ty :: CmmType
822 ty = vecCmmCat vcat w
823
824 -- Prefetch
825 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
826 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
827 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
828 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
829
830 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
831 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
832 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
833 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
834
835 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
836 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
837 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
838 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
839
840 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
841 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
842 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
843 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
844
845 -- Atomic read-modify-write
846 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
847 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
848 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
849 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
850 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
851 doAtomicRMW res AMO_And mba ix (bWord dflags) n
852 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
853 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
854 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
855 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
856 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
857 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
858 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
859 doAtomicReadByteArray res mba ix (bWord dflags)
860 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
861 doAtomicWriteByteArray mba ix (bWord dflags) val
862 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
863 doCasByteArray res mba ix (bWord dflags) old new
864
865 -- The rest just translate straightforwardly
866 emitPrimOp dflags [res] op [arg]
867 | nopOp op
868 = emitAssign (CmmLocal res) arg
869
870 | Just (mop,rep) <- narrowOp op
871 = emitAssign (CmmLocal res) $
872 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
873
874 emitPrimOp dflags r@[res] op args
875 | Just prim <- callishOp op
876 = do emitPrimCall r prim args
877
878 | Just mop <- translateOp dflags op
879 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
880 emit stmt
881
882 emitPrimOp dflags results op args
883 = case callishPrimOpSupported dflags op of
884 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
885 Right gen -> gen results args
886
887 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
888
889 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
890 callishPrimOpSupported dflags op
891 = case op of
892 IntQuotRemOp | ncg && (x86ish
893 || ppc) -> Left (MO_S_QuotRem (wordWidth dflags))
894 | otherwise -> Right (genericIntQuotRemOp dflags)
895
896 WordQuotRemOp | ncg && (x86ish
897 || ppc) -> Left (MO_U_QuotRem (wordWidth dflags))
898 | otherwise -> Right (genericWordQuotRemOp dflags)
899
900 WordQuotRem2Op | (ncg && (x86ish
901 || ppc))
902 || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
903 | otherwise -> Right (genericWordQuotRem2Op dflags)
904
905 WordAdd2Op | (ncg && (x86ish
906 || ppc))
907 || llvm -> Left (MO_Add2 (wordWidth dflags))
908 | otherwise -> Right genericWordAdd2Op
909
910 WordSubCOp | (ncg && (x86ish
911 || ppc))
912 || llvm -> Left (MO_SubWordC (wordWidth dflags))
913 | otherwise -> Right genericWordSubCOp
914
915 IntAddCOp | (ncg && (x86ish
916 || ppc))
917 || llvm -> Left (MO_AddIntC (wordWidth dflags))
918 | otherwise -> Right genericIntAddCOp
919
920 IntSubCOp | (ncg && (x86ish
921 || ppc))
922 || llvm -> Left (MO_SubIntC (wordWidth dflags))
923 | otherwise -> Right genericIntSubCOp
924
925 WordMul2Op | ncg && (x86ish
926 || ppc)
927 || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
928 | otherwise -> Right genericWordMul2Op
929 FloatFabsOp | (ncg && x86ish
930 || ppc)
931 || llvm -> Left MO_F32_Fabs
932 | otherwise -> Right $ genericFabsOp W32
933 DoubleFabsOp | (ncg && x86ish
934 || ppc)
935 || llvm -> Left MO_F64_Fabs
936 | otherwise -> Right $ genericFabsOp W64
937
938 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
939 where
940 ncg = case hscTarget dflags of
941 HscAsm -> True
942 _ -> False
943 llvm = case hscTarget dflags of
944 HscLlvm -> True
945 _ -> False
946 x86ish = case platformArch (targetPlatform dflags) of
947 ArchX86 -> True
948 ArchX86_64 -> True
949 _ -> False
950 ppc = case platformArch (targetPlatform dflags) of
951 ArchPPC -> True
952 ArchPPC_64 _ -> True
953 _ -> False
954
955 genericIntQuotRemOp :: DynFlags -> GenericOp
956 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
957 = emit $ mkAssign (CmmLocal res_q)
958 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
959 mkAssign (CmmLocal res_r)
960 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
961 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
962
963 genericWordQuotRemOp :: DynFlags -> GenericOp
964 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
965 = emit $ mkAssign (CmmLocal res_q)
966 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
967 mkAssign (CmmLocal res_r)
968 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
969 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
970
971 genericWordQuotRem2Op :: DynFlags -> GenericOp
972 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
973 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
974 where ty = cmmExprType dflags arg_x_high
975 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
976 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
977 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
978 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
979 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
980 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
981 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
982 zero = lit 0
983 one = lit 1
984 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
985 lit i = CmmLit (CmmInt i (wordWidth dflags))
986
987 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
988 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
989 mkAssign (CmmLocal res_r) high)
990 f i acc high low =
991 do roverflowedBit <- newTemp ty
992 rhigh' <- newTemp ty
993 rhigh'' <- newTemp ty
994 rlow' <- newTemp ty
995 risge <- newTemp ty
996 racc' <- newTemp ty
997 let high' = CmmReg (CmmLocal rhigh')
998 isge = CmmReg (CmmLocal risge)
999 overflowedBit = CmmReg (CmmLocal roverflowedBit)
1000 let this = catAGraphs
1001 [mkAssign (CmmLocal roverflowedBit)
1002 (shr high negone),
1003 mkAssign (CmmLocal rhigh')
1004 (or (shl high one) (shr low negone)),
1005 mkAssign (CmmLocal rlow')
1006 (shl low one),
1007 mkAssign (CmmLocal risge)
1008 (or (overflowedBit `ne` zero)
1009 (high' `ge` arg_y)),
1010 mkAssign (CmmLocal rhigh'')
1011 (high' `minus` (arg_y `times` isge)),
1012 mkAssign (CmmLocal racc')
1013 (or (shl acc one) isge)]
1014 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
1015 (CmmReg (CmmLocal rhigh''))
1016 (CmmReg (CmmLocal rlow'))
1017 return (this <*> rest)
1018 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
1019
1020 genericWordAdd2Op :: GenericOp
1021 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
1022 = do dflags <- getDynFlags
1023 r1 <- newTemp (cmmExprType dflags arg_x)
1024 r2 <- newTemp (cmmExprType dflags arg_x)
1025 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1026 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1027 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1028 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1029 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1030 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1031 (wordWidth dflags))
1032 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1033 emit $ catAGraphs
1034 [mkAssign (CmmLocal r1)
1035 (add (bottomHalf arg_x) (bottomHalf arg_y)),
1036 mkAssign (CmmLocal r2)
1037 (add (topHalf (CmmReg (CmmLocal r1)))
1038 (add (topHalf arg_x) (topHalf arg_y))),
1039 mkAssign (CmmLocal res_h)
1040 (topHalf (CmmReg (CmmLocal r2))),
1041 mkAssign (CmmLocal res_l)
1042 (or (toTopHalf (CmmReg (CmmLocal r2)))
1043 (bottomHalf (CmmReg (CmmLocal r1))))]
1044 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
1045
1046 genericWordSubCOp :: GenericOp
1047 genericWordSubCOp [res_r, res_c] [aa, bb] = do
1048 dflags <- getDynFlags
1049 emit $ catAGraphs
1050 [ -- Put the result into 'res_r'.
1051 mkAssign (CmmLocal res_r) $
1052 CmmMachOp (mo_wordSub dflags) [aa, bb]
1053 -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise.
1054 , mkAssign (CmmLocal res_c) $
1055 CmmMachOp (mo_wordUGt dflags) [bb, aa]
1056 ]
1057 genericWordSubCOp _ _ = panic "genericWordSubCOp"
1058
1059 genericIntAddCOp :: GenericOp
1060 genericIntAddCOp [res_r, res_c] [aa, bb]
1061 {-
1062 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
1063 C, and without needing any comparisons. This may not be the
1064 fastest way to do it - if you have better code, please send it! --SDM
1065
1066 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
1067
1068 We currently don't make use of the r value if c is != 0 (i.e.
1069 overflow), we just convert to big integers and try again. This
1070 could be improved by making r and c the correct values for
1071 plugging into a new J#.
1072
1073 { r = ((I_)(a)) + ((I_)(b)); \
1074 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1075 >> (BITS_IN (I_) - 1); \
1076 }
1077 Wading through the mass of bracketry, it seems to reduce to:
1078 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
1079
1080 -}
1081 = do dflags <- getDynFlags
1082 emit $ catAGraphs [
1083 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
1084 mkAssign (CmmLocal res_c) $
1085 CmmMachOp (mo_wordUShr dflags) [
1086 CmmMachOp (mo_wordAnd dflags) [
1087 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
1088 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1089 ],
1090 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1091 ]
1092 ]
1093 genericIntAddCOp _ _ = panic "genericIntAddCOp"
1094
1095 genericIntSubCOp :: GenericOp
1096 genericIntSubCOp [res_r, res_c] [aa, bb]
1097 {- Similarly:
1098 #define subIntCzh(r,c,a,b) \
1099 { r = ((I_)(a)) - ((I_)(b)); \
1100 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1101 >> (BITS_IN (I_) - 1); \
1102 }
1103
1104 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
1105 -}
1106 = do dflags <- getDynFlags
1107 emit $ catAGraphs [
1108 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1109 mkAssign (CmmLocal res_c) $
1110 CmmMachOp (mo_wordUShr dflags) [
1111 CmmMachOp (mo_wordAnd dflags) [
1112 CmmMachOp (mo_wordXor dflags) [aa,bb],
1113 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1114 ],
1115 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1116 ]
1117 ]
1118 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1119
1120 genericWordMul2Op :: GenericOp
1121 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
1122 = do dflags <- getDynFlags
1123 let t = cmmExprType dflags arg_x
1124 xlyl <- liftM CmmLocal $ newTemp t
1125 xlyh <- liftM CmmLocal $ newTemp t
1126 xhyl <- liftM CmmLocal $ newTemp t
1127 r <- liftM CmmLocal $ newTemp t
1128 -- This generic implementation is very simple and slow. We might
1129 -- well be able to do better, but for now this at least works.
1130 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1131 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1132 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1133 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1134 sum = foldl1 add
1135 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1136 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1137 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1138 (wordWidth dflags))
1139 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1140 emit $ catAGraphs
1141 [mkAssign xlyl
1142 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1143 mkAssign xlyh
1144 (mul (bottomHalf arg_x) (topHalf arg_y)),
1145 mkAssign xhyl
1146 (mul (topHalf arg_x) (bottomHalf arg_y)),
1147 mkAssign r
1148 (sum [topHalf (CmmReg xlyl),
1149 bottomHalf (CmmReg xhyl),
1150 bottomHalf (CmmReg xlyh)]),
1151 mkAssign (CmmLocal res_l)
1152 (or (bottomHalf (CmmReg xlyl))
1153 (toTopHalf (CmmReg r))),
1154 mkAssign (CmmLocal res_h)
1155 (sum [mul (topHalf arg_x) (topHalf arg_y),
1156 topHalf (CmmReg xhyl),
1157 topHalf (CmmReg xlyh),
1158 topHalf (CmmReg r)])]
1159 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1160
1161 -- This replicates what we had in libraries/base/GHC/Float.hs:
1162 --
1163 -- abs x | x == 0 = 0 -- handles (-0.0)
1164 -- | x > 0 = x
1165 -- | otherwise = negateFloat x
1166 genericFabsOp :: Width -> GenericOp
1167 genericFabsOp w [res_r] [aa]
1168 = do dflags <- getDynFlags
1169 let zero = CmmLit (CmmFloat 0 w)
1170
1171 eq x y = CmmMachOp (MO_F_Eq w) [x, y]
1172 gt x y = CmmMachOp (MO_F_Gt w) [x, y]
1173
1174 neg x = CmmMachOp (MO_F_Neg w) [x]
1175
1176 g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
1177 g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
1178
1179 res_t <- CmmLocal <$> newTemp (cmmExprType dflags aa)
1180 let g3 = catAGraphs [mkAssign res_t aa,
1181 mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
1182
1183 g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
1184
1185 emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
1186
1187 genericFabsOp _ _ _ = panic "genericFabsOp"
1188
1189 -- These PrimOps are NOPs in Cmm
1190
1191 nopOp :: PrimOp -> Bool
1192 nopOp Int2WordOp = True
1193 nopOp Word2IntOp = True
1194 nopOp Int2AddrOp = True
1195 nopOp Addr2IntOp = True
1196 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1197 nopOp OrdOp = True
1198 nopOp _ = False
1199
1200 -- These PrimOps turn into double casts
1201
1202 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1203 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1204 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1205 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1206 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1207 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1208 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1209 narrowOp _ = Nothing
1210
1211 -- Native word signless ops
1212
1213 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1214 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1215 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1216 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1217 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1218 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1219 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1220
1221 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1222 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1223 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1224 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1225 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1226 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1227
1228 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1229 translateOp dflags OrOp = Just (mo_wordOr dflags)
1230 translateOp dflags XorOp = Just (mo_wordXor dflags)
1231 translateOp dflags NotOp = Just (mo_wordNot dflags)
1232 translateOp dflags SllOp = Just (mo_wordShl dflags)
1233 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1234
1235 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1236
1237 -- Native word signed ops
1238
1239 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1240 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1241 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1242 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1243 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1244
1245
1246 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1247 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1248 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1249 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1250
1251 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1252 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1253 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1254 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1255 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1256 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1257 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1258
1259 -- Native word unsigned ops
1260
1261 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1262 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1263 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1264 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1265
1266 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1267 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1268 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1269
1270 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1271 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1272 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1273 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1274
1275 -- Char# ops
1276
1277 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1278 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1279 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1280 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1281 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1282 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1283
1284 -- Double ops
1285
1286 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1287 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1288 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1289 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1290 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1291 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1292
1293 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1294 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1295 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1296 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1297 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1298
1299 -- Float ops
1300
1301 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1302 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1303 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1304 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1305 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1306 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1307
1308 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1309 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1310 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1311 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1312 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1313
1314 -- Vector ops
1315
1316 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1317 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1318 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1319 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1320 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1321
1322 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1323 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1324 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1325 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1326 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1327 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1328
1329 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1330 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1331 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1332 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1333 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1334
1335 -- Conversions
1336
1337 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1338 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1339
1340 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1341 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1342
1343 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1344 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1345
1346 -- Word comparisons masquerading as more exotic things.
1347
1348 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1349 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1350 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1351 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1352 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1353 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1354 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1355 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1356
1357 translateOp _ _ = Nothing
1358
1359 -- These primops are implemented by CallishMachOps, because they sometimes
1360 -- turn into foreign calls depending on the backend.
1361
1362 callishOp :: PrimOp -> Maybe CallishMachOp
1363 callishOp DoublePowerOp = Just MO_F64_Pwr
1364 callishOp DoubleSinOp = Just MO_F64_Sin
1365 callishOp DoubleCosOp = Just MO_F64_Cos
1366 callishOp DoubleTanOp = Just MO_F64_Tan
1367 callishOp DoubleSinhOp = Just MO_F64_Sinh
1368 callishOp DoubleCoshOp = Just MO_F64_Cosh
1369 callishOp DoubleTanhOp = Just MO_F64_Tanh
1370 callishOp DoubleAsinOp = Just MO_F64_Asin
1371 callishOp DoubleAcosOp = Just MO_F64_Acos
1372 callishOp DoubleAtanOp = Just MO_F64_Atan
1373 callishOp DoubleLogOp = Just MO_F64_Log
1374 callishOp DoubleExpOp = Just MO_F64_Exp
1375 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1376
1377 callishOp FloatPowerOp = Just MO_F32_Pwr
1378 callishOp FloatSinOp = Just MO_F32_Sin
1379 callishOp FloatCosOp = Just MO_F32_Cos
1380 callishOp FloatTanOp = Just MO_F32_Tan
1381 callishOp FloatSinhOp = Just MO_F32_Sinh
1382 callishOp FloatCoshOp = Just MO_F32_Cosh
1383 callishOp FloatTanhOp = Just MO_F32_Tanh
1384 callishOp FloatAsinOp = Just MO_F32_Asin
1385 callishOp FloatAcosOp = Just MO_F32_Acos
1386 callishOp FloatAtanOp = Just MO_F32_Atan
1387 callishOp FloatLogOp = Just MO_F32_Log
1388 callishOp FloatExpOp = Just MO_F32_Exp
1389 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1390
1391 callishOp _ = Nothing
1392
1393 ------------------------------------------------------------------------------
1394 -- Helpers for translating various minor variants of array indexing.
1395
1396 doIndexOffAddrOp :: Maybe MachOp
1397 -> CmmType
1398 -> [LocalReg]
1399 -> [CmmExpr]
1400 -> FCode ()
1401 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1402 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1403 doIndexOffAddrOp _ _ _ _
1404 = panic "StgCmmPrim: doIndexOffAddrOp"
1405
1406 doIndexOffAddrOpAs :: Maybe MachOp
1407 -> CmmType
1408 -> CmmType
1409 -> [LocalReg]
1410 -> [CmmExpr]
1411 -> FCode ()
1412 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1413 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1414 doIndexOffAddrOpAs _ _ _ _ _
1415 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1416
1417 doIndexByteArrayOp :: Maybe MachOp
1418 -> CmmType
1419 -> [LocalReg]
1420 -> [CmmExpr]
1421 -> FCode ()
1422 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1423 = do dflags <- getDynFlags
1424 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1425 doIndexByteArrayOp _ _ _ _
1426 = panic "StgCmmPrim: doIndexByteArrayOp"
1427
1428 doIndexByteArrayOpAs :: Maybe MachOp
1429 -> CmmType
1430 -> CmmType
1431 -> [LocalReg]
1432 -> [CmmExpr]
1433 -> FCode ()
1434 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1435 = do dflags <- getDynFlags
1436 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1437 doIndexByteArrayOpAs _ _ _ _ _
1438 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1439
1440 doReadPtrArrayOp :: LocalReg
1441 -> CmmExpr
1442 -> CmmExpr
1443 -> FCode ()
1444 doReadPtrArrayOp res addr idx
1445 = do dflags <- getDynFlags
1446 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1447
1448 doWriteOffAddrOp :: Maybe MachOp
1449 -> CmmType
1450 -> [LocalReg]
1451 -> [CmmExpr]
1452 -> FCode ()
1453 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1454 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1455 doWriteOffAddrOp _ _ _ _
1456 = panic "StgCmmPrim: doWriteOffAddrOp"
1457
1458 doWriteByteArrayOp :: Maybe MachOp
1459 -> CmmType
1460 -> [LocalReg]
1461 -> [CmmExpr]
1462 -> FCode ()
1463 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1464 = do dflags <- getDynFlags
1465 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1466 doWriteByteArrayOp _ _ _ _
1467 = panic "StgCmmPrim: doWriteByteArrayOp"
1468
1469 doWritePtrArrayOp :: CmmExpr
1470 -> CmmExpr
1471 -> CmmExpr
1472 -> FCode ()
1473 doWritePtrArrayOp addr idx val
1474 = do dflags <- getDynFlags
1475 let ty = cmmExprType dflags val
1476 -- This write barrier is to ensure that the heap writes to the object
1477 -- referred to by val have happened before we write val into the array.
1478 -- See #12469 for details.
1479 emitPrimCall [] MO_WriteBarrier []
1480 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1481 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1482 -- the write barrier. We must write a byte into the mark table:
1483 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1484 emit $ mkStore (
1485 cmmOffsetExpr dflags
1486 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1487 (loadArrPtrsSize dflags addr))
1488 (CmmMachOp (mo_wordUShr dflags) [idx,
1489 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1490 ) (CmmLit (CmmInt 1 W8))
1491
1492 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1493 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1494 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1495
1496 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1497 -> Maybe MachOp -- Optional result cast
1498 -> CmmType -- Type of element we are accessing
1499 -> LocalReg -- Destination
1500 -> CmmExpr -- Base address
1501 -> CmmType -- Type of element by which we are indexing
1502 -> CmmExpr -- Index
1503 -> FCode ()
1504 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1505 = do dflags <- getDynFlags
1506 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1507 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1508 = do dflags <- getDynFlags
1509 emitAssign (CmmLocal res) (CmmMachOp cast [
1510 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1511
1512 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1513 -> Maybe MachOp -- Optional value cast
1514 -> CmmExpr -- Base address
1515 -> CmmType -- Type of element by which we are indexing
1516 -> CmmExpr -- Index
1517 -> CmmExpr -- Value to write
1518 -> FCode ()
1519 mkBasicIndexedWrite off Nothing base idx_ty idx val
1520 = do dflags <- getDynFlags
1521 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1522 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1523 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1524
1525 -- ----------------------------------------------------------------------------
1526 -- Misc utils
1527
1528 cmmIndexOffExpr :: DynFlags
1529 -> ByteOff -- Initial offset in bytes
1530 -> Width -- Width of element by which we are indexing
1531 -> CmmExpr -- Base address
1532 -> CmmExpr -- Index
1533 -> CmmExpr
1534 cmmIndexOffExpr dflags off width base idx
1535 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1536
1537 cmmLoadIndexOffExpr :: DynFlags
1538 -> ByteOff -- Initial offset in bytes
1539 -> CmmType -- Type of element we are accessing
1540 -> CmmExpr -- Base address
1541 -> CmmType -- Type of element by which we are indexing
1542 -> CmmExpr -- Index
1543 -> CmmExpr
1544 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1545 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1546
1547 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1548 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1549
1550 ------------------------------------------------------------------------------
1551 -- Helpers for translating vector primops.
1552
1553 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1554 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1555
1556 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1557 vecCmmCat IntVec = cmmBits
1558 vecCmmCat WordVec = cmmBits
1559 vecCmmCat FloatVec = cmmFloat
1560
1561 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1562 vecElemInjectCast _ FloatVec _ = Nothing
1563 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1564 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1565 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1566 vecElemInjectCast _ IntVec W64 = Nothing
1567 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1568 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1569 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1570 vecElemInjectCast _ WordVec W64 = Nothing
1571 vecElemInjectCast _ _ _ = Nothing
1572
1573 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1574 vecElemProjectCast _ FloatVec _ = Nothing
1575 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1576 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1577 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1578 vecElemProjectCast _ IntVec W64 = Nothing
1579 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1580 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1581 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1582 vecElemProjectCast _ WordVec W64 = Nothing
1583 vecElemProjectCast _ _ _ = Nothing
1584
1585 -- Check to make sure that we can generate code for the specified vector type
1586 -- given the current set of dynamic flags.
1587 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1588 checkVecCompatibility dflags vcat l w = do
1589 when (hscTarget dflags /= HscLlvm) $ do
1590 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1591 ,"Please use -fllvm."]
1592 check vecWidth vcat l w
1593 where
1594 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1595 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1596 sorry $ "128-bit wide single-precision floating point " ++
1597 "SIMD vector instructions require at least -msse."
1598 check W128 _ _ _ | not (isSse2Enabled dflags) =
1599 sorry $ "128-bit wide integer and double precision " ++
1600 "SIMD vector instructions require at least -msse2."
1601 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1602 sorry $ "256-bit wide floating point " ++
1603 "SIMD vector instructions require at least -mavx."
1604 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1605 sorry $ "256-bit wide integer " ++
1606 "SIMD vector instructions require at least -mavx2."
1607 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1608 sorry $ "512-bit wide " ++
1609 "SIMD vector instructions require -mavx512f."
1610 check _ _ _ _ = return ()
1611
1612 vecWidth = typeWidth (vecVmmType vcat l w)
1613
1614 ------------------------------------------------------------------------------
1615 -- Helpers for translating vector packing and unpacking.
1616
1617 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1618 -> CmmType -- Type of vector
1619 -> CmmExpr -- Initial vector
1620 -> [CmmExpr] -- Elements
1621 -> CmmFormal -- Destination for result
1622 -> FCode ()
1623 doVecPackOp maybe_pre_write_cast ty z es res = do
1624 dst <- newTemp ty
1625 emitAssign (CmmLocal dst) z
1626 vecPack dst es 0
1627 where
1628 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1629 vecPack src [] _ =
1630 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1631
1632 vecPack src (e : es) i = do
1633 dst <- newTemp ty
1634 if isFloatType (vecElemType ty)
1635 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1636 [CmmReg (CmmLocal src), cast e, iLit])
1637 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1638 [CmmReg (CmmLocal src), cast e, iLit])
1639 vecPack dst es (i + 1)
1640 where
1641 -- vector indices are always 32-bits
1642 iLit = CmmLit (CmmInt (toInteger i) W32)
1643
1644 cast :: CmmExpr -> CmmExpr
1645 cast val = case maybe_pre_write_cast of
1646 Nothing -> val
1647 Just cast -> CmmMachOp cast [val]
1648
1649 len :: Length
1650 len = vecLength ty
1651
1652 wid :: Width
1653 wid = typeWidth (vecElemType ty)
1654
1655 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1656 -> CmmType -- Type of vector
1657 -> CmmExpr -- Vector
1658 -> [CmmFormal] -- Element results
1659 -> FCode ()
1660 doVecUnpackOp maybe_post_read_cast ty e res =
1661 vecUnpack res 0
1662 where
1663 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1664 vecUnpack [] _ =
1665 return ()
1666
1667 vecUnpack (r : rs) i = do
1668 if isFloatType (vecElemType ty)
1669 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1670 [e, iLit]))
1671 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1672 [e, iLit]))
1673 vecUnpack rs (i + 1)
1674 where
1675 -- vector indices are always 32-bits
1676 iLit = CmmLit (CmmInt (toInteger i) W32)
1677
1678 cast :: CmmExpr -> CmmExpr
1679 cast val = case maybe_post_read_cast of
1680 Nothing -> val
1681 Just cast -> CmmMachOp cast [val]
1682
1683 len :: Length
1684 len = vecLength ty
1685
1686 wid :: Width
1687 wid = typeWidth (vecElemType ty)
1688
1689 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1690 -> CmmType -- Vector type
1691 -> CmmExpr -- Source vector
1692 -> CmmExpr -- Element
1693 -> CmmExpr -- Index at which to insert element
1694 -> CmmFormal -- Destination for result
1695 -> FCode ()
1696 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1697 dflags <- getDynFlags
1698 -- vector indices are always 32-bits
1699 let idx' :: CmmExpr
1700 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1701 if isFloatType (vecElemType ty)
1702 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1703 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1704 where
1705 cast :: CmmExpr -> CmmExpr
1706 cast val = case maybe_pre_write_cast of
1707 Nothing -> val
1708 Just cast -> CmmMachOp cast [val]
1709
1710 len :: Length
1711 len = vecLength ty
1712
1713 wid :: Width
1714 wid = typeWidth (vecElemType ty)
1715
1716 ------------------------------------------------------------------------------
1717 -- Helpers for translating prefetching.
1718
1719
1720 -- | Translate byte array prefetch operations into proper primcalls.
1721 doPrefetchByteArrayOp :: Int
1722 -> [CmmExpr]
1723 -> FCode ()
1724 doPrefetchByteArrayOp locality [addr,idx]
1725 = do dflags <- getDynFlags
1726 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1727 doPrefetchByteArrayOp _ _
1728 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1729
1730 -- | Translate mutable byte array prefetch operations into proper primcalls.
1731 doPrefetchMutableByteArrayOp :: Int
1732 -> [CmmExpr]
1733 -> FCode ()
1734 doPrefetchMutableByteArrayOp locality [addr,idx]
1735 = do dflags <- getDynFlags
1736 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1737 doPrefetchMutableByteArrayOp _ _
1738 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1739
1740 -- | Translate address prefetch operations into proper primcalls.
1741 doPrefetchAddrOp ::Int
1742 -> [CmmExpr]
1743 -> FCode ()
1744 doPrefetchAddrOp locality [addr,idx]
1745 = mkBasicPrefetch locality 0 addr idx
1746 doPrefetchAddrOp _ _
1747 = panic "StgCmmPrim: doPrefetchAddrOp"
1748
1749 -- | Translate value prefetch operations into proper primcalls.
1750 doPrefetchValueOp :: Int
1751 -> [CmmExpr]
1752 -> FCode ()
1753 doPrefetchValueOp locality [addr]
1754 = do dflags <- getDynFlags
1755 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1756 doPrefetchValueOp _ _
1757 = panic "StgCmmPrim: doPrefetchValueOp"
1758
1759 -- | helper to generate prefetch primcalls
1760 mkBasicPrefetch :: Int -- Locality level 0-3
1761 -> ByteOff -- Initial offset in bytes
1762 -> CmmExpr -- Base address
1763 -> CmmExpr -- Index
1764 -> FCode ()
1765 mkBasicPrefetch locality off base idx
1766 = do dflags <- getDynFlags
1767 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1768 return ()
1769
1770 -- ----------------------------------------------------------------------------
1771 -- Allocating byte arrays
1772
1773 -- | Takes a register to return the newly allocated array in and the
1774 -- size of the new array in bytes. Allocates a new
1775 -- 'MutableByteArray#'.
1776 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1777 doNewByteArrayOp res_r n = do
1778 dflags <- getDynFlags
1779
1780 let info_ptr = mkLblExpr mkArrWords_infoLabel
1781 rep = arrWordsRep dflags n
1782
1783 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1784 (mkIntExpr dflags (nonHdrSize dflags rep))
1785 (zeroExpr dflags)
1786
1787 let hdr_size = fixedHdrSize dflags
1788
1789 base <- allocHeapClosure rep info_ptr cccsExpr
1790 [ (mkIntExpr dflags n,
1791 hdr_size + oFFSET_StgArrBytes_bytes dflags)
1792 ]
1793
1794 emit $ mkAssign (CmmLocal res_r) base
1795
1796 -- ----------------------------------------------------------------------------
1797 -- Comparing byte arrays
1798
1799 doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1800 -> FCode ()
1801 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
1802 dflags <- getDynFlags
1803 ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
1804 ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
1805
1806 -- short-cut in case of equal pointers avoiding a costly
1807 -- subroutine call to the memcmp(3) routine; the Cmm logic below
1808 -- results in assembly code being generated for
1809 --
1810 -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
1811 -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
1812 --
1813 -- that looks like
1814 --
1815 -- leaq 16(%r14),%rax
1816 -- leaq 16(%rsi),%rbx
1817 -- xorl %ecx,%ecx
1818 -- cmpq %rbx,%rax
1819 -- je l_ptr_eq
1820 --
1821 -- ; NB: the common case (unequal pointers) falls-through
1822 -- ; the conditional jump, and therefore matches the
1823 -- ; usual static branch prediction convention of modern cpus
1824 --
1825 -- subq $8,%rsp
1826 -- movq %rbx,%rsi
1827 -- movq %rax,%rdi
1828 -- movl $10,%edx
1829 -- xorl %eax,%eax
1830 -- call memcmp
1831 -- addq $8,%rsp
1832 -- movslq %eax,%rax
1833 -- movq %rax,%rcx
1834 -- l_ptr_eq:
1835 -- movq %rcx,%rbx
1836 -- jmp *(%rbp)
1837
1838 l_ptr_eq <- newBlockId
1839 l_ptr_ne <- newBlockId
1840
1841 emit (mkAssign (CmmLocal res) (zeroExpr dflags))
1842 emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p)
1843 l_ptr_eq l_ptr_ne (Just False))
1844
1845 emitLabel l_ptr_ne
1846 emitMemcmpCall res ba1_p ba2_p n 1
1847
1848 emitLabel l_ptr_eq
1849
1850 -- ----------------------------------------------------------------------------
1851 -- Copying byte arrays
1852
1853 -- | Takes a source 'ByteArray#', an offset in the source array, a
1854 -- destination 'MutableByteArray#', an offset into the destination
1855 -- array, and the number of bytes to copy. Copies the given number of
1856 -- bytes from the source array to the destination array.
1857 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1858 -> FCode ()
1859 doCopyByteArrayOp = emitCopyByteArray copy
1860 where
1861 -- Copy data (we assume the arrays aren't overlapping since
1862 -- they're of different types)
1863 copy _src _dst dst_p src_p bytes =
1864 emitMemcpyCall dst_p src_p bytes 1
1865
1866 -- | Takes a source 'MutableByteArray#', an offset in the source
1867 -- array, a destination 'MutableByteArray#', an offset into the
1868 -- destination array, and the number of bytes to copy. Copies the
1869 -- given number of bytes from the source array to the destination
1870 -- array.
1871 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1872 -> FCode ()
1873 doCopyMutableByteArrayOp = emitCopyByteArray copy
1874 where
1875 -- The only time the memory might overlap is when the two arrays
1876 -- we were provided are the same array!
1877 -- TODO: Optimize branch for common case of no aliasing.
1878 copy src dst dst_p src_p bytes = do
1879 dflags <- getDynFlags
1880 [moveCall, cpyCall] <- forkAlts [
1881 getCode $ emitMemmoveCall dst_p src_p bytes 1,
1882 getCode $ emitMemcpyCall dst_p src_p bytes 1
1883 ]
1884 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1885
1886 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1887 -> FCode ())
1888 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1889 -> FCode ()
1890 emitCopyByteArray copy src src_off dst dst_off n = do
1891 dflags <- getDynFlags
1892 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1893 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1894 copy src dst dst_p src_p n
1895
1896 -- | Takes a source 'ByteArray#', an offset in the source array, a
1897 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1898 -- number of bytes from the source array to the destination memory region.
1899 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1900 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1901 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1902 dflags <- getDynFlags
1903 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1904 emitMemcpyCall dst_p src_p bytes 1
1905
1906 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1907 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1908 -- number of bytes from the source array to the destination memory region.
1909 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1910 -> FCode ()
1911 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1912
1913 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1914 -- the destination array, and the number of bytes to copy. Copies the given
1915 -- number of bytes from the source memory region to the destination array.
1916 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1917 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1918 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1919 dflags <- getDynFlags
1920 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1921 emitMemcpyCall dst_p src_p bytes 1
1922
1923
1924 -- ----------------------------------------------------------------------------
1925 -- Setting byte arrays
1926
1927 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1928 -- and a byte, and sets each of the selected bytes in the array to the
1929 -- character.
1930 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1931 -> FCode ()
1932 doSetByteArrayOp ba off len c
1933 = do dflags <- getDynFlags
1934 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1935 emitMemsetCall p c len 1
1936
1937 -- ----------------------------------------------------------------------------
1938 -- Allocating arrays
1939
1940 -- | Allocate a new array.
1941 doNewArrayOp :: CmmFormal -- ^ return register
1942 -> SMRep -- ^ representation of the array
1943 -> CLabel -- ^ info pointer
1944 -> [(CmmExpr, ByteOff)] -- ^ header payload
1945 -> WordOff -- ^ array size
1946 -> CmmExpr -- ^ initial element
1947 -> FCode ()
1948 doNewArrayOp res_r rep info payload n init = do
1949 dflags <- getDynFlags
1950
1951 let info_ptr = mkLblExpr info
1952
1953 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1954 (mkIntExpr dflags (nonHdrSize dflags rep))
1955 (zeroExpr dflags)
1956
1957 base <- allocHeapClosure rep info_ptr cccsExpr payload
1958
1959 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1960 emit $ mkAssign arr base
1961
1962 -- Initialise all elements of the array
1963 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1964 for <- newBlockId
1965 emitLabel for
1966 let loopBody =
1967 [ mkStore (CmmReg (CmmLocal p)) init
1968 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1969 , mkBranch for ]
1970 emit =<< mkCmmIfThen
1971 (cmmULtWord dflags (CmmReg (CmmLocal p))
1972 (cmmOffsetW dflags (CmmReg arr)
1973 (hdrSizeW dflags rep + n)))
1974 (catAGraphs loopBody)
1975
1976 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1977
1978 -- ----------------------------------------------------------------------------
1979 -- Copying pointer arrays
1980
1981 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1982 -- nowhere else in the code generator. This is mostly because these
1983 -- "primitive" ops result in a surprisingly large amount of code. It
1984 -- will likely be worthwhile to optimize what is emitted here, so that
1985 -- our optimization passes don't waste time repeatedly optimizing the
1986 -- same bits of code.
1987
1988 -- More closely imitates 'assignTemp' from the old code generator, which
1989 -- returns a CmmExpr rather than a LocalReg.
1990 assignTempE :: CmmExpr -> FCode CmmExpr
1991 assignTempE e = do
1992 t <- assignTemp e
1993 return (CmmReg (CmmLocal t))
1994
1995 -- | Takes a source 'Array#', an offset in the source array, a
1996 -- destination 'MutableArray#', an offset into the destination array,
1997 -- and the number of elements to copy. Copies the given number of
1998 -- elements from the source array to the destination array.
1999 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2000 -> FCode ()
2001 doCopyArrayOp = emitCopyArray copy
2002 where
2003 -- Copy data (we assume the arrays aren't overlapping since
2004 -- they're of different types)
2005 copy _src _dst dst_p src_p bytes =
2006 do dflags <- getDynFlags
2007 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2008 (wORD_SIZE dflags)
2009
2010
2011 -- | Takes a source 'MutableArray#', an offset in the source array, a
2012 -- destination 'MutableArray#', an offset into the destination array,
2013 -- and the number of elements to copy. Copies the given number of
2014 -- elements from the source array to the destination array.
2015 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2016 -> FCode ()
2017 doCopyMutableArrayOp = emitCopyArray copy
2018 where
2019 -- The only time the memory might overlap is when the two arrays
2020 -- we were provided are the same array!
2021 -- TODO: Optimize branch for common case of no aliasing.
2022 copy src dst dst_p src_p bytes = do
2023 dflags <- getDynFlags
2024 [moveCall, cpyCall] <- forkAlts [
2025 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
2026 (wORD_SIZE dflags),
2027 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2028 (wORD_SIZE dflags)
2029 ]
2030 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
2031
2032 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
2033 -> FCode ()) -- ^ copy function
2034 -> CmmExpr -- ^ source array
2035 -> CmmExpr -- ^ offset in source array
2036 -> CmmExpr -- ^ destination array
2037 -> CmmExpr -- ^ offset in destination array
2038 -> WordOff -- ^ number of elements to copy
2039 -> FCode ()
2040 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
2041 dflags <- getDynFlags
2042 when (n /= 0) $ do
2043 -- Passed as arguments (be careful)
2044 src <- assignTempE src0
2045 dst <- assignTempE dst0
2046 dst_off <- assignTempE dst_off0
2047
2048 -- Set the dirty bit in the header.
2049 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
2050
2051 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
2052 (arrPtrsHdrSize dflags)
2053 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
2054 src_p <- assignTempE $ cmmOffsetExprW dflags
2055 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
2056 let bytes = wordsToBytes dflags n
2057
2058 copy src dst dst_p src_p bytes
2059
2060 -- The base address of the destination card table
2061 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
2062 (loadArrPtrsSize dflags dst)
2063
2064 emitSetCards dst_off dst_cards_p n
2065
2066 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2067 -> FCode ()
2068 doCopySmallArrayOp = emitCopySmallArray copy
2069 where
2070 -- Copy data (we assume the arrays aren't overlapping since
2071 -- they're of different types)
2072 copy _src _dst dst_p src_p bytes =
2073 do dflags <- getDynFlags
2074 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2075 (wORD_SIZE dflags)
2076
2077
2078 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2079 -> FCode ()
2080 doCopySmallMutableArrayOp = emitCopySmallArray copy
2081 where
2082 -- The only time the memory might overlap is when the two arrays
2083 -- we were provided are the same array!
2084 -- TODO: Optimize branch for common case of no aliasing.
2085 copy src dst dst_p src_p bytes = do
2086 dflags <- getDynFlags
2087 [moveCall, cpyCall] <- forkAlts
2088 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
2089 (wORD_SIZE dflags)
2090 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2091 (wORD_SIZE dflags)
2092 ]
2093 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
2094
2095 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
2096 -> FCode ()) -- ^ copy function
2097 -> CmmExpr -- ^ source array
2098 -> CmmExpr -- ^ offset in source array
2099 -> CmmExpr -- ^ destination array
2100 -> CmmExpr -- ^ offset in destination array
2101 -> WordOff -- ^ number of elements to copy
2102 -> FCode ()
2103 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
2104 dflags <- getDynFlags
2105
2106 -- Passed as arguments (be careful)
2107 src <- assignTempE src0
2108 dst <- assignTempE dst0
2109
2110 -- Set the dirty bit in the header.
2111 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2112
2113 dst_p <- assignTempE $ cmmOffsetExprW dflags
2114 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
2115 src_p <- assignTempE $ cmmOffsetExprW dflags
2116 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
2117 let bytes = wordsToBytes dflags n
2118
2119 copy src dst dst_p src_p bytes
2120
2121 -- | Takes an info table label, a register to return the newly
2122 -- allocated array in, a source array, an offset in the source array,
2123 -- and the number of elements to copy. Allocates a new array and
2124 -- initializes it from the source array.
2125 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2126 -> FCode ()
2127 emitCloneArray info_p res_r src src_off n = do
2128 dflags <- getDynFlags
2129
2130 let info_ptr = mkLblExpr info_p
2131 rep = arrPtrsRep dflags n
2132
2133 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
2134 (mkIntExpr dflags (nonHdrSize dflags rep))
2135 (zeroExpr dflags)
2136
2137 let hdr_size = fixedHdrSize dflags
2138
2139 base <- allocHeapClosure rep info_ptr cccsExpr
2140 [ (mkIntExpr dflags n,
2141 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
2142 , (mkIntExpr dflags (nonHdrSizeW rep),
2143 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
2144 ]
2145
2146 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2147 emit $ mkAssign arr base
2148
2149 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2150 (arrPtrsHdrSize dflags)
2151 src_p <- assignTempE $ cmmOffsetExprW dflags src
2152 (cmmAddWord dflags
2153 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
2154
2155 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2156 (wORD_SIZE dflags)
2157
2158 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2159
2160 -- | Takes an info table label, a register to return the newly
2161 -- allocated array in, a source array, an offset in the source array,
2162 -- and the number of elements to copy. Allocates a new array and
2163 -- initializes it from the source array.
2164 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2165 -> FCode ()
2166 emitCloneSmallArray info_p res_r src src_off n = do
2167 dflags <- getDynFlags
2168
2169 let info_ptr = mkLblExpr info_p
2170 rep = smallArrPtrsRep n
2171
2172 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
2173 (mkIntExpr dflags (nonHdrSize dflags rep))
2174 (zeroExpr dflags)
2175
2176 let hdr_size = fixedHdrSize dflags
2177
2178 base <- allocHeapClosure rep info_ptr cccsExpr
2179 [ (mkIntExpr dflags n,
2180 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
2181 ]
2182
2183 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2184 emit $ mkAssign arr base
2185
2186 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2187 (smallArrPtrsHdrSize dflags)
2188 src_p <- assignTempE $ cmmOffsetExprW dflags src
2189 (cmmAddWord dflags
2190 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
2191
2192 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2193 (wORD_SIZE dflags)
2194
2195 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2196
2197 -- | Takes and offset in the destination array, the base address of
2198 -- the card table, and the number of elements affected (*not* the
2199 -- number of cards). The number of elements may not be zero.
2200 -- Marks the relevant cards as dirty.
2201 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2202 emitSetCards dst_start dst_cards_start n = do
2203 dflags <- getDynFlags
2204 start_card <- assignTempE $ cardCmm dflags dst_start
2205 let end_card = cardCmm dflags
2206 (cmmSubWord dflags
2207 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
2208 (mkIntExpr dflags 1))
2209 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
2210 (mkIntExpr dflags 1)
2211 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2212 1 -- no alignment (1 byte)
2213
2214 -- Convert an element index to a card index
2215 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2216 cardCmm dflags i =
2217 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2218
2219 ------------------------------------------------------------------------------
2220 -- SmallArray PrimOp implementations
2221
2222 doReadSmallPtrArrayOp :: LocalReg
2223 -> CmmExpr
2224 -> CmmExpr
2225 -> FCode ()
2226 doReadSmallPtrArrayOp res addr idx = do
2227 dflags <- getDynFlags
2228 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2229 (gcWord dflags) idx
2230
2231 doWriteSmallPtrArrayOp :: CmmExpr
2232 -> CmmExpr
2233 -> CmmExpr
2234 -> FCode ()
2235 doWriteSmallPtrArrayOp addr idx val = do
2236 dflags <- getDynFlags
2237 let ty = cmmExprType dflags val
2238 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2239 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2240
2241 ------------------------------------------------------------------------------
2242 -- Atomic read-modify-write
2243
2244 -- | Emit an atomic modification to a byte array element. The result
2245 -- reg contains that previous value of the element. Implies a full
2246 -- memory barrier.
2247 doAtomicRMW :: LocalReg -- ^ Result reg
2248 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2249 -> CmmExpr -- ^ MutableByteArray#
2250 -> CmmExpr -- ^ Index
2251 -> CmmType -- ^ Type of element by which we are indexing
2252 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2253 -> FCode ()
2254 doAtomicRMW res amop mba idx idx_ty n = do
2255 dflags <- getDynFlags
2256 let width = typeWidth idx_ty
2257 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2258 width mba idx
2259 emitPrimCall
2260 [ res ]
2261 (MO_AtomicRMW width amop)
2262 [ addr, n ]
2263
2264 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2265 doAtomicReadByteArray
2266 :: LocalReg -- ^ Result reg
2267 -> CmmExpr -- ^ MutableByteArray#
2268 -> CmmExpr -- ^ Index
2269 -> CmmType -- ^ Type of element by which we are indexing
2270 -> FCode ()
2271 doAtomicReadByteArray res mba idx idx_ty = do
2272 dflags <- getDynFlags
2273 let width = typeWidth idx_ty
2274 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2275 width mba idx
2276 emitPrimCall
2277 [ res ]
2278 (MO_AtomicRead width)
2279 [ addr ]
2280
2281 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2282 doAtomicWriteByteArray
2283 :: CmmExpr -- ^ MutableByteArray#
2284 -> CmmExpr -- ^ Index
2285 -> CmmType -- ^ Type of element by which we are indexing
2286 -> CmmExpr -- ^ Value to write
2287 -> FCode ()
2288 doAtomicWriteByteArray mba idx idx_ty val = do
2289 dflags <- getDynFlags
2290 let width = typeWidth idx_ty
2291 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2292 width mba idx
2293 emitPrimCall
2294 [ {- no results -} ]
2295 (MO_AtomicWrite width)
2296 [ addr, val ]
2297
2298 doCasByteArray
2299 :: LocalReg -- ^ Result reg
2300 -> CmmExpr -- ^ MutableByteArray#
2301 -> CmmExpr -- ^ Index
2302 -> CmmType -- ^ Type of element by which we are indexing
2303 -> CmmExpr -- ^ Old value
2304 -> CmmExpr -- ^ New value
2305 -> FCode ()
2306 doCasByteArray res mba idx idx_ty old new = do
2307 dflags <- getDynFlags
2308 let width = (typeWidth idx_ty)
2309 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2310 width mba idx
2311 emitPrimCall
2312 [ res ]
2313 (MO_Cmpxchg width)
2314 [ addr, old, new ]
2315
2316 ------------------------------------------------------------------------------
2317 -- Helpers for emitting function calls
2318
2319 -- | Emit a call to @memcpy@.
2320 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2321 emitMemcpyCall dst src n align = do
2322 emitPrimCall
2323 [ {-no results-} ]
2324 (MO_Memcpy align)
2325 [ dst, src, n ]
2326
2327 -- | Emit a call to @memmove@.
2328 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2329 emitMemmoveCall dst src n align = do
2330 emitPrimCall
2331 [ {- no results -} ]
2332 (MO_Memmove align)
2333 [ dst, src, n ]
2334
2335 -- | Emit a call to @memset@. The second argument must fit inside an
2336 -- unsigned char.
2337 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2338 emitMemsetCall dst c n align = do
2339 emitPrimCall
2340 [ {- no results -} ]
2341 (MO_Memset align)
2342 [ dst, c, n ]
2343
2344 emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2345 emitMemcmpCall res ptr1 ptr2 n align = do
2346 -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
2347 -- code-gens currently call out to the @memcmp(3)@ C function.
2348 -- This was easier than moving the sign-extensions into
2349 -- all the code-gens.
2350 dflags <- getDynFlags
2351 let is32Bit = typeWidth (localRegType res) == W32
2352
2353 cres <- if is32Bit
2354 then return res
2355 else newTemp b32
2356
2357 emitPrimCall
2358 [ cres ]
2359 (MO_Memcmp align)
2360 [ ptr1, ptr2, n ]
2361
2362 unless is32Bit $ do
2363 emit $ mkAssign (CmmLocal res)
2364 (CmmMachOp
2365 (mo_s_32ToWord dflags)
2366 [(CmmReg (CmmLocal cres))])
2367
2368 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2369 emitBSwapCall res x width = do
2370 emitPrimCall
2371 [ res ]
2372 (MO_BSwap width)
2373 [ x ]
2374
2375 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2376 emitPopCntCall res x width = do
2377 emitPrimCall
2378 [ res ]
2379 (MO_PopCnt width)
2380 [ x ]
2381
2382 emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
2383 emitPdepCall res x y width = do
2384 emitPrimCall
2385 [ res ]
2386 (MO_Pdep width)
2387 [ x, y ]
2388
2389 emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
2390 emitPextCall res x y width = do
2391 emitPrimCall
2392 [ res ]
2393 (MO_Pext width)
2394 [ x, y ]
2395
2396 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2397 emitClzCall res x width = do
2398 emitPrimCall
2399 [ res ]
2400 (MO_Clz width)
2401 [ x ]
2402
2403 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2404 emitCtzCall res x width = do
2405 emitPrimCall
2406 [ res ]
2407 (MO_Ctz width)
2408 [ x ]