ghci: Ensure that system libffi include path is searched
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2 -- emitPrimOp is quite large
3 {-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-}
4
5 ----------------------------------------------------------------------------
6 --
7 -- Stg to C--: primitive operations
8 --
9 -- (c) The University of Glasgow 2004-2006
10 --
11 -----------------------------------------------------------------------------
12
13 module StgCmmPrim (
14 cgOpApp,
15 cgPrimOp, -- internal(ish), used by cgCase to get code for a
16 -- comparison without also turning it into a Bool.
17 shouldInlinePrimOp
18 ) where
19
20 #include "HsVersions.h"
21
22 import GhcPrelude hiding ((<*>))
23
24 import StgCmmLayout
25 import StgCmmForeign
26 import StgCmmEnv
27 import StgCmmMonad
28 import StgCmmUtils
29 import StgCmmTicky
30 import StgCmmHeap
31 import StgCmmProf ( costCentreFrom )
32
33 import DynFlags
34 import Platform
35 import BasicTypes
36 import BlockId
37 import MkGraph
38 import StgSyn
39 import Cmm
40 import Type ( Type, tyConAppTyCon )
41 import TyCon
42 import CLabel
43 import CmmUtils
44 import PrimOp
45 import SMRep
46 import FastString
47 import Outputable
48 import Util
49
50 import Data.Bits ((.&.), bit)
51 import Control.Monad (liftM, when, unless)
52
53 ------------------------------------------------------------------------
54 -- Primitive operations and foreign calls
55 ------------------------------------------------------------------------
56
57 {- Note [Foreign call results]
58 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 A foreign call always returns an unboxed tuple of results, one
60 of which is the state token. This seems to happen even for pure
61 calls.
62
63 Even if we returned a single result for pure calls, it'd still be
64 right to wrap it in a singleton unboxed tuple, because the result
65 might be a Haskell closure pointer, we don't want to evaluate it. -}
66
67 ----------------------------------
68 cgOpApp :: StgOp -- The op
69 -> [StgArg] -- Arguments
70 -> Type -- Result type (always an unboxed tuple)
71 -> FCode ReturnKind
72
73 -- Foreign calls
74 cgOpApp (StgFCallOp fcall _) stg_args res_ty
75 = cgForeignCall fcall stg_args res_ty
76 -- Note [Foreign call results]
77
78 -- tagToEnum# is special: we need to pull the constructor
79 -- out of the table, and perform an appropriate return.
80
81 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
82 = ASSERT(isEnumerationTyCon tycon)
83 do { dflags <- getDynFlags
84 ; args' <- getNonVoidArgAmodes [arg]
85 ; let amode = case args' of [amode] -> amode
86 _ -> panic "TagToEnumOp had void arg"
87 ; emitReturn [tagToClosure dflags tycon amode] }
88 where
89 -- If you're reading this code in the attempt to figure
90 -- out why the compiler panic'ed here, it is probably because
91 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
92 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
93 -- That won't work.
94 tycon = tyConAppTyCon res_ty
95
96 cgOpApp (StgPrimOp primop) args res_ty = do
97 dflags <- getDynFlags
98 cmm_args <- getNonVoidArgAmodes args
99 case shouldInlinePrimOp dflags primop cmm_args of
100 Nothing -> do -- out-of-line
101 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
102 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
103
104 Just f -- inline
105 | ReturnsPrim VoidRep <- result_info
106 -> do f []
107 emitReturn []
108
109 | ReturnsPrim rep <- result_info
110 -> do dflags <- getDynFlags
111 res <- newTemp (primRepCmmType dflags rep)
112 f [res]
113 emitReturn [CmmReg (CmmLocal res)]
114
115 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
116 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
117 f regs
118 emitReturn (map (CmmReg . CmmLocal) regs)
119
120 | otherwise -> panic "cgPrimop"
121 where
122 result_info = getPrimOpResultInfo primop
123
124 cgOpApp (StgPrimCallOp primcall) args _res_ty
125 = do { cmm_args <- getNonVoidArgAmodes args
126 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
127 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
128
129 -- | Interpret the argument as an unsigned value, assuming the value
130 -- is given in two-complement form in the given width.
131 --
132 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
133 --
134 -- This function is used to work around the fact that many array
135 -- primops take Int# arguments, but we interpret them as unsigned
136 -- quantities in the code gen. This means that we have to be careful
137 -- every time we work on e.g. a CmmInt literal that corresponds to the
138 -- array size, as it might contain a negative Integer value if the
139 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
140 -- literal.
141 asUnsigned :: Width -> Integer -> Integer
142 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
143
144 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
145 -- ByteOff (or some other fixed width signed type) to represent
146 -- array sizes or indices. This means that these will overflow for
147 -- large enough sizes.
148
149 -- | Decide whether an out-of-line primop should be replaced by an
150 -- inline implementation. This might happen e.g. if there's enough
151 -- static information, such as statically know arguments, to emit a
152 -- more efficient implementation inline.
153 --
154 -- Returns 'Nothing' if this primop should use its out-of-line
155 -- implementation (defined elsewhere) and 'Just' together with a code
156 -- generating function that takes the output regs as arguments
157 -- otherwise.
158 shouldInlinePrimOp :: DynFlags
159 -> PrimOp -- ^ The primop
160 -> [CmmExpr] -- ^ The primop arguments
161 -> Maybe ([LocalReg] -> FCode ())
162
163 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
164 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
165 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
166
167 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
168 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
169 Just $ \ [res] ->
170 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
171 [ (mkIntExpr dflags (fromInteger n),
172 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
173 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
174 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
175 ]
176 (fromInteger n) init
177
178 shouldInlinePrimOp _ CopyArrayOp
179 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
180 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
181
182 shouldInlinePrimOp _ CopyMutableArrayOp
183 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
184 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
185
186 shouldInlinePrimOp _ CopyArrayArrayOp
187 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
188 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
189
190 shouldInlinePrimOp _ CopyMutableArrayArrayOp
191 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
192 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
193
194 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
195 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
196 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
197
198 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
199 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
200 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
201
202 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
203 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
204 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
205
206 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
207 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
208 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
209
210 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
211 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
212 Just $ \ [res] ->
213 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
214 [ (mkIntExpr dflags (fromInteger n),
215 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
216 ]
217 (fromInteger n) init
218
219 shouldInlinePrimOp _ CopySmallArrayOp
220 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
221 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
222
223 shouldInlinePrimOp _ CopySmallMutableArrayOp
224 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
225 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
226
227 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
228 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
229 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
230
231 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
232 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
233 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
234
235 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
236 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
237 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
238
239 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
240 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
241 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
242
243 shouldInlinePrimOp dflags primop args
244 | primOpOutOfLine primop = Nothing
245 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
246
247 -- TODO: Several primops, such as 'copyArray#', only have an inline
248 -- implementation (below) but could possibly have both an inline
249 -- implementation and an out-of-line implementation, just like
250 -- 'newArray#'. This would lower the amount of code generated,
251 -- hopefully without a performance impact (needs to be measured).
252
253 ---------------------------------------------------
254 cgPrimOp :: [LocalReg] -- where to put the results
255 -> PrimOp -- the op
256 -> [StgArg] -- arguments
257 -> FCode ()
258
259 cgPrimOp results op args
260 = do dflags <- getDynFlags
261 arg_exprs <- getNonVoidArgAmodes args
262 emitPrimOp dflags results op arg_exprs
263
264
265 ------------------------------------------------------------------------
266 -- Emitting code for a primop
267 ------------------------------------------------------------------------
268
269 emitPrimOp :: DynFlags
270 -> [LocalReg] -- where to put the results
271 -> PrimOp -- the op
272 -> [CmmExpr] -- arguments
273 -> FCode ()
274
275 -- First we handle various awkward cases specially. The remaining
276 -- easy cases are then handled by translateOp, defined below.
277
278 emitPrimOp _ [res] ParOp [arg]
279 =
280 -- for now, just implement this in a C function
281 -- later, we might want to inline it.
282 emitCCall
283 [(res,NoHint)]
284 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
285 [(baseExpr, AddrHint), (arg,AddrHint)]
286
287 emitPrimOp dflags [res] SparkOp [arg]
288 = do
289 -- returns the value of arg in res. We're going to therefore
290 -- refer to arg twice (once to pass to newSpark(), and once to
291 -- assign to res), so put it in a temporary.
292 tmp <- assignTemp arg
293 tmp2 <- newTemp (bWord dflags)
294 emitCCall
295 [(tmp2,NoHint)]
296 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
297 [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
298 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
299
300 emitPrimOp dflags [res] GetCCSOfOp [arg]
301 = emitAssign (CmmLocal res) val
302 where
303 val
304 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
305 | otherwise = CmmLit (zeroCLit dflags)
306
307 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
308 = emitAssign (CmmLocal res) cccsExpr
309
310 emitPrimOp _ [res] MyThreadIdOp []
311 = emitAssign (CmmLocal res) currentTSOExpr
312
313 emitPrimOp dflags [res] ReadMutVarOp [mutv]
314 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
315
316 emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
317 = do -- Without this write barrier, other CPUs may see this pointer before
318 -- the writes for the closure it points to have occurred.
319 emitPrimCall res MO_WriteBarrier []
320 emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
321 emitCCall
322 [{-no results-}]
323 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
324 [(baseExpr, AddrHint), (mutv,AddrHint)]
325
326 -- #define sizzeofByteArrayzh(r,a) \
327 -- r = ((StgArrBytes *)(a))->bytes
328 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
329 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
330
331 -- #define sizzeofMutableByteArrayzh(r,a) \
332 -- r = ((StgArrBytes *)(a))->bytes
333 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
334 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
335
336 -- #define getSizzeofMutableByteArrayzh(r,a) \
337 -- r = ((StgArrBytes *)(a))->bytes
338 emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
339 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
340
341
342 -- #define touchzh(o) /* nothing */
343 emitPrimOp _ res@[] TouchOp args@[_arg]
344 = do emitPrimCall res MO_Touch args
345
346 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
347 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
348 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
349
350 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
351 emitPrimOp dflags [res] StableNameToIntOp [arg]
352 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
353
354 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
355 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
356
357 -- #define addrToHValuezh(r,a) r=(P_)a
358 emitPrimOp _ [res] AddrToAnyOp [arg]
359 = emitAssign (CmmLocal res) arg
360
361 -- #define hvalueToAddrzh(r, a) r=(W_)a
362 emitPrimOp _ [res] AnyToAddrOp [arg]
363 = emitAssign (CmmLocal res) arg
364
365 {- Freezing arrays-of-ptrs requires changing an info table, for the
366 benefit of the generational collector. It needs to scavenge mutable
367 objects, even if they are in old space. When they become immutable,
368 they can be removed from this scavenge list. -}
369
370 -- #define unsafeFreezzeArrayzh(r,a)
371 -- {
372 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
373 -- r = a;
374 -- }
375 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
376 = emit $ catAGraphs
377 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
378 mkAssign (CmmLocal res) arg ]
379 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
380 = emit $ catAGraphs
381 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
382 mkAssign (CmmLocal res) arg ]
383 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
384 = emit $ catAGraphs
385 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)),
386 mkAssign (CmmLocal res) arg ]
387
388 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
389 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
390 = emitAssign (CmmLocal res) arg
391
392 -- Reading/writing pointer arrays
393
394 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
395 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
396 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
397
398 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
399 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
400 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
401 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
402 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
403 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
404 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
405 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
406 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
407 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
408
409 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
410 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
411 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
412
413 -- Getting the size of pointer arrays
414
415 emitPrimOp dflags [res] SizeofArrayOp [arg]
416 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
417 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
418 (bWord dflags))
419 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
420 = emitPrimOp dflags [res] SizeofArrayOp [arg]
421 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
422 = emitPrimOp dflags [res] SizeofArrayOp [arg]
423 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
424 = emitPrimOp dflags [res] SizeofArrayOp [arg]
425
426 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
427 emit $ mkAssign (CmmLocal res)
428 (cmmLoadIndexW dflags arg
429 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
430 (bWord dflags))
431 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
432 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
433
434 -- IndexXXXoffAddr
435
436 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
437 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
438 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
439 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
440 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
441 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
442 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
443 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
444 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
445 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
446 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
447 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
448 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
449 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
450 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
451 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
452
453 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
454
455 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
456 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
457 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
458 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
459 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
460 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
461 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
462 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
463 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
464 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
465 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
466 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
467 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
468 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
469 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
470 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
471
472 -- IndexXXXArray
473
474 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
475 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
476 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
477 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
478 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
479 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
480 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
481 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
482 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
483 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
484 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
485 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
486 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
487 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
488 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
489 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
490
491 -- ReadXXXArray, identical to IndexXXXArray.
492
493 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
494 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
495 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
496 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
497 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
498 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
499 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
500 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
501 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
502 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
503 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
504 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
505 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
506 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
507 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
508 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
509
510 -- IndexWord8ArrayAsXXX
511
512 emitPrimOp dflags res IndexByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
513 emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
514 emitPrimOp dflags res IndexByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
515 emitPrimOp dflags res IndexByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
516 emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
517 emitPrimOp _ res IndexByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
518 emitPrimOp _ res IndexByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
519 emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
520 emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
521 emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
522 emitPrimOp _ res IndexByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
523 emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
524 emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
525 emitPrimOp _ res IndexByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
526
527 -- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
528
529 emitPrimOp dflags res ReadByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
530 emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
531 emitPrimOp dflags res ReadByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
532 emitPrimOp dflags res ReadByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
533 emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
534 emitPrimOp _ res ReadByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
535 emitPrimOp _ res ReadByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
536 emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
537 emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
538 emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
539 emitPrimOp _ res ReadByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
540 emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
541 emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
542 emitPrimOp _ res ReadByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
543
544 -- WriteXXXoffAddr
545
546 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
547 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
548 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
549 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
550 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
551 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
552 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
553 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
554 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
555 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
556 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
557 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
558 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
559 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
560 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
561 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
562
563 -- WriteXXXArray
564
565 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
566 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
567 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
568 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
569 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
570 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
571 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
572 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
573 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
574 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
575 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
576 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
577 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
578 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
579 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
580 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
581
582 -- WriteInt8ArrayAsXXX
583
584 emitPrimOp dflags res WriteByteArrayOp_Word8AsChar args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
585 emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
586 emitPrimOp _ res WriteByteArrayOp_Word8AsInt args = doWriteByteArrayOp Nothing b8 res args
587 emitPrimOp _ res WriteByteArrayOp_Word8AsWord args = doWriteByteArrayOp Nothing b8 res args
588 emitPrimOp _ res WriteByteArrayOp_Word8AsAddr args = doWriteByteArrayOp Nothing b8 res args
589 emitPrimOp _ res WriteByteArrayOp_Word8AsFloat args = doWriteByteArrayOp Nothing b8 res args
590 emitPrimOp _ res WriteByteArrayOp_Word8AsDouble args = doWriteByteArrayOp Nothing b8 res args
591 emitPrimOp _ res WriteByteArrayOp_Word8AsStablePtr args = doWriteByteArrayOp Nothing b8 res args
592 emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
593 emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
594 emitPrimOp _ res WriteByteArrayOp_Word8AsInt64 args = doWriteByteArrayOp Nothing b8 res args
595 emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
596 emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
597 emitPrimOp _ res WriteByteArrayOp_Word8AsWord64 args = doWriteByteArrayOp Nothing b8 res args
598
599 -- Copying and setting byte arrays
600 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
601 doCopyByteArrayOp src src_off dst dst_off n
602 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
603 doCopyMutableByteArrayOp src src_off dst dst_off n
604 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
605 doCopyByteArrayToAddrOp src src_off dst n
606 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
607 doCopyMutableByteArrayToAddrOp src src_off dst n
608 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
609 doCopyAddrToByteArrayOp src dst dst_off n
610 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
611 doSetByteArrayOp ba off len c
612
613 -- Comparing byte arrays
614 emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
615 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
616
617 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
618 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
619 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
620 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
621
622 emitPrimOp _ [res] BRev8Op [w] = emitBRevCall res w W8
623 emitPrimOp _ [res] BRev16Op [w] = emitBRevCall res w W16
624 emitPrimOp _ [res] BRev32Op [w] = emitBRevCall res w W32
625 emitPrimOp _ [res] BRev64Op [w] = emitBRevCall res w W64
626 emitPrimOp dflags [res] BRevOp [w] = emitBRevCall res w (wordWidth dflags)
627
628 -- Population count
629 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
630 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
631 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
632 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
633 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
634
635 -- Parallel bit deposit
636 emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8
637 emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
638 emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
639 emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
640 emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags)
641
642 -- Parallel bit extract
643 emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8
644 emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16
645 emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32
646 emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64
647 emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags)
648
649 -- count leading zeros
650 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
651 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
652 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
653 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
654 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
655
656 -- count trailing zeros
657 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
658 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
659 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
660 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
661 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
662
663 -- Unsigned int to floating point conversions
664 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
665 (MO_UF_Conv W32) [w]
666 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
667 (MO_UF_Conv W64) [w]
668
669 -- SIMD primops
670 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
671 checkVecCompatibility dflags vcat n w
672 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
673 where
674 zeros :: CmmExpr
675 zeros = CmmLit $ CmmVec (replicate n zero)
676
677 zero :: CmmLit
678 zero = case vcat of
679 IntVec -> CmmInt 0 w
680 WordVec -> CmmInt 0 w
681 FloatVec -> CmmFloat 0 w
682
683 ty :: CmmType
684 ty = vecVmmType vcat n w
685
686 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
687 checkVecCompatibility dflags vcat n w
688 when (es `lengthIsNot` n) $
689 panic "emitPrimOp: VecPackOp has wrong number of arguments"
690 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
691 where
692 zeros :: CmmExpr
693 zeros = CmmLit $ CmmVec (replicate n zero)
694
695 zero :: CmmLit
696 zero = case vcat of
697 IntVec -> CmmInt 0 w
698 WordVec -> CmmInt 0 w
699 FloatVec -> CmmFloat 0 w
700
701 ty :: CmmType
702 ty = vecVmmType vcat n w
703
704 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
705 checkVecCompatibility dflags vcat n w
706 when (res `lengthIsNot` n) $
707 panic "emitPrimOp: VecUnpackOp has wrong number of results"
708 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
709 where
710 ty :: CmmType
711 ty = vecVmmType vcat n w
712
713 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
714 checkVecCompatibility dflags vcat n w
715 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
716 where
717 ty :: CmmType
718 ty = vecVmmType vcat n w
719
720 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
721 checkVecCompatibility dflags vcat n w
722 doIndexByteArrayOp Nothing ty res args
723 where
724 ty :: CmmType
725 ty = vecVmmType vcat n w
726
727 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
728 checkVecCompatibility dflags vcat n w
729 doIndexByteArrayOp Nothing ty res args
730 where
731 ty :: CmmType
732 ty = vecVmmType vcat n w
733
734 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
735 checkVecCompatibility dflags vcat n w
736 doWriteByteArrayOp Nothing ty res args
737 where
738 ty :: CmmType
739 ty = vecVmmType vcat n w
740
741 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
742 checkVecCompatibility dflags vcat n w
743 doIndexOffAddrOp Nothing ty res args
744 where
745 ty :: CmmType
746 ty = vecVmmType vcat n w
747
748 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
749 checkVecCompatibility dflags vcat n w
750 doIndexOffAddrOp Nothing ty res args
751 where
752 ty :: CmmType
753 ty = vecVmmType vcat n w
754
755 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
756 checkVecCompatibility dflags vcat n w
757 doWriteOffAddrOp Nothing ty res args
758 where
759 ty :: CmmType
760 ty = vecVmmType vcat n w
761
762 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
763 checkVecCompatibility dflags vcat n w
764 doIndexByteArrayOpAs Nothing vecty ty res args
765 where
766 vecty :: CmmType
767 vecty = vecVmmType vcat n w
768
769 ty :: CmmType
770 ty = vecCmmCat vcat w
771
772 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
773 checkVecCompatibility dflags vcat n w
774 doIndexByteArrayOpAs Nothing vecty ty res args
775 where
776 vecty :: CmmType
777 vecty = vecVmmType vcat n w
778
779 ty :: CmmType
780 ty = vecCmmCat vcat w
781
782 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
783 checkVecCompatibility dflags vcat n w
784 doWriteByteArrayOp Nothing ty res args
785 where
786 ty :: CmmType
787 ty = vecCmmCat vcat w
788
789 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
790 checkVecCompatibility dflags vcat n w
791 doIndexOffAddrOpAs Nothing vecty ty res args
792 where
793 vecty :: CmmType
794 vecty = vecVmmType vcat n w
795
796 ty :: CmmType
797 ty = vecCmmCat vcat w
798
799 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
800 checkVecCompatibility dflags vcat n w
801 doIndexOffAddrOpAs Nothing vecty ty res args
802 where
803 vecty :: CmmType
804 vecty = vecVmmType vcat n w
805
806 ty :: CmmType
807 ty = vecCmmCat vcat w
808
809 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
810 checkVecCompatibility dflags vcat n w
811 doWriteOffAddrOp Nothing ty res args
812 where
813 ty :: CmmType
814 ty = vecCmmCat vcat w
815
816 -- Prefetch
817 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
818 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
819 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
820 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
821
822 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
823 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
824 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
825 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
826
827 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
828 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
829 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
830 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
831
832 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
833 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
834 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
835 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
836
837 -- Atomic read-modify-write
838 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
839 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
840 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
841 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
842 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
843 doAtomicRMW res AMO_And mba ix (bWord dflags) n
844 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
845 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
846 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
847 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
848 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
849 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
850 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
851 doAtomicReadByteArray res mba ix (bWord dflags)
852 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
853 doAtomicWriteByteArray mba ix (bWord dflags) val
854 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
855 doCasByteArray res mba ix (bWord dflags) old new
856
857 -- The rest just translate straightforwardly
858 emitPrimOp dflags [res] op [arg]
859 | nopOp op
860 = emitAssign (CmmLocal res) arg
861
862 | Just (mop,rep) <- narrowOp op
863 = emitAssign (CmmLocal res) $
864 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
865
866 emitPrimOp dflags r@[res] op args
867 | Just prim <- callishOp op
868 = do emitPrimCall r prim args
869
870 | Just mop <- translateOp dflags op
871 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
872 emit stmt
873
874 emitPrimOp dflags results op args
875 = case callishPrimOpSupported dflags op of
876 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
877 Right gen -> gen results args
878
879 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
880
881 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
882 callishPrimOpSupported dflags op
883 = case op of
884 IntQuotRemOp | ncg && (x86ish || ppc) ->
885 Left (MO_S_QuotRem (wordWidth dflags))
886 | otherwise ->
887 Right (genericIntQuotRemOp (wordWidth dflags))
888
889 Int8QuotRemOp | ncg && (x86ish || ppc)
890 -> Left (MO_S_QuotRem W8)
891 | otherwise -> Right (genericIntQuotRemOp W8)
892
893 Int16QuotRemOp | ncg && (x86ish || ppc)
894 -> Left (MO_S_QuotRem W16)
895 | otherwise -> Right (genericIntQuotRemOp W16)
896
897
898 WordQuotRemOp | ncg && (x86ish || ppc) ->
899 Left (MO_U_QuotRem (wordWidth dflags))
900 | otherwise ->
901 Right (genericWordQuotRemOp (wordWidth dflags))
902
903 WordQuotRem2Op | (ncg && (x86ish || ppc))
904 || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
905 | otherwise -> Right (genericWordQuotRem2Op dflags)
906
907 Word8QuotRemOp | ncg && (x86ish || ppc)
908 -> Left (MO_U_QuotRem W8)
909 | otherwise -> Right (genericWordQuotRemOp W8)
910
911 Word16QuotRemOp| ncg && (x86ish || ppc)
912 -> Left (MO_U_QuotRem W16)
913 | otherwise -> Right (genericWordQuotRemOp W16)
914
915 WordAdd2Op | (ncg && (x86ish || ppc))
916 || llvm -> Left (MO_Add2 (wordWidth dflags))
917 | otherwise -> Right genericWordAdd2Op
918
919 WordAddCOp | (ncg && (x86ish || ppc))
920 || llvm -> Left (MO_AddWordC (wordWidth dflags))
921 | otherwise -> Right genericWordAddCOp
922
923 WordSubCOp | (ncg && (x86ish || ppc))
924 || llvm -> Left (MO_SubWordC (wordWidth dflags))
925 | otherwise -> Right genericWordSubCOp
926
927 IntAddCOp | (ncg && (x86ish || ppc))
928 || llvm -> Left (MO_AddIntC (wordWidth dflags))
929 | otherwise -> Right genericIntAddCOp
930
931 IntSubCOp | (ncg && (x86ish || ppc))
932 || llvm -> Left (MO_SubIntC (wordWidth dflags))
933 | otherwise -> Right genericIntSubCOp
934
935 WordMul2Op | ncg && (x86ish || ppc)
936 || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
937 | otherwise -> Right genericWordMul2Op
938 FloatFabsOp | (ncg && x86ish || ppc)
939 || llvm -> Left MO_F32_Fabs
940 | otherwise -> Right $ genericFabsOp W32
941 DoubleFabsOp | (ncg && x86ish || ppc)
942 || llvm -> Left MO_F64_Fabs
943 | otherwise -> Right $ genericFabsOp W64
944
945 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
946 where
947 ncg = case hscTarget dflags of
948 HscAsm -> True
949 _ -> False
950 llvm = case hscTarget dflags of
951 HscLlvm -> True
952 _ -> False
953 x86ish = case platformArch (targetPlatform dflags) of
954 ArchX86 -> True
955 ArchX86_64 -> True
956 _ -> False
957 ppc = case platformArch (targetPlatform dflags) of
958 ArchPPC -> True
959 ArchPPC_64 _ -> True
960 _ -> False
961
962 genericIntQuotRemOp :: Width -> GenericOp
963 genericIntQuotRemOp width [res_q, res_r] [arg_x, arg_y]
964 = emit $ mkAssign (CmmLocal res_q)
965 (CmmMachOp (MO_S_Quot width) [arg_x, arg_y]) <*>
966 mkAssign (CmmLocal res_r)
967 (CmmMachOp (MO_S_Rem width) [arg_x, arg_y])
968 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
969
970 genericWordQuotRemOp :: Width -> GenericOp
971 genericWordQuotRemOp width [res_q, res_r] [arg_x, arg_y]
972 = emit $ mkAssign (CmmLocal res_q)
973 (CmmMachOp (MO_U_Quot width) [arg_x, arg_y]) <*>
974 mkAssign (CmmLocal res_r)
975 (CmmMachOp (MO_U_Rem width) [arg_x, arg_y])
976 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
977
978 genericWordQuotRem2Op :: DynFlags -> GenericOp
979 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
980 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
981 where ty = cmmExprType dflags arg_x_high
982 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
983 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
984 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
985 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
986 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
987 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
988 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
989 zero = lit 0
990 one = lit 1
991 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
992 lit i = CmmLit (CmmInt i (wordWidth dflags))
993
994 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
995 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
996 mkAssign (CmmLocal res_r) high)
997 f i acc high low =
998 do roverflowedBit <- newTemp ty
999 rhigh' <- newTemp ty
1000 rhigh'' <- newTemp ty
1001 rlow' <- newTemp ty
1002 risge <- newTemp ty
1003 racc' <- newTemp ty
1004 let high' = CmmReg (CmmLocal rhigh')
1005 isge = CmmReg (CmmLocal risge)
1006 overflowedBit = CmmReg (CmmLocal roverflowedBit)
1007 let this = catAGraphs
1008 [mkAssign (CmmLocal roverflowedBit)
1009 (shr high negone),
1010 mkAssign (CmmLocal rhigh')
1011 (or (shl high one) (shr low negone)),
1012 mkAssign (CmmLocal rlow')
1013 (shl low one),
1014 mkAssign (CmmLocal risge)
1015 (or (overflowedBit `ne` zero)
1016 (high' `ge` arg_y)),
1017 mkAssign (CmmLocal rhigh'')
1018 (high' `minus` (arg_y `times` isge)),
1019 mkAssign (CmmLocal racc')
1020 (or (shl acc one) isge)]
1021 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
1022 (CmmReg (CmmLocal rhigh''))
1023 (CmmReg (CmmLocal rlow'))
1024 return (this <*> rest)
1025 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
1026
1027 genericWordAdd2Op :: GenericOp
1028 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
1029 = do dflags <- getDynFlags
1030 r1 <- newTemp (cmmExprType dflags arg_x)
1031 r2 <- newTemp (cmmExprType dflags arg_x)
1032 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1033 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1034 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1035 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1036 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1037 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1038 (wordWidth dflags))
1039 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1040 emit $ catAGraphs
1041 [mkAssign (CmmLocal r1)
1042 (add (bottomHalf arg_x) (bottomHalf arg_y)),
1043 mkAssign (CmmLocal r2)
1044 (add (topHalf (CmmReg (CmmLocal r1)))
1045 (add (topHalf arg_x) (topHalf arg_y))),
1046 mkAssign (CmmLocal res_h)
1047 (topHalf (CmmReg (CmmLocal r2))),
1048 mkAssign (CmmLocal res_l)
1049 (or (toTopHalf (CmmReg (CmmLocal r2)))
1050 (bottomHalf (CmmReg (CmmLocal r1))))]
1051 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
1052
1053 -- | Implements branchless recovery of the carry flag @c@ by checking the
1054 -- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@:
1055 --
1056 -- @
1057 -- c = a&b | (a|b)&~r
1058 -- @
1059 --
1060 -- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
1061 genericWordAddCOp :: GenericOp
1062 genericWordAddCOp [res_r, res_c] [aa, bb]
1063 = do dflags <- getDynFlags
1064 emit $ catAGraphs [
1065 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
1066 mkAssign (CmmLocal res_c) $
1067 CmmMachOp (mo_wordUShr dflags) [
1068 CmmMachOp (mo_wordOr dflags) [
1069 CmmMachOp (mo_wordAnd dflags) [aa,bb],
1070 CmmMachOp (mo_wordAnd dflags) [
1071 CmmMachOp (mo_wordOr dflags) [aa,bb],
1072 CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)]
1073 ]
1074 ],
1075 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1076 ]
1077 ]
1078 genericWordAddCOp _ _ = panic "genericWordAddCOp"
1079
1080 -- | Implements branchless recovery of the carry flag @c@ by checking the
1081 -- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@:
1082 --
1083 -- @
1084 -- c = ~a&b | (~a|b)&r
1085 -- @
1086 --
1087 -- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
1088 genericWordSubCOp :: GenericOp
1089 genericWordSubCOp [res_r, res_c] [aa, bb]
1090 = do dflags <- getDynFlags
1091 emit $ catAGraphs [
1092 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1093 mkAssign (CmmLocal res_c) $
1094 CmmMachOp (mo_wordUShr dflags) [
1095 CmmMachOp (mo_wordOr dflags) [
1096 CmmMachOp (mo_wordAnd dflags) [
1097 CmmMachOp (mo_wordNot dflags) [aa],
1098 bb
1099 ],
1100 CmmMachOp (mo_wordAnd dflags) [
1101 CmmMachOp (mo_wordOr dflags) [
1102 CmmMachOp (mo_wordNot dflags) [aa],
1103 bb
1104 ],
1105 CmmReg (CmmLocal res_r)
1106 ]
1107 ],
1108 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1109 ]
1110 ]
1111 genericWordSubCOp _ _ = panic "genericWordSubCOp"
1112
1113 genericIntAddCOp :: GenericOp
1114 genericIntAddCOp [res_r, res_c] [aa, bb]
1115 {-
1116 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
1117 C, and without needing any comparisons. This may not be the
1118 fastest way to do it - if you have better code, please send it! --SDM
1119
1120 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
1121
1122 We currently don't make use of the r value if c is != 0 (i.e.
1123 overflow), we just convert to big integers and try again. This
1124 could be improved by making r and c the correct values for
1125 plugging into a new J#.
1126
1127 { r = ((I_)(a)) + ((I_)(b)); \
1128 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1129 >> (BITS_IN (I_) - 1); \
1130 }
1131 Wading through the mass of bracketry, it seems to reduce to:
1132 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
1133
1134 -}
1135 = do dflags <- getDynFlags
1136 emit $ catAGraphs [
1137 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
1138 mkAssign (CmmLocal res_c) $
1139 CmmMachOp (mo_wordUShr dflags) [
1140 CmmMachOp (mo_wordAnd dflags) [
1141 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
1142 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1143 ],
1144 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1145 ]
1146 ]
1147 genericIntAddCOp _ _ = panic "genericIntAddCOp"
1148
1149 genericIntSubCOp :: GenericOp
1150 genericIntSubCOp [res_r, res_c] [aa, bb]
1151 {- Similarly:
1152 #define subIntCzh(r,c,a,b) \
1153 { r = ((I_)(a)) - ((I_)(b)); \
1154 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1155 >> (BITS_IN (I_) - 1); \
1156 }
1157
1158 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
1159 -}
1160 = do dflags <- getDynFlags
1161 emit $ catAGraphs [
1162 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1163 mkAssign (CmmLocal res_c) $
1164 CmmMachOp (mo_wordUShr dflags) [
1165 CmmMachOp (mo_wordAnd dflags) [
1166 CmmMachOp (mo_wordXor dflags) [aa,bb],
1167 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1168 ],
1169 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1170 ]
1171 ]
1172 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1173
1174 genericWordMul2Op :: GenericOp
1175 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
1176 = do dflags <- getDynFlags
1177 let t = cmmExprType dflags arg_x
1178 xlyl <- liftM CmmLocal $ newTemp t
1179 xlyh <- liftM CmmLocal $ newTemp t
1180 xhyl <- liftM CmmLocal $ newTemp t
1181 r <- liftM CmmLocal $ newTemp t
1182 -- This generic implementation is very simple and slow. We might
1183 -- well be able to do better, but for now this at least works.
1184 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1185 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1186 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1187 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1188 sum = foldl1 add
1189 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1190 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1191 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1192 (wordWidth dflags))
1193 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1194 emit $ catAGraphs
1195 [mkAssign xlyl
1196 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1197 mkAssign xlyh
1198 (mul (bottomHalf arg_x) (topHalf arg_y)),
1199 mkAssign xhyl
1200 (mul (topHalf arg_x) (bottomHalf arg_y)),
1201 mkAssign r
1202 (sum [topHalf (CmmReg xlyl),
1203 bottomHalf (CmmReg xhyl),
1204 bottomHalf (CmmReg xlyh)]),
1205 mkAssign (CmmLocal res_l)
1206 (or (bottomHalf (CmmReg xlyl))
1207 (toTopHalf (CmmReg r))),
1208 mkAssign (CmmLocal res_h)
1209 (sum [mul (topHalf arg_x) (topHalf arg_y),
1210 topHalf (CmmReg xhyl),
1211 topHalf (CmmReg xlyh),
1212 topHalf (CmmReg r)])]
1213 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1214
1215 -- This replicates what we had in libraries/base/GHC/Float.hs:
1216 --
1217 -- abs x | x == 0 = 0 -- handles (-0.0)
1218 -- | x > 0 = x
1219 -- | otherwise = negateFloat x
1220 genericFabsOp :: Width -> GenericOp
1221 genericFabsOp w [res_r] [aa]
1222 = do dflags <- getDynFlags
1223 let zero = CmmLit (CmmFloat 0 w)
1224
1225 eq x y = CmmMachOp (MO_F_Eq w) [x, y]
1226 gt x y = CmmMachOp (MO_F_Gt w) [x, y]
1227
1228 neg x = CmmMachOp (MO_F_Neg w) [x]
1229
1230 g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
1231 g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
1232
1233 res_t <- CmmLocal <$> newTemp (cmmExprType dflags aa)
1234 let g3 = catAGraphs [mkAssign res_t aa,
1235 mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
1236
1237 g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
1238
1239 emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
1240
1241 genericFabsOp _ _ _ = panic "genericFabsOp"
1242
1243 -- These PrimOps are NOPs in Cmm
1244
1245 nopOp :: PrimOp -> Bool
1246 nopOp Int2WordOp = True
1247 nopOp Word2IntOp = True
1248 nopOp Int2AddrOp = True
1249 nopOp Addr2IntOp = True
1250 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1251 nopOp OrdOp = True
1252 nopOp _ = False
1253
1254 -- These PrimOps turn into double casts
1255
1256 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1257 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1258 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1259 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1260 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1261 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1262 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1263 narrowOp _ = Nothing
1264
1265 -- Native word signless ops
1266
1267 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1268 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1269 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1270 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1271 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1272 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1273 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1274
1275 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1276 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1277 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1278 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1279 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1280 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1281
1282 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1283 translateOp dflags OrOp = Just (mo_wordOr dflags)
1284 translateOp dflags XorOp = Just (mo_wordXor dflags)
1285 translateOp dflags NotOp = Just (mo_wordNot dflags)
1286 translateOp dflags SllOp = Just (mo_wordShl dflags)
1287 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1288
1289 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1290
1291 -- Native word signed ops
1292
1293 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1294 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1295 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1296 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1297 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1298
1299
1300 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1301 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1302 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1303 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1304
1305 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1306 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1307 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1308 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1309 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1310 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1311 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1312
1313 -- Native word unsigned ops
1314
1315 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1316 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1317 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1318 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1319
1320 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1321 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1322 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1323
1324 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1325 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1326 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1327 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1328
1329 -- Int8# signed ops
1330
1331 translateOp dflags Int8Extend = Just (MO_SS_Conv W8 (wordWidth dflags))
1332 translateOp dflags Int8Narrow = Just (MO_SS_Conv (wordWidth dflags) W8)
1333 translateOp _ Int8NegOp = Just (MO_S_Neg W8)
1334 translateOp _ Int8AddOp = Just (MO_Add W8)
1335 translateOp _ Int8SubOp = Just (MO_Sub W8)
1336 translateOp _ Int8MulOp = Just (MO_Mul W8)
1337 translateOp _ Int8QuotOp = Just (MO_S_Quot W8)
1338 translateOp _ Int8RemOp = Just (MO_S_Rem W8)
1339
1340 translateOp _ Int8EqOp = Just (MO_Eq W8)
1341 translateOp _ Int8GeOp = Just (MO_S_Ge W8)
1342 translateOp _ Int8GtOp = Just (MO_S_Gt W8)
1343 translateOp _ Int8LeOp = Just (MO_S_Le W8)
1344 translateOp _ Int8LtOp = Just (MO_S_Lt W8)
1345 translateOp _ Int8NeOp = Just (MO_Ne W8)
1346
1347 -- Word8# unsigned ops
1348
1349 translateOp dflags Word8Extend = Just (MO_UU_Conv W8 (wordWidth dflags))
1350 translateOp dflags Word8Narrow = Just (MO_UU_Conv (wordWidth dflags) W8)
1351 translateOp _ Word8NotOp = Just (MO_Not W8)
1352 translateOp _ Word8AddOp = Just (MO_Add W8)
1353 translateOp _ Word8SubOp = Just (MO_Sub W8)
1354 translateOp _ Word8MulOp = Just (MO_Mul W8)
1355 translateOp _ Word8QuotOp = Just (MO_U_Quot W8)
1356 translateOp _ Word8RemOp = Just (MO_U_Rem W8)
1357
1358 translateOp _ Word8EqOp = Just (MO_Eq W8)
1359 translateOp _ Word8GeOp = Just (MO_U_Ge W8)
1360 translateOp _ Word8GtOp = Just (MO_U_Gt W8)
1361 translateOp _ Word8LeOp = Just (MO_U_Le W8)
1362 translateOp _ Word8LtOp = Just (MO_U_Lt W8)
1363 translateOp _ Word8NeOp = Just (MO_Ne W8)
1364
1365 -- Int16# signed ops
1366
1367 translateOp dflags Int16Extend = Just (MO_SS_Conv W16 (wordWidth dflags))
1368 translateOp dflags Int16Narrow = Just (MO_SS_Conv (wordWidth dflags) W16)
1369 translateOp _ Int16NegOp = Just (MO_S_Neg W16)
1370 translateOp _ Int16AddOp = Just (MO_Add W16)
1371 translateOp _ Int16SubOp = Just (MO_Sub W16)
1372 translateOp _ Int16MulOp = Just (MO_Mul W16)
1373 translateOp _ Int16QuotOp = Just (MO_S_Quot W16)
1374 translateOp _ Int16RemOp = Just (MO_S_Rem W16)
1375
1376 translateOp _ Int16EqOp = Just (MO_Eq W16)
1377 translateOp _ Int16GeOp = Just (MO_S_Ge W16)
1378 translateOp _ Int16GtOp = Just (MO_S_Gt W16)
1379 translateOp _ Int16LeOp = Just (MO_S_Le W16)
1380 translateOp _ Int16LtOp = Just (MO_S_Lt W16)
1381 translateOp _ Int16NeOp = Just (MO_Ne W16)
1382
1383 -- Word16# unsigned ops
1384
1385 translateOp dflags Word16Extend = Just (MO_UU_Conv W16 (wordWidth dflags))
1386 translateOp dflags Word16Narrow = Just (MO_UU_Conv (wordWidth dflags) W16)
1387 translateOp _ Word16NotOp = Just (MO_Not W16)
1388 translateOp _ Word16AddOp = Just (MO_Add W16)
1389 translateOp _ Word16SubOp = Just (MO_Sub W16)
1390 translateOp _ Word16MulOp = Just (MO_Mul W16)
1391 translateOp _ Word16QuotOp = Just (MO_U_Quot W16)
1392 translateOp _ Word16RemOp = Just (MO_U_Rem W16)
1393
1394 translateOp _ Word16EqOp = Just (MO_Eq W16)
1395 translateOp _ Word16GeOp = Just (MO_U_Ge W16)
1396 translateOp _ Word16GtOp = Just (MO_U_Gt W16)
1397 translateOp _ Word16LeOp = Just (MO_U_Le W16)
1398 translateOp _ Word16LtOp = Just (MO_U_Lt W16)
1399 translateOp _ Word16NeOp = Just (MO_Ne W16)
1400
1401 -- Char# ops
1402
1403 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1404 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1405 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1406 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1407 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1408 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1409
1410 -- Double ops
1411
1412 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1413 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1414 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1415 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1416 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1417 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1418
1419 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1420 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1421 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1422 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1423 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1424
1425 -- Float ops
1426
1427 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1428 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1429 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1430 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1431 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1432 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1433
1434 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1435 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1436 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1437 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1438 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1439
1440 -- Vector ops
1441
1442 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1443 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1444 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1445 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1446 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1447
1448 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1449 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1450 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1451 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1452 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1453 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1454
1455 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1456 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1457 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1458 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1459 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1460
1461 -- Conversions
1462
1463 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1464 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1465
1466 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1467 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1468
1469 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1470 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1471
1472 -- Word comparisons masquerading as more exotic things.
1473
1474 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1475 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1476 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1477 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1478 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1479 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1480 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1481 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1482 -- See Note [Comparing stable names]
1483 translateOp dflags EqStableNameOp = Just (mo_wordEq dflags)
1484
1485 translateOp _ _ = Nothing
1486
1487 -- Note [Comparing stable names]
1488 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1489 --
1490 -- A StableName# is actually a pointer to a stable name object (SNO)
1491 -- containing an index into the stable name table (SNT). We
1492 -- used to compare StableName#s by following the pointers to the
1493 -- SNOs and checking whether they held the same SNT indices. However,
1494 -- this is not necessary: there is a one-to-one correspondence
1495 -- between SNOs and entries in the SNT, so simple pointer equality
1496 -- does the trick.
1497
1498 -- These primops are implemented by CallishMachOps, because they sometimes
1499 -- turn into foreign calls depending on the backend.
1500
1501 callishOp :: PrimOp -> Maybe CallishMachOp
1502 callishOp DoublePowerOp = Just MO_F64_Pwr
1503 callishOp DoubleSinOp = Just MO_F64_Sin
1504 callishOp DoubleCosOp = Just MO_F64_Cos
1505 callishOp DoubleTanOp = Just MO_F64_Tan
1506 callishOp DoubleSinhOp = Just MO_F64_Sinh
1507 callishOp DoubleCoshOp = Just MO_F64_Cosh
1508 callishOp DoubleTanhOp = Just MO_F64_Tanh
1509 callishOp DoubleAsinOp = Just MO_F64_Asin
1510 callishOp DoubleAcosOp = Just MO_F64_Acos
1511 callishOp DoubleAtanOp = Just MO_F64_Atan
1512 callishOp DoubleAsinhOp = Just MO_F64_Asinh
1513 callishOp DoubleAcoshOp = Just MO_F64_Acosh
1514 callishOp DoubleAtanhOp = Just MO_F64_Atanh
1515 callishOp DoubleLogOp = Just MO_F64_Log
1516 callishOp DoubleExpOp = Just MO_F64_Exp
1517 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1518
1519 callishOp FloatPowerOp = Just MO_F32_Pwr
1520 callishOp FloatSinOp = Just MO_F32_Sin
1521 callishOp FloatCosOp = Just MO_F32_Cos
1522 callishOp FloatTanOp = Just MO_F32_Tan
1523 callishOp FloatSinhOp = Just MO_F32_Sinh
1524 callishOp FloatCoshOp = Just MO_F32_Cosh
1525 callishOp FloatTanhOp = Just MO_F32_Tanh
1526 callishOp FloatAsinOp = Just MO_F32_Asin
1527 callishOp FloatAcosOp = Just MO_F32_Acos
1528 callishOp FloatAtanOp = Just MO_F32_Atan
1529 callishOp FloatAsinhOp = Just MO_F32_Asinh
1530 callishOp FloatAcoshOp = Just MO_F32_Acosh
1531 callishOp FloatAtanhOp = Just MO_F32_Atanh
1532 callishOp FloatLogOp = Just MO_F32_Log
1533 callishOp FloatExpOp = Just MO_F32_Exp
1534 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1535
1536 callishOp _ = Nothing
1537
1538 ------------------------------------------------------------------------------
1539 -- Helpers for translating various minor variants of array indexing.
1540
1541 doIndexOffAddrOp :: Maybe MachOp
1542 -> CmmType
1543 -> [LocalReg]
1544 -> [CmmExpr]
1545 -> FCode ()
1546 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1547 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1548 doIndexOffAddrOp _ _ _ _
1549 = panic "StgCmmPrim: doIndexOffAddrOp"
1550
1551 doIndexOffAddrOpAs :: Maybe MachOp
1552 -> CmmType
1553 -> CmmType
1554 -> [LocalReg]
1555 -> [CmmExpr]
1556 -> FCode ()
1557 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1558 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1559 doIndexOffAddrOpAs _ _ _ _ _
1560 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1561
1562 doIndexByteArrayOp :: Maybe MachOp
1563 -> CmmType
1564 -> [LocalReg]
1565 -> [CmmExpr]
1566 -> FCode ()
1567 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1568 = do dflags <- getDynFlags
1569 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1570 doIndexByteArrayOp _ _ _ _
1571 = panic "StgCmmPrim: doIndexByteArrayOp"
1572
1573 doIndexByteArrayOpAs :: Maybe MachOp
1574 -> CmmType
1575 -> CmmType
1576 -> [LocalReg]
1577 -> [CmmExpr]
1578 -> FCode ()
1579 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1580 = do dflags <- getDynFlags
1581 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1582 doIndexByteArrayOpAs _ _ _ _ _
1583 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1584
1585 doReadPtrArrayOp :: LocalReg
1586 -> CmmExpr
1587 -> CmmExpr
1588 -> FCode ()
1589 doReadPtrArrayOp res addr idx
1590 = do dflags <- getDynFlags
1591 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1592
1593 doWriteOffAddrOp :: Maybe MachOp
1594 -> CmmType
1595 -> [LocalReg]
1596 -> [CmmExpr]
1597 -> FCode ()
1598 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1599 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1600 doWriteOffAddrOp _ _ _ _
1601 = panic "StgCmmPrim: doWriteOffAddrOp"
1602
1603 doWriteByteArrayOp :: Maybe MachOp
1604 -> CmmType
1605 -> [LocalReg]
1606 -> [CmmExpr]
1607 -> FCode ()
1608 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1609 = do dflags <- getDynFlags
1610 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1611 doWriteByteArrayOp _ _ _ _
1612 = panic "StgCmmPrim: doWriteByteArrayOp"
1613
1614 doWritePtrArrayOp :: CmmExpr
1615 -> CmmExpr
1616 -> CmmExpr
1617 -> FCode ()
1618 doWritePtrArrayOp addr idx val
1619 = do dflags <- getDynFlags
1620 let ty = cmmExprType dflags val
1621 -- This write barrier is to ensure that the heap writes to the object
1622 -- referred to by val have happened before we write val into the array.
1623 -- See #12469 for details.
1624 emitPrimCall [] MO_WriteBarrier []
1625 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1626 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1627 -- the write barrier. We must write a byte into the mark table:
1628 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1629 emit $ mkStore (
1630 cmmOffsetExpr dflags
1631 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1632 (loadArrPtrsSize dflags addr))
1633 (CmmMachOp (mo_wordUShr dflags) [idx,
1634 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1635 ) (CmmLit (CmmInt 1 W8))
1636
1637 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1638 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1639 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1640
1641 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1642 -> Maybe MachOp -- Optional result cast
1643 -> CmmType -- Type of element we are accessing
1644 -> LocalReg -- Destination
1645 -> CmmExpr -- Base address
1646 -> CmmType -- Type of element by which we are indexing
1647 -> CmmExpr -- Index
1648 -> FCode ()
1649 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1650 = do dflags <- getDynFlags
1651 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1652 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1653 = do dflags <- getDynFlags
1654 emitAssign (CmmLocal res) (CmmMachOp cast [
1655 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1656
1657 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1658 -> Maybe MachOp -- Optional value cast
1659 -> CmmExpr -- Base address
1660 -> CmmType -- Type of element by which we are indexing
1661 -> CmmExpr -- Index
1662 -> CmmExpr -- Value to write
1663 -> FCode ()
1664 mkBasicIndexedWrite off Nothing base idx_ty idx val
1665 = do dflags <- getDynFlags
1666 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1667 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1668 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1669
1670 -- ----------------------------------------------------------------------------
1671 -- Misc utils
1672
1673 cmmIndexOffExpr :: DynFlags
1674 -> ByteOff -- Initial offset in bytes
1675 -> Width -- Width of element by which we are indexing
1676 -> CmmExpr -- Base address
1677 -> CmmExpr -- Index
1678 -> CmmExpr
1679 cmmIndexOffExpr dflags off width base idx
1680 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1681
1682 cmmLoadIndexOffExpr :: DynFlags
1683 -> ByteOff -- Initial offset in bytes
1684 -> CmmType -- Type of element we are accessing
1685 -> CmmExpr -- Base address
1686 -> CmmType -- Type of element by which we are indexing
1687 -> CmmExpr -- Index
1688 -> CmmExpr
1689 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1690 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1691
1692 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1693 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1694
1695 ------------------------------------------------------------------------------
1696 -- Helpers for translating vector primops.
1697
1698 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1699 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1700
1701 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1702 vecCmmCat IntVec = cmmBits
1703 vecCmmCat WordVec = cmmBits
1704 vecCmmCat FloatVec = cmmFloat
1705
1706 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1707 vecElemInjectCast _ FloatVec _ = Nothing
1708 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1709 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1710 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1711 vecElemInjectCast _ IntVec W64 = Nothing
1712 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1713 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1714 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1715 vecElemInjectCast _ WordVec W64 = Nothing
1716 vecElemInjectCast _ _ _ = Nothing
1717
1718 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1719 vecElemProjectCast _ FloatVec _ = Nothing
1720 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1721 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1722 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1723 vecElemProjectCast _ IntVec W64 = Nothing
1724 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1725 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1726 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1727 vecElemProjectCast _ WordVec W64 = Nothing
1728 vecElemProjectCast _ _ _ = Nothing
1729
1730
1731 -- NOTE [SIMD Design for the future]
1732 -- Check to make sure that we can generate code for the specified vector type
1733 -- given the current set of dynamic flags.
1734 -- Currently these checks are specific to x86 and x86_64 architecture.
1735 -- This should be fixed!
1736 -- In particular,
1737 -- 1) Add better support for other architectures! (this may require a redesign)
1738 -- 2) Decouple design choices from LLVM's pseudo SIMD model!
1739 -- The high level LLVM naive rep makes per CPU family SIMD generation is own
1740 -- optimization problem, and hides important differences in eg ARM vs x86_64 simd
1741 -- 3) Depending on the architecture, the SIMD registers may also support general
1742 -- computations on Float/Double/Word/Int scalars, but currently on
1743 -- for example x86_64, we always put Word/Int (or sized) in GPR
1744 -- (general purpose) registers. Would relaxing that allow for
1745 -- useful optimization opportunities?
1746 -- Phrased differently, it is worth experimenting with supporting
1747 -- different register mapping strategies than we currently have, especially if
1748 -- someday we want SIMD to be a first class denizen in GHC along with scalar
1749 -- values!
1750 -- The current design with respect to register mapping of scalars could
1751 -- very well be the best,but exploring the design space and doing careful
1752 -- measurments is the only only way to validate that.
1753 -- In some next generation CPU ISAs, notably RISC V, the SIMD extension
1754 -- includes support for a sort of run time CPU dependent vectorization parameter,
1755 -- where a loop may act upon a single scalar each iteration OR some 2,4,8 ...
1756 -- element chunk! Time will tell if that direction sees wide adoption,
1757 -- but it is from that context that unifying our handling of simd and scalars
1758 -- may benefit. It is not likely to benefit current architectures, though
1759 -- it may very well be a design perspective that helps guide improving the NCG.
1760
1761
1762 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1763 checkVecCompatibility dflags vcat l w = do
1764 when (hscTarget dflags /= HscLlvm) $ do
1765 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1766 ,"Please use -fllvm."]
1767 check vecWidth vcat l w
1768 where
1769 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1770 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1771 sorry $ "128-bit wide single-precision floating point " ++
1772 "SIMD vector instructions require at least -msse."
1773 check W128 _ _ _ | not (isSse2Enabled dflags) =
1774 sorry $ "128-bit wide integer and double precision " ++
1775 "SIMD vector instructions require at least -msse2."
1776 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1777 sorry $ "256-bit wide floating point " ++
1778 "SIMD vector instructions require at least -mavx."
1779 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1780 sorry $ "256-bit wide integer " ++
1781 "SIMD vector instructions require at least -mavx2."
1782 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1783 sorry $ "512-bit wide " ++
1784 "SIMD vector instructions require -mavx512f."
1785 check _ _ _ _ = return ()
1786
1787 vecWidth = typeWidth (vecVmmType vcat l w)
1788
1789 ------------------------------------------------------------------------------
1790 -- Helpers for translating vector packing and unpacking.
1791
1792 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1793 -> CmmType -- Type of vector
1794 -> CmmExpr -- Initial vector
1795 -> [CmmExpr] -- Elements
1796 -> CmmFormal -- Destination for result
1797 -> FCode ()
1798 doVecPackOp maybe_pre_write_cast ty z es res = do
1799 dst <- newTemp ty
1800 emitAssign (CmmLocal dst) z
1801 vecPack dst es 0
1802 where
1803 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1804 vecPack src [] _ =
1805 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1806
1807 vecPack src (e : es) i = do
1808 dst <- newTemp ty
1809 if isFloatType (vecElemType ty)
1810 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1811 [CmmReg (CmmLocal src), cast e, iLit])
1812 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1813 [CmmReg (CmmLocal src), cast e, iLit])
1814 vecPack dst es (i + 1)
1815 where
1816 -- vector indices are always 32-bits
1817 iLit = CmmLit (CmmInt (toInteger i) W32)
1818
1819 cast :: CmmExpr -> CmmExpr
1820 cast val = case maybe_pre_write_cast of
1821 Nothing -> val
1822 Just cast -> CmmMachOp cast [val]
1823
1824 len :: Length
1825 len = vecLength ty
1826
1827 wid :: Width
1828 wid = typeWidth (vecElemType ty)
1829
1830 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1831 -> CmmType -- Type of vector
1832 -> CmmExpr -- Vector
1833 -> [CmmFormal] -- Element results
1834 -> FCode ()
1835 doVecUnpackOp maybe_post_read_cast ty e res =
1836 vecUnpack res 0
1837 where
1838 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1839 vecUnpack [] _ =
1840 return ()
1841
1842 vecUnpack (r : rs) i = do
1843 if isFloatType (vecElemType ty)
1844 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1845 [e, iLit]))
1846 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1847 [e, iLit]))
1848 vecUnpack rs (i + 1)
1849 where
1850 -- vector indices are always 32-bits
1851 iLit = CmmLit (CmmInt (toInteger i) W32)
1852
1853 cast :: CmmExpr -> CmmExpr
1854 cast val = case maybe_post_read_cast of
1855 Nothing -> val
1856 Just cast -> CmmMachOp cast [val]
1857
1858 len :: Length
1859 len = vecLength ty
1860
1861 wid :: Width
1862 wid = typeWidth (vecElemType ty)
1863
1864 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1865 -> CmmType -- Vector type
1866 -> CmmExpr -- Source vector
1867 -> CmmExpr -- Element
1868 -> CmmExpr -- Index at which to insert element
1869 -> CmmFormal -- Destination for result
1870 -> FCode ()
1871 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1872 dflags <- getDynFlags
1873 -- vector indices are always 32-bits
1874 let idx' :: CmmExpr
1875 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1876 if isFloatType (vecElemType ty)
1877 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1878 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1879 where
1880 cast :: CmmExpr -> CmmExpr
1881 cast val = case maybe_pre_write_cast of
1882 Nothing -> val
1883 Just cast -> CmmMachOp cast [val]
1884
1885 len :: Length
1886 len = vecLength ty
1887
1888 wid :: Width
1889 wid = typeWidth (vecElemType ty)
1890
1891 ------------------------------------------------------------------------------
1892 -- Helpers for translating prefetching.
1893
1894
1895 -- | Translate byte array prefetch operations into proper primcalls.
1896 doPrefetchByteArrayOp :: Int
1897 -> [CmmExpr]
1898 -> FCode ()
1899 doPrefetchByteArrayOp locality [addr,idx]
1900 = do dflags <- getDynFlags
1901 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1902 doPrefetchByteArrayOp _ _
1903 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1904
1905 -- | Translate mutable byte array prefetch operations into proper primcalls.
1906 doPrefetchMutableByteArrayOp :: Int
1907 -> [CmmExpr]
1908 -> FCode ()
1909 doPrefetchMutableByteArrayOp locality [addr,idx]
1910 = do dflags <- getDynFlags
1911 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1912 doPrefetchMutableByteArrayOp _ _
1913 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1914
1915 -- | Translate address prefetch operations into proper primcalls.
1916 doPrefetchAddrOp ::Int
1917 -> [CmmExpr]
1918 -> FCode ()
1919 doPrefetchAddrOp locality [addr,idx]
1920 = mkBasicPrefetch locality 0 addr idx
1921 doPrefetchAddrOp _ _
1922 = panic "StgCmmPrim: doPrefetchAddrOp"
1923
1924 -- | Translate value prefetch operations into proper primcalls.
1925 doPrefetchValueOp :: Int
1926 -> [CmmExpr]
1927 -> FCode ()
1928 doPrefetchValueOp locality [addr]
1929 = do dflags <- getDynFlags
1930 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1931 doPrefetchValueOp _ _
1932 = panic "StgCmmPrim: doPrefetchValueOp"
1933
1934 -- | helper to generate prefetch primcalls
1935 mkBasicPrefetch :: Int -- Locality level 0-3
1936 -> ByteOff -- Initial offset in bytes
1937 -> CmmExpr -- Base address
1938 -> CmmExpr -- Index
1939 -> FCode ()
1940 mkBasicPrefetch locality off base idx
1941 = do dflags <- getDynFlags
1942 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1943 return ()
1944
1945 -- ----------------------------------------------------------------------------
1946 -- Allocating byte arrays
1947
1948 -- | Takes a register to return the newly allocated array in and the
1949 -- size of the new array in bytes. Allocates a new
1950 -- 'MutableByteArray#'.
1951 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1952 doNewByteArrayOp res_r n = do
1953 dflags <- getDynFlags
1954
1955 let info_ptr = mkLblExpr mkArrWords_infoLabel
1956 rep = arrWordsRep dflags n
1957
1958 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1959 (mkIntExpr dflags (nonHdrSize dflags rep))
1960 (zeroExpr dflags)
1961
1962 let hdr_size = fixedHdrSize dflags
1963
1964 base <- allocHeapClosure rep info_ptr cccsExpr
1965 [ (mkIntExpr dflags n,
1966 hdr_size + oFFSET_StgArrBytes_bytes dflags)
1967 ]
1968
1969 emit $ mkAssign (CmmLocal res_r) base
1970
1971 -- ----------------------------------------------------------------------------
1972 -- Comparing byte arrays
1973
1974 doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1975 -> FCode ()
1976 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
1977 dflags <- getDynFlags
1978 ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
1979 ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
1980
1981 -- short-cut in case of equal pointers avoiding a costly
1982 -- subroutine call to the memcmp(3) routine; the Cmm logic below
1983 -- results in assembly code being generated for
1984 --
1985 -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
1986 -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
1987 --
1988 -- that looks like
1989 --
1990 -- leaq 16(%r14),%rax
1991 -- leaq 16(%rsi),%rbx
1992 -- xorl %ecx,%ecx
1993 -- cmpq %rbx,%rax
1994 -- je l_ptr_eq
1995 --
1996 -- ; NB: the common case (unequal pointers) falls-through
1997 -- ; the conditional jump, and therefore matches the
1998 -- ; usual static branch prediction convention of modern cpus
1999 --
2000 -- subq $8,%rsp
2001 -- movq %rbx,%rsi
2002 -- movq %rax,%rdi
2003 -- movl $10,%edx
2004 -- xorl %eax,%eax
2005 -- call memcmp
2006 -- addq $8,%rsp
2007 -- movslq %eax,%rax
2008 -- movq %rax,%rcx
2009 -- l_ptr_eq:
2010 -- movq %rcx,%rbx
2011 -- jmp *(%rbp)
2012
2013 l_ptr_eq <- newBlockId
2014 l_ptr_ne <- newBlockId
2015
2016 emit (mkAssign (CmmLocal res) (zeroExpr dflags))
2017 emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p)
2018 l_ptr_eq l_ptr_ne (Just False))
2019
2020 emitLabel l_ptr_ne
2021 emitMemcmpCall res ba1_p ba2_p n 1
2022
2023 emitLabel l_ptr_eq
2024
2025 -- ----------------------------------------------------------------------------
2026 -- Copying byte arrays
2027
2028 -- | Takes a source 'ByteArray#', an offset in the source array, a
2029 -- destination 'MutableByteArray#', an offset into the destination
2030 -- array, and the number of bytes to copy. Copies the given number of
2031 -- bytes from the source array to the destination array.
2032 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2033 -> FCode ()
2034 doCopyByteArrayOp = emitCopyByteArray copy
2035 where
2036 -- Copy data (we assume the arrays aren't overlapping since
2037 -- they're of different types)
2038 copy _src _dst dst_p src_p bytes align =
2039 emitMemcpyCall dst_p src_p bytes align
2040
2041 -- | Takes a source 'MutableByteArray#', an offset in the source
2042 -- array, a destination 'MutableByteArray#', an offset into the
2043 -- destination array, and the number of bytes to copy. Copies the
2044 -- given number of bytes from the source array to the destination
2045 -- array.
2046 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2047 -> FCode ()
2048 doCopyMutableByteArrayOp = emitCopyByteArray copy
2049 where
2050 -- The only time the memory might overlap is when the two arrays
2051 -- we were provided are the same array!
2052 -- TODO: Optimize branch for common case of no aliasing.
2053 copy src dst dst_p src_p bytes align = do
2054 dflags <- getDynFlags
2055 (moveCall, cpyCall) <- forkAltPair
2056 (getCode $ emitMemmoveCall dst_p src_p bytes align)
2057 (getCode $ emitMemcpyCall dst_p src_p bytes align)
2058 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
2059
2060 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2061 -> Alignment -> FCode ())
2062 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2063 -> FCode ()
2064 emitCopyByteArray copy src src_off dst dst_off n = do
2065 dflags <- getDynFlags
2066 let byteArrayAlignment = wordAlignment dflags
2067 srcOffAlignment = cmmExprAlignment src_off
2068 dstOffAlignment = cmmExprAlignment dst_off
2069 align = minimum [byteArrayAlignment, srcOffAlignment, dstOffAlignment]
2070 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
2071 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
2072 copy src dst dst_p src_p n align
2073
2074 -- | Takes a source 'ByteArray#', an offset in the source array, a
2075 -- destination 'Addr#', and the number of bytes to copy. Copies the given
2076 -- number of bytes from the source array to the destination memory region.
2077 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2078 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
2079 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
2080 dflags <- getDynFlags
2081 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
2082 emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
2083
2084 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
2085 -- destination 'Addr#', and the number of bytes to copy. Copies the given
2086 -- number of bytes from the source array to the destination memory region.
2087 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2088 -> FCode ()
2089 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
2090
2091 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
2092 -- the destination array, and the number of bytes to copy. Copies the given
2093 -- number of bytes from the source memory region to the destination array.
2094 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2095 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
2096 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
2097 dflags <- getDynFlags
2098 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
2099 emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
2100
2101
2102 -- ----------------------------------------------------------------------------
2103 -- Setting byte arrays
2104
2105 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
2106 -- and a byte, and sets each of the selected bytes in the array to the
2107 -- character.
2108 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
2109 -> FCode ()
2110 doSetByteArrayOp ba off len c = do
2111 dflags <- getDynFlags
2112
2113 let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap
2114 offsetAlignment = cmmExprAlignment off
2115 align = min byteArrayAlignment offsetAlignment
2116
2117 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
2118 emitMemsetCall p c len align
2119
2120 -- ----------------------------------------------------------------------------
2121 -- Allocating arrays
2122
2123 -- | Allocate a new array.
2124 doNewArrayOp :: CmmFormal -- ^ return register
2125 -> SMRep -- ^ representation of the array
2126 -> CLabel -- ^ info pointer
2127 -> [(CmmExpr, ByteOff)] -- ^ header payload
2128 -> WordOff -- ^ array size
2129 -> CmmExpr -- ^ initial element
2130 -> FCode ()
2131 doNewArrayOp res_r rep info payload n init = do
2132 dflags <- getDynFlags
2133
2134 let info_ptr = mkLblExpr info
2135
2136 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
2137 (mkIntExpr dflags (nonHdrSize dflags rep))
2138 (zeroExpr dflags)
2139
2140 base <- allocHeapClosure rep info_ptr cccsExpr payload
2141
2142 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2143 emit $ mkAssign arr base
2144
2145 -- Initialise all elements of the array
2146 let mkOff off = cmmOffsetW dflags (CmmReg arr) (hdrSizeW dflags rep + off)
2147 initialization = [ mkStore (mkOff off) init | off <- [0.. n - 1] ]
2148 emit (catAGraphs initialization)
2149
2150 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2151
2152 -- ----------------------------------------------------------------------------
2153 -- Copying pointer arrays
2154
2155 -- EZY: This code has an unusually high amount of assignTemp calls, seen
2156 -- nowhere else in the code generator. This is mostly because these
2157 -- "primitive" ops result in a surprisingly large amount of code. It
2158 -- will likely be worthwhile to optimize what is emitted here, so that
2159 -- our optimization passes don't waste time repeatedly optimizing the
2160 -- same bits of code.
2161
2162 -- More closely imitates 'assignTemp' from the old code generator, which
2163 -- returns a CmmExpr rather than a LocalReg.
2164 assignTempE :: CmmExpr -> FCode CmmExpr
2165 assignTempE e = do
2166 t <- assignTemp e
2167 return (CmmReg (CmmLocal t))
2168
2169 -- | Takes a source 'Array#', an offset in the source array, a
2170 -- destination 'MutableArray#', an offset into the destination array,
2171 -- and the number of elements to copy. Copies the given number of
2172 -- elements from the source array to the destination array.
2173 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2174 -> FCode ()
2175 doCopyArrayOp = emitCopyArray copy
2176 where
2177 -- Copy data (we assume the arrays aren't overlapping since
2178 -- they're of different types)
2179 copy _src _dst dst_p src_p bytes =
2180 do dflags <- getDynFlags
2181 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2182 (wordAlignment dflags)
2183
2184
2185 -- | Takes a source 'MutableArray#', an offset in the source array, a
2186 -- destination 'MutableArray#', an offset into the destination array,
2187 -- and the number of elements to copy. Copies the given number of
2188 -- elements from the source array to the destination array.
2189 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2190 -> FCode ()
2191 doCopyMutableArrayOp = emitCopyArray copy
2192 where
2193 -- The only time the memory might overlap is when the two arrays
2194 -- we were provided are the same array!
2195 -- TODO: Optimize branch for common case of no aliasing.
2196 copy src dst dst_p src_p bytes = do
2197 dflags <- getDynFlags
2198 (moveCall, cpyCall) <- forkAltPair
2199 (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
2200 (wordAlignment dflags))
2201 (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2202 (wordAlignment dflags))
2203 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
2204
2205 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
2206 -> FCode ()) -- ^ copy function
2207 -> CmmExpr -- ^ source array
2208 -> CmmExpr -- ^ offset in source array
2209 -> CmmExpr -- ^ destination array
2210 -> CmmExpr -- ^ offset in destination array
2211 -> WordOff -- ^ number of elements to copy
2212 -> FCode ()
2213 emitCopyArray copy src0 src_off dst0 dst_off0 n =
2214 when (n /= 0) $ do
2215 dflags <- getDynFlags
2216
2217 -- Passed as arguments (be careful)
2218 src <- assignTempE src0
2219 dst <- assignTempE dst0
2220 dst_off <- assignTempE dst_off0
2221
2222 -- Set the dirty bit in the header.
2223 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
2224
2225 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
2226 (arrPtrsHdrSize dflags)
2227 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
2228 src_p <- assignTempE $ cmmOffsetExprW dflags
2229 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
2230 let bytes = wordsToBytes dflags n
2231
2232 copy src dst dst_p src_p bytes
2233
2234 -- The base address of the destination card table
2235 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
2236 (loadArrPtrsSize dflags dst)
2237
2238 emitSetCards dst_off dst_cards_p n
2239
2240 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2241 -> FCode ()
2242 doCopySmallArrayOp = emitCopySmallArray copy
2243 where
2244 -- Copy data (we assume the arrays aren't overlapping since
2245 -- they're of different types)
2246 copy _src _dst dst_p src_p bytes =
2247 do dflags <- getDynFlags
2248 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2249 (wordAlignment dflags)
2250
2251
2252 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2253 -> FCode ()
2254 doCopySmallMutableArrayOp = emitCopySmallArray copy
2255 where
2256 -- The only time the memory might overlap is when the two arrays
2257 -- we were provided are the same array!
2258 -- TODO: Optimize branch for common case of no aliasing.
2259 copy src dst dst_p src_p bytes = do
2260 dflags <- getDynFlags
2261 (moveCall, cpyCall) <- forkAltPair
2262 (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
2263 (wordAlignment dflags))
2264 (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2265 (wordAlignment dflags))
2266 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
2267
2268 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
2269 -> FCode ()) -- ^ copy function
2270 -> CmmExpr -- ^ source array
2271 -> CmmExpr -- ^ offset in source array
2272 -> CmmExpr -- ^ destination array
2273 -> CmmExpr -- ^ offset in destination array
2274 -> WordOff -- ^ number of elements to copy
2275 -> FCode ()
2276 emitCopySmallArray copy src0 src_off dst0 dst_off n =
2277 when (n /= 0) $ do
2278 dflags <- getDynFlags
2279
2280 -- Passed as arguments (be careful)
2281 src <- assignTempE src0
2282 dst <- assignTempE dst0
2283
2284 -- Set the dirty bit in the header.
2285 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2286
2287 dst_p <- assignTempE $ cmmOffsetExprW dflags
2288 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
2289 src_p <- assignTempE $ cmmOffsetExprW dflags
2290 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
2291 let bytes = wordsToBytes dflags n
2292
2293 copy src dst dst_p src_p bytes
2294
2295 -- | Takes an info table label, a register to return the newly
2296 -- allocated array in, a source array, an offset in the source array,
2297 -- and the number of elements to copy. Allocates a new array and
2298 -- initializes it from the source array.
2299 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2300 -> FCode ()
2301 emitCloneArray info_p res_r src src_off n = do
2302 dflags <- getDynFlags
2303
2304 let info_ptr = mkLblExpr info_p
2305 rep = arrPtrsRep dflags n
2306
2307 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
2308 (mkIntExpr dflags (nonHdrSize dflags rep))
2309 (zeroExpr dflags)
2310
2311 let hdr_size = fixedHdrSize dflags
2312
2313 base <- allocHeapClosure rep info_ptr cccsExpr
2314 [ (mkIntExpr dflags n,
2315 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
2316 , (mkIntExpr dflags (nonHdrSizeW rep),
2317 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
2318 ]
2319
2320 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2321 emit $ mkAssign arr base
2322
2323 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2324 (arrPtrsHdrSize dflags)
2325 src_p <- assignTempE $ cmmOffsetExprW dflags src
2326 (cmmAddWord dflags
2327 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
2328
2329 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2330 (wordAlignment dflags)
2331
2332 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2333
2334 -- | Takes an info table label, a register to return the newly
2335 -- allocated array in, a source array, an offset in the source array,
2336 -- and the number of elements to copy. Allocates a new array and
2337 -- initializes it from the source array.
2338 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2339 -> FCode ()
2340 emitCloneSmallArray info_p res_r src src_off n = do
2341 dflags <- getDynFlags
2342
2343 let info_ptr = mkLblExpr info_p
2344 rep = smallArrPtrsRep n
2345
2346 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
2347 (mkIntExpr dflags (nonHdrSize dflags rep))
2348 (zeroExpr dflags)
2349
2350 let hdr_size = fixedHdrSize dflags
2351
2352 base <- allocHeapClosure rep info_ptr cccsExpr
2353 [ (mkIntExpr dflags n,
2354 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
2355 ]
2356
2357 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2358 emit $ mkAssign arr base
2359
2360 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2361 (smallArrPtrsHdrSize dflags)
2362 src_p <- assignTempE $ cmmOffsetExprW dflags src
2363 (cmmAddWord dflags
2364 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
2365
2366 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2367 (wordAlignment dflags)
2368
2369 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2370
2371 -- | Takes and offset in the destination array, the base address of
2372 -- the card table, and the number of elements affected (*not* the
2373 -- number of cards). The number of elements may not be zero.
2374 -- Marks the relevant cards as dirty.
2375 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2376 emitSetCards dst_start dst_cards_start n = do
2377 dflags <- getDynFlags
2378 start_card <- assignTempE $ cardCmm dflags dst_start
2379 let end_card = cardCmm dflags
2380 (cmmSubWord dflags
2381 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
2382 (mkIntExpr dflags 1))
2383 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
2384 (mkIntExpr dflags 1)
2385 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2386 (mkAlignment 1) -- no alignment (1 byte)
2387
2388 -- Convert an element index to a card index
2389 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2390 cardCmm dflags i =
2391 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2392
2393 ------------------------------------------------------------------------------
2394 -- SmallArray PrimOp implementations
2395
2396 doReadSmallPtrArrayOp :: LocalReg
2397 -> CmmExpr
2398 -> CmmExpr
2399 -> FCode ()
2400 doReadSmallPtrArrayOp res addr idx = do
2401 dflags <- getDynFlags
2402 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2403 (gcWord dflags) idx
2404
2405 doWriteSmallPtrArrayOp :: CmmExpr
2406 -> CmmExpr
2407 -> CmmExpr
2408 -> FCode ()
2409 doWriteSmallPtrArrayOp addr idx val = do
2410 dflags <- getDynFlags
2411 let ty = cmmExprType dflags val
2412 emitPrimCall [] MO_WriteBarrier [] -- #12469
2413 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2414 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2415
2416 ------------------------------------------------------------------------------
2417 -- Atomic read-modify-write
2418
2419 -- | Emit an atomic modification to a byte array element. The result
2420 -- reg contains that previous value of the element. Implies a full
2421 -- memory barrier.
2422 doAtomicRMW :: LocalReg -- ^ Result reg
2423 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2424 -> CmmExpr -- ^ MutableByteArray#
2425 -> CmmExpr -- ^ Index
2426 -> CmmType -- ^ Type of element by which we are indexing
2427 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2428 -> FCode ()
2429 doAtomicRMW res amop mba idx idx_ty n = do
2430 dflags <- getDynFlags
2431 let width = typeWidth idx_ty
2432 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2433 width mba idx
2434 emitPrimCall
2435 [ res ]
2436 (MO_AtomicRMW width amop)
2437 [ addr, n ]
2438
2439 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2440 doAtomicReadByteArray
2441 :: LocalReg -- ^ Result reg
2442 -> CmmExpr -- ^ MutableByteArray#
2443 -> CmmExpr -- ^ Index
2444 -> CmmType -- ^ Type of element by which we are indexing
2445 -> FCode ()
2446 doAtomicReadByteArray res mba idx idx_ty = do
2447 dflags <- getDynFlags
2448 let width = typeWidth idx_ty
2449 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2450 width mba idx
2451 emitPrimCall
2452 [ res ]
2453 (MO_AtomicRead width)
2454 [ addr ]
2455
2456 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2457 doAtomicWriteByteArray
2458 :: CmmExpr -- ^ MutableByteArray#
2459 -> CmmExpr -- ^ Index
2460 -> CmmType -- ^ Type of element by which we are indexing
2461 -> CmmExpr -- ^ Value to write
2462 -> FCode ()
2463 doAtomicWriteByteArray mba idx idx_ty val = do
2464 dflags <- getDynFlags
2465 let width = typeWidth idx_ty
2466 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2467 width mba idx
2468 emitPrimCall
2469 [ {- no results -} ]
2470 (MO_AtomicWrite width)
2471 [ addr, val ]
2472
2473 doCasByteArray
2474 :: LocalReg -- ^ Result reg
2475 -> CmmExpr -- ^ MutableByteArray#
2476 -> CmmExpr -- ^ Index
2477 -> CmmType -- ^ Type of element by which we are indexing
2478 -> CmmExpr -- ^ Old value
2479 -> CmmExpr -- ^ New value
2480 -> FCode ()
2481 doCasByteArray res mba idx idx_ty old new = do
2482 dflags <- getDynFlags
2483 let width = (typeWidth idx_ty)
2484 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2485 width mba idx
2486 emitPrimCall
2487 [ res ]
2488 (MO_Cmpxchg width)
2489 [ addr, old, new ]
2490
2491 ------------------------------------------------------------------------------
2492 -- Helpers for emitting function calls
2493
2494 -- | Emit a call to @memcpy@.
2495 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
2496 emitMemcpyCall dst src n align = do
2497 emitPrimCall
2498 [ {-no results-} ]
2499 (MO_Memcpy (alignmentBytes align))
2500 [ dst, src, n ]
2501
2502 -- | Emit a call to @memmove@.
2503 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
2504 emitMemmoveCall dst src n align = do
2505 emitPrimCall
2506 [ {- no results -} ]
2507 (MO_Memmove (alignmentBytes align))
2508 [ dst, src, n ]
2509
2510 -- | Emit a call to @memset@. The second argument must fit inside an
2511 -- unsigned char.
2512 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
2513 emitMemsetCall dst c n align = do
2514 emitPrimCall
2515 [ {- no results -} ]
2516 (MO_Memset (alignmentBytes align))
2517 [ dst, c, n ]
2518
2519 emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2520 emitMemcmpCall res ptr1 ptr2 n align = do
2521 -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
2522 -- code-gens currently call out to the @memcmp(3)@ C function.
2523 -- This was easier than moving the sign-extensions into
2524 -- all the code-gens.
2525 dflags <- getDynFlags
2526 let is32Bit = typeWidth (localRegType res) == W32
2527
2528 cres <- if is32Bit
2529 then return res
2530 else newTemp b32
2531
2532 emitPrimCall
2533 [ cres ]
2534 (MO_Memcmp align)
2535 [ ptr1, ptr2, n ]
2536
2537 unless is32Bit $ do
2538 emit $ mkAssign (CmmLocal res)
2539 (CmmMachOp
2540 (mo_s_32ToWord dflags)
2541 [(CmmReg (CmmLocal cres))])
2542
2543 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2544 emitBSwapCall res x width = do
2545 emitPrimCall
2546 [ res ]
2547 (MO_BSwap width)
2548 [ x ]
2549
2550 emitBRevCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2551 emitBRevCall res x width = do
2552 emitPrimCall
2553 [ res ]
2554 (MO_BRev width)
2555 [ x ]
2556
2557 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2558 emitPopCntCall res x width = do
2559 emitPrimCall
2560 [ res ]
2561 (MO_PopCnt width)
2562 [ x ]
2563
2564 emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
2565 emitPdepCall res x y width = do
2566 emitPrimCall
2567 [ res ]
2568 (MO_Pdep width)
2569 [ x, y ]
2570
2571 emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
2572 emitPextCall res x y width = do
2573 emitPrimCall
2574 [ res ]
2575 (MO_Pext width)
2576 [ x, y ]
2577
2578 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2579 emitClzCall res x width = do
2580 emitPrimCall
2581 [ res ]
2582 (MO_Clz width)
2583 [ x ]
2584
2585 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2586 emitCtzCall res x width = do
2587 emitPrimCall
2588 [ res ]
2589 (MO_Ctz width)
2590 [ x ]