Implement unboxed sum primitive type
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgCmmLayout
21 import StgCmmForeign
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmTicky
26 import StgCmmHeap
27 import StgCmmProf ( costCentreFrom, curCCS )
28
29 import DynFlags
30 import Platform
31 import BasicTypes
32 import MkGraph
33 import StgSyn
34 import Cmm
35 import CmmInfo
36 import Type ( Type, tyConAppTyCon )
37 import TyCon
38 import CLabel
39 import CmmUtils
40 import PrimOp
41 import SMRep
42 import FastString
43 import Outputable
44 import Util
45
46 import Prelude hiding ((<*>))
47
48 import Data.Bits ((.&.), bit)
49 import Data.Bifunctor (first)
50 import Control.Monad (liftM, when)
51
52 ------------------------------------------------------------------------
53 -- Primitive operations and foreign calls
54 ------------------------------------------------------------------------
55
56 {- Note [Foreign call results]
57 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 A foreign call always returns an unboxed tuple of results, one
59 of which is the state token. This seems to happen even for pure
60 calls.
61
62 Even if we returned a single result for pure calls, it'd still be
63 right to wrap it in a singleton unboxed tuple, because the result
64 might be a Haskell closure pointer, we don't want to evaluate it. -}
65
66 ----------------------------------
67 cgOpApp :: StgOp -- The op
68 -> [StgArg] -- Arguments
69 -> Type -- Result type (always an unboxed tuple)
70 -> FCode ReturnKind
71
72 -- Foreign calls
73 cgOpApp (StgFCallOp fcall _) stg_args res_ty
74 = cgForeignCall fcall stg_args res_ty
75 -- Note [Foreign call results]
76
77 -- tagToEnum# is special: we need to pull the constructor
78 -- out of the table, and perform an appropriate return.
79
80 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
81 = ASSERT(isEnumerationTyCon tycon)
82 do { dflags <- getDynFlags
83 ; args' <- getNonVoidArgAmodes_no_rubbish [arg]
84 ; let amode = case args' of [amode] -> amode
85 _ -> panic "TagToEnumOp had void arg"
86 ; emitReturn [CmmExprArg (tagToClosure dflags tycon amode)] }
87 where
88 -- If you're reading this code in the attempt to figure
89 -- out why the compiler panic'ed here, it is probably because
90 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
91 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
92 -- That won't work.
93 tycon = tyConAppTyCon res_ty
94
95 cgOpApp (StgPrimOp primop) args res_ty = do
96 dflags <- getDynFlags
97 cmm_args <- getNonVoidArgAmodes_no_rubbish args
98 case shouldInlinePrimOp dflags primop cmm_args of
99 Nothing -> do -- out-of-line
100 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
101 emitCall (NativeNodeCall, NativeReturn) fun (map CmmExprArg cmm_args)
102
103 Just f -- inline
104 | ReturnsPrim VoidRep <- result_info
105 -> do f []
106 emitReturn []
107
108 | ReturnsPrim rep <- result_info
109 -> do dflags <- getDynFlags
110 res <- newTemp (primRepCmmType dflags rep)
111 f [res]
112 emitReturn [CmmExprArg (CmmReg (CmmLocal res))]
113
114 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
115 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
116 f regs
117 emitReturn (map (CmmExprArg . CmmReg . CmmLocal) regs)
118
119 | otherwise -> panic "cgPrimop"
120 where
121 result_info = getPrimOpResultInfo primop
122
123 cgOpApp (StgPrimCallOp primcall) args _res_ty
124 = do { cmm_args <- getNonVoidArgAmodes args
125 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
126 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
127
128 -- | Interpret the argument as an unsigned value, assuming the value
129 -- is given in two-complement form in the given width.
130 --
131 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
132 --
133 -- This function is used to work around the fact that many array
134 -- primops take Int# arguments, but we interpret them as unsigned
135 -- quantities in the code gen. This means that we have to be careful
136 -- every time we work on e.g. a CmmInt literal that corresponds to the
137 -- array size, as it might contain a negative Integer value if the
138 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
139 -- literal.
140 asUnsigned :: Width -> Integer -> Integer
141 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
142
143 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
144 -- ByteOff (or some other fixed width signed type) to represent
145 -- array sizes or indices. This means that these will overflow for
146 -- large enough sizes.
147
148 -- | Decide whether an out-of-line primop should be replaced by an
149 -- inline implementation. This might happen e.g. if there's enough
150 -- static information, such as statically know arguments, to emit a
151 -- more efficient implementation inline.
152 --
153 -- Returns 'Nothing' if this primop should use its out-of-line
154 -- implementation (defined elsewhere) and 'Just' together with a code
155 -- generating function that takes the output regs as arguments
156 -- otherwise.
157 shouldInlinePrimOp :: DynFlags
158 -> PrimOp -- ^ The primop
159 -> [CmmExpr] -- ^ The primop arguments
160 -> Maybe ([LocalReg] -> FCode ())
161
162 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
163 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
164 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
165
166 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
167 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
168 Just $ \ [res] ->
169 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
170 [ (mkIntExpr dflags (fromInteger n),
171 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
172 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
173 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
174 ]
175 (fromInteger n) init
176
177 shouldInlinePrimOp _ CopyArrayOp
178 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
179 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
180
181 shouldInlinePrimOp _ CopyMutableArrayOp
182 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
183 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
184
185 shouldInlinePrimOp _ CopyArrayArrayOp
186 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
187 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
188
189 shouldInlinePrimOp _ CopyMutableArrayArrayOp
190 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
191 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
192
193 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
194 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
195 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
196
197 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
198 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
199 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
200
201 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
202 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
203 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
204
205 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
206 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
207 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
208
209 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
210 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
211 Just $ \ [res] ->
212 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
213 [ (mkIntExpr dflags (fromInteger n),
214 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
215 ]
216 (fromInteger n) init
217
218 shouldInlinePrimOp _ CopySmallArrayOp
219 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
220 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
221
222 shouldInlinePrimOp _ CopySmallMutableArrayOp
223 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
224 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
225
226 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
227 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
228 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
229
230 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
231 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
232 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
233
234 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
235 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
236 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
237
238 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
239 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
240 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
241
242 shouldInlinePrimOp dflags primop args
243 | primOpOutOfLine primop = Nothing
244 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
245
246 -- TODO: Several primops, such as 'copyArray#', only have an inline
247 -- implementation (below) but could possibly have both an inline
248 -- implementation and an out-of-line implementation, just like
249 -- 'newArray#'. This would lower the amount of code generated,
250 -- hopefully without a performance impact (needs to be measured).
251
252 ---------------------------------------------------
253 cgPrimOp :: [LocalReg] -- where to put the results
254 -> PrimOp -- the op
255 -> [StgArg] -- arguments
256 -> FCode ()
257
258 cgPrimOp results op args
259 = do dflags <- getDynFlags
260 arg_exprs <- getNonVoidArgAmodes_no_rubbish args
261 emitPrimOp dflags results op arg_exprs
262
263
264 ------------------------------------------------------------------------
265 -- Emitting code for a primop
266 ------------------------------------------------------------------------
267
268 emitPrimOp :: DynFlags
269 -> [LocalReg] -- where to put the results
270 -> PrimOp -- the op
271 -> [CmmExpr] -- arguments
272 -> FCode ()
273
274 -- First we handle various awkward cases specially. The remaining
275 -- easy cases are then handled by translateOp, defined below.
276
277 emitPrimOp _ [res] ParOp [arg]
278 =
279 -- for now, just implement this in a C function
280 -- later, we might want to inline it.
281 emitCCall
282 [(res,NoHint)]
283 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
284 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
285
286 emitPrimOp dflags [res] SparkOp [arg]
287 = do
288 -- returns the value of arg in res. We're going to therefore
289 -- refer to arg twice (once to pass to newSpark(), and once to
290 -- assign to res), so put it in a temporary.
291 tmp <- assignTemp arg
292 tmp2 <- newTemp (bWord dflags)
293 emitCCall
294 [(tmp2,NoHint)]
295 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
296 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
297 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
298
299 emitPrimOp dflags [res] GetCCSOfOp [arg]
300 = emitAssign (CmmLocal res) val
301 where
302 val
303 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
304 | otherwise = CmmLit (zeroCLit dflags)
305
306 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
307 = emitAssign (CmmLocal res) curCCS
308
309 emitPrimOp dflags [res] ReadMutVarOp [mutv]
310 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
311
312 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
313 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
314 emitCCall
315 [{-no results-}]
316 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
317 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
318
319 -- #define sizzeofByteArrayzh(r,a) \
320 -- r = ((StgArrBytes *)(a))->bytes
321 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
322 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
323
324 -- #define sizzeofMutableByteArrayzh(r,a) \
325 -- r = ((StgArrBytes *)(a))->bytes
326 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
327 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
328
329 -- #define getSizzeofMutableByteArrayzh(r,a) \
330 -- r = ((StgArrBytes *)(a))->bytes
331 emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
332 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
333
334
335 -- #define touchzh(o) /* nothing */
336 emitPrimOp _ res@[] TouchOp args@[_arg]
337 = do emitPrimCall res MO_Touch args
338
339 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
340 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
341 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
342
343 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
344 emitPrimOp dflags [res] StableNameToIntOp [arg]
345 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
346
347 -- #define eqStableNamezh(r,sn1,sn2) \
348 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
349 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
350 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
351 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
352 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
353 ])
354
355 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
356 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
357
358 -- #define addrToHValuezh(r,a) r=(P_)a
359 emitPrimOp _ [res] AddrToAnyOp [arg]
360 = emitAssign (CmmLocal res) arg
361
362 -- #define hvalueToAddrzh(r, a) r=(W_)a
363 emitPrimOp _ [res] AnyToAddrOp [arg]
364 = emitAssign (CmmLocal res) arg
365
366 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
367 -- Note: argument may be tagged!
368 emitPrimOp dflags [res] DataToTagOp [arg]
369 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
370
371 {- Freezing arrays-of-ptrs requires changing an info table, for the
372 benefit of the generational collector. It needs to scavenge mutable
373 objects, even if they are in old space. When they become immutable,
374 they can be removed from this scavenge list. -}
375
376 -- #define unsafeFreezzeArrayzh(r,a)
377 -- {
378 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
379 -- r = a;
380 -- }
381 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
382 = emit $ catAGraphs
383 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
384 mkAssign (CmmLocal res) arg ]
385 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
386 = emit $ catAGraphs
387 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
388 mkAssign (CmmLocal res) arg ]
389 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
390 = emit $ catAGraphs
391 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
392 mkAssign (CmmLocal res) arg ]
393
394 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
395 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
396 = emitAssign (CmmLocal res) arg
397
398 -- Reading/writing pointer arrays
399
400 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
401 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
402 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
403
404 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
405 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
406 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
407 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
408 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
409 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
410 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
411 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
412 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
413 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
414
415 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
416 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
417 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
418
419 -- Getting the size of pointer arrays
420
421 emitPrimOp dflags [res] SizeofArrayOp [arg]
422 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
423 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
424 (bWord dflags))
425 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
426 = emitPrimOp dflags [res] SizeofArrayOp [arg]
427 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
428 = emitPrimOp dflags [res] SizeofArrayOp [arg]
429 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
430 = emitPrimOp dflags [res] SizeofArrayOp [arg]
431
432 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
433 emit $ mkAssign (CmmLocal res)
434 (cmmLoadIndexW dflags arg
435 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
436 (bWord dflags))
437 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
438 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
439
440 -- IndexXXXoffAddr
441
442 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
443 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
444 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
445 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
446 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
447 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
448 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
449 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
450 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
451 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
452 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
453 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
454 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
455 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
456 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
457 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
458
459 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
460
461 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
462 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
463 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
464 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
465 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
466 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
467 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
468 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
469 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
470 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
471 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
472 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
473 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
474 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
475 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
476 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
477
478 -- IndexXXXArray
479
480 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
481 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
482 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
483 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
484 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
485 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
486 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
487 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
488 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
489 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
490 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
491 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
492 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
493 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
494 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
495 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
496
497 -- ReadXXXArray, identical to IndexXXXArray.
498
499 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
500 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
501 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
502 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
503 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
504 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
505 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
506 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
507 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
508 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
509 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
510 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
511 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
512 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
513 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
514 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
515
516 -- WriteXXXoffAddr
517
518 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
519 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
520 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
521 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
522 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
523 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
524 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
525 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
526 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
527 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
528 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
529 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
530 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
531 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
532 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
533 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
534
535 -- WriteXXXArray
536
537 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
538 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
539 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
540 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
541 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
542 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
543 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
544 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
545 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
546 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
547 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
548 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
549 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
550 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
551 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
552 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
553
554 -- Copying and setting byte arrays
555 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
556 doCopyByteArrayOp src src_off dst dst_off n
557 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
558 doCopyMutableByteArrayOp src src_off dst dst_off n
559 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
560 doCopyByteArrayToAddrOp src src_off dst n
561 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
562 doCopyMutableByteArrayToAddrOp src src_off dst n
563 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
564 doCopyAddrToByteArrayOp src dst dst_off n
565 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
566 doSetByteArrayOp ba off len c
567
568 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
569 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
570 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
571 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
572
573 -- Population count
574 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
575 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
576 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
577 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
578 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
579
580 -- count leading zeros
581 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
582 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
583 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
584 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
585 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
586
587 -- count trailing zeros
588 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
589 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
590 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
591 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
592 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
593
594 -- Unsigned int to floating point conversions
595 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
596 (MO_UF_Conv W32) [w]
597 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
598 (MO_UF_Conv W64) [w]
599
600 -- SIMD primops
601 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
602 checkVecCompatibility dflags vcat n w
603 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
604 where
605 zeros :: CmmExpr
606 zeros = CmmLit $ CmmVec (replicate n zero)
607
608 zero :: CmmLit
609 zero = case vcat of
610 IntVec -> CmmInt 0 w
611 WordVec -> CmmInt 0 w
612 FloatVec -> CmmFloat 0 w
613
614 ty :: CmmType
615 ty = vecVmmType vcat n w
616
617 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
618 checkVecCompatibility dflags vcat n w
619 when (length es /= n) $
620 panic "emitPrimOp: VecPackOp has wrong number of arguments"
621 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
622 where
623 zeros :: CmmExpr
624 zeros = CmmLit $ CmmVec (replicate n zero)
625
626 zero :: CmmLit
627 zero = case vcat of
628 IntVec -> CmmInt 0 w
629 WordVec -> CmmInt 0 w
630 FloatVec -> CmmFloat 0 w
631
632 ty :: CmmType
633 ty = vecVmmType vcat n w
634
635 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
636 checkVecCompatibility dflags vcat n w
637 when (length res /= n) $
638 panic "emitPrimOp: VecUnpackOp has wrong number of results"
639 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
640 where
641 ty :: CmmType
642 ty = vecVmmType vcat n w
643
644 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
645 checkVecCompatibility dflags vcat n w
646 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
647 where
648 ty :: CmmType
649 ty = vecVmmType vcat n w
650
651 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
652 checkVecCompatibility dflags vcat n w
653 doIndexByteArrayOp Nothing ty res args
654 where
655 ty :: CmmType
656 ty = vecVmmType vcat n w
657
658 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
659 checkVecCompatibility dflags vcat n w
660 doIndexByteArrayOp Nothing ty res args
661 where
662 ty :: CmmType
663 ty = vecVmmType vcat n w
664
665 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
666 checkVecCompatibility dflags vcat n w
667 doWriteByteArrayOp Nothing ty res args
668 where
669 ty :: CmmType
670 ty = vecVmmType vcat n w
671
672 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
673 checkVecCompatibility dflags vcat n w
674 doIndexOffAddrOp Nothing ty res args
675 where
676 ty :: CmmType
677 ty = vecVmmType vcat n w
678
679 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
680 checkVecCompatibility dflags vcat n w
681 doIndexOffAddrOp Nothing ty res args
682 where
683 ty :: CmmType
684 ty = vecVmmType vcat n w
685
686 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
687 checkVecCompatibility dflags vcat n w
688 doWriteOffAddrOp Nothing ty res args
689 where
690 ty :: CmmType
691 ty = vecVmmType vcat n w
692
693 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
694 checkVecCompatibility dflags vcat n w
695 doIndexByteArrayOpAs Nothing vecty ty res args
696 where
697 vecty :: CmmType
698 vecty = vecVmmType vcat n w
699
700 ty :: CmmType
701 ty = vecCmmCat vcat w
702
703 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
704 checkVecCompatibility dflags vcat n w
705 doIndexByteArrayOpAs Nothing vecty ty res args
706 where
707 vecty :: CmmType
708 vecty = vecVmmType vcat n w
709
710 ty :: CmmType
711 ty = vecCmmCat vcat w
712
713 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
714 checkVecCompatibility dflags vcat n w
715 doWriteByteArrayOp Nothing ty res args
716 where
717 ty :: CmmType
718 ty = vecCmmCat vcat w
719
720 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
721 checkVecCompatibility dflags vcat n w
722 doIndexOffAddrOpAs Nothing vecty ty res args
723 where
724 vecty :: CmmType
725 vecty = vecVmmType vcat n w
726
727 ty :: CmmType
728 ty = vecCmmCat vcat w
729
730 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
731 checkVecCompatibility dflags vcat n w
732 doIndexOffAddrOpAs Nothing vecty ty res args
733 where
734 vecty :: CmmType
735 vecty = vecVmmType vcat n w
736
737 ty :: CmmType
738 ty = vecCmmCat vcat w
739
740 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
741 checkVecCompatibility dflags vcat n w
742 doWriteOffAddrOp Nothing ty res args
743 where
744 ty :: CmmType
745 ty = vecCmmCat vcat w
746
747 -- Prefetch
748 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
749 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
750 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
751 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
752
753 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
754 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
755 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
756 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
757
758 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
759 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
760 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
761 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
762
763 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
764 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
765 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
766 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
767
768 -- Atomic read-modify-write
769 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
770 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
771 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
772 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
773 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
774 doAtomicRMW res AMO_And mba ix (bWord dflags) n
775 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
776 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
777 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
778 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
779 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
780 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
781 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
782 doAtomicReadByteArray res mba ix (bWord dflags)
783 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
784 doAtomicWriteByteArray mba ix (bWord dflags) val
785 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
786 doCasByteArray res mba ix (bWord dflags) old new
787
788 -- The rest just translate straightforwardly
789 emitPrimOp dflags [res] op [arg]
790 | nopOp op
791 = emitAssign (CmmLocal res) arg
792
793 | Just (mop,rep) <- narrowOp op
794 = emitAssign (CmmLocal res) $
795 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
796
797 emitPrimOp dflags r@[res] op args
798 | Just prim <- callishOp op
799 = do emitPrimCall r prim args
800
801 | Just mop <- translateOp dflags op
802 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
803 emit stmt
804
805 emitPrimOp dflags results op args
806 = case callishPrimOpSupported dflags op of
807 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
808 Right gen -> gen results args
809
810 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
811
812 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
813 callishPrimOpSupported dflags op
814 = case op of
815 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
816 | otherwise -> Right (genericIntQuotRemOp dflags)
817
818 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
819 | otherwise -> Right (genericWordQuotRemOp dflags)
820
821 WordQuotRem2Op | (ncg && x86ish)
822 || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
823 | otherwise -> Right (genericWordQuotRem2Op dflags)
824
825 WordAdd2Op | (ncg && x86ish)
826 || llvm -> Left (MO_Add2 (wordWidth dflags))
827 | otherwise -> Right genericWordAdd2Op
828
829 WordSubCOp | (ncg && x86ish)
830 || llvm -> Left (MO_SubWordC (wordWidth dflags))
831 | otherwise -> Right genericWordSubCOp
832
833 IntAddCOp | (ncg && x86ish)
834 || llvm -> Left (MO_AddIntC (wordWidth dflags))
835 | otherwise -> Right genericIntAddCOp
836
837 IntSubCOp | (ncg && x86ish)
838 || llvm -> Left (MO_SubIntC (wordWidth dflags))
839 | otherwise -> Right genericIntSubCOp
840
841 WordMul2Op | ncg && x86ish
842 || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
843 | otherwise -> Right genericWordMul2Op
844
845 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
846 where
847 ncg = case hscTarget dflags of
848 HscAsm -> True
849 _ -> False
850 llvm = case hscTarget dflags of
851 HscLlvm -> True
852 _ -> False
853 x86ish = case platformArch (targetPlatform dflags) of
854 ArchX86 -> True
855 ArchX86_64 -> True
856 _ -> False
857
858 genericIntQuotRemOp :: DynFlags -> GenericOp
859 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
860 = emit $ mkAssign (CmmLocal res_q)
861 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
862 mkAssign (CmmLocal res_r)
863 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
864 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
865
866 genericWordQuotRemOp :: DynFlags -> GenericOp
867 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
868 = emit $ mkAssign (CmmLocal res_q)
869 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
870 mkAssign (CmmLocal res_r)
871 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
872 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
873
874 genericWordQuotRem2Op :: DynFlags -> GenericOp
875 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
876 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
877 where ty = cmmExprType dflags arg_x_high
878 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
879 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
880 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
881 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
882 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
883 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
884 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
885 zero = lit 0
886 one = lit 1
887 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
888 lit i = CmmLit (CmmInt i (wordWidth dflags))
889
890 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
891 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
892 mkAssign (CmmLocal res_r) high)
893 f i acc high low =
894 do roverflowedBit <- newTemp ty
895 rhigh' <- newTemp ty
896 rhigh'' <- newTemp ty
897 rlow' <- newTemp ty
898 risge <- newTemp ty
899 racc' <- newTemp ty
900 let high' = CmmReg (CmmLocal rhigh')
901 isge = CmmReg (CmmLocal risge)
902 overflowedBit = CmmReg (CmmLocal roverflowedBit)
903 let this = catAGraphs
904 [mkAssign (CmmLocal roverflowedBit)
905 (shr high negone),
906 mkAssign (CmmLocal rhigh')
907 (or (shl high one) (shr low negone)),
908 mkAssign (CmmLocal rlow')
909 (shl low one),
910 mkAssign (CmmLocal risge)
911 (or (overflowedBit `ne` zero)
912 (high' `ge` arg_y)),
913 mkAssign (CmmLocal rhigh'')
914 (high' `minus` (arg_y `times` isge)),
915 mkAssign (CmmLocal racc')
916 (or (shl acc one) isge)]
917 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
918 (CmmReg (CmmLocal rhigh''))
919 (CmmReg (CmmLocal rlow'))
920 return (this <*> rest)
921 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
922
923 genericWordAdd2Op :: GenericOp
924 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
925 = do dflags <- getDynFlags
926 r1 <- newTemp (cmmExprType dflags arg_x)
927 r2 <- newTemp (cmmExprType dflags arg_x)
928 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
929 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
930 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
931 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
932 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
933 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
934 (wordWidth dflags))
935 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
936 emit $ catAGraphs
937 [mkAssign (CmmLocal r1)
938 (add (bottomHalf arg_x) (bottomHalf arg_y)),
939 mkAssign (CmmLocal r2)
940 (add (topHalf (CmmReg (CmmLocal r1)))
941 (add (topHalf arg_x) (topHalf arg_y))),
942 mkAssign (CmmLocal res_h)
943 (topHalf (CmmReg (CmmLocal r2))),
944 mkAssign (CmmLocal res_l)
945 (or (toTopHalf (CmmReg (CmmLocal r2)))
946 (bottomHalf (CmmReg (CmmLocal r1))))]
947 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
948
949 genericWordSubCOp :: GenericOp
950 genericWordSubCOp [res_r, res_c] [aa, bb] = do
951 dflags <- getDynFlags
952 emit $ catAGraphs
953 [ -- Put the result into 'res_r'.
954 mkAssign (CmmLocal res_r) $
955 CmmMachOp (mo_wordSub dflags) [aa, bb]
956 -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise.
957 , mkAssign (CmmLocal res_c) $
958 CmmMachOp (mo_wordUGt dflags) [bb, aa]
959 ]
960 genericWordSubCOp _ _ = panic "genericWordSubCOp"
961
962 genericIntAddCOp :: GenericOp
963 genericIntAddCOp [res_r, res_c] [aa, bb]
964 {-
965 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
966 C, and without needing any comparisons. This may not be the
967 fastest way to do it - if you have better code, please send it! --SDM
968
969 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
970
971 We currently don't make use of the r value if c is != 0 (i.e.
972 overflow), we just convert to big integers and try again. This
973 could be improved by making r and c the correct values for
974 plugging into a new J#.
975
976 { r = ((I_)(a)) + ((I_)(b)); \
977 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
978 >> (BITS_IN (I_) - 1); \
979 }
980 Wading through the mass of bracketry, it seems to reduce to:
981 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
982
983 -}
984 = do dflags <- getDynFlags
985 emit $ catAGraphs [
986 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
987 mkAssign (CmmLocal res_c) $
988 CmmMachOp (mo_wordUShr dflags) [
989 CmmMachOp (mo_wordAnd dflags) [
990 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
991 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
992 ],
993 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
994 ]
995 ]
996 genericIntAddCOp _ _ = panic "genericIntAddCOp"
997
998 genericIntSubCOp :: GenericOp
999 genericIntSubCOp [res_r, res_c] [aa, bb]
1000 {- Similarly:
1001 #define subIntCzh(r,c,a,b) \
1002 { r = ((I_)(a)) - ((I_)(b)); \
1003 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1004 >> (BITS_IN (I_) - 1); \
1005 }
1006
1007 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
1008 -}
1009 = do dflags <- getDynFlags
1010 emit $ catAGraphs [
1011 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1012 mkAssign (CmmLocal res_c) $
1013 CmmMachOp (mo_wordUShr dflags) [
1014 CmmMachOp (mo_wordAnd dflags) [
1015 CmmMachOp (mo_wordXor dflags) [aa,bb],
1016 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1017 ],
1018 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1019 ]
1020 ]
1021 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1022
1023 genericWordMul2Op :: GenericOp
1024 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
1025 = do dflags <- getDynFlags
1026 let t = cmmExprType dflags arg_x
1027 xlyl <- liftM CmmLocal $ newTemp t
1028 xlyh <- liftM CmmLocal $ newTemp t
1029 xhyl <- liftM CmmLocal $ newTemp t
1030 r <- liftM CmmLocal $ newTemp t
1031 -- This generic implementation is very simple and slow. We might
1032 -- well be able to do better, but for now this at least works.
1033 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1034 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1035 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1036 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1037 sum = foldl1 add
1038 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1039 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1040 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1041 (wordWidth dflags))
1042 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1043 emit $ catAGraphs
1044 [mkAssign xlyl
1045 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1046 mkAssign xlyh
1047 (mul (bottomHalf arg_x) (topHalf arg_y)),
1048 mkAssign xhyl
1049 (mul (topHalf arg_x) (bottomHalf arg_y)),
1050 mkAssign r
1051 (sum [topHalf (CmmReg xlyl),
1052 bottomHalf (CmmReg xhyl),
1053 bottomHalf (CmmReg xlyh)]),
1054 mkAssign (CmmLocal res_l)
1055 (or (bottomHalf (CmmReg xlyl))
1056 (toTopHalf (CmmReg r))),
1057 mkAssign (CmmLocal res_h)
1058 (sum [mul (topHalf arg_x) (topHalf arg_y),
1059 topHalf (CmmReg xhyl),
1060 topHalf (CmmReg xlyh),
1061 topHalf (CmmReg r)])]
1062 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1063
1064 -- These PrimOps are NOPs in Cmm
1065
1066 nopOp :: PrimOp -> Bool
1067 nopOp Int2WordOp = True
1068 nopOp Word2IntOp = True
1069 nopOp Int2AddrOp = True
1070 nopOp Addr2IntOp = True
1071 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1072 nopOp OrdOp = True
1073 nopOp _ = False
1074
1075 -- These PrimOps turn into double casts
1076
1077 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1078 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1079 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1080 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1081 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1082 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1083 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1084 narrowOp _ = Nothing
1085
1086 -- Native word signless ops
1087
1088 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1089 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1090 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1091 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1092 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1093 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1094 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1095
1096 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1097 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1098 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1099 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1100 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1101 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1102
1103 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1104 translateOp dflags OrOp = Just (mo_wordOr dflags)
1105 translateOp dflags XorOp = Just (mo_wordXor dflags)
1106 translateOp dflags NotOp = Just (mo_wordNot dflags)
1107 translateOp dflags SllOp = Just (mo_wordShl dflags)
1108 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1109
1110 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1111
1112 -- Native word signed ops
1113
1114 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1115 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1116 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1117 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1118 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1119
1120
1121 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1122 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1123 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1124 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1125
1126 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1127 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1128 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1129 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1130 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1131 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1132 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1133
1134 -- Native word unsigned ops
1135
1136 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1137 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1138 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1139 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1140
1141 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1142 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1143 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1144
1145 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1146 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1147 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1148 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1149
1150 -- Char# ops
1151
1152 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1153 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1154 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1155 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1156 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1157 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1158
1159 -- Double ops
1160
1161 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1162 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1163 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1164 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1165 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1166 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1167
1168 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1169 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1170 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1171 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1172 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1173
1174 -- Float ops
1175
1176 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1177 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1178 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1179 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1180 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1181 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1182
1183 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1184 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1185 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1186 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1187 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1188
1189 -- Vector ops
1190
1191 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1192 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1193 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1194 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1195 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1196
1197 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1198 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1199 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1200 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1201 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1202 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1203
1204 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1205 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1206 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1207 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1208 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1209
1210 -- Conversions
1211
1212 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1213 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1214
1215 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1216 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1217
1218 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1219 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1220
1221 -- Word comparisons masquerading as more exotic things.
1222
1223 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1224 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1225 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1226 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1227 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1228 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1229 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1230 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1231
1232 translateOp _ _ = Nothing
1233
1234 -- These primops are implemented by CallishMachOps, because they sometimes
1235 -- turn into foreign calls depending on the backend.
1236
1237 callishOp :: PrimOp -> Maybe CallishMachOp
1238 callishOp DoublePowerOp = Just MO_F64_Pwr
1239 callishOp DoubleSinOp = Just MO_F64_Sin
1240 callishOp DoubleCosOp = Just MO_F64_Cos
1241 callishOp DoubleTanOp = Just MO_F64_Tan
1242 callishOp DoubleSinhOp = Just MO_F64_Sinh
1243 callishOp DoubleCoshOp = Just MO_F64_Cosh
1244 callishOp DoubleTanhOp = Just MO_F64_Tanh
1245 callishOp DoubleAsinOp = Just MO_F64_Asin
1246 callishOp DoubleAcosOp = Just MO_F64_Acos
1247 callishOp DoubleAtanOp = Just MO_F64_Atan
1248 callishOp DoubleLogOp = Just MO_F64_Log
1249 callishOp DoubleExpOp = Just MO_F64_Exp
1250 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1251
1252 callishOp FloatPowerOp = Just MO_F32_Pwr
1253 callishOp FloatSinOp = Just MO_F32_Sin
1254 callishOp FloatCosOp = Just MO_F32_Cos
1255 callishOp FloatTanOp = Just MO_F32_Tan
1256 callishOp FloatSinhOp = Just MO_F32_Sinh
1257 callishOp FloatCoshOp = Just MO_F32_Cosh
1258 callishOp FloatTanhOp = Just MO_F32_Tanh
1259 callishOp FloatAsinOp = Just MO_F32_Asin
1260 callishOp FloatAcosOp = Just MO_F32_Acos
1261 callishOp FloatAtanOp = Just MO_F32_Atan
1262 callishOp FloatLogOp = Just MO_F32_Log
1263 callishOp FloatExpOp = Just MO_F32_Exp
1264 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1265
1266 callishOp _ = Nothing
1267
1268 ------------------------------------------------------------------------------
1269 -- Helpers for translating various minor variants of array indexing.
1270
1271 doIndexOffAddrOp :: Maybe MachOp
1272 -> CmmType
1273 -> [LocalReg]
1274 -> [CmmExpr]
1275 -> FCode ()
1276 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1277 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1278 doIndexOffAddrOp _ _ _ _
1279 = panic "StgCmmPrim: doIndexOffAddrOp"
1280
1281 doIndexOffAddrOpAs :: Maybe MachOp
1282 -> CmmType
1283 -> CmmType
1284 -> [LocalReg]
1285 -> [CmmExpr]
1286 -> FCode ()
1287 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1288 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1289 doIndexOffAddrOpAs _ _ _ _ _
1290 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1291
1292 doIndexByteArrayOp :: Maybe MachOp
1293 -> CmmType
1294 -> [LocalReg]
1295 -> [CmmExpr]
1296 -> FCode ()
1297 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1298 = do dflags <- getDynFlags
1299 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1300 doIndexByteArrayOp _ _ _ _
1301 = panic "StgCmmPrim: doIndexByteArrayOp"
1302
1303 doIndexByteArrayOpAs :: Maybe MachOp
1304 -> CmmType
1305 -> CmmType
1306 -> [LocalReg]
1307 -> [CmmExpr]
1308 -> FCode ()
1309 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1310 = do dflags <- getDynFlags
1311 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1312 doIndexByteArrayOpAs _ _ _ _ _
1313 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1314
1315 doReadPtrArrayOp :: LocalReg
1316 -> CmmExpr
1317 -> CmmExpr
1318 -> FCode ()
1319 doReadPtrArrayOp res addr idx
1320 = do dflags <- getDynFlags
1321 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1322
1323 doWriteOffAddrOp :: Maybe MachOp
1324 -> CmmType
1325 -> [LocalReg]
1326 -> [CmmExpr]
1327 -> FCode ()
1328 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1329 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1330 doWriteOffAddrOp _ _ _ _
1331 = panic "StgCmmPrim: doWriteOffAddrOp"
1332
1333 doWriteByteArrayOp :: Maybe MachOp
1334 -> CmmType
1335 -> [LocalReg]
1336 -> [CmmExpr]
1337 -> FCode ()
1338 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1339 = do dflags <- getDynFlags
1340 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1341 doWriteByteArrayOp _ _ _ _
1342 = panic "StgCmmPrim: doWriteByteArrayOp"
1343
1344 doWritePtrArrayOp :: CmmExpr
1345 -> CmmExpr
1346 -> CmmExpr
1347 -> FCode ()
1348 doWritePtrArrayOp addr idx val
1349 = do dflags <- getDynFlags
1350 let ty = cmmExprType dflags val
1351 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1352 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1353 -- the write barrier. We must write a byte into the mark table:
1354 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1355 emit $ mkStore (
1356 cmmOffsetExpr dflags
1357 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1358 (loadArrPtrsSize dflags addr))
1359 (CmmMachOp (mo_wordUShr dflags) [idx,
1360 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1361 ) (CmmLit (CmmInt 1 W8))
1362
1363 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1364 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1365 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1366
1367 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1368 -> Maybe MachOp -- Optional result cast
1369 -> CmmType -- Type of element we are accessing
1370 -> LocalReg -- Destination
1371 -> CmmExpr -- Base address
1372 -> CmmType -- Type of element by which we are indexing
1373 -> CmmExpr -- Index
1374 -> FCode ()
1375 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1376 = do dflags <- getDynFlags
1377 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1378 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1379 = do dflags <- getDynFlags
1380 emitAssign (CmmLocal res) (CmmMachOp cast [
1381 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1382
1383 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1384 -> Maybe MachOp -- Optional value cast
1385 -> CmmExpr -- Base address
1386 -> CmmType -- Type of element by which we are indexing
1387 -> CmmExpr -- Index
1388 -> CmmExpr -- Value to write
1389 -> FCode ()
1390 mkBasicIndexedWrite off Nothing base idx_ty idx val
1391 = do dflags <- getDynFlags
1392 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1393 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1394 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1395
1396 -- ----------------------------------------------------------------------------
1397 -- Misc utils
1398
1399 cmmIndexOffExpr :: DynFlags
1400 -> ByteOff -- Initial offset in bytes
1401 -> Width -- Width of element by which we are indexing
1402 -> CmmExpr -- Base address
1403 -> CmmExpr -- Index
1404 -> CmmExpr
1405 cmmIndexOffExpr dflags off width base idx
1406 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1407
1408 cmmLoadIndexOffExpr :: DynFlags
1409 -> ByteOff -- Initial offset in bytes
1410 -> CmmType -- Type of element we are accessing
1411 -> CmmExpr -- Base address
1412 -> CmmType -- Type of element by which we are indexing
1413 -> CmmExpr -- Index
1414 -> CmmExpr
1415 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1416 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1417
1418 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1419 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1420
1421 ------------------------------------------------------------------------------
1422 -- Helpers for translating vector primops.
1423
1424 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1425 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1426
1427 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1428 vecCmmCat IntVec = cmmBits
1429 vecCmmCat WordVec = cmmBits
1430 vecCmmCat FloatVec = cmmFloat
1431
1432 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1433 vecElemInjectCast _ FloatVec _ = Nothing
1434 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1435 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1436 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1437 vecElemInjectCast _ IntVec W64 = Nothing
1438 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1439 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1440 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1441 vecElemInjectCast _ WordVec W64 = Nothing
1442 vecElemInjectCast _ _ _ = Nothing
1443
1444 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1445 vecElemProjectCast _ FloatVec _ = Nothing
1446 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1447 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1448 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1449 vecElemProjectCast _ IntVec W64 = Nothing
1450 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1451 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1452 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1453 vecElemProjectCast _ WordVec W64 = Nothing
1454 vecElemProjectCast _ _ _ = Nothing
1455
1456 -- Check to make sure that we can generate code for the specified vector type
1457 -- given the current set of dynamic flags.
1458 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1459 checkVecCompatibility dflags vcat l w = do
1460 when (hscTarget dflags /= HscLlvm) $ do
1461 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1462 ,"Please use -fllvm."]
1463 check vecWidth vcat l w
1464 where
1465 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1466 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1467 sorry $ "128-bit wide single-precision floating point " ++
1468 "SIMD vector instructions require at least -msse."
1469 check W128 _ _ _ | not (isSse2Enabled dflags) =
1470 sorry $ "128-bit wide integer and double precision " ++
1471 "SIMD vector instructions require at least -msse2."
1472 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1473 sorry $ "256-bit wide floating point " ++
1474 "SIMD vector instructions require at least -mavx."
1475 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1476 sorry $ "256-bit wide integer " ++
1477 "SIMD vector instructions require at least -mavx2."
1478 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1479 sorry $ "512-bit wide " ++
1480 "SIMD vector instructions require -mavx512f."
1481 check _ _ _ _ = return ()
1482
1483 vecWidth = typeWidth (vecVmmType vcat l w)
1484
1485 ------------------------------------------------------------------------------
1486 -- Helpers for translating vector packing and unpacking.
1487
1488 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1489 -> CmmType -- Type of vector
1490 -> CmmExpr -- Initial vector
1491 -> [CmmExpr] -- Elements
1492 -> CmmFormal -- Destination for result
1493 -> FCode ()
1494 doVecPackOp maybe_pre_write_cast ty z es res = do
1495 dst <- newTemp ty
1496 emitAssign (CmmLocal dst) z
1497 vecPack dst es 0
1498 where
1499 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1500 vecPack src [] _ =
1501 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1502
1503 vecPack src (e : es) i = do
1504 dst <- newTemp ty
1505 if isFloatType (vecElemType ty)
1506 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1507 [CmmReg (CmmLocal src), cast e, iLit])
1508 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1509 [CmmReg (CmmLocal src), cast e, iLit])
1510 vecPack dst es (i + 1)
1511 where
1512 -- vector indices are always 32-bits
1513 iLit = CmmLit (CmmInt (toInteger i) W32)
1514
1515 cast :: CmmExpr -> CmmExpr
1516 cast val = case maybe_pre_write_cast of
1517 Nothing -> val
1518 Just cast -> CmmMachOp cast [val]
1519
1520 len :: Length
1521 len = vecLength ty
1522
1523 wid :: Width
1524 wid = typeWidth (vecElemType ty)
1525
1526 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1527 -> CmmType -- Type of vector
1528 -> CmmExpr -- Vector
1529 -> [CmmFormal] -- Element results
1530 -> FCode ()
1531 doVecUnpackOp maybe_post_read_cast ty e res =
1532 vecUnpack res 0
1533 where
1534 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1535 vecUnpack [] _ =
1536 return ()
1537
1538 vecUnpack (r : rs) i = do
1539 if isFloatType (vecElemType ty)
1540 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1541 [e, iLit]))
1542 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1543 [e, iLit]))
1544 vecUnpack rs (i + 1)
1545 where
1546 -- vector indices are always 32-bits
1547 iLit = CmmLit (CmmInt (toInteger i) W32)
1548
1549 cast :: CmmExpr -> CmmExpr
1550 cast val = case maybe_post_read_cast of
1551 Nothing -> val
1552 Just cast -> CmmMachOp cast [val]
1553
1554 len :: Length
1555 len = vecLength ty
1556
1557 wid :: Width
1558 wid = typeWidth (vecElemType ty)
1559
1560 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1561 -> CmmType -- Vector type
1562 -> CmmExpr -- Source vector
1563 -> CmmExpr -- Element
1564 -> CmmExpr -- Index at which to insert element
1565 -> CmmFormal -- Destination for result
1566 -> FCode ()
1567 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1568 dflags <- getDynFlags
1569 -- vector indices are always 32-bits
1570 let idx' :: CmmExpr
1571 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1572 if isFloatType (vecElemType ty)
1573 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1574 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1575 where
1576 cast :: CmmExpr -> CmmExpr
1577 cast val = case maybe_pre_write_cast of
1578 Nothing -> val
1579 Just cast -> CmmMachOp cast [val]
1580
1581 len :: Length
1582 len = vecLength ty
1583
1584 wid :: Width
1585 wid = typeWidth (vecElemType ty)
1586
1587 ------------------------------------------------------------------------------
1588 -- Helpers for translating prefetching.
1589
1590
1591 -- | Translate byte array prefetch operations into proper primcalls.
1592 doPrefetchByteArrayOp :: Int
1593 -> [CmmExpr]
1594 -> FCode ()
1595 doPrefetchByteArrayOp locality [addr,idx]
1596 = do dflags <- getDynFlags
1597 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1598 doPrefetchByteArrayOp _ _
1599 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1600
1601 -- | Translate mutable byte array prefetch operations into proper primcalls.
1602 doPrefetchMutableByteArrayOp :: Int
1603 -> [CmmExpr]
1604 -> FCode ()
1605 doPrefetchMutableByteArrayOp locality [addr,idx]
1606 = do dflags <- getDynFlags
1607 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1608 doPrefetchMutableByteArrayOp _ _
1609 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1610
1611 -- | Translate address prefetch operations into proper primcalls.
1612 doPrefetchAddrOp ::Int
1613 -> [CmmExpr]
1614 -> FCode ()
1615 doPrefetchAddrOp locality [addr,idx]
1616 = mkBasicPrefetch locality 0 addr idx
1617 doPrefetchAddrOp _ _
1618 = panic "StgCmmPrim: doPrefetchAddrOp"
1619
1620 -- | Translate value prefetch operations into proper primcalls.
1621 doPrefetchValueOp :: Int
1622 -> [CmmExpr]
1623 -> FCode ()
1624 doPrefetchValueOp locality [addr]
1625 = do dflags <- getDynFlags
1626 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1627 doPrefetchValueOp _ _
1628 = panic "StgCmmPrim: doPrefetchValueOp"
1629
1630 -- | helper to generate prefetch primcalls
1631 mkBasicPrefetch :: Int -- Locality level 0-3
1632 -> ByteOff -- Initial offset in bytes
1633 -> CmmExpr -- Base address
1634 -> CmmExpr -- Index
1635 -> FCode ()
1636 mkBasicPrefetch locality off base idx
1637 = do dflags <- getDynFlags
1638 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1639 return ()
1640
1641 -- ----------------------------------------------------------------------------
1642 -- Allocating byte arrays
1643
1644 -- | Takes a register to return the newly allocated array in and the
1645 -- size of the new array in bytes. Allocates a new
1646 -- 'MutableByteArray#'.
1647 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1648 doNewByteArrayOp res_r n = do
1649 dflags <- getDynFlags
1650
1651 let info_ptr = mkLblExpr mkArrWords_infoLabel
1652 rep = arrWordsRep dflags n
1653
1654 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1655 (mkIntExpr dflags (nonHdrSize dflags rep))
1656 (zeroExpr dflags)
1657
1658 let hdr_size = fixedHdrSize dflags
1659
1660 base <- allocHeapClosure rep info_ptr curCCS
1661 [ (CmmExprArg (mkIntExpr dflags n),
1662 hdr_size + oFFSET_StgArrBytes_bytes dflags)
1663 ]
1664
1665 emit $ mkAssign (CmmLocal res_r) base
1666
1667 -- ----------------------------------------------------------------------------
1668 -- Copying byte arrays
1669
1670 -- | Takes a source 'ByteArray#', an offset in the source array, a
1671 -- destination 'MutableByteArray#', an offset into the destination
1672 -- array, and the number of bytes to copy. Copies the given number of
1673 -- bytes from the source array to the destination array.
1674 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1675 -> FCode ()
1676 doCopyByteArrayOp = emitCopyByteArray copy
1677 where
1678 -- Copy data (we assume the arrays aren't overlapping since
1679 -- they're of different types)
1680 copy _src _dst dst_p src_p bytes =
1681 emitMemcpyCall dst_p src_p bytes 1
1682
1683 -- | Takes a source 'MutableByteArray#', an offset in the source
1684 -- array, a destination 'MutableByteArray#', an offset into the
1685 -- destination array, and the number of bytes to copy. Copies the
1686 -- given number of bytes from the source array to the destination
1687 -- array.
1688 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1689 -> FCode ()
1690 doCopyMutableByteArrayOp = emitCopyByteArray copy
1691 where
1692 -- The only time the memory might overlap is when the two arrays
1693 -- we were provided are the same array!
1694 -- TODO: Optimize branch for common case of no aliasing.
1695 copy src dst dst_p src_p bytes = do
1696 dflags <- getDynFlags
1697 [moveCall, cpyCall] <- forkAlts [
1698 getCode $ emitMemmoveCall dst_p src_p bytes 1,
1699 getCode $ emitMemcpyCall dst_p src_p bytes 1
1700 ]
1701 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1702
1703 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1704 -> FCode ())
1705 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1706 -> FCode ()
1707 emitCopyByteArray copy src src_off dst dst_off n = do
1708 dflags <- getDynFlags
1709 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1710 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1711 copy src dst dst_p src_p n
1712
1713 -- | Takes a source 'ByteArray#', an offset in the source array, a
1714 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1715 -- number of bytes from the source array to the destination memory region.
1716 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1717 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1718 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1719 dflags <- getDynFlags
1720 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1721 emitMemcpyCall dst_p src_p bytes 1
1722
1723 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1724 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1725 -- number of bytes from the source array to the destination memory region.
1726 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1727 -> FCode ()
1728 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1729
1730 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1731 -- the destination array, and the number of bytes to copy. Copies the given
1732 -- number of bytes from the source memory region to the destination array.
1733 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1734 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1735 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1736 dflags <- getDynFlags
1737 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1738 emitMemcpyCall dst_p src_p bytes 1
1739
1740
1741 -- ----------------------------------------------------------------------------
1742 -- Setting byte arrays
1743
1744 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1745 -- and a byte, and sets each of the selected bytes in the array to the
1746 -- character.
1747 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1748 -> FCode ()
1749 doSetByteArrayOp ba off len c
1750 = do dflags <- getDynFlags
1751 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1752 emitMemsetCall p c len 1
1753
1754 -- ----------------------------------------------------------------------------
1755 -- Allocating arrays
1756
1757 -- | Allocate a new array.
1758 doNewArrayOp :: CmmFormal -- ^ return register
1759 -> SMRep -- ^ representation of the array
1760 -> CLabel -- ^ info pointer
1761 -> [(CmmExpr, ByteOff)] -- ^ header payload
1762 -> WordOff -- ^ array size
1763 -> CmmExpr -- ^ initial element
1764 -> FCode ()
1765 doNewArrayOp res_r rep info payload n init = do
1766 dflags <- getDynFlags
1767
1768 let info_ptr = mkLblExpr info
1769
1770 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1771 (mkIntExpr dflags (nonHdrSize dflags rep))
1772 (zeroExpr dflags)
1773
1774 base <- allocHeapClosure rep info_ptr curCCS (map (first CmmExprArg) payload)
1775
1776 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1777 emit $ mkAssign arr base
1778
1779 -- Initialise all elements of the the array
1780 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1781 for <- newLabelC
1782 emitLabel for
1783 let loopBody =
1784 [ mkStore (CmmReg (CmmLocal p)) init
1785 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1786 , mkBranch for ]
1787 emit =<< mkCmmIfThen
1788 (cmmULtWord dflags (CmmReg (CmmLocal p))
1789 (cmmOffsetW dflags (CmmReg arr)
1790 (hdrSizeW dflags rep + n)))
1791 (catAGraphs loopBody)
1792
1793 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1794
1795 -- ----------------------------------------------------------------------------
1796 -- Copying pointer arrays
1797
1798 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1799 -- nowhere else in the code generator. This is mostly because these
1800 -- "primitive" ops result in a surprisingly large amount of code. It
1801 -- will likely be worthwhile to optimize what is emitted here, so that
1802 -- our optimization passes don't waste time repeatedly optimizing the
1803 -- same bits of code.
1804
1805 -- More closely imitates 'assignTemp' from the old code generator, which
1806 -- returns a CmmExpr rather than a LocalReg.
1807 assignTempE :: CmmExpr -> FCode CmmExpr
1808 assignTempE e = do
1809 t <- assignTemp e
1810 return (CmmReg (CmmLocal t))
1811
1812 -- | Takes a source 'Array#', an offset in the source array, a
1813 -- destination 'MutableArray#', an offset into the destination array,
1814 -- and the number of elements to copy. Copies the given number of
1815 -- elements from the source array to the destination array.
1816 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1817 -> FCode ()
1818 doCopyArrayOp = emitCopyArray copy
1819 where
1820 -- Copy data (we assume the arrays aren't overlapping since
1821 -- they're of different types)
1822 copy _src _dst dst_p src_p bytes =
1823 do dflags <- getDynFlags
1824 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1825 (wORD_SIZE dflags)
1826
1827
1828 -- | Takes a source 'MutableArray#', an offset in the source array, a
1829 -- destination 'MutableArray#', an offset into the destination array,
1830 -- and the number of elements to copy. Copies the given number of
1831 -- elements from the source array to the destination array.
1832 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1833 -> FCode ()
1834 doCopyMutableArrayOp = emitCopyArray copy
1835 where
1836 -- The only time the memory might overlap is when the two arrays
1837 -- we were provided are the same array!
1838 -- TODO: Optimize branch for common case of no aliasing.
1839 copy src dst dst_p src_p bytes = do
1840 dflags <- getDynFlags
1841 [moveCall, cpyCall] <- forkAlts [
1842 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1843 (wORD_SIZE dflags),
1844 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1845 (wORD_SIZE dflags)
1846 ]
1847 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1848
1849 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1850 -> FCode ()) -- ^ copy function
1851 -> CmmExpr -- ^ source array
1852 -> CmmExpr -- ^ offset in source array
1853 -> CmmExpr -- ^ destination array
1854 -> CmmExpr -- ^ offset in destination array
1855 -> WordOff -- ^ number of elements to copy
1856 -> FCode ()
1857 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1858 dflags <- getDynFlags
1859 when (n /= 0) $ do
1860 -- Passed as arguments (be careful)
1861 src <- assignTempE src0
1862 dst <- assignTempE dst0
1863 dst_off <- assignTempE dst_off0
1864
1865 -- Set the dirty bit in the header.
1866 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1867
1868 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1869 (arrPtrsHdrSize dflags)
1870 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1871 src_p <- assignTempE $ cmmOffsetExprW dflags
1872 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1873 let bytes = wordsToBytes dflags n
1874
1875 copy src dst dst_p src_p bytes
1876
1877 -- The base address of the destination card table
1878 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1879 (loadArrPtrsSize dflags dst)
1880
1881 emitSetCards dst_off dst_cards_p n
1882
1883 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1884 -> FCode ()
1885 doCopySmallArrayOp = emitCopySmallArray copy
1886 where
1887 -- Copy data (we assume the arrays aren't overlapping since
1888 -- they're of different types)
1889 copy _src _dst dst_p src_p bytes =
1890 do dflags <- getDynFlags
1891 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1892 (wORD_SIZE dflags)
1893
1894
1895 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1896 -> FCode ()
1897 doCopySmallMutableArrayOp = emitCopySmallArray copy
1898 where
1899 -- The only time the memory might overlap is when the two arrays
1900 -- we were provided are the same array!
1901 -- TODO: Optimize branch for common case of no aliasing.
1902 copy src dst dst_p src_p bytes = do
1903 dflags <- getDynFlags
1904 [moveCall, cpyCall] <- forkAlts
1905 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1906 (wORD_SIZE dflags)
1907 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1908 (wORD_SIZE dflags)
1909 ]
1910 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1911
1912 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1913 -> FCode ()) -- ^ copy function
1914 -> CmmExpr -- ^ source array
1915 -> CmmExpr -- ^ offset in source array
1916 -> CmmExpr -- ^ destination array
1917 -> CmmExpr -- ^ offset in destination array
1918 -> WordOff -- ^ number of elements to copy
1919 -> FCode ()
1920 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
1921 dflags <- getDynFlags
1922
1923 -- Passed as arguments (be careful)
1924 src <- assignTempE src0
1925 dst <- assignTempE dst0
1926
1927 -- Set the dirty bit in the header.
1928 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1929
1930 dst_p <- assignTempE $ cmmOffsetExprW dflags
1931 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
1932 src_p <- assignTempE $ cmmOffsetExprW dflags
1933 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
1934 let bytes = wordsToBytes dflags n
1935
1936 copy src dst dst_p src_p bytes
1937
1938 -- | Takes an info table label, a register to return the newly
1939 -- allocated array in, a source array, an offset in the source array,
1940 -- and the number of elements to copy. Allocates a new array and
1941 -- initializes it from the source array.
1942 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1943 -> FCode ()
1944 emitCloneArray info_p res_r src src_off n = do
1945 dflags <- getDynFlags
1946
1947 let info_ptr = mkLblExpr info_p
1948 rep = arrPtrsRep dflags n
1949
1950 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1951 (mkIntExpr dflags (nonHdrSize dflags rep))
1952 (zeroExpr dflags)
1953
1954 let hdr_size = fixedHdrSize dflags
1955
1956 base <- allocHeapClosure rep info_ptr curCCS
1957 [ (CmmExprArg (mkIntExpr dflags n),
1958 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1959 , (CmmExprArg (mkIntExpr dflags (nonHdrSizeW rep)),
1960 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1961 ]
1962
1963 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1964 emit $ mkAssign arr base
1965
1966 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1967 (arrPtrsHdrSize dflags)
1968 src_p <- assignTempE $ cmmOffsetExprW dflags src
1969 (cmmAddWord dflags
1970 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1971
1972 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1973 (wORD_SIZE dflags)
1974
1975 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1976
1977 -- | Takes an info table label, a register to return the newly
1978 -- allocated array in, a source array, an offset in the source array,
1979 -- and the number of elements to copy. Allocates a new array and
1980 -- initializes it from the source array.
1981 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1982 -> FCode ()
1983 emitCloneSmallArray info_p res_r src src_off n = do
1984 dflags <- getDynFlags
1985
1986 let info_ptr = mkLblExpr info_p
1987 rep = smallArrPtrsRep n
1988
1989 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
1990 (mkIntExpr dflags (nonHdrSize dflags rep))
1991 (zeroExpr dflags)
1992
1993 let hdr_size = fixedHdrSize dflags
1994
1995 base <- allocHeapClosure rep info_ptr curCCS
1996 [ (CmmExprArg (mkIntExpr dflags n),
1997 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
1998 ]
1999
2000 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2001 emit $ mkAssign arr base
2002
2003 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2004 (smallArrPtrsHdrSize dflags)
2005 src_p <- assignTempE $ cmmOffsetExprW dflags src
2006 (cmmAddWord dflags
2007 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
2008
2009 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2010 (wORD_SIZE dflags)
2011
2012 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2013
2014 -- | Takes and offset in the destination array, the base address of
2015 -- the card table, and the number of elements affected (*not* the
2016 -- number of cards). The number of elements may not be zero.
2017 -- Marks the relevant cards as dirty.
2018 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2019 emitSetCards dst_start dst_cards_start n = do
2020 dflags <- getDynFlags
2021 start_card <- assignTempE $ cardCmm dflags dst_start
2022 let end_card = cardCmm dflags
2023 (cmmSubWord dflags
2024 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
2025 (mkIntExpr dflags 1))
2026 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
2027 (mkIntExpr dflags 1)
2028 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2029 1 -- no alignment (1 byte)
2030
2031 -- Convert an element index to a card index
2032 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2033 cardCmm dflags i =
2034 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2035
2036 ------------------------------------------------------------------------------
2037 -- SmallArray PrimOp implementations
2038
2039 doReadSmallPtrArrayOp :: LocalReg
2040 -> CmmExpr
2041 -> CmmExpr
2042 -> FCode ()
2043 doReadSmallPtrArrayOp res addr idx = do
2044 dflags <- getDynFlags
2045 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2046 (gcWord dflags) idx
2047
2048 doWriteSmallPtrArrayOp :: CmmExpr
2049 -> CmmExpr
2050 -> CmmExpr
2051 -> FCode ()
2052 doWriteSmallPtrArrayOp addr idx val = do
2053 dflags <- getDynFlags
2054 let ty = cmmExprType dflags val
2055 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2056 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2057
2058 ------------------------------------------------------------------------------
2059 -- Atomic read-modify-write
2060
2061 -- | Emit an atomic modification to a byte array element. The result
2062 -- reg contains that previous value of the element. Implies a full
2063 -- memory barrier.
2064 doAtomicRMW :: LocalReg -- ^ Result reg
2065 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2066 -> CmmExpr -- ^ MutableByteArray#
2067 -> CmmExpr -- ^ Index
2068 -> CmmType -- ^ Type of element by which we are indexing
2069 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2070 -> FCode ()
2071 doAtomicRMW res amop mba idx idx_ty n = do
2072 dflags <- getDynFlags
2073 let width = typeWidth idx_ty
2074 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2075 width mba idx
2076 emitPrimCall
2077 [ res ]
2078 (MO_AtomicRMW width amop)
2079 [ addr, n ]
2080
2081 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2082 doAtomicReadByteArray
2083 :: LocalReg -- ^ Result reg
2084 -> CmmExpr -- ^ MutableByteArray#
2085 -> CmmExpr -- ^ Index
2086 -> CmmType -- ^ Type of element by which we are indexing
2087 -> FCode ()
2088 doAtomicReadByteArray res mba idx idx_ty = do
2089 dflags <- getDynFlags
2090 let width = typeWidth idx_ty
2091 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2092 width mba idx
2093 emitPrimCall
2094 [ res ]
2095 (MO_AtomicRead width)
2096 [ addr ]
2097
2098 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2099 doAtomicWriteByteArray
2100 :: CmmExpr -- ^ MutableByteArray#
2101 -> CmmExpr -- ^ Index
2102 -> CmmType -- ^ Type of element by which we are indexing
2103 -> CmmExpr -- ^ Value to write
2104 -> FCode ()
2105 doAtomicWriteByteArray mba idx idx_ty val = do
2106 dflags <- getDynFlags
2107 let width = typeWidth idx_ty
2108 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2109 width mba idx
2110 emitPrimCall
2111 [ {- no results -} ]
2112 (MO_AtomicWrite width)
2113 [ addr, val ]
2114
2115 doCasByteArray
2116 :: LocalReg -- ^ Result reg
2117 -> CmmExpr -- ^ MutableByteArray#
2118 -> CmmExpr -- ^ Index
2119 -> CmmType -- ^ Type of element by which we are indexing
2120 -> CmmExpr -- ^ Old value
2121 -> CmmExpr -- ^ New value
2122 -> FCode ()
2123 doCasByteArray res mba idx idx_ty old new = do
2124 dflags <- getDynFlags
2125 let width = (typeWidth idx_ty)
2126 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2127 width mba idx
2128 emitPrimCall
2129 [ res ]
2130 (MO_Cmpxchg width)
2131 [ addr, old, new ]
2132
2133 ------------------------------------------------------------------------------
2134 -- Helpers for emitting function calls
2135
2136 -- | Emit a call to @memcpy@.
2137 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2138 emitMemcpyCall dst src n align = do
2139 emitPrimCall
2140 [ {-no results-} ]
2141 (MO_Memcpy align)
2142 [ dst, src, n ]
2143
2144 -- | Emit a call to @memmove@.
2145 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2146 emitMemmoveCall dst src n align = do
2147 emitPrimCall
2148 [ {- no results -} ]
2149 (MO_Memmove align)
2150 [ dst, src, n ]
2151
2152 -- | Emit a call to @memset@. The second argument must fit inside an
2153 -- unsigned char.
2154 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2155 emitMemsetCall dst c n align = do
2156 emitPrimCall
2157 [ {- no results -} ]
2158 (MO_Memset align)
2159 [ dst, c, n ]
2160
2161 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2162 emitBSwapCall res x width = do
2163 emitPrimCall
2164 [ res ]
2165 (MO_BSwap width)
2166 [ x ]
2167
2168 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2169 emitPopCntCall res x width = do
2170 emitPrimCall
2171 [ res ]
2172 (MO_PopCnt width)
2173 [ x ]
2174
2175 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2176 emitClzCall res x width = do
2177 emitPrimCall
2178 [ res ]
2179 (MO_Clz width)
2180 [ x ]
2181
2182 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2183 emitCtzCall res x width = do
2184 emitPrimCall
2185 [ res ]
2186 (MO_Ctz width)
2187 [ x ]