Generalize CmmUnwind and pass unwind information through NCG
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgCmmLayout
21 import StgCmmForeign
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmTicky
26 import StgCmmHeap
27 import StgCmmProf ( costCentreFrom, curCCS )
28
29 import DynFlags
30 import Platform
31 import BasicTypes
32 import BlockId
33 import MkGraph
34 import StgSyn
35 import Cmm
36 import CmmInfo
37 import Type ( Type, tyConAppTyCon )
38 import TyCon
39 import CLabel
40 import CmmUtils
41 import PrimOp
42 import SMRep
43 import FastString
44 import Outputable
45 import Util
46
47 import Prelude hiding ((<*>))
48
49 import Data.Bits ((.&.), bit)
50 import Control.Monad (liftM, when)
51
52 ------------------------------------------------------------------------
53 -- Primitive operations and foreign calls
54 ------------------------------------------------------------------------
55
56 {- Note [Foreign call results]
57 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 A foreign call always returns an unboxed tuple of results, one
59 of which is the state token. This seems to happen even for pure
60 calls.
61
62 Even if we returned a single result for pure calls, it'd still be
63 right to wrap it in a singleton unboxed tuple, because the result
64 might be a Haskell closure pointer, we don't want to evaluate it. -}
65
66 ----------------------------------
67 cgOpApp :: StgOp -- The op
68 -> [StgArg] -- Arguments
69 -> Type -- Result type (always an unboxed tuple)
70 -> FCode ReturnKind
71
72 -- Foreign calls
73 cgOpApp (StgFCallOp fcall _) stg_args res_ty
74 = cgForeignCall fcall stg_args res_ty
75 -- Note [Foreign call results]
76
77 -- tagToEnum# is special: we need to pull the constructor
78 -- out of the table, and perform an appropriate return.
79
80 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
81 = ASSERT(isEnumerationTyCon tycon)
82 do { dflags <- getDynFlags
83 ; args' <- getNonVoidArgAmodes [arg]
84 ; let amode = case args' of [amode] -> amode
85 _ -> panic "TagToEnumOp had void arg"
86 ; emitReturn [tagToClosure dflags tycon amode] }
87 where
88 -- If you're reading this code in the attempt to figure
89 -- out why the compiler panic'ed here, it is probably because
90 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
91 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
92 -- That won't work.
93 tycon = tyConAppTyCon res_ty
94
95 cgOpApp (StgPrimOp primop) args res_ty = do
96 dflags <- getDynFlags
97 cmm_args <- getNonVoidArgAmodes args
98 case shouldInlinePrimOp dflags primop cmm_args of
99 Nothing -> do -- out-of-line
100 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
101 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
102
103 Just f -- inline
104 | ReturnsPrim VoidRep <- result_info
105 -> do f []
106 emitReturn []
107
108 | ReturnsPrim rep <- result_info
109 -> do dflags <- getDynFlags
110 res <- newTemp (primRepCmmType dflags rep)
111 f [res]
112 emitReturn [CmmReg (CmmLocal res)]
113
114 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
115 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
116 f regs
117 emitReturn (map (CmmReg . CmmLocal) regs)
118
119 | otherwise -> panic "cgPrimop"
120 where
121 result_info = getPrimOpResultInfo primop
122
123 cgOpApp (StgPrimCallOp primcall) args _res_ty
124 = do { cmm_args <- getNonVoidArgAmodes args
125 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
126 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
127
128 -- | Interpret the argument as an unsigned value, assuming the value
129 -- is given in two-complement form in the given width.
130 --
131 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
132 --
133 -- This function is used to work around the fact that many array
134 -- primops take Int# arguments, but we interpret them as unsigned
135 -- quantities in the code gen. This means that we have to be careful
136 -- every time we work on e.g. a CmmInt literal that corresponds to the
137 -- array size, as it might contain a negative Integer value if the
138 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
139 -- literal.
140 asUnsigned :: Width -> Integer -> Integer
141 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
142
143 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
144 -- ByteOff (or some other fixed width signed type) to represent
145 -- array sizes or indices. This means that these will overflow for
146 -- large enough sizes.
147
148 -- | Decide whether an out-of-line primop should be replaced by an
149 -- inline implementation. This might happen e.g. if there's enough
150 -- static information, such as statically know arguments, to emit a
151 -- more efficient implementation inline.
152 --
153 -- Returns 'Nothing' if this primop should use its out-of-line
154 -- implementation (defined elsewhere) and 'Just' together with a code
155 -- generating function that takes the output regs as arguments
156 -- otherwise.
157 shouldInlinePrimOp :: DynFlags
158 -> PrimOp -- ^ The primop
159 -> [CmmExpr] -- ^ The primop arguments
160 -> Maybe ([LocalReg] -> FCode ())
161
162 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
163 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
164 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
165
166 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
167 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
168 Just $ \ [res] ->
169 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
170 [ (mkIntExpr dflags (fromInteger n),
171 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
172 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
173 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
174 ]
175 (fromInteger n) init
176
177 shouldInlinePrimOp _ CopyArrayOp
178 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
179 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
180
181 shouldInlinePrimOp _ CopyMutableArrayOp
182 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
183 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
184
185 shouldInlinePrimOp _ CopyArrayArrayOp
186 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
187 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
188
189 shouldInlinePrimOp _ CopyMutableArrayArrayOp
190 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
191 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
192
193 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
194 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
195 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
196
197 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
198 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
199 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
200
201 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
202 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
203 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
204
205 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
206 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
207 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
208
209 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
210 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
211 Just $ \ [res] ->
212 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
213 [ (mkIntExpr dflags (fromInteger n),
214 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
215 ]
216 (fromInteger n) init
217
218 shouldInlinePrimOp _ CopySmallArrayOp
219 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
220 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
221
222 shouldInlinePrimOp _ CopySmallMutableArrayOp
223 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
224 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
225
226 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
227 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
228 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
229
230 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
231 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
232 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
233
234 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
235 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
236 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
237
238 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
239 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
240 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
241
242 shouldInlinePrimOp dflags primop args
243 | primOpOutOfLine primop = Nothing
244 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
245
246 -- TODO: Several primops, such as 'copyArray#', only have an inline
247 -- implementation (below) but could possibly have both an inline
248 -- implementation and an out-of-line implementation, just like
249 -- 'newArray#'. This would lower the amount of code generated,
250 -- hopefully without a performance impact (needs to be measured).
251
252 ---------------------------------------------------
253 cgPrimOp :: [LocalReg] -- where to put the results
254 -> PrimOp -- the op
255 -> [StgArg] -- arguments
256 -> FCode ()
257
258 cgPrimOp results op args
259 = do dflags <- getDynFlags
260 arg_exprs <- getNonVoidArgAmodes args
261 emitPrimOp dflags results op arg_exprs
262
263
264 ------------------------------------------------------------------------
265 -- Emitting code for a primop
266 ------------------------------------------------------------------------
267
268 emitPrimOp :: DynFlags
269 -> [LocalReg] -- where to put the results
270 -> PrimOp -- the op
271 -> [CmmExpr] -- arguments
272 -> FCode ()
273
274 -- First we handle various awkward cases specially. The remaining
275 -- easy cases are then handled by translateOp, defined below.
276
277 emitPrimOp _ [res] ParOp [arg]
278 =
279 -- for now, just implement this in a C function
280 -- later, we might want to inline it.
281 emitCCall
282 [(res,NoHint)]
283 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
284 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
285
286 emitPrimOp dflags [res] SparkOp [arg]
287 = do
288 -- returns the value of arg in res. We're going to therefore
289 -- refer to arg twice (once to pass to newSpark(), and once to
290 -- assign to res), so put it in a temporary.
291 tmp <- assignTemp arg
292 tmp2 <- newTemp (bWord dflags)
293 emitCCall
294 [(tmp2,NoHint)]
295 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
296 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
297 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
298
299 emitPrimOp dflags [res] GetCCSOfOp [arg]
300 = emitAssign (CmmLocal res) val
301 where
302 val
303 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
304 | otherwise = CmmLit (zeroCLit dflags)
305
306 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
307 = emitAssign (CmmLocal res) curCCS
308
309 emitPrimOp dflags [res] ReadMutVarOp [mutv]
310 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
311
312 emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
313 = do -- Without this write barrier, other CPUs may see this pointer before
314 -- the writes for the closure it points to have occurred.
315 emitPrimCall res MO_WriteBarrier []
316 emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
317 emitCCall
318 [{-no results-}]
319 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
320 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
321
322 -- #define sizzeofByteArrayzh(r,a) \
323 -- r = ((StgArrBytes *)(a))->bytes
324 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
325 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
326
327 -- #define sizzeofMutableByteArrayzh(r,a) \
328 -- r = ((StgArrBytes *)(a))->bytes
329 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
330 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
331
332 -- #define getSizzeofMutableByteArrayzh(r,a) \
333 -- r = ((StgArrBytes *)(a))->bytes
334 emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
335 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
336
337
338 -- #define touchzh(o) /* nothing */
339 emitPrimOp _ res@[] TouchOp args@[_arg]
340 = do emitPrimCall res MO_Touch args
341
342 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
343 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
344 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
345
346 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
347 emitPrimOp dflags [res] StableNameToIntOp [arg]
348 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
349
350 -- #define eqStableNamezh(r,sn1,sn2) \
351 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
352 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
353 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
354 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
355 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
356 ])
357
358 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
359 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
360
361 -- #define addrToHValuezh(r,a) r=(P_)a
362 emitPrimOp _ [res] AddrToAnyOp [arg]
363 = emitAssign (CmmLocal res) arg
364
365 -- #define hvalueToAddrzh(r, a) r=(W_)a
366 emitPrimOp _ [res] AnyToAddrOp [arg]
367 = emitAssign (CmmLocal res) arg
368
369 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
370 -- Note: argument may be tagged!
371 emitPrimOp dflags [res] DataToTagOp [arg]
372 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
373
374 {- Freezing arrays-of-ptrs requires changing an info table, for the
375 benefit of the generational collector. It needs to scavenge mutable
376 objects, even if they are in old space. When they become immutable,
377 they can be removed from this scavenge list. -}
378
379 -- #define unsafeFreezzeArrayzh(r,a)
380 -- {
381 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
382 -- r = a;
383 -- }
384 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
385 = emit $ catAGraphs
386 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
387 mkAssign (CmmLocal res) arg ]
388 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
389 = emit $ catAGraphs
390 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
391 mkAssign (CmmLocal res) arg ]
392 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
393 = emit $ catAGraphs
394 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
395 mkAssign (CmmLocal res) arg ]
396
397 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
398 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
399 = emitAssign (CmmLocal res) arg
400
401 -- Reading/writing pointer arrays
402
403 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
404 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
405 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
406
407 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
408 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
409 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
410 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
411 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
412 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
413 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
414 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
415 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
416 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
417
418 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
419 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
420 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
421
422 -- Getting the size of pointer arrays
423
424 emitPrimOp dflags [res] SizeofArrayOp [arg]
425 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
426 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
427 (bWord dflags))
428 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
429 = emitPrimOp dflags [res] SizeofArrayOp [arg]
430 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
431 = emitPrimOp dflags [res] SizeofArrayOp [arg]
432 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
433 = emitPrimOp dflags [res] SizeofArrayOp [arg]
434
435 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
436 emit $ mkAssign (CmmLocal res)
437 (cmmLoadIndexW dflags arg
438 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
439 (bWord dflags))
440 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
441 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
442
443 -- IndexXXXoffAddr
444
445 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
446 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
447 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
448 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
449 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
450 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
451 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
452 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
453 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
454 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
455 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
456 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
457 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
458 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
459 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
460 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
461
462 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
463
464 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
465 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
466 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
467 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
468 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
469 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
470 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
471 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
472 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
473 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
474 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
475 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
476 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
477 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
478 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
479 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
480
481 -- IndexXXXArray
482
483 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
484 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
485 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
486 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
487 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
488 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
489 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
490 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
491 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
492 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
493 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
494 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
495 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
496 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
497 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
498 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
499
500 -- ReadXXXArray, identical to IndexXXXArray.
501
502 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
503 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
504 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
505 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
506 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
507 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
508 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
509 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
510 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
511 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
512 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
513 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
514 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
515 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
516 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
517 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
518
519 -- WriteXXXoffAddr
520
521 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
522 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
523 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
524 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
525 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
526 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
527 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
528 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
529 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
530 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
531 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
532 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
533 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
534 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
535 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
536 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
537
538 -- WriteXXXArray
539
540 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
541 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
542 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
543 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
544 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
545 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
546 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
547 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
548 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
549 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
550 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
551 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
552 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
553 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
554 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
555 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
556
557 -- Copying and setting byte arrays
558 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
559 doCopyByteArrayOp src src_off dst dst_off n
560 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
561 doCopyMutableByteArrayOp src src_off dst dst_off n
562 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
563 doCopyByteArrayToAddrOp src src_off dst n
564 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
565 doCopyMutableByteArrayToAddrOp src src_off dst n
566 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
567 doCopyAddrToByteArrayOp src dst dst_off n
568 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
569 doSetByteArrayOp ba off len c
570
571 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
572 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
573 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
574 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
575
576 -- Population count
577 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
578 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
579 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
580 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
581 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
582
583 -- count leading zeros
584 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
585 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
586 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
587 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
588 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
589
590 -- count trailing zeros
591 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
592 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
593 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
594 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
595 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
596
597 -- Unsigned int to floating point conversions
598 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
599 (MO_UF_Conv W32) [w]
600 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
601 (MO_UF_Conv W64) [w]
602
603 -- SIMD primops
604 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
605 checkVecCompatibility dflags vcat n w
606 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
607 where
608 zeros :: CmmExpr
609 zeros = CmmLit $ CmmVec (replicate n zero)
610
611 zero :: CmmLit
612 zero = case vcat of
613 IntVec -> CmmInt 0 w
614 WordVec -> CmmInt 0 w
615 FloatVec -> CmmFloat 0 w
616
617 ty :: CmmType
618 ty = vecVmmType vcat n w
619
620 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
621 checkVecCompatibility dflags vcat n w
622 when (length es /= n) $
623 panic "emitPrimOp: VecPackOp has wrong number of arguments"
624 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
625 where
626 zeros :: CmmExpr
627 zeros = CmmLit $ CmmVec (replicate n zero)
628
629 zero :: CmmLit
630 zero = case vcat of
631 IntVec -> CmmInt 0 w
632 WordVec -> CmmInt 0 w
633 FloatVec -> CmmFloat 0 w
634
635 ty :: CmmType
636 ty = vecVmmType vcat n w
637
638 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
639 checkVecCompatibility dflags vcat n w
640 when (length res /= n) $
641 panic "emitPrimOp: VecUnpackOp has wrong number of results"
642 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
643 where
644 ty :: CmmType
645 ty = vecVmmType vcat n w
646
647 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
648 checkVecCompatibility dflags vcat n w
649 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
650 where
651 ty :: CmmType
652 ty = vecVmmType vcat n w
653
654 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
655 checkVecCompatibility dflags vcat n w
656 doIndexByteArrayOp Nothing ty res args
657 where
658 ty :: CmmType
659 ty = vecVmmType vcat n w
660
661 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
662 checkVecCompatibility dflags vcat n w
663 doIndexByteArrayOp Nothing ty res args
664 where
665 ty :: CmmType
666 ty = vecVmmType vcat n w
667
668 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
669 checkVecCompatibility dflags vcat n w
670 doWriteByteArrayOp Nothing ty res args
671 where
672 ty :: CmmType
673 ty = vecVmmType vcat n w
674
675 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
676 checkVecCompatibility dflags vcat n w
677 doIndexOffAddrOp Nothing ty res args
678 where
679 ty :: CmmType
680 ty = vecVmmType vcat n w
681
682 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
683 checkVecCompatibility dflags vcat n w
684 doIndexOffAddrOp Nothing ty res args
685 where
686 ty :: CmmType
687 ty = vecVmmType vcat n w
688
689 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
690 checkVecCompatibility dflags vcat n w
691 doWriteOffAddrOp Nothing ty res args
692 where
693 ty :: CmmType
694 ty = vecVmmType vcat n w
695
696 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
697 checkVecCompatibility dflags vcat n w
698 doIndexByteArrayOpAs Nothing vecty ty res args
699 where
700 vecty :: CmmType
701 vecty = vecVmmType vcat n w
702
703 ty :: CmmType
704 ty = vecCmmCat vcat w
705
706 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
707 checkVecCompatibility dflags vcat n w
708 doIndexByteArrayOpAs Nothing vecty ty res args
709 where
710 vecty :: CmmType
711 vecty = vecVmmType vcat n w
712
713 ty :: CmmType
714 ty = vecCmmCat vcat w
715
716 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
717 checkVecCompatibility dflags vcat n w
718 doWriteByteArrayOp Nothing ty res args
719 where
720 ty :: CmmType
721 ty = vecCmmCat vcat w
722
723 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
724 checkVecCompatibility dflags vcat n w
725 doIndexOffAddrOpAs Nothing vecty ty res args
726 where
727 vecty :: CmmType
728 vecty = vecVmmType vcat n w
729
730 ty :: CmmType
731 ty = vecCmmCat vcat w
732
733 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
734 checkVecCompatibility dflags vcat n w
735 doIndexOffAddrOpAs Nothing vecty ty res args
736 where
737 vecty :: CmmType
738 vecty = vecVmmType vcat n w
739
740 ty :: CmmType
741 ty = vecCmmCat vcat w
742
743 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
744 checkVecCompatibility dflags vcat n w
745 doWriteOffAddrOp Nothing ty res args
746 where
747 ty :: CmmType
748 ty = vecCmmCat vcat w
749
750 -- Prefetch
751 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
752 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
753 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
754 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
755
756 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
757 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
758 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
759 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
760
761 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
762 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
763 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
764 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
765
766 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
767 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
768 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
769 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
770
771 -- Atomic read-modify-write
772 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
773 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
774 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
775 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
776 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
777 doAtomicRMW res AMO_And mba ix (bWord dflags) n
778 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
779 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
780 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
781 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
782 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
783 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
784 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
785 doAtomicReadByteArray res mba ix (bWord dflags)
786 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
787 doAtomicWriteByteArray mba ix (bWord dflags) val
788 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
789 doCasByteArray res mba ix (bWord dflags) old new
790
791 -- The rest just translate straightforwardly
792 emitPrimOp dflags [res] op [arg]
793 | nopOp op
794 = emitAssign (CmmLocal res) arg
795
796 | Just (mop,rep) <- narrowOp op
797 = emitAssign (CmmLocal res) $
798 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
799
800 emitPrimOp dflags r@[res] op args
801 | Just prim <- callishOp op
802 = do emitPrimCall r prim args
803
804 | Just mop <- translateOp dflags op
805 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
806 emit stmt
807
808 emitPrimOp dflags results op args
809 = case callishPrimOpSupported dflags op of
810 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
811 Right gen -> gen results args
812
813 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
814
815 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
816 callishPrimOpSupported dflags op
817 = case op of
818 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
819 | otherwise -> Right (genericIntQuotRemOp dflags)
820
821 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
822 | otherwise -> Right (genericWordQuotRemOp dflags)
823
824 WordQuotRem2Op | (ncg && x86ish)
825 || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
826 | otherwise -> Right (genericWordQuotRem2Op dflags)
827
828 WordAdd2Op | (ncg && x86ish)
829 || llvm -> Left (MO_Add2 (wordWidth dflags))
830 | otherwise -> Right genericWordAdd2Op
831
832 WordSubCOp | (ncg && x86ish)
833 || llvm -> Left (MO_SubWordC (wordWidth dflags))
834 | otherwise -> Right genericWordSubCOp
835
836 IntAddCOp | (ncg && x86ish)
837 || llvm -> Left (MO_AddIntC (wordWidth dflags))
838 | otherwise -> Right genericIntAddCOp
839
840 IntSubCOp | (ncg && x86ish)
841 || llvm -> Left (MO_SubIntC (wordWidth dflags))
842 | otherwise -> Right genericIntSubCOp
843
844 WordMul2Op | ncg && x86ish
845 || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
846 | otherwise -> Right genericWordMul2Op
847
848 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
849 where
850 ncg = case hscTarget dflags of
851 HscAsm -> True
852 _ -> False
853 llvm = case hscTarget dflags of
854 HscLlvm -> True
855 _ -> False
856 x86ish = case platformArch (targetPlatform dflags) of
857 ArchX86 -> True
858 ArchX86_64 -> True
859 _ -> False
860
861 genericIntQuotRemOp :: DynFlags -> GenericOp
862 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
863 = emit $ mkAssign (CmmLocal res_q)
864 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
865 mkAssign (CmmLocal res_r)
866 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
867 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
868
869 genericWordQuotRemOp :: DynFlags -> GenericOp
870 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
871 = emit $ mkAssign (CmmLocal res_q)
872 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
873 mkAssign (CmmLocal res_r)
874 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
875 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
876
877 genericWordQuotRem2Op :: DynFlags -> GenericOp
878 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
879 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
880 where ty = cmmExprType dflags arg_x_high
881 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
882 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
883 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
884 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
885 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
886 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
887 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
888 zero = lit 0
889 one = lit 1
890 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
891 lit i = CmmLit (CmmInt i (wordWidth dflags))
892
893 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
894 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
895 mkAssign (CmmLocal res_r) high)
896 f i acc high low =
897 do roverflowedBit <- newTemp ty
898 rhigh' <- newTemp ty
899 rhigh'' <- newTemp ty
900 rlow' <- newTemp ty
901 risge <- newTemp ty
902 racc' <- newTemp ty
903 let high' = CmmReg (CmmLocal rhigh')
904 isge = CmmReg (CmmLocal risge)
905 overflowedBit = CmmReg (CmmLocal roverflowedBit)
906 let this = catAGraphs
907 [mkAssign (CmmLocal roverflowedBit)
908 (shr high negone),
909 mkAssign (CmmLocal rhigh')
910 (or (shl high one) (shr low negone)),
911 mkAssign (CmmLocal rlow')
912 (shl low one),
913 mkAssign (CmmLocal risge)
914 (or (overflowedBit `ne` zero)
915 (high' `ge` arg_y)),
916 mkAssign (CmmLocal rhigh'')
917 (high' `minus` (arg_y `times` isge)),
918 mkAssign (CmmLocal racc')
919 (or (shl acc one) isge)]
920 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
921 (CmmReg (CmmLocal rhigh''))
922 (CmmReg (CmmLocal rlow'))
923 return (this <*> rest)
924 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
925
926 genericWordAdd2Op :: GenericOp
927 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
928 = do dflags <- getDynFlags
929 r1 <- newTemp (cmmExprType dflags arg_x)
930 r2 <- newTemp (cmmExprType dflags arg_x)
931 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
932 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
933 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
934 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
935 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
936 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
937 (wordWidth dflags))
938 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
939 emit $ catAGraphs
940 [mkAssign (CmmLocal r1)
941 (add (bottomHalf arg_x) (bottomHalf arg_y)),
942 mkAssign (CmmLocal r2)
943 (add (topHalf (CmmReg (CmmLocal r1)))
944 (add (topHalf arg_x) (topHalf arg_y))),
945 mkAssign (CmmLocal res_h)
946 (topHalf (CmmReg (CmmLocal r2))),
947 mkAssign (CmmLocal res_l)
948 (or (toTopHalf (CmmReg (CmmLocal r2)))
949 (bottomHalf (CmmReg (CmmLocal r1))))]
950 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
951
952 genericWordSubCOp :: GenericOp
953 genericWordSubCOp [res_r, res_c] [aa, bb] = do
954 dflags <- getDynFlags
955 emit $ catAGraphs
956 [ -- Put the result into 'res_r'.
957 mkAssign (CmmLocal res_r) $
958 CmmMachOp (mo_wordSub dflags) [aa, bb]
959 -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise.
960 , mkAssign (CmmLocal res_c) $
961 CmmMachOp (mo_wordUGt dflags) [bb, aa]
962 ]
963 genericWordSubCOp _ _ = panic "genericWordSubCOp"
964
965 genericIntAddCOp :: GenericOp
966 genericIntAddCOp [res_r, res_c] [aa, bb]
967 {-
968 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
969 C, and without needing any comparisons. This may not be the
970 fastest way to do it - if you have better code, please send it! --SDM
971
972 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
973
974 We currently don't make use of the r value if c is != 0 (i.e.
975 overflow), we just convert to big integers and try again. This
976 could be improved by making r and c the correct values for
977 plugging into a new J#.
978
979 { r = ((I_)(a)) + ((I_)(b)); \
980 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
981 >> (BITS_IN (I_) - 1); \
982 }
983 Wading through the mass of bracketry, it seems to reduce to:
984 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
985
986 -}
987 = do dflags <- getDynFlags
988 emit $ catAGraphs [
989 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
990 mkAssign (CmmLocal res_c) $
991 CmmMachOp (mo_wordUShr dflags) [
992 CmmMachOp (mo_wordAnd dflags) [
993 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
994 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
995 ],
996 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
997 ]
998 ]
999 genericIntAddCOp _ _ = panic "genericIntAddCOp"
1000
1001 genericIntSubCOp :: GenericOp
1002 genericIntSubCOp [res_r, res_c] [aa, bb]
1003 {- Similarly:
1004 #define subIntCzh(r,c,a,b) \
1005 { r = ((I_)(a)) - ((I_)(b)); \
1006 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1007 >> (BITS_IN (I_) - 1); \
1008 }
1009
1010 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
1011 -}
1012 = do dflags <- getDynFlags
1013 emit $ catAGraphs [
1014 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1015 mkAssign (CmmLocal res_c) $
1016 CmmMachOp (mo_wordUShr dflags) [
1017 CmmMachOp (mo_wordAnd dflags) [
1018 CmmMachOp (mo_wordXor dflags) [aa,bb],
1019 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1020 ],
1021 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1022 ]
1023 ]
1024 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1025
1026 genericWordMul2Op :: GenericOp
1027 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
1028 = do dflags <- getDynFlags
1029 let t = cmmExprType dflags arg_x
1030 xlyl <- liftM CmmLocal $ newTemp t
1031 xlyh <- liftM CmmLocal $ newTemp t
1032 xhyl <- liftM CmmLocal $ newTemp t
1033 r <- liftM CmmLocal $ newTemp t
1034 -- This generic implementation is very simple and slow. We might
1035 -- well be able to do better, but for now this at least works.
1036 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1037 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1038 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1039 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1040 sum = foldl1 add
1041 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1042 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1043 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1044 (wordWidth dflags))
1045 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1046 emit $ catAGraphs
1047 [mkAssign xlyl
1048 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1049 mkAssign xlyh
1050 (mul (bottomHalf arg_x) (topHalf arg_y)),
1051 mkAssign xhyl
1052 (mul (topHalf arg_x) (bottomHalf arg_y)),
1053 mkAssign r
1054 (sum [topHalf (CmmReg xlyl),
1055 bottomHalf (CmmReg xhyl),
1056 bottomHalf (CmmReg xlyh)]),
1057 mkAssign (CmmLocal res_l)
1058 (or (bottomHalf (CmmReg xlyl))
1059 (toTopHalf (CmmReg r))),
1060 mkAssign (CmmLocal res_h)
1061 (sum [mul (topHalf arg_x) (topHalf arg_y),
1062 topHalf (CmmReg xhyl),
1063 topHalf (CmmReg xlyh),
1064 topHalf (CmmReg r)])]
1065 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1066
1067 -- These PrimOps are NOPs in Cmm
1068
1069 nopOp :: PrimOp -> Bool
1070 nopOp Int2WordOp = True
1071 nopOp Word2IntOp = True
1072 nopOp Int2AddrOp = True
1073 nopOp Addr2IntOp = True
1074 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1075 nopOp OrdOp = True
1076 nopOp _ = False
1077
1078 -- These PrimOps turn into double casts
1079
1080 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1081 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1082 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1083 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1084 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1085 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1086 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1087 narrowOp _ = Nothing
1088
1089 -- Native word signless ops
1090
1091 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1092 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1093 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1094 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1095 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1096 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1097 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1098
1099 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1100 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1101 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1102 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1103 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1104 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1105
1106 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1107 translateOp dflags OrOp = Just (mo_wordOr dflags)
1108 translateOp dflags XorOp = Just (mo_wordXor dflags)
1109 translateOp dflags NotOp = Just (mo_wordNot dflags)
1110 translateOp dflags SllOp = Just (mo_wordShl dflags)
1111 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1112
1113 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1114
1115 -- Native word signed ops
1116
1117 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1118 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1119 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1120 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1121 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1122
1123
1124 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1125 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1126 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1127 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1128
1129 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1130 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1131 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1132 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1133 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1134 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1135 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1136
1137 -- Native word unsigned ops
1138
1139 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1140 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1141 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1142 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1143
1144 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1145 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1146 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1147
1148 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1149 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1150 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1151 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1152
1153 -- Char# ops
1154
1155 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1156 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1157 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1158 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1159 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1160 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1161
1162 -- Double ops
1163
1164 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1165 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1166 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1167 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1168 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1169 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1170
1171 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1172 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1173 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1174 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1175 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1176
1177 -- Float ops
1178
1179 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1180 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1181 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1182 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1183 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1184 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1185
1186 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1187 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1188 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1189 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1190 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1191
1192 -- Vector ops
1193
1194 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1195 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1196 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1197 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1198 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1199
1200 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1201 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1202 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1203 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1204 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1205 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1206
1207 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1208 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1209 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1210 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1211 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1212
1213 -- Conversions
1214
1215 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1216 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1217
1218 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1219 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1220
1221 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1222 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1223
1224 -- Word comparisons masquerading as more exotic things.
1225
1226 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1227 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1228 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1229 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1230 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1231 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1232 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1233 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1234
1235 translateOp _ _ = Nothing
1236
1237 -- These primops are implemented by CallishMachOps, because they sometimes
1238 -- turn into foreign calls depending on the backend.
1239
1240 callishOp :: PrimOp -> Maybe CallishMachOp
1241 callishOp DoublePowerOp = Just MO_F64_Pwr
1242 callishOp DoubleSinOp = Just MO_F64_Sin
1243 callishOp DoubleCosOp = Just MO_F64_Cos
1244 callishOp DoubleTanOp = Just MO_F64_Tan
1245 callishOp DoubleSinhOp = Just MO_F64_Sinh
1246 callishOp DoubleCoshOp = Just MO_F64_Cosh
1247 callishOp DoubleTanhOp = Just MO_F64_Tanh
1248 callishOp DoubleAsinOp = Just MO_F64_Asin
1249 callishOp DoubleAcosOp = Just MO_F64_Acos
1250 callishOp DoubleAtanOp = Just MO_F64_Atan
1251 callishOp DoubleLogOp = Just MO_F64_Log
1252 callishOp DoubleExpOp = Just MO_F64_Exp
1253 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1254
1255 callishOp FloatPowerOp = Just MO_F32_Pwr
1256 callishOp FloatSinOp = Just MO_F32_Sin
1257 callishOp FloatCosOp = Just MO_F32_Cos
1258 callishOp FloatTanOp = Just MO_F32_Tan
1259 callishOp FloatSinhOp = Just MO_F32_Sinh
1260 callishOp FloatCoshOp = Just MO_F32_Cosh
1261 callishOp FloatTanhOp = Just MO_F32_Tanh
1262 callishOp FloatAsinOp = Just MO_F32_Asin
1263 callishOp FloatAcosOp = Just MO_F32_Acos
1264 callishOp FloatAtanOp = Just MO_F32_Atan
1265 callishOp FloatLogOp = Just MO_F32_Log
1266 callishOp FloatExpOp = Just MO_F32_Exp
1267 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1268
1269 callishOp _ = Nothing
1270
1271 ------------------------------------------------------------------------------
1272 -- Helpers for translating various minor variants of array indexing.
1273
1274 doIndexOffAddrOp :: Maybe MachOp
1275 -> CmmType
1276 -> [LocalReg]
1277 -> [CmmExpr]
1278 -> FCode ()
1279 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1280 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1281 doIndexOffAddrOp _ _ _ _
1282 = panic "StgCmmPrim: doIndexOffAddrOp"
1283
1284 doIndexOffAddrOpAs :: Maybe MachOp
1285 -> CmmType
1286 -> CmmType
1287 -> [LocalReg]
1288 -> [CmmExpr]
1289 -> FCode ()
1290 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1291 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1292 doIndexOffAddrOpAs _ _ _ _ _
1293 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1294
1295 doIndexByteArrayOp :: Maybe MachOp
1296 -> CmmType
1297 -> [LocalReg]
1298 -> [CmmExpr]
1299 -> FCode ()
1300 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1301 = do dflags <- getDynFlags
1302 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1303 doIndexByteArrayOp _ _ _ _
1304 = panic "StgCmmPrim: doIndexByteArrayOp"
1305
1306 doIndexByteArrayOpAs :: Maybe MachOp
1307 -> CmmType
1308 -> CmmType
1309 -> [LocalReg]
1310 -> [CmmExpr]
1311 -> FCode ()
1312 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1313 = do dflags <- getDynFlags
1314 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1315 doIndexByteArrayOpAs _ _ _ _ _
1316 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1317
1318 doReadPtrArrayOp :: LocalReg
1319 -> CmmExpr
1320 -> CmmExpr
1321 -> FCode ()
1322 doReadPtrArrayOp res addr idx
1323 = do dflags <- getDynFlags
1324 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1325
1326 doWriteOffAddrOp :: Maybe MachOp
1327 -> CmmType
1328 -> [LocalReg]
1329 -> [CmmExpr]
1330 -> FCode ()
1331 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1332 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1333 doWriteOffAddrOp _ _ _ _
1334 = panic "StgCmmPrim: doWriteOffAddrOp"
1335
1336 doWriteByteArrayOp :: Maybe MachOp
1337 -> CmmType
1338 -> [LocalReg]
1339 -> [CmmExpr]
1340 -> FCode ()
1341 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1342 = do dflags <- getDynFlags
1343 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1344 doWriteByteArrayOp _ _ _ _
1345 = panic "StgCmmPrim: doWriteByteArrayOp"
1346
1347 doWritePtrArrayOp :: CmmExpr
1348 -> CmmExpr
1349 -> CmmExpr
1350 -> FCode ()
1351 doWritePtrArrayOp addr idx val
1352 = do dflags <- getDynFlags
1353 let ty = cmmExprType dflags val
1354 -- This write barrier is to ensure that the heap writes to the object
1355 -- referred to by val have happened before we write val into the array.
1356 -- See #12469 for details.
1357 emitPrimCall [] MO_WriteBarrier []
1358 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1359 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1360 -- the write barrier. We must write a byte into the mark table:
1361 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1362 emit $ mkStore (
1363 cmmOffsetExpr dflags
1364 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1365 (loadArrPtrsSize dflags addr))
1366 (CmmMachOp (mo_wordUShr dflags) [idx,
1367 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1368 ) (CmmLit (CmmInt 1 W8))
1369
1370 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1371 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1372 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1373
1374 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1375 -> Maybe MachOp -- Optional result cast
1376 -> CmmType -- Type of element we are accessing
1377 -> LocalReg -- Destination
1378 -> CmmExpr -- Base address
1379 -> CmmType -- Type of element by which we are indexing
1380 -> CmmExpr -- Index
1381 -> FCode ()
1382 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1383 = do dflags <- getDynFlags
1384 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1385 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1386 = do dflags <- getDynFlags
1387 emitAssign (CmmLocal res) (CmmMachOp cast [
1388 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1389
1390 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1391 -> Maybe MachOp -- Optional value cast
1392 -> CmmExpr -- Base address
1393 -> CmmType -- Type of element by which we are indexing
1394 -> CmmExpr -- Index
1395 -> CmmExpr -- Value to write
1396 -> FCode ()
1397 mkBasicIndexedWrite off Nothing base idx_ty idx val
1398 = do dflags <- getDynFlags
1399 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1400 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1401 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1402
1403 -- ----------------------------------------------------------------------------
1404 -- Misc utils
1405
1406 cmmIndexOffExpr :: DynFlags
1407 -> ByteOff -- Initial offset in bytes
1408 -> Width -- Width of element by which we are indexing
1409 -> CmmExpr -- Base address
1410 -> CmmExpr -- Index
1411 -> CmmExpr
1412 cmmIndexOffExpr dflags off width base idx
1413 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1414
1415 cmmLoadIndexOffExpr :: DynFlags
1416 -> ByteOff -- Initial offset in bytes
1417 -> CmmType -- Type of element we are accessing
1418 -> CmmExpr -- Base address
1419 -> CmmType -- Type of element by which we are indexing
1420 -> CmmExpr -- Index
1421 -> CmmExpr
1422 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1423 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1424
1425 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1426 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1427
1428 ------------------------------------------------------------------------------
1429 -- Helpers for translating vector primops.
1430
1431 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1432 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1433
1434 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1435 vecCmmCat IntVec = cmmBits
1436 vecCmmCat WordVec = cmmBits
1437 vecCmmCat FloatVec = cmmFloat
1438
1439 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1440 vecElemInjectCast _ FloatVec _ = Nothing
1441 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1442 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1443 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1444 vecElemInjectCast _ IntVec W64 = Nothing
1445 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1446 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1447 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1448 vecElemInjectCast _ WordVec W64 = Nothing
1449 vecElemInjectCast _ _ _ = Nothing
1450
1451 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1452 vecElemProjectCast _ FloatVec _ = Nothing
1453 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1454 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1455 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1456 vecElemProjectCast _ IntVec W64 = Nothing
1457 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1458 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1459 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1460 vecElemProjectCast _ WordVec W64 = Nothing
1461 vecElemProjectCast _ _ _ = Nothing
1462
1463 -- Check to make sure that we can generate code for the specified vector type
1464 -- given the current set of dynamic flags.
1465 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1466 checkVecCompatibility dflags vcat l w = do
1467 when (hscTarget dflags /= HscLlvm) $ do
1468 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1469 ,"Please use -fllvm."]
1470 check vecWidth vcat l w
1471 where
1472 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1473 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1474 sorry $ "128-bit wide single-precision floating point " ++
1475 "SIMD vector instructions require at least -msse."
1476 check W128 _ _ _ | not (isSse2Enabled dflags) =
1477 sorry $ "128-bit wide integer and double precision " ++
1478 "SIMD vector instructions require at least -msse2."
1479 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1480 sorry $ "256-bit wide floating point " ++
1481 "SIMD vector instructions require at least -mavx."
1482 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1483 sorry $ "256-bit wide integer " ++
1484 "SIMD vector instructions require at least -mavx2."
1485 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1486 sorry $ "512-bit wide " ++
1487 "SIMD vector instructions require -mavx512f."
1488 check _ _ _ _ = return ()
1489
1490 vecWidth = typeWidth (vecVmmType vcat l w)
1491
1492 ------------------------------------------------------------------------------
1493 -- Helpers for translating vector packing and unpacking.
1494
1495 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1496 -> CmmType -- Type of vector
1497 -> CmmExpr -- Initial vector
1498 -> [CmmExpr] -- Elements
1499 -> CmmFormal -- Destination for result
1500 -> FCode ()
1501 doVecPackOp maybe_pre_write_cast ty z es res = do
1502 dst <- newTemp ty
1503 emitAssign (CmmLocal dst) z
1504 vecPack dst es 0
1505 where
1506 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1507 vecPack src [] _ =
1508 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1509
1510 vecPack src (e : es) i = do
1511 dst <- newTemp ty
1512 if isFloatType (vecElemType ty)
1513 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1514 [CmmReg (CmmLocal src), cast e, iLit])
1515 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1516 [CmmReg (CmmLocal src), cast e, iLit])
1517 vecPack dst es (i + 1)
1518 where
1519 -- vector indices are always 32-bits
1520 iLit = CmmLit (CmmInt (toInteger i) W32)
1521
1522 cast :: CmmExpr -> CmmExpr
1523 cast val = case maybe_pre_write_cast of
1524 Nothing -> val
1525 Just cast -> CmmMachOp cast [val]
1526
1527 len :: Length
1528 len = vecLength ty
1529
1530 wid :: Width
1531 wid = typeWidth (vecElemType ty)
1532
1533 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1534 -> CmmType -- Type of vector
1535 -> CmmExpr -- Vector
1536 -> [CmmFormal] -- Element results
1537 -> FCode ()
1538 doVecUnpackOp maybe_post_read_cast ty e res =
1539 vecUnpack res 0
1540 where
1541 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1542 vecUnpack [] _ =
1543 return ()
1544
1545 vecUnpack (r : rs) i = do
1546 if isFloatType (vecElemType ty)
1547 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1548 [e, iLit]))
1549 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1550 [e, iLit]))
1551 vecUnpack rs (i + 1)
1552 where
1553 -- vector indices are always 32-bits
1554 iLit = CmmLit (CmmInt (toInteger i) W32)
1555
1556 cast :: CmmExpr -> CmmExpr
1557 cast val = case maybe_post_read_cast of
1558 Nothing -> val
1559 Just cast -> CmmMachOp cast [val]
1560
1561 len :: Length
1562 len = vecLength ty
1563
1564 wid :: Width
1565 wid = typeWidth (vecElemType ty)
1566
1567 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1568 -> CmmType -- Vector type
1569 -> CmmExpr -- Source vector
1570 -> CmmExpr -- Element
1571 -> CmmExpr -- Index at which to insert element
1572 -> CmmFormal -- Destination for result
1573 -> FCode ()
1574 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1575 dflags <- getDynFlags
1576 -- vector indices are always 32-bits
1577 let idx' :: CmmExpr
1578 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1579 if isFloatType (vecElemType ty)
1580 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1581 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1582 where
1583 cast :: CmmExpr -> CmmExpr
1584 cast val = case maybe_pre_write_cast of
1585 Nothing -> val
1586 Just cast -> CmmMachOp cast [val]
1587
1588 len :: Length
1589 len = vecLength ty
1590
1591 wid :: Width
1592 wid = typeWidth (vecElemType ty)
1593
1594 ------------------------------------------------------------------------------
1595 -- Helpers for translating prefetching.
1596
1597
1598 -- | Translate byte array prefetch operations into proper primcalls.
1599 doPrefetchByteArrayOp :: Int
1600 -> [CmmExpr]
1601 -> FCode ()
1602 doPrefetchByteArrayOp locality [addr,idx]
1603 = do dflags <- getDynFlags
1604 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1605 doPrefetchByteArrayOp _ _
1606 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1607
1608 -- | Translate mutable byte array prefetch operations into proper primcalls.
1609 doPrefetchMutableByteArrayOp :: Int
1610 -> [CmmExpr]
1611 -> FCode ()
1612 doPrefetchMutableByteArrayOp locality [addr,idx]
1613 = do dflags <- getDynFlags
1614 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1615 doPrefetchMutableByteArrayOp _ _
1616 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1617
1618 -- | Translate address prefetch operations into proper primcalls.
1619 doPrefetchAddrOp ::Int
1620 -> [CmmExpr]
1621 -> FCode ()
1622 doPrefetchAddrOp locality [addr,idx]
1623 = mkBasicPrefetch locality 0 addr idx
1624 doPrefetchAddrOp _ _
1625 = panic "StgCmmPrim: doPrefetchAddrOp"
1626
1627 -- | Translate value prefetch operations into proper primcalls.
1628 doPrefetchValueOp :: Int
1629 -> [CmmExpr]
1630 -> FCode ()
1631 doPrefetchValueOp locality [addr]
1632 = do dflags <- getDynFlags
1633 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1634 doPrefetchValueOp _ _
1635 = panic "StgCmmPrim: doPrefetchValueOp"
1636
1637 -- | helper to generate prefetch primcalls
1638 mkBasicPrefetch :: Int -- Locality level 0-3
1639 -> ByteOff -- Initial offset in bytes
1640 -> CmmExpr -- Base address
1641 -> CmmExpr -- Index
1642 -> FCode ()
1643 mkBasicPrefetch locality off base idx
1644 = do dflags <- getDynFlags
1645 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1646 return ()
1647
1648 -- ----------------------------------------------------------------------------
1649 -- Allocating byte arrays
1650
1651 -- | Takes a register to return the newly allocated array in and the
1652 -- size of the new array in bytes. Allocates a new
1653 -- 'MutableByteArray#'.
1654 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1655 doNewByteArrayOp res_r n = do
1656 dflags <- getDynFlags
1657
1658 let info_ptr = mkLblExpr mkArrWords_infoLabel
1659 rep = arrWordsRep dflags n
1660
1661 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1662 (mkIntExpr dflags (nonHdrSize dflags rep))
1663 (zeroExpr dflags)
1664
1665 let hdr_size = fixedHdrSize dflags
1666
1667 base <- allocHeapClosure rep info_ptr curCCS
1668 [ (mkIntExpr dflags n,
1669 hdr_size + oFFSET_StgArrBytes_bytes dflags)
1670 ]
1671
1672 emit $ mkAssign (CmmLocal res_r) base
1673
1674 -- ----------------------------------------------------------------------------
1675 -- Copying byte arrays
1676
1677 -- | Takes a source 'ByteArray#', an offset in the source array, a
1678 -- destination 'MutableByteArray#', an offset into the destination
1679 -- array, and the number of bytes to copy. Copies the given number of
1680 -- bytes from the source array to the destination array.
1681 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1682 -> FCode ()
1683 doCopyByteArrayOp = emitCopyByteArray copy
1684 where
1685 -- Copy data (we assume the arrays aren't overlapping since
1686 -- they're of different types)
1687 copy _src _dst dst_p src_p bytes =
1688 emitMemcpyCall dst_p src_p bytes 1
1689
1690 -- | Takes a source 'MutableByteArray#', an offset in the source
1691 -- array, a destination 'MutableByteArray#', an offset into the
1692 -- destination array, and the number of bytes to copy. Copies the
1693 -- given number of bytes from the source array to the destination
1694 -- array.
1695 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1696 -> FCode ()
1697 doCopyMutableByteArrayOp = emitCopyByteArray copy
1698 where
1699 -- The only time the memory might overlap is when the two arrays
1700 -- we were provided are the same array!
1701 -- TODO: Optimize branch for common case of no aliasing.
1702 copy src dst dst_p src_p bytes = do
1703 dflags <- getDynFlags
1704 [moveCall, cpyCall] <- forkAlts [
1705 getCode $ emitMemmoveCall dst_p src_p bytes 1,
1706 getCode $ emitMemcpyCall dst_p src_p bytes 1
1707 ]
1708 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1709
1710 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1711 -> FCode ())
1712 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1713 -> FCode ()
1714 emitCopyByteArray copy src src_off dst dst_off n = do
1715 dflags <- getDynFlags
1716 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1717 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1718 copy src dst dst_p src_p n
1719
1720 -- | Takes a source 'ByteArray#', an offset in the source array, a
1721 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1722 -- number of bytes from the source array to the destination memory region.
1723 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1724 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1725 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1726 dflags <- getDynFlags
1727 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1728 emitMemcpyCall dst_p src_p bytes 1
1729
1730 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1731 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1732 -- number of bytes from the source array to the destination memory region.
1733 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1734 -> FCode ()
1735 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1736
1737 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1738 -- the destination array, and the number of bytes to copy. Copies the given
1739 -- number of bytes from the source memory region to the destination array.
1740 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1741 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1742 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1743 dflags <- getDynFlags
1744 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1745 emitMemcpyCall dst_p src_p bytes 1
1746
1747
1748 -- ----------------------------------------------------------------------------
1749 -- Setting byte arrays
1750
1751 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1752 -- and a byte, and sets each of the selected bytes in the array to the
1753 -- character.
1754 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1755 -> FCode ()
1756 doSetByteArrayOp ba off len c
1757 = do dflags <- getDynFlags
1758 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1759 emitMemsetCall p c len 1
1760
1761 -- ----------------------------------------------------------------------------
1762 -- Allocating arrays
1763
1764 -- | Allocate a new array.
1765 doNewArrayOp :: CmmFormal -- ^ return register
1766 -> SMRep -- ^ representation of the array
1767 -> CLabel -- ^ info pointer
1768 -> [(CmmExpr, ByteOff)] -- ^ header payload
1769 -> WordOff -- ^ array size
1770 -> CmmExpr -- ^ initial element
1771 -> FCode ()
1772 doNewArrayOp res_r rep info payload n init = do
1773 dflags <- getDynFlags
1774
1775 let info_ptr = mkLblExpr info
1776
1777 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1778 (mkIntExpr dflags (nonHdrSize dflags rep))
1779 (zeroExpr dflags)
1780
1781 base <- allocHeapClosure rep info_ptr curCCS payload
1782
1783 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1784 emit $ mkAssign arr base
1785
1786 -- Initialise all elements of the the array
1787 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1788 for <- newBlockId
1789 emitLabel for
1790 let loopBody =
1791 [ mkStore (CmmReg (CmmLocal p)) init
1792 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1793 , mkBranch for ]
1794 emit =<< mkCmmIfThen
1795 (cmmULtWord dflags (CmmReg (CmmLocal p))
1796 (cmmOffsetW dflags (CmmReg arr)
1797 (hdrSizeW dflags rep + n)))
1798 (catAGraphs loopBody)
1799
1800 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1801
1802 -- ----------------------------------------------------------------------------
1803 -- Copying pointer arrays
1804
1805 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1806 -- nowhere else in the code generator. This is mostly because these
1807 -- "primitive" ops result in a surprisingly large amount of code. It
1808 -- will likely be worthwhile to optimize what is emitted here, so that
1809 -- our optimization passes don't waste time repeatedly optimizing the
1810 -- same bits of code.
1811
1812 -- More closely imitates 'assignTemp' from the old code generator, which
1813 -- returns a CmmExpr rather than a LocalReg.
1814 assignTempE :: CmmExpr -> FCode CmmExpr
1815 assignTempE e = do
1816 t <- assignTemp e
1817 return (CmmReg (CmmLocal t))
1818
1819 -- | Takes a source 'Array#', an offset in the source array, a
1820 -- destination 'MutableArray#', an offset into the destination array,
1821 -- and the number of elements to copy. Copies the given number of
1822 -- elements from the source array to the destination array.
1823 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1824 -> FCode ()
1825 doCopyArrayOp = emitCopyArray copy
1826 where
1827 -- Copy data (we assume the arrays aren't overlapping since
1828 -- they're of different types)
1829 copy _src _dst dst_p src_p bytes =
1830 do dflags <- getDynFlags
1831 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1832 (wORD_SIZE dflags)
1833
1834
1835 -- | Takes a source 'MutableArray#', an offset in the source array, a
1836 -- destination 'MutableArray#', an offset into the destination array,
1837 -- and the number of elements to copy. Copies the given number of
1838 -- elements from the source array to the destination array.
1839 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1840 -> FCode ()
1841 doCopyMutableArrayOp = emitCopyArray copy
1842 where
1843 -- The only time the memory might overlap is when the two arrays
1844 -- we were provided are the same array!
1845 -- TODO: Optimize branch for common case of no aliasing.
1846 copy src dst dst_p src_p bytes = do
1847 dflags <- getDynFlags
1848 [moveCall, cpyCall] <- forkAlts [
1849 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1850 (wORD_SIZE dflags),
1851 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1852 (wORD_SIZE dflags)
1853 ]
1854 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1855
1856 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1857 -> FCode ()) -- ^ copy function
1858 -> CmmExpr -- ^ source array
1859 -> CmmExpr -- ^ offset in source array
1860 -> CmmExpr -- ^ destination array
1861 -> CmmExpr -- ^ offset in destination array
1862 -> WordOff -- ^ number of elements to copy
1863 -> FCode ()
1864 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1865 dflags <- getDynFlags
1866 when (n /= 0) $ do
1867 -- Passed as arguments (be careful)
1868 src <- assignTempE src0
1869 dst <- assignTempE dst0
1870 dst_off <- assignTempE dst_off0
1871
1872 -- Set the dirty bit in the header.
1873 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1874
1875 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1876 (arrPtrsHdrSize dflags)
1877 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1878 src_p <- assignTempE $ cmmOffsetExprW dflags
1879 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1880 let bytes = wordsToBytes dflags n
1881
1882 copy src dst dst_p src_p bytes
1883
1884 -- The base address of the destination card table
1885 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1886 (loadArrPtrsSize dflags dst)
1887
1888 emitSetCards dst_off dst_cards_p n
1889
1890 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1891 -> FCode ()
1892 doCopySmallArrayOp = emitCopySmallArray copy
1893 where
1894 -- Copy data (we assume the arrays aren't overlapping since
1895 -- they're of different types)
1896 copy _src _dst dst_p src_p bytes =
1897 do dflags <- getDynFlags
1898 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1899 (wORD_SIZE dflags)
1900
1901
1902 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1903 -> FCode ()
1904 doCopySmallMutableArrayOp = emitCopySmallArray copy
1905 where
1906 -- The only time the memory might overlap is when the two arrays
1907 -- we were provided are the same array!
1908 -- TODO: Optimize branch for common case of no aliasing.
1909 copy src dst dst_p src_p bytes = do
1910 dflags <- getDynFlags
1911 [moveCall, cpyCall] <- forkAlts
1912 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1913 (wORD_SIZE dflags)
1914 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1915 (wORD_SIZE dflags)
1916 ]
1917 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1918
1919 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1920 -> FCode ()) -- ^ copy function
1921 -> CmmExpr -- ^ source array
1922 -> CmmExpr -- ^ offset in source array
1923 -> CmmExpr -- ^ destination array
1924 -> CmmExpr -- ^ offset in destination array
1925 -> WordOff -- ^ number of elements to copy
1926 -> FCode ()
1927 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
1928 dflags <- getDynFlags
1929
1930 -- Passed as arguments (be careful)
1931 src <- assignTempE src0
1932 dst <- assignTempE dst0
1933
1934 -- Set the dirty bit in the header.
1935 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1936
1937 dst_p <- assignTempE $ cmmOffsetExprW dflags
1938 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
1939 src_p <- assignTempE $ cmmOffsetExprW dflags
1940 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
1941 let bytes = wordsToBytes dflags n
1942
1943 copy src dst dst_p src_p bytes
1944
1945 -- | Takes an info table label, a register to return the newly
1946 -- allocated array in, a source array, an offset in the source array,
1947 -- and the number of elements to copy. Allocates a new array and
1948 -- initializes it from the source array.
1949 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1950 -> FCode ()
1951 emitCloneArray info_p res_r src src_off n = do
1952 dflags <- getDynFlags
1953
1954 let info_ptr = mkLblExpr info_p
1955 rep = arrPtrsRep dflags n
1956
1957 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1958 (mkIntExpr dflags (nonHdrSize dflags rep))
1959 (zeroExpr dflags)
1960
1961 let hdr_size = fixedHdrSize dflags
1962
1963 base <- allocHeapClosure rep info_ptr curCCS
1964 [ (mkIntExpr dflags n,
1965 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1966 , (mkIntExpr dflags (nonHdrSizeW rep),
1967 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1968 ]
1969
1970 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1971 emit $ mkAssign arr base
1972
1973 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1974 (arrPtrsHdrSize dflags)
1975 src_p <- assignTempE $ cmmOffsetExprW dflags src
1976 (cmmAddWord dflags
1977 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1978
1979 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1980 (wORD_SIZE dflags)
1981
1982 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1983
1984 -- | Takes an info table label, a register to return the newly
1985 -- allocated array in, a source array, an offset in the source array,
1986 -- and the number of elements to copy. Allocates a new array and
1987 -- initializes it from the source array.
1988 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1989 -> FCode ()
1990 emitCloneSmallArray info_p res_r src src_off n = do
1991 dflags <- getDynFlags
1992
1993 let info_ptr = mkLblExpr info_p
1994 rep = smallArrPtrsRep n
1995
1996 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
1997 (mkIntExpr dflags (nonHdrSize dflags rep))
1998 (zeroExpr dflags)
1999
2000 let hdr_size = fixedHdrSize dflags
2001
2002 base <- allocHeapClosure rep info_ptr curCCS
2003 [ (mkIntExpr dflags n,
2004 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
2005 ]
2006
2007 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2008 emit $ mkAssign arr base
2009
2010 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2011 (smallArrPtrsHdrSize dflags)
2012 src_p <- assignTempE $ cmmOffsetExprW dflags src
2013 (cmmAddWord dflags
2014 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
2015
2016 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2017 (wORD_SIZE dflags)
2018
2019 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2020
2021 -- | Takes and offset in the destination array, the base address of
2022 -- the card table, and the number of elements affected (*not* the
2023 -- number of cards). The number of elements may not be zero.
2024 -- Marks the relevant cards as dirty.
2025 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2026 emitSetCards dst_start dst_cards_start n = do
2027 dflags <- getDynFlags
2028 start_card <- assignTempE $ cardCmm dflags dst_start
2029 let end_card = cardCmm dflags
2030 (cmmSubWord dflags
2031 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
2032 (mkIntExpr dflags 1))
2033 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
2034 (mkIntExpr dflags 1)
2035 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2036 1 -- no alignment (1 byte)
2037
2038 -- Convert an element index to a card index
2039 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2040 cardCmm dflags i =
2041 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2042
2043 ------------------------------------------------------------------------------
2044 -- SmallArray PrimOp implementations
2045
2046 doReadSmallPtrArrayOp :: LocalReg
2047 -> CmmExpr
2048 -> CmmExpr
2049 -> FCode ()
2050 doReadSmallPtrArrayOp res addr idx = do
2051 dflags <- getDynFlags
2052 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2053 (gcWord dflags) idx
2054
2055 doWriteSmallPtrArrayOp :: CmmExpr
2056 -> CmmExpr
2057 -> CmmExpr
2058 -> FCode ()
2059 doWriteSmallPtrArrayOp addr idx val = do
2060 dflags <- getDynFlags
2061 let ty = cmmExprType dflags val
2062 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2063 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2064
2065 ------------------------------------------------------------------------------
2066 -- Atomic read-modify-write
2067
2068 -- | Emit an atomic modification to a byte array element. The result
2069 -- reg contains that previous value of the element. Implies a full
2070 -- memory barrier.
2071 doAtomicRMW :: LocalReg -- ^ Result reg
2072 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2073 -> CmmExpr -- ^ MutableByteArray#
2074 -> CmmExpr -- ^ Index
2075 -> CmmType -- ^ Type of element by which we are indexing
2076 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2077 -> FCode ()
2078 doAtomicRMW res amop mba idx idx_ty n = do
2079 dflags <- getDynFlags
2080 let width = typeWidth idx_ty
2081 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2082 width mba idx
2083 emitPrimCall
2084 [ res ]
2085 (MO_AtomicRMW width amop)
2086 [ addr, n ]
2087
2088 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2089 doAtomicReadByteArray
2090 :: LocalReg -- ^ Result reg
2091 -> CmmExpr -- ^ MutableByteArray#
2092 -> CmmExpr -- ^ Index
2093 -> CmmType -- ^ Type of element by which we are indexing
2094 -> FCode ()
2095 doAtomicReadByteArray res mba idx idx_ty = do
2096 dflags <- getDynFlags
2097 let width = typeWidth idx_ty
2098 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2099 width mba idx
2100 emitPrimCall
2101 [ res ]
2102 (MO_AtomicRead width)
2103 [ addr ]
2104
2105 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2106 doAtomicWriteByteArray
2107 :: CmmExpr -- ^ MutableByteArray#
2108 -> CmmExpr -- ^ Index
2109 -> CmmType -- ^ Type of element by which we are indexing
2110 -> CmmExpr -- ^ Value to write
2111 -> FCode ()
2112 doAtomicWriteByteArray mba idx idx_ty val = do
2113 dflags <- getDynFlags
2114 let width = typeWidth idx_ty
2115 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2116 width mba idx
2117 emitPrimCall
2118 [ {- no results -} ]
2119 (MO_AtomicWrite width)
2120 [ addr, val ]
2121
2122 doCasByteArray
2123 :: LocalReg -- ^ Result reg
2124 -> CmmExpr -- ^ MutableByteArray#
2125 -> CmmExpr -- ^ Index
2126 -> CmmType -- ^ Type of element by which we are indexing
2127 -> CmmExpr -- ^ Old value
2128 -> CmmExpr -- ^ New value
2129 -> FCode ()
2130 doCasByteArray res mba idx idx_ty old new = do
2131 dflags <- getDynFlags
2132 let width = (typeWidth idx_ty)
2133 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2134 width mba idx
2135 emitPrimCall
2136 [ res ]
2137 (MO_Cmpxchg width)
2138 [ addr, old, new ]
2139
2140 ------------------------------------------------------------------------------
2141 -- Helpers for emitting function calls
2142
2143 -- | Emit a call to @memcpy@.
2144 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2145 emitMemcpyCall dst src n align = do
2146 emitPrimCall
2147 [ {-no results-} ]
2148 (MO_Memcpy align)
2149 [ dst, src, n ]
2150
2151 -- | Emit a call to @memmove@.
2152 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2153 emitMemmoveCall dst src n align = do
2154 emitPrimCall
2155 [ {- no results -} ]
2156 (MO_Memmove align)
2157 [ dst, src, n ]
2158
2159 -- | Emit a call to @memset@. The second argument must fit inside an
2160 -- unsigned char.
2161 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2162 emitMemsetCall dst c n align = do
2163 emitPrimCall
2164 [ {- no results -} ]
2165 (MO_Memset align)
2166 [ dst, c, n ]
2167
2168 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2169 emitBSwapCall res x width = do
2170 emitPrimCall
2171 [ res ]
2172 (MO_BSwap width)
2173 [ x ]
2174
2175 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2176 emitPopCntCall res x width = do
2177 emitPrimCall
2178 [ res ]
2179 (MO_PopCnt width)
2180 [ x ]
2181
2182 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2183 emitClzCall res x width = do
2184 emitPrimCall
2185 [ res ]
2186 (MO_Clz width)
2187 [ x ]
2188
2189 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2190 emitCtzCall res x width = do
2191 emitPrimCall
2192 [ res ]
2193 (MO_Ctz width)
2194 [ x ]