Add 'addWordC#' PrimOp
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2 -- emitPrimOp is quite large
3 {-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-}
4
5 ----------------------------------------------------------------------------
6 --
7 -- Stg to C--: primitive operations
8 --
9 -- (c) The University of Glasgow 2004-2006
10 --
11 -----------------------------------------------------------------------------
12
13 module StgCmmPrim (
14 cgOpApp,
15 cgPrimOp, -- internal(ish), used by cgCase to get code for a
16 -- comparison without also turning it into a Bool.
17 shouldInlinePrimOp
18 ) where
19
20 #include "HsVersions.h"
21
22 import GhcPrelude hiding ((<*>))
23
24 import StgCmmLayout
25 import StgCmmForeign
26 import StgCmmEnv
27 import StgCmmMonad
28 import StgCmmUtils
29 import StgCmmTicky
30 import StgCmmHeap
31 import StgCmmProf ( costCentreFrom )
32
33 import DynFlags
34 import Platform
35 import BasicTypes
36 import BlockId
37 import MkGraph
38 import StgSyn
39 import Cmm
40 import CmmInfo
41 import Type ( Type, tyConAppTyCon )
42 import TyCon
43 import CLabel
44 import CmmUtils
45 import PrimOp
46 import SMRep
47 import FastString
48 import Outputable
49 import Util
50
51 import Data.Bits ((.&.), bit)
52 import Control.Monad (liftM, when, unless)
53
54 ------------------------------------------------------------------------
55 -- Primitive operations and foreign calls
56 ------------------------------------------------------------------------
57
58 {- Note [Foreign call results]
59 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
60 A foreign call always returns an unboxed tuple of results, one
61 of which is the state token. This seems to happen even for pure
62 calls.
63
64 Even if we returned a single result for pure calls, it'd still be
65 right to wrap it in a singleton unboxed tuple, because the result
66 might be a Haskell closure pointer, we don't want to evaluate it. -}
67
68 ----------------------------------
69 cgOpApp :: StgOp -- The op
70 -> [StgArg] -- Arguments
71 -> Type -- Result type (always an unboxed tuple)
72 -> FCode ReturnKind
73
74 -- Foreign calls
75 cgOpApp (StgFCallOp fcall _) stg_args res_ty
76 = cgForeignCall fcall stg_args res_ty
77 -- Note [Foreign call results]
78
79 -- tagToEnum# is special: we need to pull the constructor
80 -- out of the table, and perform an appropriate return.
81
82 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
83 = ASSERT(isEnumerationTyCon tycon)
84 do { dflags <- getDynFlags
85 ; args' <- getNonVoidArgAmodes [arg]
86 ; let amode = case args' of [amode] -> amode
87 _ -> panic "TagToEnumOp had void arg"
88 ; emitReturn [tagToClosure dflags tycon amode] }
89 where
90 -- If you're reading this code in the attempt to figure
91 -- out why the compiler panic'ed here, it is probably because
92 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
93 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
94 -- That won't work.
95 tycon = tyConAppTyCon res_ty
96
97 cgOpApp (StgPrimOp primop) args res_ty = do
98 dflags <- getDynFlags
99 cmm_args <- getNonVoidArgAmodes args
100 case shouldInlinePrimOp dflags primop cmm_args of
101 Nothing -> do -- out-of-line
102 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
103 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
104
105 Just f -- inline
106 | ReturnsPrim VoidRep <- result_info
107 -> do f []
108 emitReturn []
109
110 | ReturnsPrim rep <- result_info
111 -> do dflags <- getDynFlags
112 res <- newTemp (primRepCmmType dflags rep)
113 f [res]
114 emitReturn [CmmReg (CmmLocal res)]
115
116 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
117 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
118 f regs
119 emitReturn (map (CmmReg . CmmLocal) regs)
120
121 | otherwise -> panic "cgPrimop"
122 where
123 result_info = getPrimOpResultInfo primop
124
125 cgOpApp (StgPrimCallOp primcall) args _res_ty
126 = do { cmm_args <- getNonVoidArgAmodes args
127 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
128 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
129
130 -- | Interpret the argument as an unsigned value, assuming the value
131 -- is given in two-complement form in the given width.
132 --
133 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
134 --
135 -- This function is used to work around the fact that many array
136 -- primops take Int# arguments, but we interpret them as unsigned
137 -- quantities in the code gen. This means that we have to be careful
138 -- every time we work on e.g. a CmmInt literal that corresponds to the
139 -- array size, as it might contain a negative Integer value if the
140 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
141 -- literal.
142 asUnsigned :: Width -> Integer -> Integer
143 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
144
145 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
146 -- ByteOff (or some other fixed width signed type) to represent
147 -- array sizes or indices. This means that these will overflow for
148 -- large enough sizes.
149
150 -- | Decide whether an out-of-line primop should be replaced by an
151 -- inline implementation. This might happen e.g. if there's enough
152 -- static information, such as statically know arguments, to emit a
153 -- more efficient implementation inline.
154 --
155 -- Returns 'Nothing' if this primop should use its out-of-line
156 -- implementation (defined elsewhere) and 'Just' together with a code
157 -- generating function that takes the output regs as arguments
158 -- otherwise.
159 shouldInlinePrimOp :: DynFlags
160 -> PrimOp -- ^ The primop
161 -> [CmmExpr] -- ^ The primop arguments
162 -> Maybe ([LocalReg] -> FCode ())
163
164 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
165 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
166 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
167
168 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
169 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
170 Just $ \ [res] ->
171 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
172 [ (mkIntExpr dflags (fromInteger n),
173 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
174 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
175 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
176 ]
177 (fromInteger n) init
178
179 shouldInlinePrimOp _ CopyArrayOp
180 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
181 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
182
183 shouldInlinePrimOp _ CopyMutableArrayOp
184 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
185 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
186
187 shouldInlinePrimOp _ CopyArrayArrayOp
188 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
189 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
190
191 shouldInlinePrimOp _ CopyMutableArrayArrayOp
192 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
193 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
194
195 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
196 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
197 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
198
199 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
200 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
201 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
202
203 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
204 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
205 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
206
207 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
208 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
209 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
210
211 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
212 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
213 Just $ \ [res] ->
214 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
215 [ (mkIntExpr dflags (fromInteger n),
216 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
217 ]
218 (fromInteger n) init
219
220 shouldInlinePrimOp _ CopySmallArrayOp
221 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
222 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
223
224 shouldInlinePrimOp _ CopySmallMutableArrayOp
225 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
226 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
227
228 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
229 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
230 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
231
232 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
233 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
234 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
235
236 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
237 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
238 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
239
240 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
241 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
242 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
243
244 shouldInlinePrimOp dflags primop args
245 | primOpOutOfLine primop = Nothing
246 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
247
248 -- TODO: Several primops, such as 'copyArray#', only have an inline
249 -- implementation (below) but could possibly have both an inline
250 -- implementation and an out-of-line implementation, just like
251 -- 'newArray#'. This would lower the amount of code generated,
252 -- hopefully without a performance impact (needs to be measured).
253
254 ---------------------------------------------------
255 cgPrimOp :: [LocalReg] -- where to put the results
256 -> PrimOp -- the op
257 -> [StgArg] -- arguments
258 -> FCode ()
259
260 cgPrimOp results op args
261 = do dflags <- getDynFlags
262 arg_exprs <- getNonVoidArgAmodes args
263 emitPrimOp dflags results op arg_exprs
264
265
266 ------------------------------------------------------------------------
267 -- Emitting code for a primop
268 ------------------------------------------------------------------------
269
270 emitPrimOp :: DynFlags
271 -> [LocalReg] -- where to put the results
272 -> PrimOp -- the op
273 -> [CmmExpr] -- arguments
274 -> FCode ()
275
276 -- First we handle various awkward cases specially. The remaining
277 -- easy cases are then handled by translateOp, defined below.
278
279 emitPrimOp _ [res] ParOp [arg]
280 =
281 -- for now, just implement this in a C function
282 -- later, we might want to inline it.
283 emitCCall
284 [(res,NoHint)]
285 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
286 [(baseExpr, AddrHint), (arg,AddrHint)]
287
288 emitPrimOp dflags [res] SparkOp [arg]
289 = do
290 -- returns the value of arg in res. We're going to therefore
291 -- refer to arg twice (once to pass to newSpark(), and once to
292 -- assign to res), so put it in a temporary.
293 tmp <- assignTemp arg
294 tmp2 <- newTemp (bWord dflags)
295 emitCCall
296 [(tmp2,NoHint)]
297 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
298 [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
299 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
300
301 emitPrimOp dflags [res] GetCCSOfOp [arg]
302 = emitAssign (CmmLocal res) val
303 where
304 val
305 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
306 | otherwise = CmmLit (zeroCLit dflags)
307
308 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
309 = emitAssign (CmmLocal res) cccsExpr
310
311 emitPrimOp _ [res] MyThreadIdOp []
312 = emitAssign (CmmLocal res) currentTSOExpr
313
314 emitPrimOp dflags [res] ReadMutVarOp [mutv]
315 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
316
317 emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
318 = do -- Without this write barrier, other CPUs may see this pointer before
319 -- the writes for the closure it points to have occurred.
320 emitPrimCall res MO_WriteBarrier []
321 emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
322 emitCCall
323 [{-no results-}]
324 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
325 [(baseExpr, AddrHint), (mutv,AddrHint)]
326
327 -- #define sizzeofByteArrayzh(r,a) \
328 -- r = ((StgArrBytes *)(a))->bytes
329 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
330 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
331
332 -- #define sizzeofMutableByteArrayzh(r,a) \
333 -- r = ((StgArrBytes *)(a))->bytes
334 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
335 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
336
337 -- #define getSizzeofMutableByteArrayzh(r,a) \
338 -- r = ((StgArrBytes *)(a))->bytes
339 emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
340 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
341
342
343 -- #define touchzh(o) /* nothing */
344 emitPrimOp _ res@[] TouchOp args@[_arg]
345 = do emitPrimCall res MO_Touch args
346
347 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
348 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
349 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
350
351 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
352 emitPrimOp dflags [res] StableNameToIntOp [arg]
353 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
354
355 -- #define eqStableNamezh(r,sn1,sn2) \
356 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
357 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
358 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
359 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
360 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
361 ])
362
363 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
364 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
365
366 -- #define addrToHValuezh(r,a) r=(P_)a
367 emitPrimOp _ [res] AddrToAnyOp [arg]
368 = emitAssign (CmmLocal res) arg
369
370 -- #define hvalueToAddrzh(r, a) r=(W_)a
371 emitPrimOp _ [res] AnyToAddrOp [arg]
372 = emitAssign (CmmLocal res) arg
373
374 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
375 -- Note: argument may be tagged!
376 emitPrimOp dflags [res] DataToTagOp [arg]
377 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
378
379 {- Freezing arrays-of-ptrs requires changing an info table, for the
380 benefit of the generational collector. It needs to scavenge mutable
381 objects, even if they are in old space. When they become immutable,
382 they can be removed from this scavenge list. -}
383
384 -- #define unsafeFreezzeArrayzh(r,a)
385 -- {
386 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
387 -- r = a;
388 -- }
389 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
390 = emit $ catAGraphs
391 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
392 mkAssign (CmmLocal res) arg ]
393 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
394 = emit $ catAGraphs
395 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
396 mkAssign (CmmLocal res) arg ]
397 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
398 = emit $ catAGraphs
399 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
400 mkAssign (CmmLocal res) arg ]
401
402 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
403 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
404 = emitAssign (CmmLocal res) arg
405
406 -- Reading/writing pointer arrays
407
408 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
409 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
410 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
411
412 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
413 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
414 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
415 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
416 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
417 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
418 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
419 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
420 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
421 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
422
423 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
424 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
425 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
426
427 -- Getting the size of pointer arrays
428
429 emitPrimOp dflags [res] SizeofArrayOp [arg]
430 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
431 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
432 (bWord dflags))
433 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
434 = emitPrimOp dflags [res] SizeofArrayOp [arg]
435 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
436 = emitPrimOp dflags [res] SizeofArrayOp [arg]
437 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
438 = emitPrimOp dflags [res] SizeofArrayOp [arg]
439
440 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
441 emit $ mkAssign (CmmLocal res)
442 (cmmLoadIndexW dflags arg
443 (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
444 (bWord dflags))
445 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
446 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
447
448 -- IndexXXXoffAddr
449
450 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
451 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
452 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
453 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
454 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
455 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
456 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
457 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
458 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
459 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
460 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
461 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
462 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
463 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
464 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
465 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
466
467 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
468
469 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
470 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
471 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
472 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
473 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
474 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
475 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
476 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
477 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
478 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
479 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
480 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
481 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
482 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
483 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
484 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
485
486 -- IndexXXXArray
487
488 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
489 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
490 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
491 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
492 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
493 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
494 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
495 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
496 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
497 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
498 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
499 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
500 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
501 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
502 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
503 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
504
505 -- ReadXXXArray, identical to IndexXXXArray.
506
507 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
508 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
509 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
510 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
511 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
512 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
513 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
514 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
515 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
516 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
517 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
518 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
519 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
520 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
521 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
522 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
523
524 -- IndexWord8ArrayAsXXX
525
526 emitPrimOp dflags res IndexByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
527 emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
528 emitPrimOp dflags res IndexByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
529 emitPrimOp dflags res IndexByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
530 emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
531 emitPrimOp _ res IndexByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
532 emitPrimOp _ res IndexByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
533 emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
534 emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
535 emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
536 emitPrimOp _ res IndexByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
537 emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
538 emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
539 emitPrimOp _ res IndexByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
540
541 -- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
542
543 emitPrimOp dflags res ReadByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
544 emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
545 emitPrimOp dflags res ReadByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
546 emitPrimOp dflags res ReadByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
547 emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
548 emitPrimOp _ res ReadByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
549 emitPrimOp _ res ReadByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
550 emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
551 emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
552 emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
553 emitPrimOp _ res ReadByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
554 emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
555 emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
556 emitPrimOp _ res ReadByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
557
558 -- WriteXXXoffAddr
559
560 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
561 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
562 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
563 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
564 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
565 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
566 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
567 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
568 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
569 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
570 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
571 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
572 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
573 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
574 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
575 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
576
577 -- WriteXXXArray
578
579 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
580 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
581 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
582 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
583 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
584 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
585 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
586 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
587 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
588 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
589 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
590 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
591 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
592 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
593 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
594 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
595
596 -- WriteInt8ArrayAsXXX
597
598 emitPrimOp dflags res WriteByteArrayOp_Word8AsChar args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
599 emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
600 emitPrimOp _ res WriteByteArrayOp_Word8AsInt args = doWriteByteArrayOp Nothing b8 res args
601 emitPrimOp _ res WriteByteArrayOp_Word8AsWord args = doWriteByteArrayOp Nothing b8 res args
602 emitPrimOp _ res WriteByteArrayOp_Word8AsAddr args = doWriteByteArrayOp Nothing b8 res args
603 emitPrimOp _ res WriteByteArrayOp_Word8AsFloat args = doWriteByteArrayOp Nothing b8 res args
604 emitPrimOp _ res WriteByteArrayOp_Word8AsDouble args = doWriteByteArrayOp Nothing b8 res args
605 emitPrimOp _ res WriteByteArrayOp_Word8AsStablePtr args = doWriteByteArrayOp Nothing b8 res args
606 emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
607 emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
608 emitPrimOp _ res WriteByteArrayOp_Word8AsInt64 args = doWriteByteArrayOp Nothing b8 res args
609 emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
610 emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
611 emitPrimOp _ res WriteByteArrayOp_Word8AsWord64 args = doWriteByteArrayOp Nothing b8 res args
612
613 -- Copying and setting byte arrays
614 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
615 doCopyByteArrayOp src src_off dst dst_off n
616 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
617 doCopyMutableByteArrayOp src src_off dst dst_off n
618 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
619 doCopyByteArrayToAddrOp src src_off dst n
620 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
621 doCopyMutableByteArrayToAddrOp src src_off dst n
622 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
623 doCopyAddrToByteArrayOp src dst dst_off n
624 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
625 doSetByteArrayOp ba off len c
626
627 -- Comparing byte arrays
628 emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
629 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
630
631 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
632 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
633 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
634 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
635
636 -- Population count
637 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
638 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
639 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
640 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
641 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
642
643 -- Parallel bit deposit
644 emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8
645 emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
646 emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
647 emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
648 emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags)
649
650 -- Parallel bit extract
651 emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8
652 emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16
653 emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32
654 emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64
655 emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags)
656
657 -- count leading zeros
658 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
659 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
660 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
661 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
662 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
663
664 -- count trailing zeros
665 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
666 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
667 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
668 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
669 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
670
671 -- Unsigned int to floating point conversions
672 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
673 (MO_UF_Conv W32) [w]
674 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
675 (MO_UF_Conv W64) [w]
676
677 -- SIMD primops
678 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
679 checkVecCompatibility dflags vcat n w
680 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
681 where
682 zeros :: CmmExpr
683 zeros = CmmLit $ CmmVec (replicate n zero)
684
685 zero :: CmmLit
686 zero = case vcat of
687 IntVec -> CmmInt 0 w
688 WordVec -> CmmInt 0 w
689 FloatVec -> CmmFloat 0 w
690
691 ty :: CmmType
692 ty = vecVmmType vcat n w
693
694 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
695 checkVecCompatibility dflags vcat n w
696 when (es `lengthIsNot` n) $
697 panic "emitPrimOp: VecPackOp has wrong number of arguments"
698 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
699 where
700 zeros :: CmmExpr
701 zeros = CmmLit $ CmmVec (replicate n zero)
702
703 zero :: CmmLit
704 zero = case vcat of
705 IntVec -> CmmInt 0 w
706 WordVec -> CmmInt 0 w
707 FloatVec -> CmmFloat 0 w
708
709 ty :: CmmType
710 ty = vecVmmType vcat n w
711
712 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
713 checkVecCompatibility dflags vcat n w
714 when (res `lengthIsNot` n) $
715 panic "emitPrimOp: VecUnpackOp has wrong number of results"
716 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
717 where
718 ty :: CmmType
719 ty = vecVmmType vcat n w
720
721 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
722 checkVecCompatibility dflags vcat n w
723 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
724 where
725 ty :: CmmType
726 ty = vecVmmType vcat n w
727
728 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
729 checkVecCompatibility dflags vcat n w
730 doIndexByteArrayOp Nothing ty res args
731 where
732 ty :: CmmType
733 ty = vecVmmType vcat n w
734
735 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
736 checkVecCompatibility dflags vcat n w
737 doIndexByteArrayOp Nothing ty res args
738 where
739 ty :: CmmType
740 ty = vecVmmType vcat n w
741
742 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
743 checkVecCompatibility dflags vcat n w
744 doWriteByteArrayOp Nothing ty res args
745 where
746 ty :: CmmType
747 ty = vecVmmType vcat n w
748
749 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
750 checkVecCompatibility dflags vcat n w
751 doIndexOffAddrOp Nothing ty res args
752 where
753 ty :: CmmType
754 ty = vecVmmType vcat n w
755
756 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
757 checkVecCompatibility dflags vcat n w
758 doIndexOffAddrOp Nothing ty res args
759 where
760 ty :: CmmType
761 ty = vecVmmType vcat n w
762
763 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
764 checkVecCompatibility dflags vcat n w
765 doWriteOffAddrOp Nothing ty res args
766 where
767 ty :: CmmType
768 ty = vecVmmType vcat n w
769
770 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
771 checkVecCompatibility dflags vcat n w
772 doIndexByteArrayOpAs Nothing vecty ty res args
773 where
774 vecty :: CmmType
775 vecty = vecVmmType vcat n w
776
777 ty :: CmmType
778 ty = vecCmmCat vcat w
779
780 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
781 checkVecCompatibility dflags vcat n w
782 doIndexByteArrayOpAs Nothing vecty ty res args
783 where
784 vecty :: CmmType
785 vecty = vecVmmType vcat n w
786
787 ty :: CmmType
788 ty = vecCmmCat vcat w
789
790 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
791 checkVecCompatibility dflags vcat n w
792 doWriteByteArrayOp Nothing ty res args
793 where
794 ty :: CmmType
795 ty = vecCmmCat vcat w
796
797 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
798 checkVecCompatibility dflags vcat n w
799 doIndexOffAddrOpAs Nothing vecty ty res args
800 where
801 vecty :: CmmType
802 vecty = vecVmmType vcat n w
803
804 ty :: CmmType
805 ty = vecCmmCat vcat w
806
807 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
808 checkVecCompatibility dflags vcat n w
809 doIndexOffAddrOpAs Nothing vecty ty res args
810 where
811 vecty :: CmmType
812 vecty = vecVmmType vcat n w
813
814 ty :: CmmType
815 ty = vecCmmCat vcat w
816
817 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
818 checkVecCompatibility dflags vcat n w
819 doWriteOffAddrOp Nothing ty res args
820 where
821 ty :: CmmType
822 ty = vecCmmCat vcat w
823
824 -- Prefetch
825 emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
826 emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
827 emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
828 emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
829
830 emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
831 emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
832 emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
833 emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
834
835 emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
836 emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
837 emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
838 emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
839
840 emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
841 emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
842 emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
843 emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
844
845 -- Atomic read-modify-write
846 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
847 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
848 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
849 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
850 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
851 doAtomicRMW res AMO_And mba ix (bWord dflags) n
852 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
853 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
854 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
855 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
856 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
857 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
858 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
859 doAtomicReadByteArray res mba ix (bWord dflags)
860 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
861 doAtomicWriteByteArray mba ix (bWord dflags) val
862 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
863 doCasByteArray res mba ix (bWord dflags) old new
864
865 -- The rest just translate straightforwardly
866 emitPrimOp dflags [res] op [arg]
867 | nopOp op
868 = emitAssign (CmmLocal res) arg
869
870 | Just (mop,rep) <- narrowOp op
871 = emitAssign (CmmLocal res) $
872 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
873
874 emitPrimOp dflags r@[res] op args
875 | Just prim <- callishOp op
876 = do emitPrimCall r prim args
877
878 | Just mop <- translateOp dflags op
879 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
880 emit stmt
881
882 emitPrimOp dflags results op args
883 = case callishPrimOpSupported dflags op of
884 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
885 Right gen -> gen results args
886
887 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
888
889 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
890 callishPrimOpSupported dflags op
891 = case op of
892 IntQuotRemOp | ncg && (x86ish
893 || ppc) -> Left (MO_S_QuotRem (wordWidth dflags))
894 | otherwise -> Right (genericIntQuotRemOp dflags)
895
896 WordQuotRemOp | ncg && (x86ish
897 || ppc) -> Left (MO_U_QuotRem (wordWidth dflags))
898 | otherwise -> Right (genericWordQuotRemOp dflags)
899
900 WordQuotRem2Op | (ncg && (x86ish
901 || ppc))
902 || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
903 | otherwise -> Right (genericWordQuotRem2Op dflags)
904
905 WordAdd2Op | (ncg && (x86ish
906 || ppc))
907 || llvm -> Left (MO_Add2 (wordWidth dflags))
908 | otherwise -> Right genericWordAdd2Op
909
910 WordAddCOp | (ncg && (x86ish
911 || ppc))
912 || llvm -> Left (MO_AddWordC (wordWidth dflags))
913 | otherwise -> Right genericWordAddCOp
914
915 WordSubCOp | (ncg && (x86ish
916 || ppc))
917 || llvm -> Left (MO_SubWordC (wordWidth dflags))
918 | otherwise -> Right genericWordSubCOp
919
920 IntAddCOp | (ncg && (x86ish
921 || ppc))
922 || llvm -> Left (MO_AddIntC (wordWidth dflags))
923 | otherwise -> Right genericIntAddCOp
924
925 IntSubCOp | (ncg && (x86ish
926 || ppc))
927 || llvm -> Left (MO_SubIntC (wordWidth dflags))
928 | otherwise -> Right genericIntSubCOp
929
930 WordMul2Op | ncg && (x86ish
931 || ppc)
932 || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
933 | otherwise -> Right genericWordMul2Op
934 FloatFabsOp | (ncg && x86ish
935 || ppc)
936 || llvm -> Left MO_F32_Fabs
937 | otherwise -> Right $ genericFabsOp W32
938 DoubleFabsOp | (ncg && x86ish
939 || ppc)
940 || llvm -> Left MO_F64_Fabs
941 | otherwise -> Right $ genericFabsOp W64
942
943 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
944 where
945 ncg = case hscTarget dflags of
946 HscAsm -> True
947 _ -> False
948 llvm = case hscTarget dflags of
949 HscLlvm -> True
950 _ -> False
951 x86ish = case platformArch (targetPlatform dflags) of
952 ArchX86 -> True
953 ArchX86_64 -> True
954 _ -> False
955 ppc = case platformArch (targetPlatform dflags) of
956 ArchPPC -> True
957 ArchPPC_64 _ -> True
958 _ -> False
959
960 genericIntQuotRemOp :: DynFlags -> GenericOp
961 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
962 = emit $ mkAssign (CmmLocal res_q)
963 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
964 mkAssign (CmmLocal res_r)
965 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
966 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
967
968 genericWordQuotRemOp :: DynFlags -> GenericOp
969 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
970 = emit $ mkAssign (CmmLocal res_q)
971 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
972 mkAssign (CmmLocal res_r)
973 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
974 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
975
976 genericWordQuotRem2Op :: DynFlags -> GenericOp
977 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
978 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
979 where ty = cmmExprType dflags arg_x_high
980 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
981 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
982 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
983 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
984 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
985 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
986 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
987 zero = lit 0
988 one = lit 1
989 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
990 lit i = CmmLit (CmmInt i (wordWidth dflags))
991
992 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
993 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
994 mkAssign (CmmLocal res_r) high)
995 f i acc high low =
996 do roverflowedBit <- newTemp ty
997 rhigh' <- newTemp ty
998 rhigh'' <- newTemp ty
999 rlow' <- newTemp ty
1000 risge <- newTemp ty
1001 racc' <- newTemp ty
1002 let high' = CmmReg (CmmLocal rhigh')
1003 isge = CmmReg (CmmLocal risge)
1004 overflowedBit = CmmReg (CmmLocal roverflowedBit)
1005 let this = catAGraphs
1006 [mkAssign (CmmLocal roverflowedBit)
1007 (shr high negone),
1008 mkAssign (CmmLocal rhigh')
1009 (or (shl high one) (shr low negone)),
1010 mkAssign (CmmLocal rlow')
1011 (shl low one),
1012 mkAssign (CmmLocal risge)
1013 (or (overflowedBit `ne` zero)
1014 (high' `ge` arg_y)),
1015 mkAssign (CmmLocal rhigh'')
1016 (high' `minus` (arg_y `times` isge)),
1017 mkAssign (CmmLocal racc')
1018 (or (shl acc one) isge)]
1019 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
1020 (CmmReg (CmmLocal rhigh''))
1021 (CmmReg (CmmLocal rlow'))
1022 return (this <*> rest)
1023 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
1024
1025 genericWordAdd2Op :: GenericOp
1026 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
1027 = do dflags <- getDynFlags
1028 r1 <- newTemp (cmmExprType dflags arg_x)
1029 r2 <- newTemp (cmmExprType dflags arg_x)
1030 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1031 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1032 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1033 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1034 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1035 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1036 (wordWidth dflags))
1037 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1038 emit $ catAGraphs
1039 [mkAssign (CmmLocal r1)
1040 (add (bottomHalf arg_x) (bottomHalf arg_y)),
1041 mkAssign (CmmLocal r2)
1042 (add (topHalf (CmmReg (CmmLocal r1)))
1043 (add (topHalf arg_x) (topHalf arg_y))),
1044 mkAssign (CmmLocal res_h)
1045 (topHalf (CmmReg (CmmLocal r2))),
1046 mkAssign (CmmLocal res_l)
1047 (or (toTopHalf (CmmReg (CmmLocal r2)))
1048 (bottomHalf (CmmReg (CmmLocal r1))))]
1049 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
1050
1051 -- | Implements branchless recovery of the carry flag @c@ by checking the
1052 -- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@:
1053 --
1054 -- @
1055 -- c = a&b | (a|b)&~r
1056 -- @
1057 --
1058 -- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
1059 genericWordAddCOp :: GenericOp
1060 genericWordAddCOp [res_r, res_c] [aa, bb]
1061 = do dflags <- getDynFlags
1062 emit $ catAGraphs [
1063 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
1064 mkAssign (CmmLocal res_c) $
1065 CmmMachOp (mo_wordUShr dflags) [
1066 CmmMachOp (mo_wordOr dflags) [
1067 CmmMachOp (mo_wordAnd dflags) [aa,bb],
1068 CmmMachOp (mo_wordAnd dflags) [
1069 CmmMachOp (mo_wordOr dflags) [aa,bb],
1070 CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)]
1071 ]
1072 ],
1073 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1074 ]
1075 ]
1076 genericWordAddCOp _ _ = panic "genericWordAddCOp"
1077
1078 -- | Implements branchless recovery of the carry flag @c@ by checking the
1079 -- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@:
1080 --
1081 -- @
1082 -- c = ~a&b | (~a|b)&r
1083 -- @
1084 --
1085 -- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
1086 genericWordSubCOp :: GenericOp
1087 genericWordSubCOp [res_r, res_c] [aa, bb]
1088 = do dflags <- getDynFlags
1089 emit $ catAGraphs [
1090 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1091 mkAssign (CmmLocal res_c) $
1092 CmmMachOp (mo_wordUShr dflags) [
1093 CmmMachOp (mo_wordOr dflags) [
1094 CmmMachOp (mo_wordAnd dflags) [
1095 CmmMachOp (mo_wordNot dflags) [aa],
1096 bb
1097 ],
1098 CmmMachOp (mo_wordAnd dflags) [
1099 CmmMachOp (mo_wordOr dflags) [
1100 CmmMachOp (mo_wordNot dflags) [aa],
1101 bb
1102 ],
1103 CmmReg (CmmLocal res_r)
1104 ]
1105 ],
1106 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1107 ]
1108 ]
1109 genericWordSubCOp _ _ = panic "genericWordSubCOp"
1110
1111 genericIntAddCOp :: GenericOp
1112 genericIntAddCOp [res_r, res_c] [aa, bb]
1113 {-
1114 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
1115 C, and without needing any comparisons. This may not be the
1116 fastest way to do it - if you have better code, please send it! --SDM
1117
1118 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
1119
1120 We currently don't make use of the r value if c is != 0 (i.e.
1121 overflow), we just convert to big integers and try again. This
1122 could be improved by making r and c the correct values for
1123 plugging into a new J#.
1124
1125 { r = ((I_)(a)) + ((I_)(b)); \
1126 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1127 >> (BITS_IN (I_) - 1); \
1128 }
1129 Wading through the mass of bracketry, it seems to reduce to:
1130 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
1131
1132 -}
1133 = do dflags <- getDynFlags
1134 emit $ catAGraphs [
1135 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
1136 mkAssign (CmmLocal res_c) $
1137 CmmMachOp (mo_wordUShr dflags) [
1138 CmmMachOp (mo_wordAnd dflags) [
1139 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
1140 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1141 ],
1142 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1143 ]
1144 ]
1145 genericIntAddCOp _ _ = panic "genericIntAddCOp"
1146
1147 genericIntSubCOp :: GenericOp
1148 genericIntSubCOp [res_r, res_c] [aa, bb]
1149 {- Similarly:
1150 #define subIntCzh(r,c,a,b) \
1151 { r = ((I_)(a)) - ((I_)(b)); \
1152 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
1153 >> (BITS_IN (I_) - 1); \
1154 }
1155
1156 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
1157 -}
1158 = do dflags <- getDynFlags
1159 emit $ catAGraphs [
1160 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
1161 mkAssign (CmmLocal res_c) $
1162 CmmMachOp (mo_wordUShr dflags) [
1163 CmmMachOp (mo_wordAnd dflags) [
1164 CmmMachOp (mo_wordXor dflags) [aa,bb],
1165 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
1166 ],
1167 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
1168 ]
1169 ]
1170 genericIntSubCOp _ _ = panic "genericIntSubCOp"
1171
1172 genericWordMul2Op :: GenericOp
1173 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
1174 = do dflags <- getDynFlags
1175 let t = cmmExprType dflags arg_x
1176 xlyl <- liftM CmmLocal $ newTemp t
1177 xlyh <- liftM CmmLocal $ newTemp t
1178 xhyl <- liftM CmmLocal $ newTemp t
1179 r <- liftM CmmLocal $ newTemp t
1180 -- This generic implementation is very simple and slow. We might
1181 -- well be able to do better, but for now this at least works.
1182 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
1183 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
1184 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
1185 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
1186 sum = foldl1 add
1187 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
1188 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
1189 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
1190 (wordWidth dflags))
1191 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
1192 emit $ catAGraphs
1193 [mkAssign xlyl
1194 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1195 mkAssign xlyh
1196 (mul (bottomHalf arg_x) (topHalf arg_y)),
1197 mkAssign xhyl
1198 (mul (topHalf arg_x) (bottomHalf arg_y)),
1199 mkAssign r
1200 (sum [topHalf (CmmReg xlyl),
1201 bottomHalf (CmmReg xhyl),
1202 bottomHalf (CmmReg xlyh)]),
1203 mkAssign (CmmLocal res_l)
1204 (or (bottomHalf (CmmReg xlyl))
1205 (toTopHalf (CmmReg r))),
1206 mkAssign (CmmLocal res_h)
1207 (sum [mul (topHalf arg_x) (topHalf arg_y),
1208 topHalf (CmmReg xhyl),
1209 topHalf (CmmReg xlyh),
1210 topHalf (CmmReg r)])]
1211 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1212
1213 -- This replicates what we had in libraries/base/GHC/Float.hs:
1214 --
1215 -- abs x | x == 0 = 0 -- handles (-0.0)
1216 -- | x > 0 = x
1217 -- | otherwise = negateFloat x
1218 genericFabsOp :: Width -> GenericOp
1219 genericFabsOp w [res_r] [aa]
1220 = do dflags <- getDynFlags
1221 let zero = CmmLit (CmmFloat 0 w)
1222
1223 eq x y = CmmMachOp (MO_F_Eq w) [x, y]
1224 gt x y = CmmMachOp (MO_F_Gt w) [x, y]
1225
1226 neg x = CmmMachOp (MO_F_Neg w) [x]
1227
1228 g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
1229 g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
1230
1231 res_t <- CmmLocal <$> newTemp (cmmExprType dflags aa)
1232 let g3 = catAGraphs [mkAssign res_t aa,
1233 mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
1234
1235 g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
1236
1237 emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
1238
1239 genericFabsOp _ _ _ = panic "genericFabsOp"
1240
1241 -- These PrimOps are NOPs in Cmm
1242
1243 nopOp :: PrimOp -> Bool
1244 nopOp Int2WordOp = True
1245 nopOp Word2IntOp = True
1246 nopOp Int2AddrOp = True
1247 nopOp Addr2IntOp = True
1248 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1249 nopOp OrdOp = True
1250 nopOp _ = False
1251
1252 -- These PrimOps turn into double casts
1253
1254 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1255 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1256 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1257 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1258 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1259 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1260 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1261 narrowOp _ = Nothing
1262
1263 -- Native word signless ops
1264
1265 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1266 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1267 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1268 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1269 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1270 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1271 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1272
1273 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1274 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1275 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1276 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1277 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1278 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1279
1280 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1281 translateOp dflags OrOp = Just (mo_wordOr dflags)
1282 translateOp dflags XorOp = Just (mo_wordXor dflags)
1283 translateOp dflags NotOp = Just (mo_wordNot dflags)
1284 translateOp dflags SllOp = Just (mo_wordShl dflags)
1285 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1286
1287 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1288
1289 -- Native word signed ops
1290
1291 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1292 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1293 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1294 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1295 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1296
1297
1298 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1299 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1300 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1301 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1302
1303 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1304 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1305 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1306 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1307 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1308 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1309 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1310
1311 -- Native word unsigned ops
1312
1313 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1314 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1315 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1316 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1317
1318 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1319 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1320 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1321
1322 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1323 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1324 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1325 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1326
1327 -- Char# ops
1328
1329 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1330 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1331 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1332 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1333 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1334 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1335
1336 -- Double ops
1337
1338 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1339 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1340 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1341 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1342 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1343 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1344
1345 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1346 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1347 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1348 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1349 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1350
1351 -- Float ops
1352
1353 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1354 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1355 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1356 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1357 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1358 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1359
1360 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1361 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1362 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1363 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1364 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1365
1366 -- Vector ops
1367
1368 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1369 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1370 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1371 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1372 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1373
1374 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1375 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1376 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1377 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1378 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1379 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1380
1381 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1382 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1383 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1384 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1385 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1386
1387 -- Conversions
1388
1389 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1390 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1391
1392 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1393 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1394
1395 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1396 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1397
1398 -- Word comparisons masquerading as more exotic things.
1399
1400 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1401 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1402 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1403 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1404 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1405 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1406 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1407 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1408
1409 translateOp _ _ = Nothing
1410
1411 -- These primops are implemented by CallishMachOps, because they sometimes
1412 -- turn into foreign calls depending on the backend.
1413
1414 callishOp :: PrimOp -> Maybe CallishMachOp
1415 callishOp DoublePowerOp = Just MO_F64_Pwr
1416 callishOp DoubleSinOp = Just MO_F64_Sin
1417 callishOp DoubleCosOp = Just MO_F64_Cos
1418 callishOp DoubleTanOp = Just MO_F64_Tan
1419 callishOp DoubleSinhOp = Just MO_F64_Sinh
1420 callishOp DoubleCoshOp = Just MO_F64_Cosh
1421 callishOp DoubleTanhOp = Just MO_F64_Tanh
1422 callishOp DoubleAsinOp = Just MO_F64_Asin
1423 callishOp DoubleAcosOp = Just MO_F64_Acos
1424 callishOp DoubleAtanOp = Just MO_F64_Atan
1425 callishOp DoubleLogOp = Just MO_F64_Log
1426 callishOp DoubleExpOp = Just MO_F64_Exp
1427 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1428
1429 callishOp FloatPowerOp = Just MO_F32_Pwr
1430 callishOp FloatSinOp = Just MO_F32_Sin
1431 callishOp FloatCosOp = Just MO_F32_Cos
1432 callishOp FloatTanOp = Just MO_F32_Tan
1433 callishOp FloatSinhOp = Just MO_F32_Sinh
1434 callishOp FloatCoshOp = Just MO_F32_Cosh
1435 callishOp FloatTanhOp = Just MO_F32_Tanh
1436 callishOp FloatAsinOp = Just MO_F32_Asin
1437 callishOp FloatAcosOp = Just MO_F32_Acos
1438 callishOp FloatAtanOp = Just MO_F32_Atan
1439 callishOp FloatLogOp = Just MO_F32_Log
1440 callishOp FloatExpOp = Just MO_F32_Exp
1441 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1442
1443 callishOp _ = Nothing
1444
1445 ------------------------------------------------------------------------------
1446 -- Helpers for translating various minor variants of array indexing.
1447
1448 doIndexOffAddrOp :: Maybe MachOp
1449 -> CmmType
1450 -> [LocalReg]
1451 -> [CmmExpr]
1452 -> FCode ()
1453 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1454 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1455 doIndexOffAddrOp _ _ _ _
1456 = panic "StgCmmPrim: doIndexOffAddrOp"
1457
1458 doIndexOffAddrOpAs :: Maybe MachOp
1459 -> CmmType
1460 -> CmmType
1461 -> [LocalReg]
1462 -> [CmmExpr]
1463 -> FCode ()
1464 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1465 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1466 doIndexOffAddrOpAs _ _ _ _ _
1467 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1468
1469 doIndexByteArrayOp :: Maybe MachOp
1470 -> CmmType
1471 -> [LocalReg]
1472 -> [CmmExpr]
1473 -> FCode ()
1474 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1475 = do dflags <- getDynFlags
1476 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1477 doIndexByteArrayOp _ _ _ _
1478 = panic "StgCmmPrim: doIndexByteArrayOp"
1479
1480 doIndexByteArrayOpAs :: Maybe MachOp
1481 -> CmmType
1482 -> CmmType
1483 -> [LocalReg]
1484 -> [CmmExpr]
1485 -> FCode ()
1486 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1487 = do dflags <- getDynFlags
1488 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1489 doIndexByteArrayOpAs _ _ _ _ _
1490 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1491
1492 doReadPtrArrayOp :: LocalReg
1493 -> CmmExpr
1494 -> CmmExpr
1495 -> FCode ()
1496 doReadPtrArrayOp res addr idx
1497 = do dflags <- getDynFlags
1498 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1499
1500 doWriteOffAddrOp :: Maybe MachOp
1501 -> CmmType
1502 -> [LocalReg]
1503 -> [CmmExpr]
1504 -> FCode ()
1505 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1506 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1507 doWriteOffAddrOp _ _ _ _
1508 = panic "StgCmmPrim: doWriteOffAddrOp"
1509
1510 doWriteByteArrayOp :: Maybe MachOp
1511 -> CmmType
1512 -> [LocalReg]
1513 -> [CmmExpr]
1514 -> FCode ()
1515 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1516 = do dflags <- getDynFlags
1517 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1518 doWriteByteArrayOp _ _ _ _
1519 = panic "StgCmmPrim: doWriteByteArrayOp"
1520
1521 doWritePtrArrayOp :: CmmExpr
1522 -> CmmExpr
1523 -> CmmExpr
1524 -> FCode ()
1525 doWritePtrArrayOp addr idx val
1526 = do dflags <- getDynFlags
1527 let ty = cmmExprType dflags val
1528 -- This write barrier is to ensure that the heap writes to the object
1529 -- referred to by val have happened before we write val into the array.
1530 -- See #12469 for details.
1531 emitPrimCall [] MO_WriteBarrier []
1532 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1533 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1534 -- the write barrier. We must write a byte into the mark table:
1535 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1536 emit $ mkStore (
1537 cmmOffsetExpr dflags
1538 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1539 (loadArrPtrsSize dflags addr))
1540 (CmmMachOp (mo_wordUShr dflags) [idx,
1541 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1542 ) (CmmLit (CmmInt 1 W8))
1543
1544 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1545 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1546 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1547
1548 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1549 -> Maybe MachOp -- Optional result cast
1550 -> CmmType -- Type of element we are accessing
1551 -> LocalReg -- Destination
1552 -> CmmExpr -- Base address
1553 -> CmmType -- Type of element by which we are indexing
1554 -> CmmExpr -- Index
1555 -> FCode ()
1556 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1557 = do dflags <- getDynFlags
1558 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1559 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1560 = do dflags <- getDynFlags
1561 emitAssign (CmmLocal res) (CmmMachOp cast [
1562 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1563
1564 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1565 -> Maybe MachOp -- Optional value cast
1566 -> CmmExpr -- Base address
1567 -> CmmType -- Type of element by which we are indexing
1568 -> CmmExpr -- Index
1569 -> CmmExpr -- Value to write
1570 -> FCode ()
1571 mkBasicIndexedWrite off Nothing base idx_ty idx val
1572 = do dflags <- getDynFlags
1573 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1574 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1575 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1576
1577 -- ----------------------------------------------------------------------------
1578 -- Misc utils
1579
1580 cmmIndexOffExpr :: DynFlags
1581 -> ByteOff -- Initial offset in bytes
1582 -> Width -- Width of element by which we are indexing
1583 -> CmmExpr -- Base address
1584 -> CmmExpr -- Index
1585 -> CmmExpr
1586 cmmIndexOffExpr dflags off width base idx
1587 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1588
1589 cmmLoadIndexOffExpr :: DynFlags
1590 -> ByteOff -- Initial offset in bytes
1591 -> CmmType -- Type of element we are accessing
1592 -> CmmExpr -- Base address
1593 -> CmmType -- Type of element by which we are indexing
1594 -> CmmExpr -- Index
1595 -> CmmExpr
1596 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1597 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1598
1599 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1600 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1601
1602 ------------------------------------------------------------------------------
1603 -- Helpers for translating vector primops.
1604
1605 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1606 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1607
1608 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1609 vecCmmCat IntVec = cmmBits
1610 vecCmmCat WordVec = cmmBits
1611 vecCmmCat FloatVec = cmmFloat
1612
1613 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1614 vecElemInjectCast _ FloatVec _ = Nothing
1615 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1616 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1617 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1618 vecElemInjectCast _ IntVec W64 = Nothing
1619 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1620 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1621 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1622 vecElemInjectCast _ WordVec W64 = Nothing
1623 vecElemInjectCast _ _ _ = Nothing
1624
1625 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1626 vecElemProjectCast _ FloatVec _ = Nothing
1627 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1628 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1629 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1630 vecElemProjectCast _ IntVec W64 = Nothing
1631 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1632 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1633 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1634 vecElemProjectCast _ WordVec W64 = Nothing
1635 vecElemProjectCast _ _ _ = Nothing
1636
1637 -- Check to make sure that we can generate code for the specified vector type
1638 -- given the current set of dynamic flags.
1639 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1640 checkVecCompatibility dflags vcat l w = do
1641 when (hscTarget dflags /= HscLlvm) $ do
1642 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1643 ,"Please use -fllvm."]
1644 check vecWidth vcat l w
1645 where
1646 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1647 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1648 sorry $ "128-bit wide single-precision floating point " ++
1649 "SIMD vector instructions require at least -msse."
1650 check W128 _ _ _ | not (isSse2Enabled dflags) =
1651 sorry $ "128-bit wide integer and double precision " ++
1652 "SIMD vector instructions require at least -msse2."
1653 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1654 sorry $ "256-bit wide floating point " ++
1655 "SIMD vector instructions require at least -mavx."
1656 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1657 sorry $ "256-bit wide integer " ++
1658 "SIMD vector instructions require at least -mavx2."
1659 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1660 sorry $ "512-bit wide " ++
1661 "SIMD vector instructions require -mavx512f."
1662 check _ _ _ _ = return ()
1663
1664 vecWidth = typeWidth (vecVmmType vcat l w)
1665
1666 ------------------------------------------------------------------------------
1667 -- Helpers for translating vector packing and unpacking.
1668
1669 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1670 -> CmmType -- Type of vector
1671 -> CmmExpr -- Initial vector
1672 -> [CmmExpr] -- Elements
1673 -> CmmFormal -- Destination for result
1674 -> FCode ()
1675 doVecPackOp maybe_pre_write_cast ty z es res = do
1676 dst <- newTemp ty
1677 emitAssign (CmmLocal dst) z
1678 vecPack dst es 0
1679 where
1680 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1681 vecPack src [] _ =
1682 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1683
1684 vecPack src (e : es) i = do
1685 dst <- newTemp ty
1686 if isFloatType (vecElemType ty)
1687 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1688 [CmmReg (CmmLocal src), cast e, iLit])
1689 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1690 [CmmReg (CmmLocal src), cast e, iLit])
1691 vecPack dst es (i + 1)
1692 where
1693 -- vector indices are always 32-bits
1694 iLit = CmmLit (CmmInt (toInteger i) W32)
1695
1696 cast :: CmmExpr -> CmmExpr
1697 cast val = case maybe_pre_write_cast of
1698 Nothing -> val
1699 Just cast -> CmmMachOp cast [val]
1700
1701 len :: Length
1702 len = vecLength ty
1703
1704 wid :: Width
1705 wid = typeWidth (vecElemType ty)
1706
1707 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1708 -> CmmType -- Type of vector
1709 -> CmmExpr -- Vector
1710 -> [CmmFormal] -- Element results
1711 -> FCode ()
1712 doVecUnpackOp maybe_post_read_cast ty e res =
1713 vecUnpack res 0
1714 where
1715 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1716 vecUnpack [] _ =
1717 return ()
1718
1719 vecUnpack (r : rs) i = do
1720 if isFloatType (vecElemType ty)
1721 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1722 [e, iLit]))
1723 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1724 [e, iLit]))
1725 vecUnpack rs (i + 1)
1726 where
1727 -- vector indices are always 32-bits
1728 iLit = CmmLit (CmmInt (toInteger i) W32)
1729
1730 cast :: CmmExpr -> CmmExpr
1731 cast val = case maybe_post_read_cast of
1732 Nothing -> val
1733 Just cast -> CmmMachOp cast [val]
1734
1735 len :: Length
1736 len = vecLength ty
1737
1738 wid :: Width
1739 wid = typeWidth (vecElemType ty)
1740
1741 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1742 -> CmmType -- Vector type
1743 -> CmmExpr -- Source vector
1744 -> CmmExpr -- Element
1745 -> CmmExpr -- Index at which to insert element
1746 -> CmmFormal -- Destination for result
1747 -> FCode ()
1748 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1749 dflags <- getDynFlags
1750 -- vector indices are always 32-bits
1751 let idx' :: CmmExpr
1752 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1753 if isFloatType (vecElemType ty)
1754 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1755 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1756 where
1757 cast :: CmmExpr -> CmmExpr
1758 cast val = case maybe_pre_write_cast of
1759 Nothing -> val
1760 Just cast -> CmmMachOp cast [val]
1761
1762 len :: Length
1763 len = vecLength ty
1764
1765 wid :: Width
1766 wid = typeWidth (vecElemType ty)
1767
1768 ------------------------------------------------------------------------------
1769 -- Helpers for translating prefetching.
1770
1771
1772 -- | Translate byte array prefetch operations into proper primcalls.
1773 doPrefetchByteArrayOp :: Int
1774 -> [CmmExpr]
1775 -> FCode ()
1776 doPrefetchByteArrayOp locality [addr,idx]
1777 = do dflags <- getDynFlags
1778 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1779 doPrefetchByteArrayOp _ _
1780 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1781
1782 -- | Translate mutable byte array prefetch operations into proper primcalls.
1783 doPrefetchMutableByteArrayOp :: Int
1784 -> [CmmExpr]
1785 -> FCode ()
1786 doPrefetchMutableByteArrayOp locality [addr,idx]
1787 = do dflags <- getDynFlags
1788 mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
1789 doPrefetchMutableByteArrayOp _ _
1790 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1791
1792 -- | Translate address prefetch operations into proper primcalls.
1793 doPrefetchAddrOp ::Int
1794 -> [CmmExpr]
1795 -> FCode ()
1796 doPrefetchAddrOp locality [addr,idx]
1797 = mkBasicPrefetch locality 0 addr idx
1798 doPrefetchAddrOp _ _
1799 = panic "StgCmmPrim: doPrefetchAddrOp"
1800
1801 -- | Translate value prefetch operations into proper primcalls.
1802 doPrefetchValueOp :: Int
1803 -> [CmmExpr]
1804 -> FCode ()
1805 doPrefetchValueOp locality [addr]
1806 = do dflags <- getDynFlags
1807 mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
1808 doPrefetchValueOp _ _
1809 = panic "StgCmmPrim: doPrefetchValueOp"
1810
1811 -- | helper to generate prefetch primcalls
1812 mkBasicPrefetch :: Int -- Locality level 0-3
1813 -> ByteOff -- Initial offset in bytes
1814 -> CmmExpr -- Base address
1815 -> CmmExpr -- Index
1816 -> FCode ()
1817 mkBasicPrefetch locality off base idx
1818 = do dflags <- getDynFlags
1819 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1820 return ()
1821
1822 -- ----------------------------------------------------------------------------
1823 -- Allocating byte arrays
1824
1825 -- | Takes a register to return the newly allocated array in and the
1826 -- size of the new array in bytes. Allocates a new
1827 -- 'MutableByteArray#'.
1828 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1829 doNewByteArrayOp res_r n = do
1830 dflags <- getDynFlags
1831
1832 let info_ptr = mkLblExpr mkArrWords_infoLabel
1833 rep = arrWordsRep dflags n
1834
1835 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1836 (mkIntExpr dflags (nonHdrSize dflags rep))
1837 (zeroExpr dflags)
1838
1839 let hdr_size = fixedHdrSize dflags
1840
1841 base <- allocHeapClosure rep info_ptr cccsExpr
1842 [ (mkIntExpr dflags n,
1843 hdr_size + oFFSET_StgArrBytes_bytes dflags)
1844 ]
1845
1846 emit $ mkAssign (CmmLocal res_r) base
1847
1848 -- ----------------------------------------------------------------------------
1849 -- Comparing byte arrays
1850
1851 doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1852 -> FCode ()
1853 doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
1854 dflags <- getDynFlags
1855 ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
1856 ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
1857
1858 -- short-cut in case of equal pointers avoiding a costly
1859 -- subroutine call to the memcmp(3) routine; the Cmm logic below
1860 -- results in assembly code being generated for
1861 --
1862 -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
1863 -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
1864 --
1865 -- that looks like
1866 --
1867 -- leaq 16(%r14),%rax
1868 -- leaq 16(%rsi),%rbx
1869 -- xorl %ecx,%ecx
1870 -- cmpq %rbx,%rax
1871 -- je l_ptr_eq
1872 --
1873 -- ; NB: the common case (unequal pointers) falls-through
1874 -- ; the conditional jump, and therefore matches the
1875 -- ; usual static branch prediction convention of modern cpus
1876 --
1877 -- subq $8,%rsp
1878 -- movq %rbx,%rsi
1879 -- movq %rax,%rdi
1880 -- movl $10,%edx
1881 -- xorl %eax,%eax
1882 -- call memcmp
1883 -- addq $8,%rsp
1884 -- movslq %eax,%rax
1885 -- movq %rax,%rcx
1886 -- l_ptr_eq:
1887 -- movq %rcx,%rbx
1888 -- jmp *(%rbp)
1889
1890 l_ptr_eq <- newBlockId
1891 l_ptr_ne <- newBlockId
1892
1893 emit (mkAssign (CmmLocal res) (zeroExpr dflags))
1894 emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p)
1895 l_ptr_eq l_ptr_ne (Just False))
1896
1897 emitLabel l_ptr_ne
1898 emitMemcmpCall res ba1_p ba2_p n 1
1899
1900 emitLabel l_ptr_eq
1901
1902 -- ----------------------------------------------------------------------------
1903 -- Copying byte arrays
1904
1905 -- | Takes a source 'ByteArray#', an offset in the source array, a
1906 -- destination 'MutableByteArray#', an offset into the destination
1907 -- array, and the number of bytes to copy. Copies the given number of
1908 -- bytes from the source array to the destination array.
1909 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1910 -> FCode ()
1911 doCopyByteArrayOp = emitCopyByteArray copy
1912 where
1913 -- Copy data (we assume the arrays aren't overlapping since
1914 -- they're of different types)
1915 copy _src _dst dst_p src_p bytes =
1916 emitMemcpyCall dst_p src_p bytes 1
1917
1918 -- | Takes a source 'MutableByteArray#', an offset in the source
1919 -- array, a destination 'MutableByteArray#', an offset into the
1920 -- destination array, and the number of bytes to copy. Copies the
1921 -- given number of bytes from the source array to the destination
1922 -- array.
1923 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1924 -> FCode ()
1925 doCopyMutableByteArrayOp = emitCopyByteArray copy
1926 where
1927 -- The only time the memory might overlap is when the two arrays
1928 -- we were provided are the same array!
1929 -- TODO: Optimize branch for common case of no aliasing.
1930 copy src dst dst_p src_p bytes = do
1931 dflags <- getDynFlags
1932 [moveCall, cpyCall] <- forkAlts [
1933 getCode $ emitMemmoveCall dst_p src_p bytes 1,
1934 getCode $ emitMemcpyCall dst_p src_p bytes 1
1935 ]
1936 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1937
1938 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1939 -> FCode ())
1940 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1941 -> FCode ()
1942 emitCopyByteArray copy src src_off dst dst_off n = do
1943 dflags <- getDynFlags
1944 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1945 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1946 copy src dst dst_p src_p n
1947
1948 -- | Takes a source 'ByteArray#', an offset in the source array, a
1949 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1950 -- number of bytes from the source array to the destination memory region.
1951 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1952 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1953 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1954 dflags <- getDynFlags
1955 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1956 emitMemcpyCall dst_p src_p bytes 1
1957
1958 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1959 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1960 -- number of bytes from the source array to the destination memory region.
1961 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1962 -> FCode ()
1963 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1964
1965 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1966 -- the destination array, and the number of bytes to copy. Copies the given
1967 -- number of bytes from the source memory region to the destination array.
1968 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1969 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1970 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1971 dflags <- getDynFlags
1972 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1973 emitMemcpyCall dst_p src_p bytes 1
1974
1975
1976 -- ----------------------------------------------------------------------------
1977 -- Setting byte arrays
1978
1979 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1980 -- and a byte, and sets each of the selected bytes in the array to the
1981 -- character.
1982 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1983 -> FCode ()
1984 doSetByteArrayOp ba off len c
1985 = do dflags <- getDynFlags
1986 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1987 emitMemsetCall p c len 1
1988
1989 -- ----------------------------------------------------------------------------
1990 -- Allocating arrays
1991
1992 -- | Allocate a new array.
1993 doNewArrayOp :: CmmFormal -- ^ return register
1994 -> SMRep -- ^ representation of the array
1995 -> CLabel -- ^ info pointer
1996 -> [(CmmExpr, ByteOff)] -- ^ header payload
1997 -> WordOff -- ^ array size
1998 -> CmmExpr -- ^ initial element
1999 -> FCode ()
2000 doNewArrayOp res_r rep info payload n init = do
2001 dflags <- getDynFlags
2002
2003 let info_ptr = mkLblExpr info
2004
2005 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
2006 (mkIntExpr dflags (nonHdrSize dflags rep))
2007 (zeroExpr dflags)
2008
2009 base <- allocHeapClosure rep info_ptr cccsExpr payload
2010
2011 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2012 emit $ mkAssign arr base
2013
2014 -- Initialise all elements of the array
2015 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
2016 for <- newBlockId
2017 emitLabel for
2018 let loopBody =
2019 [ mkStore (CmmReg (CmmLocal p)) init
2020 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
2021 , mkBranch for ]
2022 emit =<< mkCmmIfThen
2023 (cmmULtWord dflags (CmmReg (CmmLocal p))
2024 (cmmOffsetW dflags (CmmReg arr)
2025 (hdrSizeW dflags rep + n)))
2026 (catAGraphs loopBody)
2027
2028 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2029
2030 -- ----------------------------------------------------------------------------
2031 -- Copying pointer arrays
2032
2033 -- EZY: This code has an unusually high amount of assignTemp calls, seen
2034 -- nowhere else in the code generator. This is mostly because these
2035 -- "primitive" ops result in a surprisingly large amount of code. It
2036 -- will likely be worthwhile to optimize what is emitted here, so that
2037 -- our optimization passes don't waste time repeatedly optimizing the
2038 -- same bits of code.
2039
2040 -- More closely imitates 'assignTemp' from the old code generator, which
2041 -- returns a CmmExpr rather than a LocalReg.
2042 assignTempE :: CmmExpr -> FCode CmmExpr
2043 assignTempE e = do
2044 t <- assignTemp e
2045 return (CmmReg (CmmLocal t))
2046
2047 -- | Takes a source 'Array#', an offset in the source array, a
2048 -- destination 'MutableArray#', an offset into the destination array,
2049 -- and the number of elements to copy. Copies the given number of
2050 -- elements from the source array to the destination array.
2051 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2052 -> FCode ()
2053 doCopyArrayOp = emitCopyArray copy
2054 where
2055 -- Copy data (we assume the arrays aren't overlapping since
2056 -- they're of different types)
2057 copy _src _dst dst_p src_p bytes =
2058 do dflags <- getDynFlags
2059 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2060 (wORD_SIZE dflags)
2061
2062
2063 -- | Takes a source 'MutableArray#', an offset in the source array, a
2064 -- destination 'MutableArray#', an offset into the destination array,
2065 -- and the number of elements to copy. Copies the given number of
2066 -- elements from the source array to the destination array.
2067 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2068 -> FCode ()
2069 doCopyMutableArrayOp = emitCopyArray copy
2070 where
2071 -- The only time the memory might overlap is when the two arrays
2072 -- we were provided are the same array!
2073 -- TODO: Optimize branch for common case of no aliasing.
2074 copy src dst dst_p src_p bytes = do
2075 dflags <- getDynFlags
2076 [moveCall, cpyCall] <- forkAlts [
2077 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
2078 (wORD_SIZE dflags),
2079 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2080 (wORD_SIZE dflags)
2081 ]
2082 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
2083
2084 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
2085 -> FCode ()) -- ^ copy function
2086 -> CmmExpr -- ^ source array
2087 -> CmmExpr -- ^ offset in source array
2088 -> CmmExpr -- ^ destination array
2089 -> CmmExpr -- ^ offset in destination array
2090 -> WordOff -- ^ number of elements to copy
2091 -> FCode ()
2092 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
2093 dflags <- getDynFlags
2094 when (n /= 0) $ do
2095 -- Passed as arguments (be careful)
2096 src <- assignTempE src0
2097 dst <- assignTempE dst0
2098 dst_off <- assignTempE dst_off0
2099
2100 -- Set the dirty bit in the header.
2101 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
2102
2103 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
2104 (arrPtrsHdrSize dflags)
2105 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
2106 src_p <- assignTempE $ cmmOffsetExprW dflags
2107 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
2108 let bytes = wordsToBytes dflags n
2109
2110 copy src dst dst_p src_p bytes
2111
2112 -- The base address of the destination card table
2113 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
2114 (loadArrPtrsSize dflags dst)
2115
2116 emitSetCards dst_off dst_cards_p n
2117
2118 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2119 -> FCode ()
2120 doCopySmallArrayOp = emitCopySmallArray copy
2121 where
2122 -- Copy data (we assume the arrays aren't overlapping since
2123 -- they're of different types)
2124 copy _src _dst dst_p src_p bytes =
2125 do dflags <- getDynFlags
2126 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2127 (wORD_SIZE dflags)
2128
2129
2130 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
2131 -> FCode ()
2132 doCopySmallMutableArrayOp = emitCopySmallArray copy
2133 where
2134 -- The only time the memory might overlap is when the two arrays
2135 -- we were provided are the same array!
2136 -- TODO: Optimize branch for common case of no aliasing.
2137 copy src dst dst_p src_p bytes = do
2138 dflags <- getDynFlags
2139 [moveCall, cpyCall] <- forkAlts
2140 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
2141 (wORD_SIZE dflags)
2142 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
2143 (wORD_SIZE dflags)
2144 ]
2145 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
2146
2147 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
2148 -> FCode ()) -- ^ copy function
2149 -> CmmExpr -- ^ source array
2150 -> CmmExpr -- ^ offset in source array
2151 -> CmmExpr -- ^ destination array
2152 -> CmmExpr -- ^ offset in destination array
2153 -> WordOff -- ^ number of elements to copy
2154 -> FCode ()
2155 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
2156 dflags <- getDynFlags
2157
2158 -- Passed as arguments (be careful)
2159 src <- assignTempE src0
2160 dst <- assignTempE dst0
2161
2162 -- Set the dirty bit in the header.
2163 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2164
2165 dst_p <- assignTempE $ cmmOffsetExprW dflags
2166 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
2167 src_p <- assignTempE $ cmmOffsetExprW dflags
2168 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
2169 let bytes = wordsToBytes dflags n
2170
2171 copy src dst dst_p src_p bytes
2172
2173 -- | Takes an info table label, a register to return the newly
2174 -- allocated array in, a source array, an offset in the source array,
2175 -- and the number of elements to copy. Allocates a new array and
2176 -- initializes it from the source array.
2177 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2178 -> FCode ()
2179 emitCloneArray info_p res_r src src_off n = do
2180 dflags <- getDynFlags
2181
2182 let info_ptr = mkLblExpr info_p
2183 rep = arrPtrsRep dflags n
2184
2185 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
2186 (mkIntExpr dflags (nonHdrSize dflags rep))
2187 (zeroExpr dflags)
2188
2189 let hdr_size = fixedHdrSize dflags
2190
2191 base <- allocHeapClosure rep info_ptr cccsExpr
2192 [ (mkIntExpr dflags n,
2193 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
2194 , (mkIntExpr dflags (nonHdrSizeW rep),
2195 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
2196 ]
2197
2198 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2199 emit $ mkAssign arr base
2200
2201 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2202 (arrPtrsHdrSize dflags)
2203 src_p <- assignTempE $ cmmOffsetExprW dflags src
2204 (cmmAddWord dflags
2205 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
2206
2207 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2208 (wORD_SIZE dflags)
2209
2210 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2211
2212 -- | Takes an info table label, a register to return the newly
2213 -- allocated array in, a source array, an offset in the source array,
2214 -- and the number of elements to copy. Allocates a new array and
2215 -- initializes it from the source array.
2216 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
2217 -> FCode ()
2218 emitCloneSmallArray info_p res_r src src_off n = do
2219 dflags <- getDynFlags
2220
2221 let info_ptr = mkLblExpr info_p
2222 rep = smallArrPtrsRep n
2223
2224 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
2225 (mkIntExpr dflags (nonHdrSize dflags rep))
2226 (zeroExpr dflags)
2227
2228 let hdr_size = fixedHdrSize dflags
2229
2230 base <- allocHeapClosure rep info_ptr cccsExpr
2231 [ (mkIntExpr dflags n,
2232 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
2233 ]
2234
2235 arr <- CmmLocal `fmap` newTemp (bWord dflags)
2236 emit $ mkAssign arr base
2237
2238 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
2239 (smallArrPtrsHdrSize dflags)
2240 src_p <- assignTempE $ cmmOffsetExprW dflags src
2241 (cmmAddWord dflags
2242 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
2243
2244 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
2245 (wORD_SIZE dflags)
2246
2247 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
2248
2249 -- | Takes and offset in the destination array, the base address of
2250 -- the card table, and the number of elements affected (*not* the
2251 -- number of cards). The number of elements may not be zero.
2252 -- Marks the relevant cards as dirty.
2253 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
2254 emitSetCards dst_start dst_cards_start n = do
2255 dflags <- getDynFlags
2256 start_card <- assignTempE $ cardCmm dflags dst_start
2257 let end_card = cardCmm dflags
2258 (cmmSubWord dflags
2259 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
2260 (mkIntExpr dflags 1))
2261 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
2262 (mkIntExpr dflags 1)
2263 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
2264 1 -- no alignment (1 byte)
2265
2266 -- Convert an element index to a card index
2267 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
2268 cardCmm dflags i =
2269 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
2270
2271 ------------------------------------------------------------------------------
2272 -- SmallArray PrimOp implementations
2273
2274 doReadSmallPtrArrayOp :: LocalReg
2275 -> CmmExpr
2276 -> CmmExpr
2277 -> FCode ()
2278 doReadSmallPtrArrayOp res addr idx = do
2279 dflags <- getDynFlags
2280 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
2281 (gcWord dflags) idx
2282
2283 doWriteSmallPtrArrayOp :: CmmExpr
2284 -> CmmExpr
2285 -> CmmExpr
2286 -> FCode ()
2287 doWriteSmallPtrArrayOp addr idx val = do
2288 dflags <- getDynFlags
2289 let ty = cmmExprType dflags val
2290 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
2291 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
2292
2293 ------------------------------------------------------------------------------
2294 -- Atomic read-modify-write
2295
2296 -- | Emit an atomic modification to a byte array element. The result
2297 -- reg contains that previous value of the element. Implies a full
2298 -- memory barrier.
2299 doAtomicRMW :: LocalReg -- ^ Result reg
2300 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2301 -> CmmExpr -- ^ MutableByteArray#
2302 -> CmmExpr -- ^ Index
2303 -> CmmType -- ^ Type of element by which we are indexing
2304 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2305 -> FCode ()
2306 doAtomicRMW res amop mba idx idx_ty n = do
2307 dflags <- getDynFlags
2308 let width = typeWidth idx_ty
2309 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2310 width mba idx
2311 emitPrimCall
2312 [ res ]
2313 (MO_AtomicRMW width amop)
2314 [ addr, n ]
2315
2316 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2317 doAtomicReadByteArray
2318 :: LocalReg -- ^ Result reg
2319 -> CmmExpr -- ^ MutableByteArray#
2320 -> CmmExpr -- ^ Index
2321 -> CmmType -- ^ Type of element by which we are indexing
2322 -> FCode ()
2323 doAtomicReadByteArray res mba idx idx_ty = do
2324 dflags <- getDynFlags
2325 let width = typeWidth idx_ty
2326 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2327 width mba idx
2328 emitPrimCall
2329 [ res ]
2330 (MO_AtomicRead width)
2331 [ addr ]
2332
2333 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2334 doAtomicWriteByteArray
2335 :: CmmExpr -- ^ MutableByteArray#
2336 -> CmmExpr -- ^ Index
2337 -> CmmType -- ^ Type of element by which we are indexing
2338 -> CmmExpr -- ^ Value to write
2339 -> FCode ()
2340 doAtomicWriteByteArray mba idx idx_ty val = do
2341 dflags <- getDynFlags
2342 let width = typeWidth idx_ty
2343 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2344 width mba idx
2345 emitPrimCall
2346 [ {- no results -} ]
2347 (MO_AtomicWrite width)
2348 [ addr, val ]
2349
2350 doCasByteArray
2351 :: LocalReg -- ^ Result reg
2352 -> CmmExpr -- ^ MutableByteArray#
2353 -> CmmExpr -- ^ Index
2354 -> CmmType -- ^ Type of element by which we are indexing
2355 -> CmmExpr -- ^ Old value
2356 -> CmmExpr -- ^ New value
2357 -> FCode ()
2358 doCasByteArray res mba idx idx_ty old new = do
2359 dflags <- getDynFlags
2360 let width = (typeWidth idx_ty)
2361 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2362 width mba idx
2363 emitPrimCall
2364 [ res ]
2365 (MO_Cmpxchg width)
2366 [ addr, old, new ]
2367
2368 ------------------------------------------------------------------------------
2369 -- Helpers for emitting function calls
2370
2371 -- | Emit a call to @memcpy@.
2372 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2373 emitMemcpyCall dst src n align = do
2374 emitPrimCall
2375 [ {-no results-} ]
2376 (MO_Memcpy align)
2377 [ dst, src, n ]
2378
2379 -- | Emit a call to @memmove@.
2380 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2381 emitMemmoveCall dst src n align = do
2382 emitPrimCall
2383 [ {- no results -} ]
2384 (MO_Memmove align)
2385 [ dst, src, n ]
2386
2387 -- | Emit a call to @memset@. The second argument must fit inside an
2388 -- unsigned char.
2389 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2390 emitMemsetCall dst c n align = do
2391 emitPrimCall
2392 [ {- no results -} ]
2393 (MO_Memset align)
2394 [ dst, c, n ]
2395
2396 emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
2397 emitMemcmpCall res ptr1 ptr2 n align = do
2398 -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
2399 -- code-gens currently call out to the @memcmp(3)@ C function.
2400 -- This was easier than moving the sign-extensions into
2401 -- all the code-gens.
2402 dflags <- getDynFlags
2403 let is32Bit = typeWidth (localRegType res) == W32
2404
2405 cres <- if is32Bit
2406 then return res
2407 else newTemp b32
2408
2409 emitPrimCall
2410 [ cres ]
2411 (MO_Memcmp align)
2412 [ ptr1, ptr2, n ]
2413
2414 unless is32Bit $ do
2415 emit $ mkAssign (CmmLocal res)
2416 (CmmMachOp
2417 (mo_s_32ToWord dflags)
2418 [(CmmReg (CmmLocal cres))])
2419
2420 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2421 emitBSwapCall res x width = do
2422 emitPrimCall
2423 [ res ]
2424 (MO_BSwap width)
2425 [ x ]
2426
2427 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2428 emitPopCntCall res x width = do
2429 emitPrimCall
2430 [ res ]
2431 (MO_PopCnt width)
2432 [ x ]
2433
2434 emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
2435 emitPdepCall res x y width = do
2436 emitPrimCall
2437 [ res ]
2438 (MO_Pdep width)
2439 [ x, y ]
2440
2441 emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
2442 emitPextCall res x y width = do
2443 emitPrimCall
2444 [ res ]
2445 (MO_Pext width)
2446 [ x, y ]
2447
2448 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2449 emitClzCall res x width = do
2450 emitPrimCall
2451 [ res ]
2452 (MO_Clz width)
2453 [ x ]
2454
2455 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2456 emitCtzCall res x width = do
2457 emitPrimCall
2458 [ res ]
2459 (MO_Ctz width)
2460 [ x ]