Implement new CLZ and CTZ primops (re #9340)
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgCmmLayout
21 import StgCmmForeign
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmTicky
26 import StgCmmHeap
27 import StgCmmProf ( costCentreFrom, curCCS )
28
29 import DynFlags
30 import Platform
31 import BasicTypes
32 import MkGraph
33 import StgSyn
34 import Cmm
35 import CmmInfo
36 import Type ( Type, tyConAppTyCon )
37 import TyCon
38 import CLabel
39 import CmmUtils
40 import PrimOp
41 import SMRep
42 import FastString
43 import Outputable
44 import Util
45
46 import Data.Bits ((.&.), bit)
47 import Control.Monad (liftM, when)
48
49 ------------------------------------------------------------------------
50 -- Primitive operations and foreign calls
51 ------------------------------------------------------------------------
52
53 {- Note [Foreign call results]
54 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 A foreign call always returns an unboxed tuple of results, one
56 of which is the state token. This seems to happen even for pure
57 calls.
58
59 Even if we returned a single result for pure calls, it'd still be
60 right to wrap it in a singleton unboxed tuple, because the result
61 might be a Haskell closure pointer, we don't want to evaluate it. -}
62
63 ----------------------------------
64 cgOpApp :: StgOp -- The op
65 -> [StgArg] -- Arguments
66 -> Type -- Result type (always an unboxed tuple)
67 -> FCode ReturnKind
68
69 -- Foreign calls
70 cgOpApp (StgFCallOp fcall _) stg_args res_ty
71 = cgForeignCall fcall stg_args res_ty
72 -- Note [Foreign call results]
73
74 -- tagToEnum# is special: we need to pull the constructor
75 -- out of the table, and perform an appropriate return.
76
77 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
78 = ASSERT(isEnumerationTyCon tycon)
79 do { dflags <- getDynFlags
80 ; args' <- getNonVoidArgAmodes [arg]
81 ; let amode = case args' of [amode] -> amode
82 _ -> panic "TagToEnumOp had void arg"
83 ; emitReturn [tagToClosure dflags tycon amode] }
84 where
85 -- If you're reading this code in the attempt to figure
86 -- out why the compiler panic'ed here, it is probably because
87 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
88 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
89 -- That won't work.
90 tycon = tyConAppTyCon res_ty
91
92 cgOpApp (StgPrimOp primop) args res_ty = do
93 dflags <- getDynFlags
94 cmm_args <- getNonVoidArgAmodes args
95 case shouldInlinePrimOp dflags primop cmm_args of
96 Nothing -> do -- out-of-line
97 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
98 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
99
100 Just f -- inline
101 | ReturnsPrim VoidRep <- result_info
102 -> do f []
103 emitReturn []
104
105 | ReturnsPrim rep <- result_info
106 -> do dflags <- getDynFlags
107 res <- newTemp (primRepCmmType dflags rep)
108 f [res]
109 emitReturn [CmmReg (CmmLocal res)]
110
111 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
112 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
113 f regs
114 emitReturn (map (CmmReg . CmmLocal) regs)
115
116 | otherwise -> panic "cgPrimop"
117 where
118 result_info = getPrimOpResultInfo primop
119
120 cgOpApp (StgPrimCallOp primcall) args _res_ty
121 = do { cmm_args <- getNonVoidArgAmodes args
122 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
123 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
124
125 -- | Interpret the argument as an unsigned value, assuming the value
126 -- is given in two-complement form in the given width.
127 --
128 -- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
129 --
130 -- This function is used to work around the fact that many array
131 -- primops take Int# arguments, but we interpret them as unsigned
132 -- quantities in the code gen. This means that we have to be careful
133 -- every time we work on e.g. a CmmInt literal that corresponds to the
134 -- array size, as it might contain a negative Integer value if the
135 -- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
136 -- literal.
137 asUnsigned :: Width -> Integer -> Integer
138 asUnsigned w n = n .&. (bit (widthInBits w) - 1)
139
140 -- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
141 -- ByteOff (or some other fixed width signed type) to represent
142 -- array sizes or indices. This means that these will overflow for
143 -- large enough sizes.
144
145 -- | Decide whether an out-of-line primop should be replaced by an
146 -- inline implementation. This might happen e.g. if there's enough
147 -- static information, such as statically know arguments, to emit a
148 -- more efficient implementation inline.
149 --
150 -- Returns 'Nothing' if this primop should use its out-of-line
151 -- implementation (defined elsewhere) and 'Just' together with a code
152 -- generating function that takes the output regs as arguments
153 -- otherwise.
154 shouldInlinePrimOp :: DynFlags
155 -> PrimOp -- ^ The primop
156 -> [CmmExpr] -- ^ The primop arguments
157 -> Maybe ([LocalReg] -> FCode ())
158
159 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
160 | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
161 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
162
163 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
164 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
165 Just $ \ [res] ->
166 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
167 [ (mkIntExpr dflags (fromInteger n),
168 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
169 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
170 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
171 ]
172 (fromInteger n) init
173
174 shouldInlinePrimOp _ CopyArrayOp
175 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
176 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
177
178 shouldInlinePrimOp _ CopyMutableArrayOp
179 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
180 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
181
182 shouldInlinePrimOp _ CopyArrayArrayOp
183 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
184 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
185
186 shouldInlinePrimOp _ CopyMutableArrayArrayOp
187 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
188 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
189
190 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
191 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
192 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
193
194 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
195 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
196 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
197
198 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
199 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
200 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
201
202 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
203 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
204 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
205
206 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
207 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
208 Just $ \ [res] ->
209 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
210 [ (mkIntExpr dflags (fromInteger n),
211 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
212 ]
213 (fromInteger n) init
214
215 shouldInlinePrimOp _ CopySmallArrayOp
216 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
217 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
218
219 shouldInlinePrimOp _ CopySmallMutableArrayOp
220 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
221 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
222
223 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
224 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
225 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
226
227 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
228 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
229 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
230
231 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
232 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
233 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
234
235 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
236 | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
237 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
238
239 shouldInlinePrimOp dflags primop args
240 | primOpOutOfLine primop = Nothing
241 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
242
243 -- TODO: Several primops, such as 'copyArray#', only have an inline
244 -- implementation (below) but could possibly have both an inline
245 -- implementation and an out-of-line implementation, just like
246 -- 'newArray#'. This would lower the amount of code generated,
247 -- hopefully without a performance impact (needs to be measured).
248
249 ---------------------------------------------------
250 cgPrimOp :: [LocalReg] -- where to put the results
251 -> PrimOp -- the op
252 -> [StgArg] -- arguments
253 -> FCode ()
254
255 cgPrimOp results op args
256 = do dflags <- getDynFlags
257 arg_exprs <- getNonVoidArgAmodes args
258 emitPrimOp dflags results op arg_exprs
259
260
261 ------------------------------------------------------------------------
262 -- Emitting code for a primop
263 ------------------------------------------------------------------------
264
265 emitPrimOp :: DynFlags
266 -> [LocalReg] -- where to put the results
267 -> PrimOp -- the op
268 -> [CmmExpr] -- arguments
269 -> FCode ()
270
271 -- First we handle various awkward cases specially. The remaining
272 -- easy cases are then handled by translateOp, defined below.
273
274 emitPrimOp _ [res] ParOp [arg]
275 =
276 -- for now, just implement this in a C function
277 -- later, we might want to inline it.
278 emitCCall
279 [(res,NoHint)]
280 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
281 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
282
283 emitPrimOp dflags [res] SparkOp [arg]
284 = do
285 -- returns the value of arg in res. We're going to therefore
286 -- refer to arg twice (once to pass to newSpark(), and once to
287 -- assign to res), so put it in a temporary.
288 tmp <- assignTemp arg
289 tmp2 <- newTemp (bWord dflags)
290 emitCCall
291 [(tmp2,NoHint)]
292 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
293 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
294 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
295
296 emitPrimOp dflags [res] GetCCSOfOp [arg]
297 = emitAssign (CmmLocal res) val
298 where
299 val
300 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
301 | otherwise = CmmLit (zeroCLit dflags)
302
303 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
304 = emitAssign (CmmLocal res) curCCS
305
306 emitPrimOp dflags [res] ReadMutVarOp [mutv]
307 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
308
309 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
310 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
311 emitCCall
312 [{-no results-}]
313 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
314 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
315
316 -- #define sizzeofByteArrayzh(r,a) \
317 -- r = ((StgArrWords *)(a))->bytes
318 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
319 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
320
321 -- #define sizzeofMutableByteArrayzh(r,a) \
322 -- r = ((StgArrWords *)(a))->bytes
323 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
324 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
325
326
327 -- #define touchzh(o) /* nothing */
328 emitPrimOp _ res@[] TouchOp args@[_arg]
329 = do emitPrimCall res MO_Touch args
330
331 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
332 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
333 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
334
335 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
336 emitPrimOp dflags [res] StableNameToIntOp [arg]
337 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
338
339 -- #define eqStableNamezh(r,sn1,sn2) \
340 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
341 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
342 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
343 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
344 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
345 ])
346
347
348 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
349 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
350
351 -- #define addrToHValuezh(r,a) r=(P_)a
352 emitPrimOp _ [res] AddrToAnyOp [arg]
353 = emitAssign (CmmLocal res) arg
354
355 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
356 -- Note: argument may be tagged!
357 emitPrimOp dflags [res] DataToTagOp [arg]
358 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
359
360 {- Freezing arrays-of-ptrs requires changing an info table, for the
361 benefit of the generational collector. It needs to scavenge mutable
362 objects, even if they are in old space. When they become immutable,
363 they can be removed from this scavenge list. -}
364
365 -- #define unsafeFreezzeArrayzh(r,a)
366 -- {
367 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
368 -- r = a;
369 -- }
370 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
371 = emit $ catAGraphs
372 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
373 mkAssign (CmmLocal res) arg ]
374 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
375 = emit $ catAGraphs
376 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
377 mkAssign (CmmLocal res) arg ]
378 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
379 = emit $ catAGraphs
380 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
381 mkAssign (CmmLocal res) arg ]
382
383 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
384 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
385 = emitAssign (CmmLocal res) arg
386
387 -- Reading/writing pointer arrays
388
389 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
390 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
391 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
392
393 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
394 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
395 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
396 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
397 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
398 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
399 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
400 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
401 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
402 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
403
404 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
405 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
406 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
407
408 -- Getting the size of pointer arrays
409
410 emitPrimOp dflags [res] SizeofArrayOp [arg]
411 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
412 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
413 = emitPrimOp dflags [res] SizeofArrayOp [arg]
414 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
415 = emitPrimOp dflags [res] SizeofArrayOp [arg]
416 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
417 = emitPrimOp dflags [res] SizeofArrayOp [arg]
418
419 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
420 emit $ mkAssign (CmmLocal res)
421 (cmmLoadIndexW dflags arg
422 (fixedHdrSizeW dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags) (bWord dflags))
423 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
424 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
425
426 -- IndexXXXoffAddr
427
428 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
429 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
430 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
431 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
432 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
433 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
434 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
435 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
436 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
437 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
438 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
439 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
440 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
441 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
442 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
443 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
444
445 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
446
447 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
448 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
449 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
450 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
451 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
452 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
453 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
454 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
455 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
456 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
457 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
458 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
459 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
460 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
461 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
462 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
463
464 -- IndexXXXArray
465
466 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
467 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
468 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
469 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
470 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
471 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
472 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
473 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
474 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
475 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
476 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
477 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
478 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
479 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
480 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
481 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
482
483 -- ReadXXXArray, identical to IndexXXXArray.
484
485 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
486 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
487 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
488 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
489 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
490 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
491 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
492 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
493 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
494 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
495 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
496 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
497 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
498 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
499 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
500 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
501
502 -- WriteXXXoffAddr
503
504 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
505 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
506 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
507 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
508 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
509 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
510 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
511 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
512 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
513 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
514 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
515 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
516 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
517 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
518 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
519 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
520
521 -- WriteXXXArray
522
523 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
524 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
525 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
526 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
527 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
528 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
529 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
530 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
531 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
532 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
533 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
534 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
535 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
536 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
537 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
538 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
539
540 -- Copying and setting byte arrays
541 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
542 doCopyByteArrayOp src src_off dst dst_off n
543 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
544 doCopyMutableByteArrayOp src src_off dst dst_off n
545 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
546 doCopyByteArrayToAddrOp src src_off dst n
547 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
548 doCopyMutableByteArrayToAddrOp src src_off dst n
549 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
550 doCopyAddrToByteArrayOp src dst dst_off n
551 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
552 doSetByteArrayOp ba off len c
553
554 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
555 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
556 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
557 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
558
559 -- Population count
560 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
561 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
562 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
563 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
564 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
565
566 -- count leading zeros
567 emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
568 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
569 emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
570 emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
571 emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
572
573 -- count trailing zeros
574 emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
575 emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
576 emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
577 emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
578 emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
579
580 -- Unsigned int to floating point conversions
581 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
582 (MO_UF_Conv W32) [w]
583 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
584 (MO_UF_Conv W64) [w]
585
586 -- SIMD primops
587 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
588 checkVecCompatibility dflags vcat n w
589 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
590 where
591 zeros :: CmmExpr
592 zeros = CmmLit $ CmmVec (replicate n zero)
593
594 zero :: CmmLit
595 zero = case vcat of
596 IntVec -> CmmInt 0 w
597 WordVec -> CmmInt 0 w
598 FloatVec -> CmmFloat 0 w
599
600 ty :: CmmType
601 ty = vecVmmType vcat n w
602
603 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
604 checkVecCompatibility dflags vcat n w
605 when (length es /= n) $
606 panic "emitPrimOp: VecPackOp has wrong number of arguments"
607 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
608 where
609 zeros :: CmmExpr
610 zeros = CmmLit $ CmmVec (replicate n zero)
611
612 zero :: CmmLit
613 zero = case vcat of
614 IntVec -> CmmInt 0 w
615 WordVec -> CmmInt 0 w
616 FloatVec -> CmmFloat 0 w
617
618 ty :: CmmType
619 ty = vecVmmType vcat n w
620
621 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
622 checkVecCompatibility dflags vcat n w
623 when (length res /= n) $
624 panic "emitPrimOp: VecUnpackOp has wrong number of results"
625 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
626 where
627 ty :: CmmType
628 ty = vecVmmType vcat n w
629
630 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
631 checkVecCompatibility dflags vcat n w
632 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
633 where
634 ty :: CmmType
635 ty = vecVmmType vcat n w
636
637 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
638 checkVecCompatibility dflags vcat n w
639 doIndexByteArrayOp Nothing ty res args
640 where
641 ty :: CmmType
642 ty = vecVmmType vcat n w
643
644 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
645 checkVecCompatibility dflags vcat n w
646 doIndexByteArrayOp Nothing ty res args
647 where
648 ty :: CmmType
649 ty = vecVmmType vcat n w
650
651 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
652 checkVecCompatibility dflags vcat n w
653 doWriteByteArrayOp Nothing ty res args
654 where
655 ty :: CmmType
656 ty = vecVmmType vcat n w
657
658 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
659 checkVecCompatibility dflags vcat n w
660 doIndexOffAddrOp Nothing ty res args
661 where
662 ty :: CmmType
663 ty = vecVmmType vcat n w
664
665 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
666 checkVecCompatibility dflags vcat n w
667 doIndexOffAddrOp Nothing ty res args
668 where
669 ty :: CmmType
670 ty = vecVmmType vcat n w
671
672 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
673 checkVecCompatibility dflags vcat n w
674 doWriteOffAddrOp Nothing ty res args
675 where
676 ty :: CmmType
677 ty = vecVmmType vcat n w
678
679 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
680 checkVecCompatibility dflags vcat n w
681 doIndexByteArrayOpAs Nothing vecty ty res args
682 where
683 vecty :: CmmType
684 vecty = vecVmmType vcat n w
685
686 ty :: CmmType
687 ty = vecCmmCat vcat w
688
689 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
690 checkVecCompatibility dflags vcat n w
691 doIndexByteArrayOpAs Nothing vecty ty res args
692 where
693 vecty :: CmmType
694 vecty = vecVmmType vcat n w
695
696 ty :: CmmType
697 ty = vecCmmCat vcat w
698
699 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
700 checkVecCompatibility dflags vcat n w
701 doWriteByteArrayOp Nothing ty res args
702 where
703 ty :: CmmType
704 ty = vecCmmCat vcat w
705
706 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
707 checkVecCompatibility dflags vcat n w
708 doIndexOffAddrOpAs Nothing vecty ty res args
709 where
710 vecty :: CmmType
711 vecty = vecVmmType vcat n w
712
713 ty :: CmmType
714 ty = vecCmmCat vcat w
715
716 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
717 checkVecCompatibility dflags vcat n w
718 doIndexOffAddrOpAs Nothing vecty ty res args
719 where
720 vecty :: CmmType
721 vecty = vecVmmType vcat n w
722
723 ty :: CmmType
724 ty = vecCmmCat vcat w
725
726 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
727 checkVecCompatibility dflags vcat n w
728 doWriteOffAddrOp Nothing ty res args
729 where
730 ty :: CmmType
731 ty = vecCmmCat vcat w
732
733 -- Prefetch
734 emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
735 emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
736 emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
737
738 emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
739 emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
740 emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
741
742 emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
743 emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
744 emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
745
746 emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
747 emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
748 emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
749
750 -- Atomic read-modify-write
751 emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
752 doAtomicRMW res AMO_Add mba ix (bWord dflags) n
753 emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
754 doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
755 emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
756 doAtomicRMW res AMO_And mba ix (bWord dflags) n
757 emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
758 doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
759 emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
760 doAtomicRMW res AMO_Or mba ix (bWord dflags) n
761 emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
762 doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
763 emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
764 doAtomicReadByteArray res mba ix (bWord dflags)
765 emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
766 doAtomicWriteByteArray mba ix (bWord dflags) val
767 emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
768 doCasByteArray res mba ix (bWord dflags) old new
769
770 -- The rest just translate straightforwardly
771 emitPrimOp dflags [res] op [arg]
772 | nopOp op
773 = emitAssign (CmmLocal res) arg
774
775 | Just (mop,rep) <- narrowOp op
776 = emitAssign (CmmLocal res) $
777 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
778
779 emitPrimOp dflags r@[res] op args
780 | Just prim <- callishOp op
781 = do emitPrimCall r prim args
782
783 | Just mop <- translateOp dflags op
784 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
785 emit stmt
786
787 emitPrimOp dflags results op args
788 = case callishPrimOpSupported dflags op of
789 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
790 Right gen -> gen results args
791
792 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
793
794 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
795 callishPrimOpSupported dflags op
796 = case op of
797 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
798 | otherwise -> Right (genericIntQuotRemOp dflags)
799
800 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
801 | otherwise -> Right (genericWordQuotRemOp dflags)
802
803 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
804 | otherwise -> Right (genericWordQuotRem2Op dflags)
805
806 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
807 | otherwise -> Right genericWordAdd2Op
808
809 IntAddCOp -> Right genericIntAddCOp
810
811 IntSubCOp -> Right genericIntSubCOp
812
813 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
814 | otherwise -> Right genericWordMul2Op
815
816 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
817 where
818 ncg = case hscTarget dflags of
819 HscAsm -> True
820 _ -> False
821
822 x86ish = case platformArch (targetPlatform dflags) of
823 ArchX86 -> True
824 ArchX86_64 -> True
825 _ -> False
826
827 genericIntQuotRemOp :: DynFlags -> GenericOp
828 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
829 = emit $ mkAssign (CmmLocal res_q)
830 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
831 mkAssign (CmmLocal res_r)
832 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
833 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
834
835 genericWordQuotRemOp :: DynFlags -> GenericOp
836 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
837 = emit $ mkAssign (CmmLocal res_q)
838 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
839 mkAssign (CmmLocal res_r)
840 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
841 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
842
843 genericWordQuotRem2Op :: DynFlags -> GenericOp
844 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
845 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
846 where ty = cmmExprType dflags arg_x_high
847 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
848 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
849 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
850 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
851 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
852 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
853 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
854 zero = lit 0
855 one = lit 1
856 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
857 lit i = CmmLit (CmmInt i (wordWidth dflags))
858
859 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
860 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
861 mkAssign (CmmLocal res_r) high)
862 f i acc high low =
863 do roverflowedBit <- newTemp ty
864 rhigh' <- newTemp ty
865 rhigh'' <- newTemp ty
866 rlow' <- newTemp ty
867 risge <- newTemp ty
868 racc' <- newTemp ty
869 let high' = CmmReg (CmmLocal rhigh')
870 isge = CmmReg (CmmLocal risge)
871 overflowedBit = CmmReg (CmmLocal roverflowedBit)
872 let this = catAGraphs
873 [mkAssign (CmmLocal roverflowedBit)
874 (shr high negone),
875 mkAssign (CmmLocal rhigh')
876 (or (shl high one) (shr low negone)),
877 mkAssign (CmmLocal rlow')
878 (shl low one),
879 mkAssign (CmmLocal risge)
880 (or (overflowedBit `ne` zero)
881 (high' `ge` arg_y)),
882 mkAssign (CmmLocal rhigh'')
883 (high' `minus` (arg_y `times` isge)),
884 mkAssign (CmmLocal racc')
885 (or (shl acc one) isge)]
886 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
887 (CmmReg (CmmLocal rhigh''))
888 (CmmReg (CmmLocal rlow'))
889 return (this <*> rest)
890 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
891
892 genericWordAdd2Op :: GenericOp
893 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
894 = do dflags <- getDynFlags
895 r1 <- newTemp (cmmExprType dflags arg_x)
896 r2 <- newTemp (cmmExprType dflags arg_x)
897 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
898 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
899 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
900 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
901 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
902 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
903 (wordWidth dflags))
904 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
905 emit $ catAGraphs
906 [mkAssign (CmmLocal r1)
907 (add (bottomHalf arg_x) (bottomHalf arg_y)),
908 mkAssign (CmmLocal r2)
909 (add (topHalf (CmmReg (CmmLocal r1)))
910 (add (topHalf arg_x) (topHalf arg_y))),
911 mkAssign (CmmLocal res_h)
912 (topHalf (CmmReg (CmmLocal r2))),
913 mkAssign (CmmLocal res_l)
914 (or (toTopHalf (CmmReg (CmmLocal r2)))
915 (bottomHalf (CmmReg (CmmLocal r1))))]
916 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
917
918 genericIntAddCOp :: GenericOp
919 genericIntAddCOp [res_r, res_c] [aa, bb]
920 {-
921 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
922 C, and without needing any comparisons. This may not be the
923 fastest way to do it - if you have better code, please send it! --SDM
924
925 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
926
927 We currently don't make use of the r value if c is != 0 (i.e.
928 overflow), we just convert to big integers and try again. This
929 could be improved by making r and c the correct values for
930 plugging into a new J#.
931
932 { r = ((I_)(a)) + ((I_)(b)); \
933 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
934 >> (BITS_IN (I_) - 1); \
935 }
936 Wading through the mass of bracketry, it seems to reduce to:
937 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
938
939 -}
940 = do dflags <- getDynFlags
941 emit $ catAGraphs [
942 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
943 mkAssign (CmmLocal res_c) $
944 CmmMachOp (mo_wordUShr dflags) [
945 CmmMachOp (mo_wordAnd dflags) [
946 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
947 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
948 ],
949 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
950 ]
951 ]
952 genericIntAddCOp _ _ = panic "genericIntAddCOp"
953
954 genericIntSubCOp :: GenericOp
955 genericIntSubCOp [res_r, res_c] [aa, bb]
956 {- Similarly:
957 #define subIntCzh(r,c,a,b) \
958 { r = ((I_)(a)) - ((I_)(b)); \
959 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
960 >> (BITS_IN (I_) - 1); \
961 }
962
963 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
964 -}
965 = do dflags <- getDynFlags
966 emit $ catAGraphs [
967 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
968 mkAssign (CmmLocal res_c) $
969 CmmMachOp (mo_wordUShr dflags) [
970 CmmMachOp (mo_wordAnd dflags) [
971 CmmMachOp (mo_wordXor dflags) [aa,bb],
972 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
973 ],
974 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
975 ]
976 ]
977 genericIntSubCOp _ _ = panic "genericIntSubCOp"
978
979 genericWordMul2Op :: GenericOp
980 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
981 = do dflags <- getDynFlags
982 let t = cmmExprType dflags arg_x
983 xlyl <- liftM CmmLocal $ newTemp t
984 xlyh <- liftM CmmLocal $ newTemp t
985 xhyl <- liftM CmmLocal $ newTemp t
986 r <- liftM CmmLocal $ newTemp t
987 -- This generic implementation is very simple and slow. We might
988 -- well be able to do better, but for now this at least works.
989 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
990 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
991 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
992 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
993 sum = foldl1 add
994 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
995 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
996 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
997 (wordWidth dflags))
998 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
999 emit $ catAGraphs
1000 [mkAssign xlyl
1001 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
1002 mkAssign xlyh
1003 (mul (bottomHalf arg_x) (topHalf arg_y)),
1004 mkAssign xhyl
1005 (mul (topHalf arg_x) (bottomHalf arg_y)),
1006 mkAssign r
1007 (sum [topHalf (CmmReg xlyl),
1008 bottomHalf (CmmReg xhyl),
1009 bottomHalf (CmmReg xlyh)]),
1010 mkAssign (CmmLocal res_l)
1011 (or (bottomHalf (CmmReg xlyl))
1012 (toTopHalf (CmmReg r))),
1013 mkAssign (CmmLocal res_h)
1014 (sum [mul (topHalf arg_x) (topHalf arg_y),
1015 topHalf (CmmReg xhyl),
1016 topHalf (CmmReg xlyh),
1017 topHalf (CmmReg r)])]
1018 genericWordMul2Op _ _ = panic "genericWordMul2Op"
1019
1020 -- These PrimOps are NOPs in Cmm
1021
1022 nopOp :: PrimOp -> Bool
1023 nopOp Int2WordOp = True
1024 nopOp Word2IntOp = True
1025 nopOp Int2AddrOp = True
1026 nopOp Addr2IntOp = True
1027 nopOp ChrOp = True -- Int# and Char# are rep'd the same
1028 nopOp OrdOp = True
1029 nopOp _ = False
1030
1031 -- These PrimOps turn into double casts
1032
1033 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
1034 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
1035 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
1036 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
1037 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
1038 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
1039 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
1040 narrowOp _ = Nothing
1041
1042 -- Native word signless ops
1043
1044 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
1045 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
1046 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
1047 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
1048 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
1049 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
1050 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
1051
1052 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
1053 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
1054 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
1055 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
1056 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
1057 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
1058
1059 translateOp dflags AndOp = Just (mo_wordAnd dflags)
1060 translateOp dflags OrOp = Just (mo_wordOr dflags)
1061 translateOp dflags XorOp = Just (mo_wordXor dflags)
1062 translateOp dflags NotOp = Just (mo_wordNot dflags)
1063 translateOp dflags SllOp = Just (mo_wordShl dflags)
1064 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1065
1066 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1067
1068 -- Native word signed ops
1069
1070 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1071 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1072 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1073 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1074 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1075
1076
1077 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1078 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1079 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1080 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1081
1082 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1083 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1084 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1085 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1086 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1087 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1088 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1089
1090 -- Native word unsigned ops
1091
1092 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1093 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1094 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1095 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1096
1097 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1098 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1099 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1100
1101 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1102 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1103 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1104 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1105
1106 -- Char# ops
1107
1108 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1109 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1110 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1111 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1112 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1113 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1114
1115 -- Double ops
1116
1117 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1118 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1119 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1120 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1121 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1122 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1123
1124 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1125 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1126 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1127 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1128 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1129
1130 -- Float ops
1131
1132 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1133 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1134 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1135 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1136 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1137 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1138
1139 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1140 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1141 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1142 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1143 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1144
1145 -- Vector ops
1146
1147 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1148 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1149 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1150 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1151 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1152
1153 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1154 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1155 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1156 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1157 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1158 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1159
1160 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1161 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1162 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1163 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1164 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1165
1166 -- Conversions
1167
1168 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1169 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1170
1171 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1172 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1173
1174 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1175 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1176
1177 -- Word comparisons masquerading as more exotic things.
1178
1179 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1180 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1181 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1182 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1183 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1184 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1185 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1186 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1187
1188 translateOp _ _ = Nothing
1189
1190 -- These primops are implemented by CallishMachOps, because they sometimes
1191 -- turn into foreign calls depending on the backend.
1192
1193 callishOp :: PrimOp -> Maybe CallishMachOp
1194 callishOp DoublePowerOp = Just MO_F64_Pwr
1195 callishOp DoubleSinOp = Just MO_F64_Sin
1196 callishOp DoubleCosOp = Just MO_F64_Cos
1197 callishOp DoubleTanOp = Just MO_F64_Tan
1198 callishOp DoubleSinhOp = Just MO_F64_Sinh
1199 callishOp DoubleCoshOp = Just MO_F64_Cosh
1200 callishOp DoubleTanhOp = Just MO_F64_Tanh
1201 callishOp DoubleAsinOp = Just MO_F64_Asin
1202 callishOp DoubleAcosOp = Just MO_F64_Acos
1203 callishOp DoubleAtanOp = Just MO_F64_Atan
1204 callishOp DoubleLogOp = Just MO_F64_Log
1205 callishOp DoubleExpOp = Just MO_F64_Exp
1206 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1207
1208 callishOp FloatPowerOp = Just MO_F32_Pwr
1209 callishOp FloatSinOp = Just MO_F32_Sin
1210 callishOp FloatCosOp = Just MO_F32_Cos
1211 callishOp FloatTanOp = Just MO_F32_Tan
1212 callishOp FloatSinhOp = Just MO_F32_Sinh
1213 callishOp FloatCoshOp = Just MO_F32_Cosh
1214 callishOp FloatTanhOp = Just MO_F32_Tanh
1215 callishOp FloatAsinOp = Just MO_F32_Asin
1216 callishOp FloatAcosOp = Just MO_F32_Acos
1217 callishOp FloatAtanOp = Just MO_F32_Atan
1218 callishOp FloatLogOp = Just MO_F32_Log
1219 callishOp FloatExpOp = Just MO_F32_Exp
1220 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1221
1222 callishOp _ = Nothing
1223
1224 ------------------------------------------------------------------------------
1225 -- Helpers for translating various minor variants of array indexing.
1226
1227 doIndexOffAddrOp :: Maybe MachOp
1228 -> CmmType
1229 -> [LocalReg]
1230 -> [CmmExpr]
1231 -> FCode ()
1232 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1233 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1234 doIndexOffAddrOp _ _ _ _
1235 = panic "StgCmmPrim: doIndexOffAddrOp"
1236
1237 doIndexOffAddrOpAs :: Maybe MachOp
1238 -> CmmType
1239 -> CmmType
1240 -> [LocalReg]
1241 -> [CmmExpr]
1242 -> FCode ()
1243 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1244 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1245 doIndexOffAddrOpAs _ _ _ _ _
1246 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1247
1248 doIndexByteArrayOp :: Maybe MachOp
1249 -> CmmType
1250 -> [LocalReg]
1251 -> [CmmExpr]
1252 -> FCode ()
1253 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1254 = do dflags <- getDynFlags
1255 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1256 doIndexByteArrayOp _ _ _ _
1257 = panic "StgCmmPrim: doIndexByteArrayOp"
1258
1259 doIndexByteArrayOpAs :: Maybe MachOp
1260 -> CmmType
1261 -> CmmType
1262 -> [LocalReg]
1263 -> [CmmExpr]
1264 -> FCode ()
1265 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1266 = do dflags <- getDynFlags
1267 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1268 doIndexByteArrayOpAs _ _ _ _ _
1269 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1270
1271 doReadPtrArrayOp :: LocalReg
1272 -> CmmExpr
1273 -> CmmExpr
1274 -> FCode ()
1275 doReadPtrArrayOp res addr idx
1276 = do dflags <- getDynFlags
1277 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1278
1279 doWriteOffAddrOp :: Maybe MachOp
1280 -> CmmType
1281 -> [LocalReg]
1282 -> [CmmExpr]
1283 -> FCode ()
1284 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1285 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1286 doWriteOffAddrOp _ _ _ _
1287 = panic "StgCmmPrim: doWriteOffAddrOp"
1288
1289 doWriteByteArrayOp :: Maybe MachOp
1290 -> CmmType
1291 -> [LocalReg]
1292 -> [CmmExpr]
1293 -> FCode ()
1294 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1295 = do dflags <- getDynFlags
1296 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1297 doWriteByteArrayOp _ _ _ _
1298 = panic "StgCmmPrim: doWriteByteArrayOp"
1299
1300 doWritePtrArrayOp :: CmmExpr
1301 -> CmmExpr
1302 -> CmmExpr
1303 -> FCode ()
1304 doWritePtrArrayOp addr idx val
1305 = do dflags <- getDynFlags
1306 let ty = cmmExprType dflags val
1307 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1308 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1309 -- the write barrier. We must write a byte into the mark table:
1310 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1311 emit $ mkStore (
1312 cmmOffsetExpr dflags
1313 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1314 (loadArrPtrsSize dflags addr))
1315 (CmmMachOp (mo_wordUShr dflags) [idx,
1316 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1317 ) (CmmLit (CmmInt 1 W8))
1318
1319 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1320 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1321 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1322
1323 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1324 -> Maybe MachOp -- Optional result cast
1325 -> CmmType -- Type of element we are accessing
1326 -> LocalReg -- Destination
1327 -> CmmExpr -- Base address
1328 -> CmmType -- Type of element by which we are indexing
1329 -> CmmExpr -- Index
1330 -> FCode ()
1331 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1332 = do dflags <- getDynFlags
1333 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1334 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1335 = do dflags <- getDynFlags
1336 emitAssign (CmmLocal res) (CmmMachOp cast [
1337 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1338
1339 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1340 -> Maybe MachOp -- Optional value cast
1341 -> CmmExpr -- Base address
1342 -> CmmType -- Type of element by which we are indexing
1343 -> CmmExpr -- Index
1344 -> CmmExpr -- Value to write
1345 -> FCode ()
1346 mkBasicIndexedWrite off Nothing base idx_ty idx val
1347 = do dflags <- getDynFlags
1348 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1349 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1350 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1351
1352 -- ----------------------------------------------------------------------------
1353 -- Misc utils
1354
1355 cmmIndexOffExpr :: DynFlags
1356 -> ByteOff -- Initial offset in bytes
1357 -> Width -- Width of element by which we are indexing
1358 -> CmmExpr -- Base address
1359 -> CmmExpr -- Index
1360 -> CmmExpr
1361 cmmIndexOffExpr dflags off width base idx
1362 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1363
1364 cmmLoadIndexOffExpr :: DynFlags
1365 -> ByteOff -- Initial offset in bytes
1366 -> CmmType -- Type of element we are accessing
1367 -> CmmExpr -- Base address
1368 -> CmmType -- Type of element by which we are indexing
1369 -> CmmExpr -- Index
1370 -> CmmExpr
1371 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1372 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1373
1374 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1375 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1376
1377 ------------------------------------------------------------------------------
1378 -- Helpers for translating vector primops.
1379
1380 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1381 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1382
1383 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1384 vecCmmCat IntVec = cmmBits
1385 vecCmmCat WordVec = cmmBits
1386 vecCmmCat FloatVec = cmmFloat
1387
1388 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1389 vecElemInjectCast _ FloatVec _ = Nothing
1390 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1391 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1392 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1393 vecElemInjectCast _ IntVec W64 = Nothing
1394 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1395 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1396 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1397 vecElemInjectCast _ WordVec W64 = Nothing
1398 vecElemInjectCast _ _ _ = Nothing
1399
1400 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1401 vecElemProjectCast _ FloatVec _ = Nothing
1402 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1403 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1404 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1405 vecElemProjectCast _ IntVec W64 = Nothing
1406 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1407 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1408 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1409 vecElemProjectCast _ WordVec W64 = Nothing
1410 vecElemProjectCast _ _ _ = Nothing
1411
1412 -- Check to make sure that we can generate code for the specified vector type
1413 -- given the current set of dynamic flags.
1414 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1415 checkVecCompatibility dflags vcat l w = do
1416 when (hscTarget dflags /= HscLlvm) $ do
1417 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1418 ,"Please use -fllvm."]
1419 check vecWidth vcat l w
1420 where
1421 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1422 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1423 sorry $ "128-bit wide single-precision floating point " ++
1424 "SIMD vector instructions require at least -msse."
1425 check W128 _ _ _ | not (isSse2Enabled dflags) =
1426 sorry $ "128-bit wide integer and double precision " ++
1427 "SIMD vector instructions require at least -msse2."
1428 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1429 sorry $ "256-bit wide floating point " ++
1430 "SIMD vector instructions require at least -mavx."
1431 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1432 sorry $ "256-bit wide integer " ++
1433 "SIMD vector instructions require at least -mavx2."
1434 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1435 sorry $ "512-bit wide " ++
1436 "SIMD vector instructions require -mavx512f."
1437 check _ _ _ _ = return ()
1438
1439 vecWidth = typeWidth (vecVmmType vcat l w)
1440
1441 ------------------------------------------------------------------------------
1442 -- Helpers for translating vector packing and unpacking.
1443
1444 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1445 -> CmmType -- Type of vector
1446 -> CmmExpr -- Initial vector
1447 -> [CmmExpr] -- Elements
1448 -> CmmFormal -- Destination for result
1449 -> FCode ()
1450 doVecPackOp maybe_pre_write_cast ty z es res = do
1451 dst <- newTemp ty
1452 emitAssign (CmmLocal dst) z
1453 vecPack dst es 0
1454 where
1455 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1456 vecPack src [] _ =
1457 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1458
1459 vecPack src (e : es) i = do
1460 dst <- newTemp ty
1461 if isFloatType (vecElemType ty)
1462 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1463 [CmmReg (CmmLocal src), cast e, iLit])
1464 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1465 [CmmReg (CmmLocal src), cast e, iLit])
1466 vecPack dst es (i + 1)
1467 where
1468 -- vector indices are always 32-bits
1469 iLit = CmmLit (CmmInt (toInteger i) W32)
1470
1471 cast :: CmmExpr -> CmmExpr
1472 cast val = case maybe_pre_write_cast of
1473 Nothing -> val
1474 Just cast -> CmmMachOp cast [val]
1475
1476 len :: Length
1477 len = vecLength ty
1478
1479 wid :: Width
1480 wid = typeWidth (vecElemType ty)
1481
1482 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1483 -> CmmType -- Type of vector
1484 -> CmmExpr -- Vector
1485 -> [CmmFormal] -- Element results
1486 -> FCode ()
1487 doVecUnpackOp maybe_post_read_cast ty e res =
1488 vecUnpack res 0
1489 where
1490 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1491 vecUnpack [] _ =
1492 return ()
1493
1494 vecUnpack (r : rs) i = do
1495 if isFloatType (vecElemType ty)
1496 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1497 [e, iLit]))
1498 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1499 [e, iLit]))
1500 vecUnpack rs (i + 1)
1501 where
1502 -- vector indices are always 32-bits
1503 iLit = CmmLit (CmmInt (toInteger i) W32)
1504
1505 cast :: CmmExpr -> CmmExpr
1506 cast val = case maybe_post_read_cast of
1507 Nothing -> val
1508 Just cast -> CmmMachOp cast [val]
1509
1510 len :: Length
1511 len = vecLength ty
1512
1513 wid :: Width
1514 wid = typeWidth (vecElemType ty)
1515
1516 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1517 -> CmmType -- Vector type
1518 -> CmmExpr -- Source vector
1519 -> CmmExpr -- Element
1520 -> CmmExpr -- Index at which to insert element
1521 -> CmmFormal -- Destination for result
1522 -> FCode ()
1523 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1524 dflags <- getDynFlags
1525 -- vector indices are always 32-bits
1526 let idx' :: CmmExpr
1527 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1528 if isFloatType (vecElemType ty)
1529 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1530 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1531 where
1532 cast :: CmmExpr -> CmmExpr
1533 cast val = case maybe_pre_write_cast of
1534 Nothing -> val
1535 Just cast -> CmmMachOp cast [val]
1536
1537 len :: Length
1538 len = vecLength ty
1539
1540 wid :: Width
1541 wid = typeWidth (vecElemType ty)
1542
1543 ------------------------------------------------------------------------------
1544 -- Helpers for translating prefetching.
1545
1546 doPrefetchByteArrayOp :: Int
1547 -> [LocalReg]
1548 -> [CmmExpr]
1549 -> FCode ()
1550 doPrefetchByteArrayOp locality res [addr,idx]
1551 = do dflags <- getDynFlags
1552 mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
1553 doPrefetchByteArrayOp _ _ _
1554 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1555
1556 doPrefetchAddrOp ::Int
1557 -> [LocalReg]
1558 -> [CmmExpr]
1559 -> FCode ()
1560 doPrefetchAddrOp locality res [addr,idx]
1561 = mkBasicPrefetch locality 0 res addr idx
1562 doPrefetchAddrOp _ _ _
1563 = panic "StgCmmPrim: doPrefetchAddrOp"
1564
1565 mkBasicPrefetch :: Int -- Locality level 0-3
1566 -> ByteOff -- Initial offset in bytes
1567 -> [LocalReg] -- Destination
1568 -> CmmExpr -- Base address
1569 -> CmmExpr -- Index
1570 -> FCode ()
1571 mkBasicPrefetch locality off res base idx
1572 = do dflags <- getDynFlags
1573 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1574 case res of
1575 [] -> return ()
1576 [reg] -> emitAssign (CmmLocal reg) base
1577 _ -> panic "StgCmmPrim: mkBasicPrefetch"
1578
1579 -- ----------------------------------------------------------------------------
1580 -- Allocating byte arrays
1581
1582 -- | Takes a register to return the newly allocated array in and the
1583 -- size of the new array in bytes. Allocates a new
1584 -- 'MutableByteArray#'.
1585 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1586 doNewByteArrayOp res_r n = do
1587 dflags <- getDynFlags
1588
1589 let info_ptr = mkLblExpr mkArrWords_infoLabel
1590 rep = arrWordsRep dflags n
1591
1592 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1593 (mkIntExpr dflags (nonHdrSize dflags rep))
1594 (zeroExpr dflags)
1595
1596 let hdr_size = fixedHdrSize dflags
1597
1598 base <- allocHeapClosure rep info_ptr curCCS
1599 [ (mkIntExpr dflags n,
1600 hdr_size + oFFSET_StgArrWords_bytes dflags)
1601 ]
1602
1603 emit $ mkAssign (CmmLocal res_r) base
1604
1605 -- ----------------------------------------------------------------------------
1606 -- Copying byte arrays
1607
1608 -- | Takes a source 'ByteArray#', an offset in the source array, a
1609 -- destination 'MutableByteArray#', an offset into the destination
1610 -- array, and the number of bytes to copy. Copies the given number of
1611 -- bytes from the source array to the destination array.
1612 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1613 -> FCode ()
1614 doCopyByteArrayOp = emitCopyByteArray copy
1615 where
1616 -- Copy data (we assume the arrays aren't overlapping since
1617 -- they're of different types)
1618 copy _src _dst dst_p src_p bytes =
1619 do dflags <- getDynFlags
1620 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1621
1622 -- | Takes a source 'MutableByteArray#', an offset in the source
1623 -- array, a destination 'MutableByteArray#', an offset into the
1624 -- destination array, and the number of bytes to copy. Copies the
1625 -- given number of bytes from the source array to the destination
1626 -- array.
1627 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1628 -> FCode ()
1629 doCopyMutableByteArrayOp = emitCopyByteArray copy
1630 where
1631 -- The only time the memory might overlap is when the two arrays
1632 -- we were provided are the same array!
1633 -- TODO: Optimize branch for common case of no aliasing.
1634 copy src dst dst_p src_p bytes = do
1635 dflags <- getDynFlags
1636 [moveCall, cpyCall] <- forkAlts [
1637 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1638 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1639 ]
1640 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1641
1642 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1643 -> FCode ())
1644 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1645 -> FCode ()
1646 emitCopyByteArray copy src src_off dst dst_off n = do
1647 dflags <- getDynFlags
1648 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1649 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1650 copy src dst dst_p src_p n
1651
1652 -- | Takes a source 'ByteArray#', an offset in the source array, a
1653 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1654 -- number of bytes from the source array to the destination memory region.
1655 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1656 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1657 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1658 dflags <- getDynFlags
1659 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1660 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1661
1662 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1663 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1664 -- number of bytes from the source array to the destination memory region.
1665 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1666 -> FCode ()
1667 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1668
1669 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1670 -- the destination array, and the number of bytes to copy. Copies the given
1671 -- number of bytes from the source memory region to the destination array.
1672 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1673 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1674 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1675 dflags <- getDynFlags
1676 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1677 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1678
1679
1680 -- ----------------------------------------------------------------------------
1681 -- Setting byte arrays
1682
1683 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1684 -- and a byte, and sets each of the selected bytes in the array to the
1685 -- character.
1686 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1687 -> FCode ()
1688 doSetByteArrayOp ba off len c
1689 = do dflags <- getDynFlags
1690 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1691 emitMemsetCall p c len (mkIntExpr dflags 1)
1692
1693 -- ----------------------------------------------------------------------------
1694 -- Allocating arrays
1695
1696 -- | Allocate a new array.
1697 doNewArrayOp :: CmmFormal -- ^ return register
1698 -> SMRep -- ^ representation of the array
1699 -> CLabel -- ^ info pointer
1700 -> [(CmmExpr, ByteOff)] -- ^ header payload
1701 -> WordOff -- ^ array size
1702 -> CmmExpr -- ^ initial element
1703 -> FCode ()
1704 doNewArrayOp res_r rep info payload n init = do
1705 dflags <- getDynFlags
1706
1707 let info_ptr = mkLblExpr info
1708
1709 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1710 (mkIntExpr dflags (nonHdrSize dflags rep))
1711 (zeroExpr dflags)
1712
1713 base <- allocHeapClosure rep info_ptr curCCS payload
1714
1715 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1716 emit $ mkAssign arr base
1717
1718 -- Initialise all elements of the the array
1719 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1720 for <- newLabelC
1721 emitLabel for
1722 let loopBody =
1723 [ mkStore (CmmReg (CmmLocal p)) init
1724 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1725 , mkBranch for ]
1726 emit =<< mkCmmIfThen
1727 (cmmULtWord dflags (CmmReg (CmmLocal p))
1728 (cmmOffsetW dflags (CmmReg arr)
1729 (hdrSizeW dflags rep + n)))
1730 (catAGraphs loopBody)
1731
1732 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1733
1734 -- ----------------------------------------------------------------------------
1735 -- Copying pointer arrays
1736
1737 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1738 -- nowhere else in the code generator. This is mostly because these
1739 -- "primitive" ops result in a surprisingly large amount of code. It
1740 -- will likely be worthwhile to optimize what is emitted here, so that
1741 -- our optimization passes don't waste time repeatedly optimizing the
1742 -- same bits of code.
1743
1744 -- More closely imitates 'assignTemp' from the old code generator, which
1745 -- returns a CmmExpr rather than a LocalReg.
1746 assignTempE :: CmmExpr -> FCode CmmExpr
1747 assignTempE e = do
1748 t <- assignTemp e
1749 return (CmmReg (CmmLocal t))
1750
1751 -- | Takes a source 'Array#', an offset in the source array, a
1752 -- destination 'MutableArray#', an offset into the destination array,
1753 -- and the number of elements to copy. Copies the given number of
1754 -- elements from the source array to the destination array.
1755 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1756 -> FCode ()
1757 doCopyArrayOp = emitCopyArray copy
1758 where
1759 -- Copy data (we assume the arrays aren't overlapping since
1760 -- they're of different types)
1761 copy _src _dst dst_p src_p bytes =
1762 do dflags <- getDynFlags
1763 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1764 (mkIntExpr dflags (wORD_SIZE dflags))
1765
1766
1767 -- | Takes a source 'MutableArray#', an offset in the source array, a
1768 -- destination 'MutableArray#', an offset into the destination array,
1769 -- and the number of elements to copy. Copies the given number of
1770 -- elements from the source array to the destination array.
1771 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1772 -> FCode ()
1773 doCopyMutableArrayOp = emitCopyArray copy
1774 where
1775 -- The only time the memory might overlap is when the two arrays
1776 -- we were provided are the same array!
1777 -- TODO: Optimize branch for common case of no aliasing.
1778 copy src dst dst_p src_p bytes = do
1779 dflags <- getDynFlags
1780 [moveCall, cpyCall] <- forkAlts [
1781 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1782 (mkIntExpr dflags (wORD_SIZE dflags)),
1783 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1784 (mkIntExpr dflags (wORD_SIZE dflags))
1785 ]
1786 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1787
1788 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1789 -> FCode ()) -- ^ copy function
1790 -> CmmExpr -- ^ source array
1791 -> CmmExpr -- ^ offset in source array
1792 -> CmmExpr -- ^ destination array
1793 -> CmmExpr -- ^ offset in destination array
1794 -> WordOff -- ^ number of elements to copy
1795 -> FCode ()
1796 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1797 dflags <- getDynFlags
1798 when (n /= 0) $ do
1799 -- Passed as arguments (be careful)
1800 src <- assignTempE src0
1801 dst <- assignTempE dst0
1802 dst_off <- assignTempE dst_off0
1803
1804 -- Set the dirty bit in the header.
1805 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1806
1807 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1808 (arrPtrsHdrSize dflags)
1809 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1810 src_p <- assignTempE $ cmmOffsetExprW dflags
1811 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1812 let bytes = wordsToBytes dflags n
1813
1814 copy src dst dst_p src_p bytes
1815
1816 -- The base address of the destination card table
1817 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1818 (loadArrPtrsSize dflags dst)
1819
1820 emitSetCards dst_off dst_cards_p n
1821
1822 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1823 -> FCode ()
1824 doCopySmallArrayOp = emitCopySmallArray copy
1825 where
1826 -- Copy data (we assume the arrays aren't overlapping since
1827 -- they're of different types)
1828 copy _src _dst dst_p src_p bytes =
1829 do dflags <- getDynFlags
1830 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1831 (mkIntExpr dflags (wORD_SIZE dflags))
1832
1833
1834 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1835 -> FCode ()
1836 doCopySmallMutableArrayOp = emitCopySmallArray copy
1837 where
1838 -- The only time the memory might overlap is when the two arrays
1839 -- we were provided are the same array!
1840 -- TODO: Optimize branch for common case of no aliasing.
1841 copy src dst dst_p src_p bytes = do
1842 dflags <- getDynFlags
1843 [moveCall, cpyCall] <- forkAlts
1844 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1845 (mkIntExpr dflags (wORD_SIZE dflags))
1846 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1847 (mkIntExpr dflags (wORD_SIZE dflags))
1848 ]
1849 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1850
1851 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1852 -> FCode ()) -- ^ copy function
1853 -> CmmExpr -- ^ source array
1854 -> CmmExpr -- ^ offset in source array
1855 -> CmmExpr -- ^ destination array
1856 -> CmmExpr -- ^ offset in destination array
1857 -> WordOff -- ^ number of elements to copy
1858 -> FCode ()
1859 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
1860 dflags <- getDynFlags
1861
1862 -- Passed as arguments (be careful)
1863 src <- assignTempE src0
1864 dst <- assignTempE dst0
1865
1866 -- Set the dirty bit in the header.
1867 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1868
1869 dst_p <- assignTempE $ cmmOffsetExprW dflags
1870 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
1871 src_p <- assignTempE $ cmmOffsetExprW dflags
1872 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
1873 let bytes = wordsToBytes dflags n
1874
1875 copy src dst dst_p src_p bytes
1876
1877 -- | Takes an info table label, a register to return the newly
1878 -- allocated array in, a source array, an offset in the source array,
1879 -- and the number of elements to copy. Allocates a new array and
1880 -- initializes it from the source array.
1881 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1882 -> FCode ()
1883 emitCloneArray info_p res_r src src_off n = do
1884 dflags <- getDynFlags
1885
1886 let info_ptr = mkLblExpr info_p
1887 rep = arrPtrsRep dflags n
1888
1889 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1890 (mkIntExpr dflags (nonHdrSize dflags rep))
1891 (zeroExpr dflags)
1892
1893 let hdr_size = fixedHdrSize dflags
1894
1895 base <- allocHeapClosure rep info_ptr curCCS
1896 [ (mkIntExpr dflags n,
1897 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1898 , (mkIntExpr dflags (nonHdrSizeW rep),
1899 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1900 ]
1901
1902 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1903 emit $ mkAssign arr base
1904
1905 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1906 (arrPtrsHdrSize dflags)
1907 src_p <- assignTempE $ cmmOffsetExprW dflags src
1908 (cmmAddWord dflags
1909 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1910
1911 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1912 (mkIntExpr dflags (wORD_SIZE dflags))
1913
1914 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1915
1916 -- | Takes an info table label, a register to return the newly
1917 -- allocated array in, a source array, an offset in the source array,
1918 -- and the number of elements to copy. Allocates a new array and
1919 -- initializes it from the source array.
1920 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1921 -> FCode ()
1922 emitCloneSmallArray info_p res_r src src_off n = do
1923 dflags <- getDynFlags
1924
1925 let info_ptr = mkLblExpr info_p
1926 rep = smallArrPtrsRep n
1927
1928 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
1929 (mkIntExpr dflags (nonHdrSize dflags rep))
1930 (zeroExpr dflags)
1931
1932 let hdr_size = fixedHdrSize dflags
1933
1934 base <- allocHeapClosure rep info_ptr curCCS
1935 [ (mkIntExpr dflags n,
1936 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
1937 ]
1938
1939 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1940 emit $ mkAssign arr base
1941
1942 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1943 (smallArrPtrsHdrSize dflags)
1944 src_p <- assignTempE $ cmmOffsetExprW dflags src
1945 (cmmAddWord dflags
1946 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
1947
1948 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1949 (mkIntExpr dflags (wORD_SIZE dflags))
1950
1951 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1952
1953 -- | Takes and offset in the destination array, the base address of
1954 -- the card table, and the number of elements affected (*not* the
1955 -- number of cards). The number of elements may not be zero.
1956 -- Marks the relevant cards as dirty.
1957 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
1958 emitSetCards dst_start dst_cards_start n = do
1959 dflags <- getDynFlags
1960 start_card <- assignTempE $ cardCmm dflags dst_start
1961 let end_card = cardCmm dflags
1962 (cmmSubWord dflags
1963 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
1964 (mkIntExpr dflags 1))
1965 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1966 (mkIntExpr dflags 1)
1967 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1968 (mkIntExpr dflags 1) -- no alignment (1 byte)
1969
1970 -- Convert an element index to a card index
1971 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
1972 cardCmm dflags i =
1973 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1974
1975 ------------------------------------------------------------------------------
1976 -- SmallArray PrimOp implementations
1977
1978 doReadSmallPtrArrayOp :: LocalReg
1979 -> CmmExpr
1980 -> CmmExpr
1981 -> FCode ()
1982 doReadSmallPtrArrayOp res addr idx = do
1983 dflags <- getDynFlags
1984 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
1985 (gcWord dflags) idx
1986
1987 doWriteSmallPtrArrayOp :: CmmExpr
1988 -> CmmExpr
1989 -> CmmExpr
1990 -> FCode ()
1991 doWriteSmallPtrArrayOp addr idx val = do
1992 dflags <- getDynFlags
1993 let ty = cmmExprType dflags val
1994 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
1995 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1996
1997 ------------------------------------------------------------------------------
1998 -- Atomic read-modify-write
1999
2000 -- | Emit an atomic modification to a byte array element. The result
2001 -- reg contains that previous value of the element. Implies a full
2002 -- memory barrier.
2003 doAtomicRMW :: LocalReg -- ^ Result reg
2004 -> AtomicMachOp -- ^ Atomic op (e.g. add)
2005 -> CmmExpr -- ^ MutableByteArray#
2006 -> CmmExpr -- ^ Index
2007 -> CmmType -- ^ Type of element by which we are indexing
2008 -> CmmExpr -- ^ Op argument (e.g. amount to add)
2009 -> FCode ()
2010 doAtomicRMW res amop mba idx idx_ty n = do
2011 dflags <- getDynFlags
2012 let width = typeWidth idx_ty
2013 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2014 width mba idx
2015 emitPrimCall
2016 [ res ]
2017 (MO_AtomicRMW width amop)
2018 [ addr, n ]
2019
2020 -- | Emit an atomic read to a byte array that acts as a memory barrier.
2021 doAtomicReadByteArray
2022 :: LocalReg -- ^ Result reg
2023 -> CmmExpr -- ^ MutableByteArray#
2024 -> CmmExpr -- ^ Index
2025 -> CmmType -- ^ Type of element by which we are indexing
2026 -> FCode ()
2027 doAtomicReadByteArray res mba idx idx_ty = do
2028 dflags <- getDynFlags
2029 let width = typeWidth idx_ty
2030 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2031 width mba idx
2032 emitPrimCall
2033 [ res ]
2034 (MO_AtomicRead width)
2035 [ addr ]
2036
2037 -- | Emit an atomic write to a byte array that acts as a memory barrier.
2038 doAtomicWriteByteArray
2039 :: CmmExpr -- ^ MutableByteArray#
2040 -> CmmExpr -- ^ Index
2041 -> CmmType -- ^ Type of element by which we are indexing
2042 -> CmmExpr -- ^ Value to write
2043 -> FCode ()
2044 doAtomicWriteByteArray mba idx idx_ty val = do
2045 dflags <- getDynFlags
2046 let width = typeWidth idx_ty
2047 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2048 width mba idx
2049 emitPrimCall
2050 [ {- no results -} ]
2051 (MO_AtomicWrite width)
2052 [ addr, val ]
2053
2054 doCasByteArray
2055 :: LocalReg -- ^ Result reg
2056 -> CmmExpr -- ^ MutableByteArray#
2057 -> CmmExpr -- ^ Index
2058 -> CmmType -- ^ Type of element by which we are indexing
2059 -> CmmExpr -- ^ Old value
2060 -> CmmExpr -- ^ New value
2061 -> FCode ()
2062 doCasByteArray res mba idx idx_ty old new = do
2063 dflags <- getDynFlags
2064 let width = (typeWidth idx_ty)
2065 addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
2066 width mba idx
2067 emitPrimCall
2068 [ res ]
2069 (MO_Cmpxchg width)
2070 [ addr, old, new ]
2071
2072 ------------------------------------------------------------------------------
2073 -- Helpers for emitting function calls
2074
2075 -- | Emit a call to @memcpy@.
2076 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2077 emitMemcpyCall dst src n align = do
2078 emitPrimCall
2079 [ {-no results-} ]
2080 MO_Memcpy
2081 [ dst, src, n, align ]
2082
2083 -- | Emit a call to @memmove@.
2084 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2085 emitMemmoveCall dst src n align = do
2086 emitPrimCall
2087 [ {- no results -} ]
2088 MO_Memmove
2089 [ dst, src, n, align ]
2090
2091 -- | Emit a call to @memset@. The second argument must fit inside an
2092 -- unsigned char.
2093 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
2094 emitMemsetCall dst c n align = do
2095 emitPrimCall
2096 [ {- no results -} ]
2097 MO_Memset
2098 [ dst, c, n, align ]
2099
2100 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2101 emitBSwapCall res x width = do
2102 emitPrimCall
2103 [ res ]
2104 (MO_BSwap width)
2105 [ x ]
2106
2107 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2108 emitPopCntCall res x width = do
2109 emitPrimCall
2110 [ res ]
2111 (MO_PopCnt width)
2112 [ x ]
2113
2114 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2115 emitClzCall res x width = do
2116 emitPrimCall
2117 [ res ]
2118 (MO_Clz width)
2119 [ x ]
2120
2121 emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
2122 emitCtzCall res x width = do
2123 emitPrimCall
2124 [ res ]
2125 (MO_Ctz width)
2126 [ x ]