Refactor inline array allocation
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmPrim (
10 cgOpApp,
11 cgPrimOp -- internal(ish), used by cgCase to get code for a
12 -- comparison without also turning it into a Bool.
13 ) where
14
15 #include "HsVersions.h"
16
17 import StgCmmLayout
18 import StgCmmForeign
19 import StgCmmEnv
20 import StgCmmMonad
21 import StgCmmUtils
22 import StgCmmTicky
23 import StgCmmHeap
24 import StgCmmProf ( costCentreFrom, curCCS )
25
26 import DynFlags
27 import Platform
28 import BasicTypes
29 import MkGraph
30 import StgSyn
31 import Cmm
32 import CmmInfo
33 import Type ( Type, tyConAppTyCon )
34 import TyCon
35 import CLabel
36 import CmmUtils
37 import PrimOp
38 import SMRep
39 import FastString
40 import Outputable
41 import Util
42
43 import Control.Monad (liftM, when)
44 import Data.Bits
45
46 ------------------------------------------------------------------------
47 -- Primitive operations and foreign calls
48 ------------------------------------------------------------------------
49
50 {- Note [Foreign call results]
51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
52 A foreign call always returns an unboxed tuple of results, one
53 of which is the state token. This seems to happen even for pure
54 calls.
55
56 Even if we returned a single result for pure calls, it'd still be
57 right to wrap it in a singleton unboxed tuple, because the result
58 might be a Haskell closure pointer, we don't want to evaluate it. -}
59
60 ----------------------------------
61 cgOpApp :: StgOp -- The op
62 -> [StgArg] -- Arguments
63 -> Type -- Result type (always an unboxed tuple)
64 -> FCode ReturnKind
65
66 -- Foreign calls
67 cgOpApp (StgFCallOp fcall _) stg_args res_ty
68 = cgForeignCall fcall stg_args res_ty
69 -- Note [Foreign call results]
70
71 -- tagToEnum# is special: we need to pull the constructor
72 -- out of the table, and perform an appropriate return.
73
74 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
75 = ASSERT(isEnumerationTyCon tycon)
76 do { dflags <- getDynFlags
77 ; args' <- getNonVoidArgAmodes [arg]
78 ; let amode = case args' of [amode] -> amode
79 _ -> panic "TagToEnumOp had void arg"
80 ; emitReturn [tagToClosure dflags tycon amode] }
81 where
82 -- If you're reading this code in the attempt to figure
83 -- out why the compiler panic'ed here, it is probably because
84 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
85 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
86 -- That won't work.
87 tycon = tyConAppTyCon res_ty
88
89 cgOpApp (StgPrimOp primop) args res_ty = do
90 dflags <- getDynFlags
91 cmm_args <- getNonVoidArgAmodes args
92 case shouldInlinePrimOp dflags primop cmm_args of
93 Nothing -> do -- out-of-line
94 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
95 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
96
97 Just f -- inline
98 | ReturnsPrim VoidRep <- result_info
99 -> do f []
100 emitReturn []
101
102 | ReturnsPrim rep <- result_info
103 -> do dflags <- getDynFlags
104 res <- newTemp (primRepCmmType dflags rep)
105 f [res]
106 emitReturn [CmmReg (CmmLocal res)]
107
108 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
109 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
110 f regs
111 emitReturn (map (CmmReg . CmmLocal) regs)
112
113 | otherwise -> panic "cgPrimop"
114 where
115 result_info = getPrimOpResultInfo primop
116
117 cgOpApp (StgPrimCallOp primcall) args _res_ty
118 = do { cmm_args <- getNonVoidArgAmodes args
119 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
120 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
121
122 -- | Decide whether an out-of-line primop should be replaced by an
123 -- inline implementation. This might happen e.g. if there's enough
124 -- static information, such as statically know arguments, to emit a
125 -- more efficient implementation inline.
126 --
127 -- Returns 'Nothing' if this primop should use its out-of-line
128 -- implementation (defined elsewhere) and 'Just' together with a code
129 -- generating function that takes the output regs as arguments
130 -- otherwise.
131 shouldInlinePrimOp :: DynFlags
132 -> PrimOp -- ^ The primop
133 -> [CmmExpr] -- ^ The primop arguments
134 -> Maybe ([LocalReg] -> FCode ())
135 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
136 | n <= maxInlineAllocThreshold dflags =
137 Just $ \ [res] -> doNewArrayOp res n init
138 shouldInlinePrimOp dflags primop args
139 | primOpOutOfLine primop = Nothing
140 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
141
142 -- TODO: Several primops, such as 'copyArray#', only have an inline
143 -- implementation (below) but could possibly have both an inline
144 -- implementation and an out-of-line implementation, just like
145 -- 'newArray#'. This would lower the amount of code generated,
146 -- hopefully without a performance impact (needs to be measured).
147
148 ---------------------------------------------------
149 cgPrimOp :: [LocalReg] -- where to put the results
150 -> PrimOp -- the op
151 -> [StgArg] -- arguments
152 -> FCode ()
153
154 cgPrimOp results op args
155 = do dflags <- getDynFlags
156 arg_exprs <- getNonVoidArgAmodes args
157 emitPrimOp dflags results op arg_exprs
158
159
160 ------------------------------------------------------------------------
161 -- Emitting code for a primop
162 ------------------------------------------------------------------------
163
164 emitPrimOp :: DynFlags
165 -> [LocalReg] -- where to put the results
166 -> PrimOp -- the op
167 -> [CmmExpr] -- arguments
168 -> FCode ()
169
170 -- First we handle various awkward cases specially. The remaining
171 -- easy cases are then handled by translateOp, defined below.
172
173 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
174 {-
175 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
176 C, and without needing any comparisons. This may not be the
177 fastest way to do it - if you have better code, please send it! --SDM
178
179 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
180
181 We currently don't make use of the r value if c is != 0 (i.e.
182 overflow), we just convert to big integers and try again. This
183 could be improved by making r and c the correct values for
184 plugging into a new J#.
185
186 { r = ((I_)(a)) + ((I_)(b)); \
187 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
188 >> (BITS_IN (I_) - 1); \
189 }
190 Wading through the mass of bracketry, it seems to reduce to:
191 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
192
193 -}
194 = emit $ catAGraphs [
195 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
196 mkAssign (CmmLocal res_c) $
197 CmmMachOp (mo_wordUShr dflags) [
198 CmmMachOp (mo_wordAnd dflags) [
199 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
200 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
201 ],
202 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
203 ]
204 ]
205
206
207 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
208 {- Similarly:
209 #define subIntCzh(r,c,a,b) \
210 { r = ((I_)(a)) - ((I_)(b)); \
211 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
212 >> (BITS_IN (I_) - 1); \
213 }
214
215 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
216 -}
217 = emit $ catAGraphs [
218 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
219 mkAssign (CmmLocal res_c) $
220 CmmMachOp (mo_wordUShr dflags) [
221 CmmMachOp (mo_wordAnd dflags) [
222 CmmMachOp (mo_wordXor dflags) [aa,bb],
223 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
224 ],
225 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
226 ]
227 ]
228
229
230 emitPrimOp _ [res] ParOp [arg]
231 =
232 -- for now, just implement this in a C function
233 -- later, we might want to inline it.
234 emitCCall
235 [(res,NoHint)]
236 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
237 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
238
239 emitPrimOp dflags [res] SparkOp [arg]
240 = do
241 -- returns the value of arg in res. We're going to therefore
242 -- refer to arg twice (once to pass to newSpark(), and once to
243 -- assign to res), so put it in a temporary.
244 tmp <- assignTemp arg
245 tmp2 <- newTemp (bWord dflags)
246 emitCCall
247 [(tmp2,NoHint)]
248 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
249 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
250 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
251
252 emitPrimOp dflags [res] GetCCSOfOp [arg]
253 = emitAssign (CmmLocal res) val
254 where
255 val
256 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
257 | otherwise = CmmLit (zeroCLit dflags)
258
259 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
260 = emitAssign (CmmLocal res) curCCS
261
262 emitPrimOp dflags [res] ReadMutVarOp [mutv]
263 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
264
265 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
266 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
267 emitCCall
268 [{-no results-}]
269 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
270 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
271
272 -- #define sizzeofByteArrayzh(r,a) \
273 -- r = ((StgArrWords *)(a))->bytes
274 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
275 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
276
277 -- #define sizzeofMutableByteArrayzh(r,a) \
278 -- r = ((StgArrWords *)(a))->bytes
279 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
280 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
281
282
283 -- #define touchzh(o) /* nothing */
284 emitPrimOp _ res@[] TouchOp args@[_arg]
285 = do emitPrimCall res MO_Touch args
286
287 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
288 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
289 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
290
291 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
292 emitPrimOp dflags [res] StableNameToIntOp [arg]
293 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
294
295 -- #define eqStableNamezh(r,sn1,sn2) \
296 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
297 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
298 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
299 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
300 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
301 ])
302
303
304 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
305 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
306
307 -- #define addrToHValuezh(r,a) r=(P_)a
308 emitPrimOp _ [res] AddrToAnyOp [arg]
309 = emitAssign (CmmLocal res) arg
310
311 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
312 -- Note: argument may be tagged!
313 emitPrimOp dflags [res] DataToTagOp [arg]
314 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
315
316 {- Freezing arrays-of-ptrs requires changing an info table, for the
317 benefit of the generational collector. It needs to scavenge mutable
318 objects, even if they are in old space. When they become immutable,
319 they can be removed from this scavenge list. -}
320
321 -- #define unsafeFreezzeArrayzh(r,a)
322 -- {
323 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
324 -- r = a;
325 -- }
326 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
327 = emit $ catAGraphs
328 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
329 mkAssign (CmmLocal res) arg ]
330 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
331 = emit $ catAGraphs
332 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
333 mkAssign (CmmLocal res) arg ]
334
335 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
336 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
337 = emitAssign (CmmLocal res) arg
338
339 -- Copying pointer arrays
340
341 emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
342 doCopyArrayOp src src_off dst dst_off n
343 emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
344 doCopyMutableArrayOp src src_off dst dst_off n
345 emitPrimOp _ [res] CloneArrayOp [src,src_off,n] =
346 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
347 emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] =
348 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
349 emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] =
350 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
351 emitPrimOp _ [res] ThawArrayOp [src,src_off,n] =
352 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
353
354 emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
355 doCopyArrayOp src src_off dst dst_off n
356 emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
357 doCopyMutableArrayOp src src_off dst dst_off n
358
359 -- Reading/writing pointer arrays
360
361 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
362 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
363 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
364
365 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
366 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
367 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
368 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
369 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
370 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
371 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
372 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
373 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
374 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
375
376 emitPrimOp dflags [res] SizeofArrayOp [arg]
377 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
378 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
379 = emitPrimOp dflags [res] SizeofArrayOp [arg]
380 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
381 = emitPrimOp dflags [res] SizeofArrayOp [arg]
382 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
383 = emitPrimOp dflags [res] SizeofArrayOp [arg]
384
385 -- IndexXXXoffAddr
386
387 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
388 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
389 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
390 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
391 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
392 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
393 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
394 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
395 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
396 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
397 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
398 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
399 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
400 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
401 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
402 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
403
404 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
405
406 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
407 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
408 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
409 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
410 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
411 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
412 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
413 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
414 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
415 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
416 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
417 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
418 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
419 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
420 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
421 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
422
423 -- IndexXXXArray
424
425 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
426 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
427 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
428 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
429 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
430 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
431 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
432 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
433 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
434 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
435 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
436 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
437 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
438 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
439 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
440 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
441
442 -- ReadXXXArray, identical to IndexXXXArray.
443
444 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
445 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
446 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
447 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
448 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
449 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
450 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
451 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
452 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
453 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
454 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
455 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
456 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
457 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
458 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
459 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
460
461 -- WriteXXXoffAddr
462
463 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
464 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
465 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
466 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
467 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
468 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
469 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
470 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
471 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
472 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
473 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
474 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
475 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
476 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
477 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
478 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
479
480 -- WriteXXXArray
481
482 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
483 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
484 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
485 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
486 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
487 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
488 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
489 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
490 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
491 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
492 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
493 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
494 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
495 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
496 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
497 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
498
499 -- Copying and setting byte arrays
500 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
501 doCopyByteArrayOp src src_off dst dst_off n
502 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
503 doCopyMutableByteArrayOp src src_off dst dst_off n
504 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
505 doCopyByteArrayToAddrOp src src_off dst n
506 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
507 doCopyMutableByteArrayToAddrOp src src_off dst n
508 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
509 doCopyAddrToByteArrayOp src dst dst_off n
510 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
511 doSetByteArrayOp ba off len c
512
513 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
514 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
515 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
516 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
517
518 -- Population count
519 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
520 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
521 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
522 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
523 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
524
525 -- Unsigned int to floating point conversions
526 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
527 (MO_UF_Conv W32) [w]
528 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
529 (MO_UF_Conv W64) [w]
530
531 -- SIMD primops
532 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
533 checkVecCompatibility dflags vcat n w
534 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
535 where
536 zeros :: CmmExpr
537 zeros = CmmLit $ CmmVec (replicate n zero)
538
539 zero :: CmmLit
540 zero = case vcat of
541 IntVec -> CmmInt 0 w
542 WordVec -> CmmInt 0 w
543 FloatVec -> CmmFloat 0 w
544
545 ty :: CmmType
546 ty = vecVmmType vcat n w
547
548 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
549 checkVecCompatibility dflags vcat n w
550 when (length es /= n) $
551 panic "emitPrimOp: VecPackOp has wrong number of arguments"
552 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
553 where
554 zeros :: CmmExpr
555 zeros = CmmLit $ CmmVec (replicate n zero)
556
557 zero :: CmmLit
558 zero = case vcat of
559 IntVec -> CmmInt 0 w
560 WordVec -> CmmInt 0 w
561 FloatVec -> CmmFloat 0 w
562
563 ty :: CmmType
564 ty = vecVmmType vcat n w
565
566 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
567 checkVecCompatibility dflags vcat n w
568 when (length res /= n) $
569 panic "emitPrimOp: VecUnpackOp has wrong number of results"
570 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
571 where
572 ty :: CmmType
573 ty = vecVmmType vcat n w
574
575 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
576 checkVecCompatibility dflags vcat n w
577 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
578 where
579 ty :: CmmType
580 ty = vecVmmType vcat n w
581
582 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
583 checkVecCompatibility dflags vcat n w
584 doIndexByteArrayOp Nothing ty res args
585 where
586 ty :: CmmType
587 ty = vecVmmType vcat n w
588
589 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
590 checkVecCompatibility dflags vcat n w
591 doIndexByteArrayOp Nothing ty res args
592 where
593 ty :: CmmType
594 ty = vecVmmType vcat n w
595
596 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
597 checkVecCompatibility dflags vcat n w
598 doWriteByteArrayOp Nothing ty res args
599 where
600 ty :: CmmType
601 ty = vecVmmType vcat n w
602
603 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
604 checkVecCompatibility dflags vcat n w
605 doIndexOffAddrOp Nothing ty res args
606 where
607 ty :: CmmType
608 ty = vecVmmType vcat n w
609
610 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
611 checkVecCompatibility dflags vcat n w
612 doIndexOffAddrOp Nothing ty res args
613 where
614 ty :: CmmType
615 ty = vecVmmType vcat n w
616
617 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
618 checkVecCompatibility dflags vcat n w
619 doWriteOffAddrOp Nothing ty res args
620 where
621 ty :: CmmType
622 ty = vecVmmType vcat n w
623
624 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
625 checkVecCompatibility dflags vcat n w
626 doIndexByteArrayOpAs Nothing vecty ty res args
627 where
628 vecty :: CmmType
629 vecty = vecVmmType vcat n w
630
631 ty :: CmmType
632 ty = vecCmmCat vcat w
633
634 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
635 checkVecCompatibility dflags vcat n w
636 doIndexByteArrayOpAs Nothing vecty ty res args
637 where
638 vecty :: CmmType
639 vecty = vecVmmType vcat n w
640
641 ty :: CmmType
642 ty = vecCmmCat vcat w
643
644 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
645 checkVecCompatibility dflags vcat n w
646 doWriteByteArrayOp Nothing ty res args
647 where
648 ty :: CmmType
649 ty = vecCmmCat vcat w
650
651 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
652 checkVecCompatibility dflags vcat n w
653 doIndexOffAddrOpAs Nothing vecty ty res args
654 where
655 vecty :: CmmType
656 vecty = vecVmmType vcat n w
657
658 ty :: CmmType
659 ty = vecCmmCat vcat w
660
661 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
662 checkVecCompatibility dflags vcat n w
663 doIndexOffAddrOpAs Nothing vecty ty res args
664 where
665 vecty :: CmmType
666 vecty = vecVmmType vcat n w
667
668 ty :: CmmType
669 ty = vecCmmCat vcat w
670
671 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
672 checkVecCompatibility dflags vcat n w
673 doWriteOffAddrOp Nothing ty res args
674 where
675 ty :: CmmType
676 ty = vecCmmCat vcat w
677
678 -- Prefetch
679 emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
680 emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
681 emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
682
683 emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
684 emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
685 emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
686
687 emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
688 emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
689 emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
690
691 emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
692 emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
693 emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
694
695
696 -- The rest just translate straightforwardly
697 emitPrimOp dflags [res] op [arg]
698 | nopOp op
699 = emitAssign (CmmLocal res) arg
700
701 | Just (mop,rep) <- narrowOp op
702 = emitAssign (CmmLocal res) $
703 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
704
705 emitPrimOp dflags r@[res] op args
706 | Just prim <- callishOp op
707 = do emitPrimCall r prim args
708
709 | Just mop <- translateOp dflags op
710 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
711 emit stmt
712
713 emitPrimOp dflags results op args
714 = case callishPrimOpSupported dflags op of
715 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
716 Right gen -> gen results args
717
718 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
719
720 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
721 callishPrimOpSupported dflags op
722 = case op of
723 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
724 | otherwise -> Right (genericIntQuotRemOp dflags)
725
726 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
727 | otherwise -> Right (genericWordQuotRemOp dflags)
728
729 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
730 | otherwise -> Right (genericWordQuotRem2Op dflags)
731
732 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
733 | otherwise -> Right genericWordAdd2Op
734
735 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
736 | otherwise -> Right genericWordMul2Op
737
738 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
739 where
740 ncg = case hscTarget dflags of
741 HscAsm -> True
742 _ -> False
743
744 x86ish = case platformArch (targetPlatform dflags) of
745 ArchX86 -> True
746 ArchX86_64 -> True
747 _ -> False
748
749 genericIntQuotRemOp :: DynFlags -> GenericOp
750 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
751 = emit $ mkAssign (CmmLocal res_q)
752 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
753 mkAssign (CmmLocal res_r)
754 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
755 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
756
757 genericWordQuotRemOp :: DynFlags -> GenericOp
758 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
759 = emit $ mkAssign (CmmLocal res_q)
760 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
761 mkAssign (CmmLocal res_r)
762 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
763 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
764
765 genericWordQuotRem2Op :: DynFlags -> GenericOp
766 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
767 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
768 where ty = cmmExprType dflags arg_x_high
769 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
770 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
771 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
772 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
773 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
774 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
775 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
776 zero = lit 0
777 one = lit 1
778 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
779 lit i = CmmLit (CmmInt i (wordWidth dflags))
780
781 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
782 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
783 mkAssign (CmmLocal res_r) high)
784 f i acc high low =
785 do roverflowedBit <- newTemp ty
786 rhigh' <- newTemp ty
787 rhigh'' <- newTemp ty
788 rlow' <- newTemp ty
789 risge <- newTemp ty
790 racc' <- newTemp ty
791 let high' = CmmReg (CmmLocal rhigh')
792 isge = CmmReg (CmmLocal risge)
793 overflowedBit = CmmReg (CmmLocal roverflowedBit)
794 let this = catAGraphs
795 [mkAssign (CmmLocal roverflowedBit)
796 (shr high negone),
797 mkAssign (CmmLocal rhigh')
798 (or (shl high one) (shr low negone)),
799 mkAssign (CmmLocal rlow')
800 (shl low one),
801 mkAssign (CmmLocal risge)
802 (or (overflowedBit `ne` zero)
803 (high' `ge` arg_y)),
804 mkAssign (CmmLocal rhigh'')
805 (high' `minus` (arg_y `times` isge)),
806 mkAssign (CmmLocal racc')
807 (or (shl acc one) isge)]
808 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
809 (CmmReg (CmmLocal rhigh''))
810 (CmmReg (CmmLocal rlow'))
811 return (this <*> rest)
812 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
813
814 genericWordAdd2Op :: GenericOp
815 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
816 = do dflags <- getDynFlags
817 r1 <- newTemp (cmmExprType dflags arg_x)
818 r2 <- newTemp (cmmExprType dflags arg_x)
819 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
820 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
821 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
822 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
823 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
824 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
825 (wordWidth dflags))
826 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
827 emit $ catAGraphs
828 [mkAssign (CmmLocal r1)
829 (add (bottomHalf arg_x) (bottomHalf arg_y)),
830 mkAssign (CmmLocal r2)
831 (add (topHalf (CmmReg (CmmLocal r1)))
832 (add (topHalf arg_x) (topHalf arg_y))),
833 mkAssign (CmmLocal res_h)
834 (topHalf (CmmReg (CmmLocal r2))),
835 mkAssign (CmmLocal res_l)
836 (or (toTopHalf (CmmReg (CmmLocal r2)))
837 (bottomHalf (CmmReg (CmmLocal r1))))]
838 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
839
840 genericWordMul2Op :: GenericOp
841 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
842 = do dflags <- getDynFlags
843 let t = cmmExprType dflags arg_x
844 xlyl <- liftM CmmLocal $ newTemp t
845 xlyh <- liftM CmmLocal $ newTemp t
846 xhyl <- liftM CmmLocal $ newTemp t
847 r <- liftM CmmLocal $ newTemp t
848 -- This generic implementation is very simple and slow. We might
849 -- well be able to do better, but for now this at least works.
850 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
851 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
852 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
853 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
854 sum = foldl1 add
855 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
856 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
857 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
858 (wordWidth dflags))
859 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
860 emit $ catAGraphs
861 [mkAssign xlyl
862 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
863 mkAssign xlyh
864 (mul (bottomHalf arg_x) (topHalf arg_y)),
865 mkAssign xhyl
866 (mul (topHalf arg_x) (bottomHalf arg_y)),
867 mkAssign r
868 (sum [topHalf (CmmReg xlyl),
869 bottomHalf (CmmReg xhyl),
870 bottomHalf (CmmReg xlyh)]),
871 mkAssign (CmmLocal res_l)
872 (or (bottomHalf (CmmReg xlyl))
873 (toTopHalf (CmmReg r))),
874 mkAssign (CmmLocal res_h)
875 (sum [mul (topHalf arg_x) (topHalf arg_y),
876 topHalf (CmmReg xhyl),
877 topHalf (CmmReg xlyh),
878 topHalf (CmmReg r)])]
879 genericWordMul2Op _ _ = panic "genericWordMul2Op"
880
881 -- These PrimOps are NOPs in Cmm
882
883 nopOp :: PrimOp -> Bool
884 nopOp Int2WordOp = True
885 nopOp Word2IntOp = True
886 nopOp Int2AddrOp = True
887 nopOp Addr2IntOp = True
888 nopOp ChrOp = True -- Int# and Char# are rep'd the same
889 nopOp OrdOp = True
890 nopOp _ = False
891
892 -- These PrimOps turn into double casts
893
894 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
895 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
896 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
897 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
898 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
899 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
900 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
901 narrowOp _ = Nothing
902
903 -- Native word signless ops
904
905 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
906 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
907 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
908 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
909 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
910 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
911 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
912
913 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
914 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
915 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
916 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
917 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
918 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
919
920 translateOp dflags AndOp = Just (mo_wordAnd dflags)
921 translateOp dflags OrOp = Just (mo_wordOr dflags)
922 translateOp dflags XorOp = Just (mo_wordXor dflags)
923 translateOp dflags NotOp = Just (mo_wordNot dflags)
924 translateOp dflags SllOp = Just (mo_wordShl dflags)
925 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
926
927 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
928
929 -- Native word signed ops
930
931 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
932 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
933 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
934 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
935 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
936
937
938 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
939 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
940 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
941 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
942
943 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
944 translateOp dflags OrIOp = Just (mo_wordOr dflags)
945 translateOp dflags XorIOp = Just (mo_wordXor dflags)
946 translateOp dflags NotIOp = Just (mo_wordNot dflags)
947 translateOp dflags ISllOp = Just (mo_wordShl dflags)
948 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
949 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
950
951 -- Native word unsigned ops
952
953 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
954 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
955 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
956 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
957
958 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
959 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
960 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
961
962 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
963 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
964 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
965 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
966
967 -- Char# ops
968
969 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
970 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
971 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
972 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
973 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
974 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
975
976 -- Double ops
977
978 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
979 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
980 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
981 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
982 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
983 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
984
985 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
986 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
987 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
988 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
989 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
990
991 -- Float ops
992
993 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
994 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
995 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
996 translateOp _ FloatLeOp = Just (MO_F_Le W32)
997 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
998 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
999
1000 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1001 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1002 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1003 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1004 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1005
1006 -- Vector ops
1007
1008 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1009 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1010 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1011 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1012 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1013
1014 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1015 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1016 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1017 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1018 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1019 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1020
1021 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1022 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1023 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1024 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1025 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1026
1027 -- Conversions
1028
1029 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1030 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1031
1032 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1033 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1034
1035 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1036 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1037
1038 -- Word comparisons masquerading as more exotic things.
1039
1040 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1041 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1042 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1043 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1044 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1045 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1046 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1047
1048 translateOp _ _ = Nothing
1049
1050 -- These primops are implemented by CallishMachOps, because they sometimes
1051 -- turn into foreign calls depending on the backend.
1052
1053 callishOp :: PrimOp -> Maybe CallishMachOp
1054 callishOp DoublePowerOp = Just MO_F64_Pwr
1055 callishOp DoubleSinOp = Just MO_F64_Sin
1056 callishOp DoubleCosOp = Just MO_F64_Cos
1057 callishOp DoubleTanOp = Just MO_F64_Tan
1058 callishOp DoubleSinhOp = Just MO_F64_Sinh
1059 callishOp DoubleCoshOp = Just MO_F64_Cosh
1060 callishOp DoubleTanhOp = Just MO_F64_Tanh
1061 callishOp DoubleAsinOp = Just MO_F64_Asin
1062 callishOp DoubleAcosOp = Just MO_F64_Acos
1063 callishOp DoubleAtanOp = Just MO_F64_Atan
1064 callishOp DoubleLogOp = Just MO_F64_Log
1065 callishOp DoubleExpOp = Just MO_F64_Exp
1066 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1067
1068 callishOp FloatPowerOp = Just MO_F32_Pwr
1069 callishOp FloatSinOp = Just MO_F32_Sin
1070 callishOp FloatCosOp = Just MO_F32_Cos
1071 callishOp FloatTanOp = Just MO_F32_Tan
1072 callishOp FloatSinhOp = Just MO_F32_Sinh
1073 callishOp FloatCoshOp = Just MO_F32_Cosh
1074 callishOp FloatTanhOp = Just MO_F32_Tanh
1075 callishOp FloatAsinOp = Just MO_F32_Asin
1076 callishOp FloatAcosOp = Just MO_F32_Acos
1077 callishOp FloatAtanOp = Just MO_F32_Atan
1078 callishOp FloatLogOp = Just MO_F32_Log
1079 callishOp FloatExpOp = Just MO_F32_Exp
1080 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1081
1082 callishOp _ = Nothing
1083
1084 ------------------------------------------------------------------------------
1085 -- Helpers for translating various minor variants of array indexing.
1086
1087 doIndexOffAddrOp :: Maybe MachOp
1088 -> CmmType
1089 -> [LocalReg]
1090 -> [CmmExpr]
1091 -> FCode ()
1092 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1093 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1094 doIndexOffAddrOp _ _ _ _
1095 = panic "StgCmmPrim: doIndexOffAddrOp"
1096
1097 doIndexOffAddrOpAs :: Maybe MachOp
1098 -> CmmType
1099 -> CmmType
1100 -> [LocalReg]
1101 -> [CmmExpr]
1102 -> FCode ()
1103 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1104 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1105 doIndexOffAddrOpAs _ _ _ _ _
1106 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1107
1108 doIndexByteArrayOp :: Maybe MachOp
1109 -> CmmType
1110 -> [LocalReg]
1111 -> [CmmExpr]
1112 -> FCode ()
1113 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1114 = do dflags <- getDynFlags
1115 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1116 doIndexByteArrayOp _ _ _ _
1117 = panic "StgCmmPrim: doIndexByteArrayOp"
1118
1119 doIndexByteArrayOpAs :: Maybe MachOp
1120 -> CmmType
1121 -> CmmType
1122 -> [LocalReg]
1123 -> [CmmExpr]
1124 -> FCode ()
1125 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1126 = do dflags <- getDynFlags
1127 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1128 doIndexByteArrayOpAs _ _ _ _ _
1129 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1130
1131 doReadPtrArrayOp :: LocalReg
1132 -> CmmExpr
1133 -> CmmExpr
1134 -> FCode ()
1135 doReadPtrArrayOp res addr idx
1136 = do dflags <- getDynFlags
1137 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1138
1139 doWriteOffAddrOp :: Maybe MachOp
1140 -> CmmType
1141 -> [LocalReg]
1142 -> [CmmExpr]
1143 -> FCode ()
1144 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1145 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1146 doWriteOffAddrOp _ _ _ _
1147 = panic "StgCmmPrim: doWriteOffAddrOp"
1148
1149 doWriteByteArrayOp :: Maybe MachOp
1150 -> CmmType
1151 -> [LocalReg]
1152 -> [CmmExpr]
1153 -> FCode ()
1154 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1155 = do dflags <- getDynFlags
1156 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1157 doWriteByteArrayOp _ _ _ _
1158 = panic "StgCmmPrim: doWriteByteArrayOp"
1159
1160 doWritePtrArrayOp :: CmmExpr
1161 -> CmmExpr
1162 -> CmmExpr
1163 -> FCode ()
1164 doWritePtrArrayOp addr idx val
1165 = do dflags <- getDynFlags
1166 let ty = cmmExprType dflags val
1167 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1168 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1169 -- the write barrier. We must write a byte into the mark table:
1170 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1171 emit $ mkStore (
1172 cmmOffsetExpr dflags
1173 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1174 (loadArrPtrsSize dflags addr))
1175 (CmmMachOp (mo_wordUShr dflags) [idx,
1176 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1177 ) (CmmLit (CmmInt 1 W8))
1178
1179 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1180 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1181 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1182
1183 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1184 -> Maybe MachOp -- Optional result cast
1185 -> CmmType -- Type of element we are accessing
1186 -> LocalReg -- Destination
1187 -> CmmExpr -- Base address
1188 -> CmmType -- Type of element by which we are indexing
1189 -> CmmExpr -- Index
1190 -> FCode ()
1191 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1192 = do dflags <- getDynFlags
1193 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1194 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1195 = do dflags <- getDynFlags
1196 emitAssign (CmmLocal res) (CmmMachOp cast [
1197 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1198
1199 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1200 -> Maybe MachOp -- Optional value cast
1201 -> CmmExpr -- Base address
1202 -> CmmType -- Type of element by which we are indexing
1203 -> CmmExpr -- Index
1204 -> CmmExpr -- Value to write
1205 -> FCode ()
1206 mkBasicIndexedWrite off Nothing base idx_ty idx val
1207 = do dflags <- getDynFlags
1208 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1209 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1210 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1211
1212 -- ----------------------------------------------------------------------------
1213 -- Misc utils
1214
1215 cmmIndexOffExpr :: DynFlags
1216 -> ByteOff -- Initial offset in bytes
1217 -> Width -- Width of element by which we are indexing
1218 -> CmmExpr -- Base address
1219 -> CmmExpr -- Index
1220 -> CmmExpr
1221 cmmIndexOffExpr dflags off width base idx
1222 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1223
1224 cmmLoadIndexOffExpr :: DynFlags
1225 -> ByteOff -- Initial offset in bytes
1226 -> CmmType -- Type of element we are accessing
1227 -> CmmExpr -- Base address
1228 -> CmmType -- Type of element by which we are indexing
1229 -> CmmExpr -- Index
1230 -> CmmExpr
1231 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1232 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1233
1234 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1235 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1236
1237 ------------------------------------------------------------------------------
1238 -- Helpers for translating vector primops.
1239
1240 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1241 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1242
1243 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1244 vecCmmCat IntVec = cmmBits
1245 vecCmmCat WordVec = cmmBits
1246 vecCmmCat FloatVec = cmmFloat
1247
1248 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1249 vecElemInjectCast _ FloatVec _ = Nothing
1250 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1251 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1252 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1253 vecElemInjectCast _ IntVec W64 = Nothing
1254 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1255 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1256 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1257 vecElemInjectCast _ WordVec W64 = Nothing
1258 vecElemInjectCast _ _ _ = Nothing
1259
1260 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1261 vecElemProjectCast _ FloatVec _ = Nothing
1262 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1263 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1264 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1265 vecElemProjectCast _ IntVec W64 = Nothing
1266 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1267 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1268 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1269 vecElemProjectCast _ WordVec W64 = Nothing
1270 vecElemProjectCast _ _ _ = Nothing
1271
1272 -- Check to make sure that we can generate code for the specified vector type
1273 -- given the current set of dynamic flags.
1274 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1275 checkVecCompatibility dflags vcat l w = do
1276 when (hscTarget dflags /= HscLlvm) $ do
1277 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1278 ,"Please use -fllvm."]
1279 check vecWidth vcat l w
1280 where
1281 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1282 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1283 sorry $ "128-bit wide single-precision floating point " ++
1284 "SIMD vector instructions require at least -msse."
1285 check W128 _ _ _ | not (isSse2Enabled dflags) =
1286 sorry $ "128-bit wide integer and double precision " ++
1287 "SIMD vector instructions require at least -msse2."
1288 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1289 sorry $ "256-bit wide floating point " ++
1290 "SIMD vector instructions require at least -mavx."
1291 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1292 sorry $ "256-bit wide integer " ++
1293 "SIMD vector instructions require at least -mavx2."
1294 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1295 sorry $ "512-bit wide " ++
1296 "SIMD vector instructions require -mavx512f."
1297 check _ _ _ _ = return ()
1298
1299 vecWidth = typeWidth (vecVmmType vcat l w)
1300
1301 ------------------------------------------------------------------------------
1302 -- Helpers for translating vector packing and unpacking.
1303
1304 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1305 -> CmmType -- Type of vector
1306 -> CmmExpr -- Initial vector
1307 -> [CmmExpr] -- Elements
1308 -> CmmFormal -- Destination for result
1309 -> FCode ()
1310 doVecPackOp maybe_pre_write_cast ty z es res = do
1311 dst <- newTemp ty
1312 emitAssign (CmmLocal dst) z
1313 vecPack dst es 0
1314 where
1315 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1316 vecPack src [] _ =
1317 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1318
1319 vecPack src (e : es) i = do
1320 dst <- newTemp ty
1321 if isFloatType (vecElemType ty)
1322 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1323 [CmmReg (CmmLocal src), cast e, iLit])
1324 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1325 [CmmReg (CmmLocal src), cast e, iLit])
1326 vecPack dst es (i + 1)
1327 where
1328 -- vector indices are always 32-bits
1329 iLit = CmmLit (CmmInt (toInteger i) W32)
1330
1331 cast :: CmmExpr -> CmmExpr
1332 cast val = case maybe_pre_write_cast of
1333 Nothing -> val
1334 Just cast -> CmmMachOp cast [val]
1335
1336 len :: Length
1337 len = vecLength ty
1338
1339 wid :: Width
1340 wid = typeWidth (vecElemType ty)
1341
1342 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1343 -> CmmType -- Type of vector
1344 -> CmmExpr -- Vector
1345 -> [CmmFormal] -- Element results
1346 -> FCode ()
1347 doVecUnpackOp maybe_post_read_cast ty e res =
1348 vecUnpack res 0
1349 where
1350 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1351 vecUnpack [] _ =
1352 return ()
1353
1354 vecUnpack (r : rs) i = do
1355 if isFloatType (vecElemType ty)
1356 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1357 [e, iLit]))
1358 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1359 [e, iLit]))
1360 vecUnpack rs (i + 1)
1361 where
1362 -- vector indices are always 32-bits
1363 iLit = CmmLit (CmmInt (toInteger i) W32)
1364
1365 cast :: CmmExpr -> CmmExpr
1366 cast val = case maybe_post_read_cast of
1367 Nothing -> val
1368 Just cast -> CmmMachOp cast [val]
1369
1370 len :: Length
1371 len = vecLength ty
1372
1373 wid :: Width
1374 wid = typeWidth (vecElemType ty)
1375
1376 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1377 -> CmmType -- Vector type
1378 -> CmmExpr -- Source vector
1379 -> CmmExpr -- Element
1380 -> CmmExpr -- Index at which to insert element
1381 -> CmmFormal -- Destination for result
1382 -> FCode ()
1383 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1384 dflags <- getDynFlags
1385 -- vector indices are always 32-bits
1386 let idx' :: CmmExpr
1387 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1388 if isFloatType (vecElemType ty)
1389 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1390 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1391 where
1392 cast :: CmmExpr -> CmmExpr
1393 cast val = case maybe_pre_write_cast of
1394 Nothing -> val
1395 Just cast -> CmmMachOp cast [val]
1396
1397 len :: Length
1398 len = vecLength ty
1399
1400 wid :: Width
1401 wid = typeWidth (vecElemType ty)
1402
1403 ------------------------------------------------------------------------------
1404 -- Helpers for translating prefetching.
1405
1406 doPrefetchByteArrayOp :: Int
1407 -> [LocalReg]
1408 -> [CmmExpr]
1409 -> FCode ()
1410 doPrefetchByteArrayOp locality res [addr,idx]
1411 = do dflags <- getDynFlags
1412 mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
1413 doPrefetchByteArrayOp _ _ _
1414 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1415
1416 doPrefetchAddrOp ::Int
1417 -> [LocalReg]
1418 -> [CmmExpr]
1419 -> FCode ()
1420 doPrefetchAddrOp locality res [addr,idx]
1421 = mkBasicPrefetch locality 0 res addr idx
1422 doPrefetchAddrOp _ _ _
1423 = panic "StgCmmPrim: doPrefetchAddrOp"
1424
1425 mkBasicPrefetch :: Int -- Locality level 0-3
1426 -> ByteOff -- Initial offset in bytes
1427 -> [LocalReg] -- Destination
1428 -> CmmExpr -- Base address
1429 -> CmmExpr -- Index
1430 -> FCode ()
1431 mkBasicPrefetch locality off res base idx
1432 = do dflags <- getDynFlags
1433 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1434 case res of
1435 [] -> return ()
1436 [reg] -> emitAssign (CmmLocal reg) base
1437 _ -> panic "StgCmmPrim: mkBasicPrefetch"
1438
1439 -- ----------------------------------------------------------------------------
1440 -- Copying byte arrays
1441
1442 -- | Takes a source 'ByteArray#', an offset in the source array, a
1443 -- destination 'MutableByteArray#', an offset into the destination
1444 -- array, and the number of bytes to copy. Copies the given number of
1445 -- bytes from the source array to the destination array.
1446 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1447 -> FCode ()
1448 doCopyByteArrayOp = emitCopyByteArray copy
1449 where
1450 -- Copy data (we assume the arrays aren't overlapping since
1451 -- they're of different types)
1452 copy _src _dst dst_p src_p bytes =
1453 do dflags <- getDynFlags
1454 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1455
1456 -- | Takes a source 'MutableByteArray#', an offset in the source
1457 -- array, a destination 'MutableByteArray#', an offset into the
1458 -- destination array, and the number of bytes to copy. Copies the
1459 -- given number of bytes from the source array to the destination
1460 -- array.
1461 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1462 -> FCode ()
1463 doCopyMutableByteArrayOp = emitCopyByteArray copy
1464 where
1465 -- The only time the memory might overlap is when the two arrays
1466 -- we were provided are the same array!
1467 -- TODO: Optimize branch for common case of no aliasing.
1468 copy src dst dst_p src_p bytes = do
1469 dflags <- getDynFlags
1470 [moveCall, cpyCall] <- forkAlts [
1471 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1472 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1473 ]
1474 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1475
1476 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1477 -> FCode ())
1478 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1479 -> FCode ()
1480 emitCopyByteArray copy src src_off dst dst_off n = do
1481 dflags <- getDynFlags
1482 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1483 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1484 copy src dst dst_p src_p n
1485
1486 -- | Takes a source 'ByteArray#', an offset in the source array, a
1487 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1488 -- number of bytes from the source array to the destination memory region.
1489 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1490 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1491 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1492 dflags <- getDynFlags
1493 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1494 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1495
1496 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1497 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1498 -- number of bytes from the source array to the destination memory region.
1499 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1500 -> FCode ()
1501 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1502
1503 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1504 -- the destination array, and the number of bytes to copy. Copies the given
1505 -- number of bytes from the source memory region to the destination array.
1506 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1507 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1508 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1509 dflags <- getDynFlags
1510 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1511 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1512
1513
1514 -- ----------------------------------------------------------------------------
1515 -- Setting byte arrays
1516
1517 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1518 -- and a byte, and sets each of the selected bytes in the array to the
1519 -- character.
1520 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1521 -> FCode ()
1522 doSetByteArrayOp ba off len c
1523 = do dflags <- getDynFlags
1524 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1525 emitMemsetCall p c len (mkIntExpr dflags 1)
1526
1527 -- ----------------------------------------------------------------------------
1528 -- Allocating arrays
1529
1530 -- | Takes a register to return the newly allocated array in, the size
1531 -- of the new array, and an initial value for the elements. Allocates
1532 -- a new 'MutableArray#'.
1533 doNewArrayOp :: CmmFormal -> Integer -> CmmExpr -> FCode ()
1534 doNewArrayOp res_r n init = do
1535 dflags <- getDynFlags
1536
1537 let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel
1538
1539 -- ToDo: this probably isn't right (card size?)
1540 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1541 (mkIntExpr dflags (fromInteger n * wORD_SIZE dflags))
1542 (zeroExpr dflags)
1543
1544 let rep = arrPtrsRep dflags (fromIntegral n)
1545 hdr_size = fixedHdrSize dflags * wORD_SIZE dflags
1546 base <- allocHeapClosure rep info_ptr curCCS
1547 [ (mkIntExpr dflags (fromInteger n),
1548 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1549 , (mkIntExpr dflags (nonHdrSizeW rep),
1550 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1551 ]
1552
1553 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1554 emit $ mkAssign arr base
1555
1556 -- Initialise all elements of the the array
1557 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags)
1558 for <- newLabelC
1559 emitLabel for
1560 let loopBody =
1561 [ mkStore (CmmReg (CmmLocal p)) init
1562 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1563 , mkBranch for ]
1564 emit =<< mkCmmIfThen
1565 (cmmULtWord dflags (CmmReg (CmmLocal p))
1566 (cmmOffsetW dflags (CmmReg arr) (fromInteger n)))
1567 (catAGraphs loopBody)
1568
1569 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1570
1571 -- | The inline allocation limit is 128 bytes, expressed in words.
1572 maxInlineAllocThreshold :: DynFlags -> Integer
1573 maxInlineAllocThreshold dflags = toInteger (128 `quot` wORD_SIZE dflags)
1574
1575 -- ----------------------------------------------------------------------------
1576 -- Copying pointer arrays
1577
1578 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1579 -- nowhere else in the code generator. This is mostly because these
1580 -- "primitive" ops result in a surprisingly large amount of code. It
1581 -- will likely be worthwhile to optimize what is emitted here, so that
1582 -- our optimization passes don't waste time repeatedly optimizing the
1583 -- same bits of code.
1584
1585 -- More closely imitates 'assignTemp' from the old code generator, which
1586 -- returns a CmmExpr rather than a LocalReg.
1587 assignTempE :: CmmExpr -> FCode CmmExpr
1588 assignTempE e = do
1589 t <- assignTemp e
1590 return (CmmReg (CmmLocal t))
1591
1592 -- | Takes a source 'Array#', an offset in the source array, a
1593 -- destination 'MutableArray#', an offset into the destination array,
1594 -- and the number of elements to copy. Copies the given number of
1595 -- elements from the source array to the destination array.
1596 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1597 -> FCode ()
1598 doCopyArrayOp = emitCopyArray copy
1599 where
1600 -- Copy data (we assume the arrays aren't overlapping since
1601 -- they're of different types)
1602 copy _src _dst dst_p src_p bytes =
1603 do dflags <- getDynFlags
1604 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1605
1606
1607 -- | Takes a source 'MutableArray#', an offset in the source array, a
1608 -- destination 'MutableArray#', an offset into the destination array,
1609 -- and the number of elements to copy. Copies the given number of
1610 -- elements from the source array to the destination array.
1611 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1612 -> FCode ()
1613 doCopyMutableArrayOp = emitCopyArray copy
1614 where
1615 -- The only time the memory might overlap is when the two arrays
1616 -- we were provided are the same array!
1617 -- TODO: Optimize branch for common case of no aliasing.
1618 copy src dst dst_p src_p bytes = do
1619 dflags <- getDynFlags
1620 [moveCall, cpyCall] <- forkAlts [
1621 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags)),
1622 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1623 ]
1624 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1625
1626 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1627 -> FCode ())
1628 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1629 -> FCode ()
1630 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
1631 dflags <- getDynFlags
1632 n <- assignTempE n0
1633 nonzero <- getCode $ do
1634 -- Passed as arguments (be careful)
1635 src <- assignTempE src0
1636 src_off <- assignTempE src_off0
1637 dst <- assignTempE dst0
1638 dst_off <- assignTempE dst_off0
1639
1640 -- Set the dirty bit in the header.
1641 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1642
1643 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst (arrPtrsHdrSize dflags)
1644 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1645 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1646 bytes <- assignTempE $ cmmMulWord dflags n (mkIntExpr dflags (wORD_SIZE dflags))
1647
1648 copy src dst dst_p src_p bytes
1649
1650 -- The base address of the destination card table
1651 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p (loadArrPtrsSize dflags dst)
1652
1653 emitSetCards dst_off dst_cards_p n
1654
1655 -- TODO: Figure out if this branch is really neccesary.
1656 emit =<< mkCmmIfThen (cmmNeWord dflags n (mkIntExpr dflags 0)) nonzero
1657
1658 -- | Takes an info table label, a register to return the newly
1659 -- allocated array in, a source array, an offset in the source array,
1660 -- and the number of elements to copy. Allocates a new array and
1661 -- initializes it from the source array.
1662 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
1663 -> FCode ()
1664 emitCloneArray info_p res_r src0 src_off0 n0 = do
1665 dflags <- getDynFlags
1666 let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags +
1667 (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags))
1668 myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags))
1669 -- Passed as arguments (be careful)
1670 src <- assignTempE src0
1671 src_off <- assignTempE src_off0
1672 n <- assignTempE n0
1673
1674 card_bytes <- assignTempE $ cardRoundUpCmm dflags n
1675 size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUpCmm dflags card_bytes)
1676 words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size
1677
1678 arr_r <- newTemp (bWord dflags)
1679 emitAllocateCall arr_r myCapability words
1680 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags))
1681 (zeroExpr dflags)
1682
1683 let arr = CmmReg (CmmLocal arr_r)
1684 emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
1685 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1686 oFFSET_StgMutArrPtrs_ptrs dflags)) n
1687 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1688 oFFSET_StgMutArrPtrs_size dflags)) size
1689
1690 dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags)
1691 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags))
1692 src_off
1693
1694 emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
1695
1696 emitMemsetCall (cmmOffsetExprW dflags dst_p n)
1697 (mkIntExpr dflags 1)
1698 card_bytes
1699 (mkIntExpr dflags (wORD_SIZE dflags))
1700 emit $ mkAssign (CmmLocal res_r) arr
1701
1702 -- | Takes and offset in the destination array, the base address of
1703 -- the card table, and the number of elements affected (*not* the
1704 -- number of cards). The number of elements may not be zero.
1705 -- Marks the relevant cards as dirty.
1706 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1707 emitSetCards dst_start dst_cards_start n = do
1708 dflags <- getDynFlags
1709 start_card <- assignTempE $ cardCmm dflags dst_start
1710 let end_card = cardCmm dflags (cmmSubWord dflags (cmmAddWord dflags dst_start n) (mkIntExpr dflags 1))
1711 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1712 (mkIntExpr dflags 1)
1713 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1714 (mkIntExpr dflags 1) -- no alignment (1 byte)
1715
1716 -- Convert an element index to a card index
1717 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
1718 cardCmm dflags i =
1719 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1720
1721 -- Convert a number of elements to a number of cards, rounding up
1722 cardRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr
1723 cardRoundUpCmm dflags i =
1724 cardCmm dflags (cmmAddWord dflags i
1725 (mkIntExpr dflags
1726 ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)))
1727
1728 bytesToWordsRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr
1729 bytesToWordsRoundUpCmm dflags e =
1730 cmmQuotWord dflags (cmmAddWord dflags e
1731 (mkIntExpr dflags
1732 (wORD_SIZE dflags - 1))) (wordSize dflags)
1733
1734 wordSize :: DynFlags -> CmmExpr
1735 wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags)
1736
1737 -- | Emit a call to @memcpy@.
1738 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1739 emitMemcpyCall dst src n align = do
1740 emitPrimCall
1741 [ {-no results-} ]
1742 MO_Memcpy
1743 [ dst, src, n, align ]
1744
1745 -- | Emit a call to @memmove@.
1746 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1747 emitMemmoveCall dst src n align = do
1748 emitPrimCall
1749 [ {- no results -} ]
1750 MO_Memmove
1751 [ dst, src, n, align ]
1752
1753 -- | Emit a call to @memset@. The second argument must fit inside an
1754 -- unsigned char.
1755 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1756 emitMemsetCall dst c n align = do
1757 emitPrimCall
1758 [ {- no results -} ]
1759 MO_Memset
1760 [ dst, c, n, align ]
1761
1762 -- | Emit a call to @allocate@.
1763 emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
1764 emitAllocateCall res cap n = do
1765 emitCCall
1766 [ (res, AddrHint) ]
1767 allocate
1768 [ (cap, AddrHint)
1769 , (n, NoHint)
1770 ]
1771 where
1772 allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
1773 ForeignLabelInExternalPackage IsFunction))
1774
1775 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1776 emitBSwapCall res x width = do
1777 emitPrimCall
1778 [ res ]
1779 (MO_BSwap width)
1780 [ x ]
1781
1782 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1783 emitPopCntCall res x width = do
1784 emitPrimCall
1785 [ res ]
1786 (MO_PopCnt width)
1787 [ x ]