504510c35909eea3f2f5360f0c0418e5a968361f
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmPrim (
10 cgOpApp,
11 cgPrimOp -- internal(ish), used by cgCase to get code for a
12 -- comparison without also turning it into a Bool.
13 ) where
14
15 #include "HsVersions.h"
16
17 import StgCmmLayout
18 import StgCmmForeign
19 import StgCmmEnv
20 import StgCmmMonad
21 import StgCmmUtils
22 import StgCmmTicky
23 import StgCmmHeap
24 import StgCmmProf ( costCentreFrom, curCCS )
25
26 import DynFlags
27 import Platform
28 import BasicTypes
29 import MkGraph
30 import StgSyn
31 import Cmm
32 import CmmInfo
33 import Type ( Type, tyConAppTyCon )
34 import TyCon
35 import CLabel
36 import CmmUtils
37 import PrimOp
38 import SMRep
39 import FastString
40 import Outputable
41 import Util
42
43 import Control.Monad (liftM, when)
44 import Data.Bits
45
46 ------------------------------------------------------------------------
47 -- Primitive operations and foreign calls
48 ------------------------------------------------------------------------
49
50 {- Note [Foreign call results]
51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
52 A foreign call always returns an unboxed tuple of results, one
53 of which is the state token. This seems to happen even for pure
54 calls.
55
56 Even if we returned a single result for pure calls, it'd still be
57 right to wrap it in a singleton unboxed tuple, because the result
58 might be a Haskell closure pointer, we don't want to evaluate it. -}
59
60 ----------------------------------
61 cgOpApp :: StgOp -- The op
62 -> [StgArg] -- Arguments
63 -> Type -- Result type (always an unboxed tuple)
64 -> FCode ReturnKind
65
66 -- Foreign calls
67 cgOpApp (StgFCallOp fcall _) stg_args res_ty
68 = cgForeignCall fcall stg_args res_ty
69 -- Note [Foreign call results]
70
71 -- tagToEnum# is special: we need to pull the constructor
72 -- out of the table, and perform an appropriate return.
73
74 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
75 = ASSERT(isEnumerationTyCon tycon)
76 do { dflags <- getDynFlags
77 ; args' <- getNonVoidArgAmodes [arg]
78 ; let amode = case args' of [amode] -> amode
79 _ -> panic "TagToEnumOp had void arg"
80 ; emitReturn [tagToClosure dflags tycon amode] }
81 where
82 -- If you're reading this code in the attempt to figure
83 -- out why the compiler panic'ed here, it is probably because
84 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
85 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
86 -- That won't work.
87 tycon = tyConAppTyCon res_ty
88
89 cgOpApp (StgPrimOp primop) args res_ty = do
90 dflags <- getDynFlags
91 cmm_args <- getNonVoidArgAmodes args
92 case shouldInlinePrimOp dflags primop cmm_args of
93 Nothing -> do let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
94 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
95
96 Just f
97 | ReturnsPrim VoidRep <- result_info
98 -> do f []
99 emitReturn []
100
101 | ReturnsPrim rep <- result_info
102 -> do dflags <- getDynFlags
103 res <- newTemp (primRepCmmType dflags rep)
104 f [res]
105 emitReturn [CmmReg (CmmLocal res)]
106
107 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
108 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
109 f regs
110 emitReturn (map (CmmReg . CmmLocal) regs)
111
112 | otherwise -> panic "cgPrimop"
113 where
114 result_info = getPrimOpResultInfo primop
115
116 cgOpApp (StgPrimCallOp primcall) args _res_ty
117 = do { cmm_args <- getNonVoidArgAmodes args
118 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
119 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
120
121 -- | Decide whether an out-of-line primop should be replaced by an
122 -- inline implementation. This might happen e.g. if there's enough
123 -- static information, such as statically know arguments, to emit a
124 -- more efficient implementation inline.
125 --
126 -- Returns 'Nothing' if this primop should use its out-of-line
127 -- implementation (defined elsewhere) and 'Just' together with a code
128 -- generating function that takes the output regs as arguments
129 -- otherwise.
130 shouldInlinePrimOp :: DynFlags
131 -> PrimOp -- ^ The primop
132 -> [CmmExpr] -- ^ The primop arguments
133 -> Maybe ([LocalReg] -> FCode ())
134 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
135 | n <= maxInlineAllocThreshold dflags =
136 Just $ \ [res] -> doNewArrayOp res n init
137 shouldInlinePrimOp dflags primop args
138 | primOpOutOfLine primop = Nothing
139 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
140
141 -- TODO: Several primops, such as 'copyArray#', only have an inline
142 -- implementation (below) but could possibly have both an inline
143 -- implementation and an out-of-line implementation, just like
144 -- 'newArray#'. This would lower the amount of code generated,
145 -- hopefully without a performance impact (needs to be measured).
146
147 ---------------------------------------------------
148 cgPrimOp :: [LocalReg] -- where to put the results
149 -> PrimOp -- the op
150 -> [StgArg] -- arguments
151 -> FCode ()
152
153 cgPrimOp results op args
154 = do dflags <- getDynFlags
155 arg_exprs <- getNonVoidArgAmodes args
156 emitPrimOp dflags results op arg_exprs
157
158
159 ------------------------------------------------------------------------
160 -- Emitting code for a primop
161 ------------------------------------------------------------------------
162
163 emitPrimOp :: DynFlags
164 -> [LocalReg] -- where to put the results
165 -> PrimOp -- the op
166 -> [CmmExpr] -- arguments
167 -> FCode ()
168
169 -- First we handle various awkward cases specially. The remaining
170 -- easy cases are then handled by translateOp, defined below.
171
172 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
173 {-
174 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
175 C, and without needing any comparisons. This may not be the
176 fastest way to do it - if you have better code, please send it! --SDM
177
178 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
179
180 We currently don't make use of the r value if c is != 0 (i.e.
181 overflow), we just convert to big integers and try again. This
182 could be improved by making r and c the correct values for
183 plugging into a new J#.
184
185 { r = ((I_)(a)) + ((I_)(b)); \
186 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
187 >> (BITS_IN (I_) - 1); \
188 }
189 Wading through the mass of bracketry, it seems to reduce to:
190 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
191
192 -}
193 = emit $ catAGraphs [
194 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
195 mkAssign (CmmLocal res_c) $
196 CmmMachOp (mo_wordUShr dflags) [
197 CmmMachOp (mo_wordAnd dflags) [
198 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
199 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
200 ],
201 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
202 ]
203 ]
204
205
206 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
207 {- Similarly:
208 #define subIntCzh(r,c,a,b) \
209 { r = ((I_)(a)) - ((I_)(b)); \
210 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
211 >> (BITS_IN (I_) - 1); \
212 }
213
214 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
215 -}
216 = emit $ catAGraphs [
217 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
218 mkAssign (CmmLocal res_c) $
219 CmmMachOp (mo_wordUShr dflags) [
220 CmmMachOp (mo_wordAnd dflags) [
221 CmmMachOp (mo_wordXor dflags) [aa,bb],
222 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
223 ],
224 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
225 ]
226 ]
227
228
229 emitPrimOp _ [res] ParOp [arg]
230 =
231 -- for now, just implement this in a C function
232 -- later, we might want to inline it.
233 emitCCall
234 [(res,NoHint)]
235 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
236 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
237
238 emitPrimOp dflags [res] SparkOp [arg]
239 = do
240 -- returns the value of arg in res. We're going to therefore
241 -- refer to arg twice (once to pass to newSpark(), and once to
242 -- assign to res), so put it in a temporary.
243 tmp <- assignTemp arg
244 tmp2 <- newTemp (bWord dflags)
245 emitCCall
246 [(tmp2,NoHint)]
247 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
248 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
249 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
250
251 emitPrimOp dflags [res] GetCCSOfOp [arg]
252 = emitAssign (CmmLocal res) val
253 where
254 val
255 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
256 | otherwise = CmmLit (zeroCLit dflags)
257
258 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
259 = emitAssign (CmmLocal res) curCCS
260
261 emitPrimOp dflags [res] ReadMutVarOp [mutv]
262 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
263
264 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
265 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
266 emitCCall
267 [{-no results-}]
268 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
269 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
270
271 -- #define sizzeofByteArrayzh(r,a) \
272 -- r = ((StgArrWords *)(a))->bytes
273 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
274 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
275
276 -- #define sizzeofMutableByteArrayzh(r,a) \
277 -- r = ((StgArrWords *)(a))->bytes
278 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
279 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
280
281
282 -- #define touchzh(o) /* nothing */
283 emitPrimOp _ res@[] TouchOp args@[_arg]
284 = do emitPrimCall res MO_Touch args
285
286 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
287 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
288 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
289
290 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
291 emitPrimOp dflags [res] StableNameToIntOp [arg]
292 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
293
294 -- #define eqStableNamezh(r,sn1,sn2) \
295 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
296 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
297 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
298 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
299 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
300 ])
301
302
303 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
304 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
305
306 -- #define addrToHValuezh(r,a) r=(P_)a
307 emitPrimOp _ [res] AddrToAnyOp [arg]
308 = emitAssign (CmmLocal res) arg
309
310 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
311 -- Note: argument may be tagged!
312 emitPrimOp dflags [res] DataToTagOp [arg]
313 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
314
315 {- Freezing arrays-of-ptrs requires changing an info table, for the
316 benefit of the generational collector. It needs to scavenge mutable
317 objects, even if they are in old space. When they become immutable,
318 they can be removed from this scavenge list. -}
319
320 -- #define unsafeFreezzeArrayzh(r,a)
321 -- {
322 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
323 -- r = a;
324 -- }
325 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
326 = emit $ catAGraphs
327 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
328 mkAssign (CmmLocal res) arg ]
329 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
330 = emit $ catAGraphs
331 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
332 mkAssign (CmmLocal res) arg ]
333
334 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
335 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
336 = emitAssign (CmmLocal res) arg
337
338 -- Copying pointer arrays
339
340 emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
341 doCopyArrayOp src src_off dst dst_off n
342 emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
343 doCopyMutableArrayOp src src_off dst dst_off n
344 emitPrimOp _ [res] CloneArrayOp [src,src_off,n] =
345 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
346 emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] =
347 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
348 emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] =
349 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
350 emitPrimOp _ [res] ThawArrayOp [src,src_off,n] =
351 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
352
353 emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
354 doCopyArrayOp src src_off dst dst_off n
355 emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
356 doCopyMutableArrayOp src src_off dst dst_off n
357
358 -- Reading/writing pointer arrays
359
360 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
361 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
362 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
363
364 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
365 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
366 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
367 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
368 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
369 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
370 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
371 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
372 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
373 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
374
375 emitPrimOp dflags [res] SizeofArrayOp [arg]
376 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
377 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
378 = emitPrimOp dflags [res] SizeofArrayOp [arg]
379 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
380 = emitPrimOp dflags [res] SizeofArrayOp [arg]
381 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
382 = emitPrimOp dflags [res] SizeofArrayOp [arg]
383
384 -- IndexXXXoffAddr
385
386 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
387 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
388 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
389 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
390 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
391 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
392 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
393 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
394 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
395 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
396 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
397 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
398 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
399 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
400 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
401 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
402
403 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
404
405 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
406 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
407 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
408 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
409 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
410 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
411 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
412 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
413 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
414 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
415 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
416 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
417 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
418 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
419 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
420 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
421
422 -- IndexXXXArray
423
424 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
425 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
426 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
427 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
428 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
429 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
430 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
431 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
432 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
433 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
434 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
435 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
436 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
437 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
438 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
439 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
440
441 -- ReadXXXArray, identical to IndexXXXArray.
442
443 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
444 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
445 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
446 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
447 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
448 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
449 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
450 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
451 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
452 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
453 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
454 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
455 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
456 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
457 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
458 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
459
460 -- WriteXXXoffAddr
461
462 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
463 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
464 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
465 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
466 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
467 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
468 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
469 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
470 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
471 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
472 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
473 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
474 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
475 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
476 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
477 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
478
479 -- WriteXXXArray
480
481 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
482 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
483 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
484 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
485 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
486 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
487 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
488 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
489 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
490 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
491 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
492 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
493 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
494 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
495 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
496 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
497
498 -- Copying and setting byte arrays
499 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
500 doCopyByteArrayOp src src_off dst dst_off n
501 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
502 doCopyMutableByteArrayOp src src_off dst dst_off n
503 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
504 doCopyByteArrayToAddrOp src src_off dst n
505 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
506 doCopyMutableByteArrayToAddrOp src src_off dst n
507 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
508 doCopyAddrToByteArrayOp src dst dst_off n
509 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
510 doSetByteArrayOp ba off len c
511
512 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
513 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
514 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
515 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
516
517 -- Population count
518 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
519 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
520 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
521 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
522 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
523
524 -- Unsigned int to floating point conversions
525 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
526 (MO_UF_Conv W32) [w]
527 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
528 (MO_UF_Conv W64) [w]
529
530 -- SIMD primops
531 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
532 checkVecCompatibility dflags vcat n w
533 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
534 where
535 zeros :: CmmExpr
536 zeros = CmmLit $ CmmVec (replicate n zero)
537
538 zero :: CmmLit
539 zero = case vcat of
540 IntVec -> CmmInt 0 w
541 WordVec -> CmmInt 0 w
542 FloatVec -> CmmFloat 0 w
543
544 ty :: CmmType
545 ty = vecVmmType vcat n w
546
547 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
548 checkVecCompatibility dflags vcat n w
549 when (length es /= n) $
550 panic "emitPrimOp: VecPackOp has wrong number of arguments"
551 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
552 where
553 zeros :: CmmExpr
554 zeros = CmmLit $ CmmVec (replicate n zero)
555
556 zero :: CmmLit
557 zero = case vcat of
558 IntVec -> CmmInt 0 w
559 WordVec -> CmmInt 0 w
560 FloatVec -> CmmFloat 0 w
561
562 ty :: CmmType
563 ty = vecVmmType vcat n w
564
565 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
566 checkVecCompatibility dflags vcat n w
567 when (length res /= n) $
568 panic "emitPrimOp: VecUnpackOp has wrong number of results"
569 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
570 where
571 ty :: CmmType
572 ty = vecVmmType vcat n w
573
574 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
575 checkVecCompatibility dflags vcat n w
576 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
577 where
578 ty :: CmmType
579 ty = vecVmmType vcat n w
580
581 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
582 checkVecCompatibility dflags vcat n w
583 doIndexByteArrayOp Nothing ty res args
584 where
585 ty :: CmmType
586 ty = vecVmmType vcat n w
587
588 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
589 checkVecCompatibility dflags vcat n w
590 doIndexByteArrayOp Nothing ty res args
591 where
592 ty :: CmmType
593 ty = vecVmmType vcat n w
594
595 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
596 checkVecCompatibility dflags vcat n w
597 doWriteByteArrayOp Nothing ty res args
598 where
599 ty :: CmmType
600 ty = vecVmmType vcat n w
601
602 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
603 checkVecCompatibility dflags vcat n w
604 doIndexOffAddrOp Nothing ty res args
605 where
606 ty :: CmmType
607 ty = vecVmmType vcat n w
608
609 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
610 checkVecCompatibility dflags vcat n w
611 doIndexOffAddrOp Nothing ty res args
612 where
613 ty :: CmmType
614 ty = vecVmmType vcat n w
615
616 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
617 checkVecCompatibility dflags vcat n w
618 doWriteOffAddrOp Nothing ty res args
619 where
620 ty :: CmmType
621 ty = vecVmmType vcat n w
622
623 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
624 checkVecCompatibility dflags vcat n w
625 doIndexByteArrayOpAs Nothing vecty ty res args
626 where
627 vecty :: CmmType
628 vecty = vecVmmType vcat n w
629
630 ty :: CmmType
631 ty = vecCmmCat vcat w
632
633 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
634 checkVecCompatibility dflags vcat n w
635 doIndexByteArrayOpAs Nothing vecty ty res args
636 where
637 vecty :: CmmType
638 vecty = vecVmmType vcat n w
639
640 ty :: CmmType
641 ty = vecCmmCat vcat w
642
643 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
644 checkVecCompatibility dflags vcat n w
645 doWriteByteArrayOp Nothing ty res args
646 where
647 ty :: CmmType
648 ty = vecCmmCat vcat w
649
650 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
651 checkVecCompatibility dflags vcat n w
652 doIndexOffAddrOpAs Nothing vecty ty res args
653 where
654 vecty :: CmmType
655 vecty = vecVmmType vcat n w
656
657 ty :: CmmType
658 ty = vecCmmCat vcat w
659
660 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
661 checkVecCompatibility dflags vcat n w
662 doIndexOffAddrOpAs Nothing vecty ty res args
663 where
664 vecty :: CmmType
665 vecty = vecVmmType vcat n w
666
667 ty :: CmmType
668 ty = vecCmmCat vcat w
669
670 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
671 checkVecCompatibility dflags vcat n w
672 doWriteOffAddrOp Nothing ty res args
673 where
674 ty :: CmmType
675 ty = vecCmmCat vcat w
676
677 -- Prefetch
678 emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
679 emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
680 emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
681
682 emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
683 emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
684 emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
685
686 emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
687 emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
688 emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
689
690 emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
691 emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
692 emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
693
694
695 -- The rest just translate straightforwardly
696 emitPrimOp dflags [res] op [arg]
697 | nopOp op
698 = emitAssign (CmmLocal res) arg
699
700 | Just (mop,rep) <- narrowOp op
701 = emitAssign (CmmLocal res) $
702 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
703
704 emitPrimOp dflags r@[res] op args
705 | Just prim <- callishOp op
706 = do emitPrimCall r prim args
707
708 | Just mop <- translateOp dflags op
709 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
710 emit stmt
711
712 emitPrimOp dflags results op args
713 = case callishPrimOpSupported dflags op of
714 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
715 Right gen -> gen results args
716
717 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
718
719 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
720 callishPrimOpSupported dflags op
721 = case op of
722 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
723 | otherwise -> Right (genericIntQuotRemOp dflags)
724
725 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
726 | otherwise -> Right (genericWordQuotRemOp dflags)
727
728 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
729 | otherwise -> Right (genericWordQuotRem2Op dflags)
730
731 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
732 | otherwise -> Right genericWordAdd2Op
733
734 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
735 | otherwise -> Right genericWordMul2Op
736
737 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
738 where
739 ncg = case hscTarget dflags of
740 HscAsm -> True
741 _ -> False
742
743 x86ish = case platformArch (targetPlatform dflags) of
744 ArchX86 -> True
745 ArchX86_64 -> True
746 _ -> False
747
748 genericIntQuotRemOp :: DynFlags -> GenericOp
749 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
750 = emit $ mkAssign (CmmLocal res_q)
751 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
752 mkAssign (CmmLocal res_r)
753 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
754 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
755
756 genericWordQuotRemOp :: DynFlags -> GenericOp
757 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
758 = emit $ mkAssign (CmmLocal res_q)
759 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
760 mkAssign (CmmLocal res_r)
761 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
762 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
763
764 genericWordQuotRem2Op :: DynFlags -> GenericOp
765 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
766 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
767 where ty = cmmExprType dflags arg_x_high
768 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
769 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
770 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
771 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
772 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
773 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
774 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
775 zero = lit 0
776 one = lit 1
777 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
778 lit i = CmmLit (CmmInt i (wordWidth dflags))
779
780 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
781 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
782 mkAssign (CmmLocal res_r) high)
783 f i acc high low =
784 do roverflowedBit <- newTemp ty
785 rhigh' <- newTemp ty
786 rhigh'' <- newTemp ty
787 rlow' <- newTemp ty
788 risge <- newTemp ty
789 racc' <- newTemp ty
790 let high' = CmmReg (CmmLocal rhigh')
791 isge = CmmReg (CmmLocal risge)
792 overflowedBit = CmmReg (CmmLocal roverflowedBit)
793 let this = catAGraphs
794 [mkAssign (CmmLocal roverflowedBit)
795 (shr high negone),
796 mkAssign (CmmLocal rhigh')
797 (or (shl high one) (shr low negone)),
798 mkAssign (CmmLocal rlow')
799 (shl low one),
800 mkAssign (CmmLocal risge)
801 (or (overflowedBit `ne` zero)
802 (high' `ge` arg_y)),
803 mkAssign (CmmLocal rhigh'')
804 (high' `minus` (arg_y `times` isge)),
805 mkAssign (CmmLocal racc')
806 (or (shl acc one) isge)]
807 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
808 (CmmReg (CmmLocal rhigh''))
809 (CmmReg (CmmLocal rlow'))
810 return (this <*> rest)
811 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
812
813 genericWordAdd2Op :: GenericOp
814 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
815 = do dflags <- getDynFlags
816 r1 <- newTemp (cmmExprType dflags arg_x)
817 r2 <- newTemp (cmmExprType dflags arg_x)
818 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
819 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
820 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
821 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
822 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
823 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
824 (wordWidth dflags))
825 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
826 emit $ catAGraphs
827 [mkAssign (CmmLocal r1)
828 (add (bottomHalf arg_x) (bottomHalf arg_y)),
829 mkAssign (CmmLocal r2)
830 (add (topHalf (CmmReg (CmmLocal r1)))
831 (add (topHalf arg_x) (topHalf arg_y))),
832 mkAssign (CmmLocal res_h)
833 (topHalf (CmmReg (CmmLocal r2))),
834 mkAssign (CmmLocal res_l)
835 (or (toTopHalf (CmmReg (CmmLocal r2)))
836 (bottomHalf (CmmReg (CmmLocal r1))))]
837 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
838
839 genericWordMul2Op :: GenericOp
840 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
841 = do dflags <- getDynFlags
842 let t = cmmExprType dflags arg_x
843 xlyl <- liftM CmmLocal $ newTemp t
844 xlyh <- liftM CmmLocal $ newTemp t
845 xhyl <- liftM CmmLocal $ newTemp t
846 r <- liftM CmmLocal $ newTemp t
847 -- This generic implementation is very simple and slow. We might
848 -- well be able to do better, but for now this at least works.
849 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
850 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
851 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
852 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
853 sum = foldl1 add
854 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
855 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
856 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
857 (wordWidth dflags))
858 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
859 emit $ catAGraphs
860 [mkAssign xlyl
861 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
862 mkAssign xlyh
863 (mul (bottomHalf arg_x) (topHalf arg_y)),
864 mkAssign xhyl
865 (mul (topHalf arg_x) (bottomHalf arg_y)),
866 mkAssign r
867 (sum [topHalf (CmmReg xlyl),
868 bottomHalf (CmmReg xhyl),
869 bottomHalf (CmmReg xlyh)]),
870 mkAssign (CmmLocal res_l)
871 (or (bottomHalf (CmmReg xlyl))
872 (toTopHalf (CmmReg r))),
873 mkAssign (CmmLocal res_h)
874 (sum [mul (topHalf arg_x) (topHalf arg_y),
875 topHalf (CmmReg xhyl),
876 topHalf (CmmReg xlyh),
877 topHalf (CmmReg r)])]
878 genericWordMul2Op _ _ = panic "genericWordMul2Op"
879
880 -- These PrimOps are NOPs in Cmm
881
882 nopOp :: PrimOp -> Bool
883 nopOp Int2WordOp = True
884 nopOp Word2IntOp = True
885 nopOp Int2AddrOp = True
886 nopOp Addr2IntOp = True
887 nopOp ChrOp = True -- Int# and Char# are rep'd the same
888 nopOp OrdOp = True
889 nopOp _ = False
890
891 -- These PrimOps turn into double casts
892
893 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
894 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
895 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
896 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
897 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
898 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
899 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
900 narrowOp _ = Nothing
901
902 -- Native word signless ops
903
904 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
905 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
906 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
907 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
908 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
909 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
910 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
911
912 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
913 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
914 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
915 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
916 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
917 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
918
919 translateOp dflags AndOp = Just (mo_wordAnd dflags)
920 translateOp dflags OrOp = Just (mo_wordOr dflags)
921 translateOp dflags XorOp = Just (mo_wordXor dflags)
922 translateOp dflags NotOp = Just (mo_wordNot dflags)
923 translateOp dflags SllOp = Just (mo_wordShl dflags)
924 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
925
926 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
927
928 -- Native word signed ops
929
930 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
931 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
932 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
933 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
934 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
935
936
937 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
938 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
939 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
940 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
941
942 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
943 translateOp dflags OrIOp = Just (mo_wordOr dflags)
944 translateOp dflags XorIOp = Just (mo_wordXor dflags)
945 translateOp dflags NotIOp = Just (mo_wordNot dflags)
946 translateOp dflags ISllOp = Just (mo_wordShl dflags)
947 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
948 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
949
950 -- Native word unsigned ops
951
952 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
953 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
954 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
955 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
956
957 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
958 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
959 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
960
961 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
962 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
963 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
964 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
965
966 -- Char# ops
967
968 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
969 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
970 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
971 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
972 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
973 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
974
975 -- Double ops
976
977 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
978 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
979 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
980 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
981 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
982 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
983
984 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
985 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
986 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
987 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
988 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
989
990 -- Float ops
991
992 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
993 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
994 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
995 translateOp _ FloatLeOp = Just (MO_F_Le W32)
996 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
997 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
998
999 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1000 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1001 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1002 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1003 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1004
1005 -- Vector ops
1006
1007 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1008 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1009 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1010 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1011 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1012
1013 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1014 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1015 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1016 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1017 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1018 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1019
1020 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1021 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1022 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1023 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1024 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1025
1026 -- Conversions
1027
1028 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1029 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1030
1031 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1032 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1033
1034 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1035 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1036
1037 -- Word comparisons masquerading as more exotic things.
1038
1039 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1040 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1041 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1042 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1043 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1044 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1045 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1046
1047 translateOp _ _ = Nothing
1048
1049 -- These primops are implemented by CallishMachOps, because they sometimes
1050 -- turn into foreign calls depending on the backend.
1051
1052 callishOp :: PrimOp -> Maybe CallishMachOp
1053 callishOp DoublePowerOp = Just MO_F64_Pwr
1054 callishOp DoubleSinOp = Just MO_F64_Sin
1055 callishOp DoubleCosOp = Just MO_F64_Cos
1056 callishOp DoubleTanOp = Just MO_F64_Tan
1057 callishOp DoubleSinhOp = Just MO_F64_Sinh
1058 callishOp DoubleCoshOp = Just MO_F64_Cosh
1059 callishOp DoubleTanhOp = Just MO_F64_Tanh
1060 callishOp DoubleAsinOp = Just MO_F64_Asin
1061 callishOp DoubleAcosOp = Just MO_F64_Acos
1062 callishOp DoubleAtanOp = Just MO_F64_Atan
1063 callishOp DoubleLogOp = Just MO_F64_Log
1064 callishOp DoubleExpOp = Just MO_F64_Exp
1065 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1066
1067 callishOp FloatPowerOp = Just MO_F32_Pwr
1068 callishOp FloatSinOp = Just MO_F32_Sin
1069 callishOp FloatCosOp = Just MO_F32_Cos
1070 callishOp FloatTanOp = Just MO_F32_Tan
1071 callishOp FloatSinhOp = Just MO_F32_Sinh
1072 callishOp FloatCoshOp = Just MO_F32_Cosh
1073 callishOp FloatTanhOp = Just MO_F32_Tanh
1074 callishOp FloatAsinOp = Just MO_F32_Asin
1075 callishOp FloatAcosOp = Just MO_F32_Acos
1076 callishOp FloatAtanOp = Just MO_F32_Atan
1077 callishOp FloatLogOp = Just MO_F32_Log
1078 callishOp FloatExpOp = Just MO_F32_Exp
1079 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1080
1081 callishOp _ = Nothing
1082
1083 ------------------------------------------------------------------------------
1084 -- Helpers for translating various minor variants of array indexing.
1085
1086 doIndexOffAddrOp :: Maybe MachOp
1087 -> CmmType
1088 -> [LocalReg]
1089 -> [CmmExpr]
1090 -> FCode ()
1091 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1092 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1093 doIndexOffAddrOp _ _ _ _
1094 = panic "StgCmmPrim: doIndexOffAddrOp"
1095
1096 doIndexOffAddrOpAs :: Maybe MachOp
1097 -> CmmType
1098 -> CmmType
1099 -> [LocalReg]
1100 -> [CmmExpr]
1101 -> FCode ()
1102 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1103 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1104 doIndexOffAddrOpAs _ _ _ _ _
1105 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1106
1107 doIndexByteArrayOp :: Maybe MachOp
1108 -> CmmType
1109 -> [LocalReg]
1110 -> [CmmExpr]
1111 -> FCode ()
1112 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1113 = do dflags <- getDynFlags
1114 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1115 doIndexByteArrayOp _ _ _ _
1116 = panic "StgCmmPrim: doIndexByteArrayOp"
1117
1118 doIndexByteArrayOpAs :: Maybe MachOp
1119 -> CmmType
1120 -> CmmType
1121 -> [LocalReg]
1122 -> [CmmExpr]
1123 -> FCode ()
1124 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1125 = do dflags <- getDynFlags
1126 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1127 doIndexByteArrayOpAs _ _ _ _ _
1128 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1129
1130 doReadPtrArrayOp :: LocalReg
1131 -> CmmExpr
1132 -> CmmExpr
1133 -> FCode ()
1134 doReadPtrArrayOp res addr idx
1135 = do dflags <- getDynFlags
1136 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1137
1138 doWriteOffAddrOp :: Maybe MachOp
1139 -> CmmType
1140 -> [LocalReg]
1141 -> [CmmExpr]
1142 -> FCode ()
1143 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1144 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1145 doWriteOffAddrOp _ _ _ _
1146 = panic "StgCmmPrim: doWriteOffAddrOp"
1147
1148 doWriteByteArrayOp :: Maybe MachOp
1149 -> CmmType
1150 -> [LocalReg]
1151 -> [CmmExpr]
1152 -> FCode ()
1153 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1154 = do dflags <- getDynFlags
1155 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1156 doWriteByteArrayOp _ _ _ _
1157 = panic "StgCmmPrim: doWriteByteArrayOp"
1158
1159 doWritePtrArrayOp :: CmmExpr
1160 -> CmmExpr
1161 -> CmmExpr
1162 -> FCode ()
1163 doWritePtrArrayOp addr idx val
1164 = do dflags <- getDynFlags
1165 let ty = cmmExprType dflags val
1166 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1167 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1168 -- the write barrier. We must write a byte into the mark table:
1169 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1170 emit $ mkStore (
1171 cmmOffsetExpr dflags
1172 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1173 (loadArrPtrsSize dflags addr))
1174 (CmmMachOp (mo_wordUShr dflags) [idx,
1175 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1176 ) (CmmLit (CmmInt 1 W8))
1177
1178 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1179 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1180 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1181
1182 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1183 -> Maybe MachOp -- Optional result cast
1184 -> CmmType -- Type of element we are accessing
1185 -> LocalReg -- Destination
1186 -> CmmExpr -- Base address
1187 -> CmmType -- Type of element by which we are indexing
1188 -> CmmExpr -- Index
1189 -> FCode ()
1190 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1191 = do dflags <- getDynFlags
1192 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1193 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1194 = do dflags <- getDynFlags
1195 emitAssign (CmmLocal res) (CmmMachOp cast [
1196 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1197
1198 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1199 -> Maybe MachOp -- Optional value cast
1200 -> CmmExpr -- Base address
1201 -> CmmType -- Type of element by which we are indexing
1202 -> CmmExpr -- Index
1203 -> CmmExpr -- Value to write
1204 -> FCode ()
1205 mkBasicIndexedWrite off Nothing base idx_ty idx val
1206 = do dflags <- getDynFlags
1207 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1208 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1209 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1210
1211 -- ----------------------------------------------------------------------------
1212 -- Misc utils
1213
1214 cmmIndexOffExpr :: DynFlags
1215 -> ByteOff -- Initial offset in bytes
1216 -> Width -- Width of element by which we are indexing
1217 -> CmmExpr -- Base address
1218 -> CmmExpr -- Index
1219 -> CmmExpr
1220 cmmIndexOffExpr dflags off width base idx
1221 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1222
1223 cmmLoadIndexOffExpr :: DynFlags
1224 -> ByteOff -- Initial offset in bytes
1225 -> CmmType -- Type of element we are accessing
1226 -> CmmExpr -- Base address
1227 -> CmmType -- Type of element by which we are indexing
1228 -> CmmExpr -- Index
1229 -> CmmExpr
1230 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1231 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1232
1233 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1234 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1235
1236 ------------------------------------------------------------------------------
1237 -- Helpers for translating vector primops.
1238
1239 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1240 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1241
1242 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1243 vecCmmCat IntVec = cmmBits
1244 vecCmmCat WordVec = cmmBits
1245 vecCmmCat FloatVec = cmmFloat
1246
1247 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1248 vecElemInjectCast _ FloatVec _ = Nothing
1249 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1250 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1251 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1252 vecElemInjectCast _ IntVec W64 = Nothing
1253 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1254 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1255 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1256 vecElemInjectCast _ WordVec W64 = Nothing
1257 vecElemInjectCast _ _ _ = Nothing
1258
1259 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1260 vecElemProjectCast _ FloatVec _ = Nothing
1261 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1262 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1263 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1264 vecElemProjectCast _ IntVec W64 = Nothing
1265 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1266 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1267 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1268 vecElemProjectCast _ WordVec W64 = Nothing
1269 vecElemProjectCast _ _ _ = Nothing
1270
1271 -- Check to make sure that we can generate code for the specified vector type
1272 -- given the current set of dynamic flags.
1273 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1274 checkVecCompatibility dflags vcat l w = do
1275 when (hscTarget dflags /= HscLlvm) $ do
1276 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1277 ,"Please use -fllvm."]
1278 check vecWidth vcat l w
1279 where
1280 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1281 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1282 sorry $ "128-bit wide single-precision floating point " ++
1283 "SIMD vector instructions require at least -msse."
1284 check W128 _ _ _ | not (isSse2Enabled dflags) =
1285 sorry $ "128-bit wide integer and double precision " ++
1286 "SIMD vector instructions require at least -msse2."
1287 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1288 sorry $ "256-bit wide floating point " ++
1289 "SIMD vector instructions require at least -mavx."
1290 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1291 sorry $ "256-bit wide integer " ++
1292 "SIMD vector instructions require at least -mavx2."
1293 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1294 sorry $ "512-bit wide " ++
1295 "SIMD vector instructions require -mavx512f."
1296 check _ _ _ _ = return ()
1297
1298 vecWidth = typeWidth (vecVmmType vcat l w)
1299
1300 ------------------------------------------------------------------------------
1301 -- Helpers for translating vector packing and unpacking.
1302
1303 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1304 -> CmmType -- Type of vector
1305 -> CmmExpr -- Initial vector
1306 -> [CmmExpr] -- Elements
1307 -> CmmFormal -- Destination for result
1308 -> FCode ()
1309 doVecPackOp maybe_pre_write_cast ty z es res = do
1310 dst <- newTemp ty
1311 emitAssign (CmmLocal dst) z
1312 vecPack dst es 0
1313 where
1314 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1315 vecPack src [] _ =
1316 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1317
1318 vecPack src (e : es) i = do
1319 dst <- newTemp ty
1320 if isFloatType (vecElemType ty)
1321 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1322 [CmmReg (CmmLocal src), cast e, iLit])
1323 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1324 [CmmReg (CmmLocal src), cast e, iLit])
1325 vecPack dst es (i + 1)
1326 where
1327 -- vector indices are always 32-bits
1328 iLit = CmmLit (CmmInt (toInteger i) W32)
1329
1330 cast :: CmmExpr -> CmmExpr
1331 cast val = case maybe_pre_write_cast of
1332 Nothing -> val
1333 Just cast -> CmmMachOp cast [val]
1334
1335 len :: Length
1336 len = vecLength ty
1337
1338 wid :: Width
1339 wid = typeWidth (vecElemType ty)
1340
1341 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1342 -> CmmType -- Type of vector
1343 -> CmmExpr -- Vector
1344 -> [CmmFormal] -- Element results
1345 -> FCode ()
1346 doVecUnpackOp maybe_post_read_cast ty e res =
1347 vecUnpack res 0
1348 where
1349 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1350 vecUnpack [] _ =
1351 return ()
1352
1353 vecUnpack (r : rs) i = do
1354 if isFloatType (vecElemType ty)
1355 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1356 [e, iLit]))
1357 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1358 [e, iLit]))
1359 vecUnpack rs (i + 1)
1360 where
1361 -- vector indices are always 32-bits
1362 iLit = CmmLit (CmmInt (toInteger i) W32)
1363
1364 cast :: CmmExpr -> CmmExpr
1365 cast val = case maybe_post_read_cast of
1366 Nothing -> val
1367 Just cast -> CmmMachOp cast [val]
1368
1369 len :: Length
1370 len = vecLength ty
1371
1372 wid :: Width
1373 wid = typeWidth (vecElemType ty)
1374
1375 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1376 -> CmmType -- Vector type
1377 -> CmmExpr -- Source vector
1378 -> CmmExpr -- Element
1379 -> CmmExpr -- Index at which to insert element
1380 -> CmmFormal -- Destination for result
1381 -> FCode ()
1382 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1383 dflags <- getDynFlags
1384 -- vector indices are always 32-bits
1385 let idx' :: CmmExpr
1386 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1387 if isFloatType (vecElemType ty)
1388 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1389 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1390 where
1391 cast :: CmmExpr -> CmmExpr
1392 cast val = case maybe_pre_write_cast of
1393 Nothing -> val
1394 Just cast -> CmmMachOp cast [val]
1395
1396 len :: Length
1397 len = vecLength ty
1398
1399 wid :: Width
1400 wid = typeWidth (vecElemType ty)
1401
1402 ------------------------------------------------------------------------------
1403 -- Helpers for translating prefetching.
1404
1405 doPrefetchByteArrayOp :: Int
1406 -> [LocalReg]
1407 -> [CmmExpr]
1408 -> FCode ()
1409 doPrefetchByteArrayOp locality res [addr,idx]
1410 = do dflags <- getDynFlags
1411 mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
1412 doPrefetchByteArrayOp _ _ _
1413 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1414
1415 doPrefetchAddrOp ::Int
1416 -> [LocalReg]
1417 -> [CmmExpr]
1418 -> FCode ()
1419 doPrefetchAddrOp locality res [addr,idx]
1420 = mkBasicPrefetch locality 0 res addr idx
1421 doPrefetchAddrOp _ _ _
1422 = panic "StgCmmPrim: doPrefetchAddrOp"
1423
1424 mkBasicPrefetch :: Int -- Locality level 0-3
1425 -> ByteOff -- Initial offset in bytes
1426 -> [LocalReg] -- Destination
1427 -> CmmExpr -- Base address
1428 -> CmmExpr -- Index
1429 -> FCode ()
1430 mkBasicPrefetch locality off res base idx
1431 = do dflags <- getDynFlags
1432 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1433 case res of
1434 [] -> return ()
1435 [reg] -> emitAssign (CmmLocal reg) base
1436 _ -> panic "StgCmmPrim: mkBasicPrefetch"
1437
1438 -- ----------------------------------------------------------------------------
1439 -- Copying byte arrays
1440
1441 -- | Takes a source 'ByteArray#', an offset in the source array, a
1442 -- destination 'MutableByteArray#', an offset into the destination
1443 -- array, and the number of bytes to copy. Copies the given number of
1444 -- bytes from the source array to the destination array.
1445 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1446 -> FCode ()
1447 doCopyByteArrayOp = emitCopyByteArray copy
1448 where
1449 -- Copy data (we assume the arrays aren't overlapping since
1450 -- they're of different types)
1451 copy _src _dst dst_p src_p bytes =
1452 do dflags <- getDynFlags
1453 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1454
1455 -- | Takes a source 'MutableByteArray#', an offset in the source
1456 -- array, a destination 'MutableByteArray#', an offset into the
1457 -- destination array, and the number of bytes to copy. Copies the
1458 -- given number of bytes from the source array to the destination
1459 -- array.
1460 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1461 -> FCode ()
1462 doCopyMutableByteArrayOp = emitCopyByteArray copy
1463 where
1464 -- The only time the memory might overlap is when the two arrays
1465 -- we were provided are the same array!
1466 -- TODO: Optimize branch for common case of no aliasing.
1467 copy src dst dst_p src_p bytes = do
1468 dflags <- getDynFlags
1469 [moveCall, cpyCall] <- forkAlts [
1470 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1471 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1472 ]
1473 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1474
1475 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1476 -> FCode ())
1477 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1478 -> FCode ()
1479 emitCopyByteArray copy src src_off dst dst_off n = do
1480 dflags <- getDynFlags
1481 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1482 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1483 copy src dst dst_p src_p n
1484
1485 -- | Takes a source 'ByteArray#', an offset in the source array, a
1486 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1487 -- number of bytes from the source array to the destination memory region.
1488 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1489 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1490 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1491 dflags <- getDynFlags
1492 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1493 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1494
1495 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1496 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1497 -- number of bytes from the source array to the destination memory region.
1498 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1499 -> FCode ()
1500 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1501
1502 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1503 -- the destination array, and the number of bytes to copy. Copies the given
1504 -- number of bytes from the source memory region to the destination array.
1505 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1506 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1507 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1508 dflags <- getDynFlags
1509 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1510 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1511
1512
1513 -- ----------------------------------------------------------------------------
1514 -- Setting byte arrays
1515
1516 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1517 -- and a byte, and sets each of the selected bytes in the array to the
1518 -- character.
1519 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1520 -> FCode ()
1521 doSetByteArrayOp ba off len c
1522 = do dflags <- getDynFlags
1523 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1524 emitMemsetCall p c len (mkIntExpr dflags 1)
1525
1526 -- ----------------------------------------------------------------------------
1527 -- Allocating arrays
1528
1529 -- | Takes a register to return the newly allocated array in, the size
1530 -- of the new array, and an initial value for the elements. Allocates
1531 -- a new 'MutableArray#'.
1532 doNewArrayOp :: CmmFormal -> Integer -> CmmExpr -> FCode ()
1533 doNewArrayOp res_r n init = do
1534 dflags <- getDynFlags
1535
1536 let card_bytes = cardRoundUp dflags (fromInteger n)
1537 size = fromInteger n + bytesToWordsRoundUp dflags card_bytes
1538 words = arrPtrsHdrSizeWords dflags + size
1539
1540 -- If the allocation is of small, statically-known size, we reuse
1541 -- the existing heap check to allocate inline.
1542 virt_hp <- getVirtHp
1543
1544 -- FIND THE OFFSET OF THE INFO-PTR WORD
1545 let info_offset = virt_hp + 1
1546 -- info_offset is the VirtualHpOffset of the first
1547 -- word of the new object
1548 -- Remember, virtHp points to last allocated word,
1549 -- ie 1 *before* the info-ptr word of new object.
1550 base <- getHpRelOffset info_offset
1551 setVirtHp (virt_hp + fromIntegral words) -- check n < big
1552 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1553 emit $ mkAssign arr base
1554 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1555 (cmmMulWord dflags (mkIntExpr dflags (fromInteger n)) (wordSize dflags))
1556 (zeroExpr dflags)
1557
1558 emitSetDynHdr base (mkLblExpr mkMAP_DIRTY_infoLabel) curCCS
1559 emit $ mkStore (cmmOffsetB dflags base
1560 (fixedHdrSize dflags * wORD_SIZE dflags +
1561 oFFSET_StgMutArrPtrs_ptrs dflags))
1562 (mkIntExpr dflags (fromInteger n))
1563 emit $ mkStore (cmmOffsetB dflags base
1564 (fixedHdrSize dflags * wORD_SIZE dflags +
1565 oFFSET_StgMutArrPtrs_size dflags)) (mkIntExpr dflags size)
1566
1567 -- Initialise all elements of the the array
1568 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags)
1569 for <- newLabelC
1570 emitLabel for
1571 let loopBody =
1572 [ mkStore (CmmReg (CmmLocal p)) init
1573 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1574 , mkBranch for ]
1575 emit =<< mkCmmIfThen
1576 (cmmULtWord dflags (CmmReg (CmmLocal p))
1577 (cmmOffsetW dflags (CmmReg arr) (fromInteger n)))
1578 (catAGraphs loopBody)
1579
1580 -- Initialise the mark bits with 0. This will be unrolled in the
1581 -- backend to e.g. a single assignment since the arguments are
1582 -- statically known.
1583 emitMemsetCall
1584 (cmmOffsetExprW dflags (CmmReg (CmmLocal p))
1585 (mkIntExpr dflags (fromInteger n)))
1586 (mkIntExpr dflags 0)
1587 (mkIntExpr dflags card_bytes)
1588 (mkIntExpr dflags (wORD_SIZE dflags))
1589 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1590
1591 -- | The inline allocation limit is 128 bytes, expressed in words.
1592 maxInlineAllocThreshold :: DynFlags -> Integer
1593 maxInlineAllocThreshold dflags = toInteger (128 `quot` wORD_SIZE dflags)
1594
1595 arrPtrsHdrSizeWords :: DynFlags -> WordOff
1596 arrPtrsHdrSizeWords dflags =
1597 fixedHdrSize dflags +
1598 (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags)
1599
1600 -- ----------------------------------------------------------------------------
1601 -- Copying pointer arrays
1602
1603 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1604 -- nowhere else in the code generator. This is mostly because these
1605 -- "primitive" ops result in a surprisingly large amount of code. It
1606 -- will likely be worthwhile to optimize what is emitted here, so that
1607 -- our optimization passes don't waste time repeatedly optimizing the
1608 -- same bits of code.
1609
1610 -- More closely imitates 'assignTemp' from the old code generator, which
1611 -- returns a CmmExpr rather than a LocalReg.
1612 assignTempE :: CmmExpr -> FCode CmmExpr
1613 assignTempE e = do
1614 t <- assignTemp e
1615 return (CmmReg (CmmLocal t))
1616
1617 -- | Takes a source 'Array#', an offset in the source array, a
1618 -- destination 'MutableArray#', an offset into the destination array,
1619 -- and the number of elements to copy. Copies the given number of
1620 -- elements from the source array to the destination array.
1621 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1622 -> FCode ()
1623 doCopyArrayOp = emitCopyArray copy
1624 where
1625 -- Copy data (we assume the arrays aren't overlapping since
1626 -- they're of different types)
1627 copy _src _dst dst_p src_p bytes =
1628 do dflags <- getDynFlags
1629 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1630
1631
1632 -- | Takes a source 'MutableArray#', an offset in the source array, a
1633 -- destination 'MutableArray#', an offset into the destination array,
1634 -- and the number of elements to copy. Copies the given number of
1635 -- elements from the source array to the destination array.
1636 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1637 -> FCode ()
1638 doCopyMutableArrayOp = emitCopyArray copy
1639 where
1640 -- The only time the memory might overlap is when the two arrays
1641 -- we were provided are the same array!
1642 -- TODO: Optimize branch for common case of no aliasing.
1643 copy src dst dst_p src_p bytes = do
1644 dflags <- getDynFlags
1645 [moveCall, cpyCall] <- forkAlts [
1646 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags)),
1647 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1648 ]
1649 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1650
1651 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1652 -> FCode ())
1653 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1654 -> FCode ()
1655 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
1656 dflags <- getDynFlags
1657 n <- assignTempE n0
1658 nonzero <- getCode $ do
1659 -- Passed as arguments (be careful)
1660 src <- assignTempE src0
1661 src_off <- assignTempE src_off0
1662 dst <- assignTempE dst0
1663 dst_off <- assignTempE dst_off0
1664
1665 -- Set the dirty bit in the header.
1666 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1667
1668 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst (arrPtrsHdrSize dflags)
1669 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1670 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1671 bytes <- assignTempE $ cmmMulWord dflags n (mkIntExpr dflags (wORD_SIZE dflags))
1672
1673 copy src dst dst_p src_p bytes
1674
1675 -- The base address of the destination card table
1676 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p (loadArrPtrsSize dflags dst)
1677
1678 emitSetCards dst_off dst_cards_p n
1679
1680 -- TODO: Figure out if this branch is really neccesary.
1681 emit =<< mkCmmIfThen (cmmNeWord dflags n (mkIntExpr dflags 0)) nonzero
1682
1683 -- | Takes an info table label, a register to return the newly
1684 -- allocated array in, a source array, an offset in the source array,
1685 -- and the number of elements to copy. Allocates a new array and
1686 -- initializes it from the source array.
1687 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
1688 -> FCode ()
1689 emitCloneArray info_p res_r src0 src_off0 n0 = do
1690 dflags <- getDynFlags
1691 let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags +
1692 (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags))
1693 myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags))
1694 -- Passed as arguments (be careful)
1695 src <- assignTempE src0
1696 src_off <- assignTempE src_off0
1697 n <- assignTempE n0
1698
1699 card_bytes <- assignTempE $ cardRoundUpCmm dflags n
1700 size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUpCmm dflags card_bytes)
1701 words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size
1702
1703 arr_r <- newTemp (bWord dflags)
1704 emitAllocateCall arr_r myCapability words
1705 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags))
1706 (zeroExpr dflags)
1707
1708 let arr = CmmReg (CmmLocal arr_r)
1709 emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
1710 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1711 oFFSET_StgMutArrPtrs_ptrs dflags)) n
1712 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1713 oFFSET_StgMutArrPtrs_size dflags)) size
1714
1715 dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags)
1716 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags))
1717 src_off
1718
1719 emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
1720
1721 emitMemsetCall (cmmOffsetExprW dflags dst_p n)
1722 (mkIntExpr dflags 1)
1723 card_bytes
1724 (mkIntExpr dflags (wORD_SIZE dflags))
1725 emit $ mkAssign (CmmLocal res_r) arr
1726
1727 card :: DynFlags -> Int -> Int
1728 card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags
1729
1730 -- Convert a number of elements to a number of cards, rounding up
1731 cardRoundUp :: DynFlags -> Int -> Int
1732 cardRoundUp dflags i =
1733 card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1))
1734
1735 bytesToWordsRoundUp :: DynFlags -> Int -> Int
1736 bytesToWordsRoundUp dflags e =
1737 (e + wORD_SIZE dflags - 1) `quot` (wORD_SIZE dflags)
1738
1739 -- | Takes and offset in the destination array, the base address of
1740 -- the card table, and the number of elements affected (*not* the
1741 -- number of cards). The number of elements may not be zero.
1742 -- Marks the relevant cards as dirty.
1743 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1744 emitSetCards dst_start dst_cards_start n = do
1745 dflags <- getDynFlags
1746 start_card <- assignTempE $ cardCmm dflags dst_start
1747 let end_card = cardCmm dflags (cmmSubWord dflags (cmmAddWord dflags dst_start n) (mkIntExpr dflags 1))
1748 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1749 (mkIntExpr dflags 1)
1750 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1751 (mkIntExpr dflags 1) -- no alignment (1 byte)
1752
1753 -- Convert an element index to a card index
1754 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
1755 cardCmm dflags i =
1756 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1757
1758 -- Convert a number of elements to a number of cards, rounding up
1759 cardRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr
1760 cardRoundUpCmm dflags i =
1761 cardCmm dflags (cmmAddWord dflags i
1762 (mkIntExpr dflags
1763 ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)))
1764
1765 bytesToWordsRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr
1766 bytesToWordsRoundUpCmm dflags e =
1767 cmmQuotWord dflags (cmmAddWord dflags e
1768 (mkIntExpr dflags
1769 (wORD_SIZE dflags - 1))) (wordSize dflags)
1770
1771 wordSize :: DynFlags -> CmmExpr
1772 wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags)
1773
1774 -- | Emit a call to @memcpy@.
1775 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1776 emitMemcpyCall dst src n align = do
1777 emitPrimCall
1778 [ {-no results-} ]
1779 MO_Memcpy
1780 [ dst, src, n, align ]
1781
1782 -- | Emit a call to @memmove@.
1783 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1784 emitMemmoveCall dst src n align = do
1785 emitPrimCall
1786 [ {- no results -} ]
1787 MO_Memmove
1788 [ dst, src, n, align ]
1789
1790 -- | Emit a call to @memset@. The second argument must fit inside an
1791 -- unsigned char.
1792 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1793 emitMemsetCall dst c n align = do
1794 emitPrimCall
1795 [ {- no results -} ]
1796 MO_Memset
1797 [ dst, c, n, align ]
1798
1799 -- | Emit a call to @allocate@.
1800 emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
1801 emitAllocateCall res cap n = do
1802 emitCCall
1803 [ (res, AddrHint) ]
1804 allocate
1805 [ (cap, AddrHint)
1806 , (n, NoHint)
1807 ]
1808 where
1809 allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
1810 ForeignLabelInExternalPackage IsFunction))
1811
1812 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1813 emitBSwapCall res x width = do
1814 emitPrimCall
1815 [ res ]
1816 (MO_BSwap width)
1817 [ x ]
1818
1819 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1820 emitPopCntCall res x width = do
1821 emitPrimCall
1822 [ res ]
1823 (MO_PopCnt width)
1824 [ x ]