codeGen: allocate small byte arrays of statically known size inline
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmPrim (
10 cgOpApp,
11 cgPrimOp -- internal(ish), used by cgCase to get code for a
12 -- comparison without also turning it into a Bool.
13 ) where
14
15 #include "HsVersions.h"
16
17 import StgCmmLayout
18 import StgCmmForeign
19 import StgCmmEnv
20 import StgCmmMonad
21 import StgCmmUtils
22 import StgCmmTicky
23 import StgCmmHeap
24 import StgCmmProf ( costCentreFrom, curCCS )
25
26 import DynFlags
27 import Platform
28 import BasicTypes
29 import MkGraph
30 import StgSyn
31 import Cmm
32 import CmmInfo
33 import Type ( Type, tyConAppTyCon )
34 import TyCon
35 import CLabel
36 import CmmUtils
37 import PrimOp
38 import SMRep
39 import FastString
40 import Outputable
41 import Util
42
43 import Control.Monad (liftM, when)
44 import Data.Bits
45
46 ------------------------------------------------------------------------
47 -- Primitive operations and foreign calls
48 ------------------------------------------------------------------------
49
50 {- Note [Foreign call results]
51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
52 A foreign call always returns an unboxed tuple of results, one
53 of which is the state token. This seems to happen even for pure
54 calls.
55
56 Even if we returned a single result for pure calls, it'd still be
57 right to wrap it in a singleton unboxed tuple, because the result
58 might be a Haskell closure pointer, we don't want to evaluate it. -}
59
60 ----------------------------------
61 cgOpApp :: StgOp -- The op
62 -> [StgArg] -- Arguments
63 -> Type -- Result type (always an unboxed tuple)
64 -> FCode ReturnKind
65
66 -- Foreign calls
67 cgOpApp (StgFCallOp fcall _) stg_args res_ty
68 = cgForeignCall fcall stg_args res_ty
69 -- Note [Foreign call results]
70
71 -- tagToEnum# is special: we need to pull the constructor
72 -- out of the table, and perform an appropriate return.
73
74 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
75 = ASSERT(isEnumerationTyCon tycon)
76 do { dflags <- getDynFlags
77 ; args' <- getNonVoidArgAmodes [arg]
78 ; let amode = case args' of [amode] -> amode
79 _ -> panic "TagToEnumOp had void arg"
80 ; emitReturn [tagToClosure dflags tycon amode] }
81 where
82 -- If you're reading this code in the attempt to figure
83 -- out why the compiler panic'ed here, it is probably because
84 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
85 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
86 -- That won't work.
87 tycon = tyConAppTyCon res_ty
88
89 cgOpApp (StgPrimOp primop) args res_ty = do
90 dflags <- getDynFlags
91 cmm_args <- getNonVoidArgAmodes args
92 case shouldInlinePrimOp dflags primop cmm_args of
93 Nothing -> do -- out-of-line
94 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
95 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
96
97 Just f -- inline
98 | ReturnsPrim VoidRep <- result_info
99 -> do f []
100 emitReturn []
101
102 | ReturnsPrim rep <- result_info
103 -> do dflags <- getDynFlags
104 res <- newTemp (primRepCmmType dflags rep)
105 f [res]
106 emitReturn [CmmReg (CmmLocal res)]
107
108 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
109 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
110 f regs
111 emitReturn (map (CmmReg . CmmLocal) regs)
112
113 | otherwise -> panic "cgPrimop"
114 where
115 result_info = getPrimOpResultInfo primop
116
117 cgOpApp (StgPrimCallOp primcall) args _res_ty
118 = do { cmm_args <- getNonVoidArgAmodes args
119 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
120 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
121
122 -- | Decide whether an out-of-line primop should be replaced by an
123 -- inline implementation. This might happen e.g. if there's enough
124 -- static information, such as statically know arguments, to emit a
125 -- more efficient implementation inline.
126 --
127 -- Returns 'Nothing' if this primop should use its out-of-line
128 -- implementation (defined elsewhere) and 'Just' together with a code
129 -- generating function that takes the output regs as arguments
130 -- otherwise.
131 shouldInlinePrimOp :: DynFlags
132 -> PrimOp -- ^ The primop
133 -> [CmmExpr] -- ^ The primop arguments
134 -> Maybe ([LocalReg] -> FCode ())
135 shouldInlinePrimOp _ NewByteArrayOp_Char [(CmmLit (CmmInt n _))]
136 | fromInteger n <= maxInlineAllocThreshold =
137 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
138 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
139 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocThreshold =
140 Just $ \ [res] -> doNewArrayOp res (fromInteger n) init
141 shouldInlinePrimOp dflags primop args
142 | primOpOutOfLine primop = Nothing
143 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
144
145 -- TODO: Several primops, such as 'copyArray#', only have an inline
146 -- implementation (below) but could possibly have both an inline
147 -- implementation and an out-of-line implementation, just like
148 -- 'newArray#'. This would lower the amount of code generated,
149 -- hopefully without a performance impact (needs to be measured).
150
151 ---------------------------------------------------
152 cgPrimOp :: [LocalReg] -- where to put the results
153 -> PrimOp -- the op
154 -> [StgArg] -- arguments
155 -> FCode ()
156
157 cgPrimOp results op args
158 = do dflags <- getDynFlags
159 arg_exprs <- getNonVoidArgAmodes args
160 emitPrimOp dflags results op arg_exprs
161
162
163 ------------------------------------------------------------------------
164 -- Emitting code for a primop
165 ------------------------------------------------------------------------
166
167 emitPrimOp :: DynFlags
168 -> [LocalReg] -- where to put the results
169 -> PrimOp -- the op
170 -> [CmmExpr] -- arguments
171 -> FCode ()
172
173 -- First we handle various awkward cases specially. The remaining
174 -- easy cases are then handled by translateOp, defined below.
175
176 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
177 {-
178 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
179 C, and without needing any comparisons. This may not be the
180 fastest way to do it - if you have better code, please send it! --SDM
181
182 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
183
184 We currently don't make use of the r value if c is != 0 (i.e.
185 overflow), we just convert to big integers and try again. This
186 could be improved by making r and c the correct values for
187 plugging into a new J#.
188
189 { r = ((I_)(a)) + ((I_)(b)); \
190 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
191 >> (BITS_IN (I_) - 1); \
192 }
193 Wading through the mass of bracketry, it seems to reduce to:
194 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
195
196 -}
197 = emit $ catAGraphs [
198 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
199 mkAssign (CmmLocal res_c) $
200 CmmMachOp (mo_wordUShr dflags) [
201 CmmMachOp (mo_wordAnd dflags) [
202 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
203 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
204 ],
205 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
206 ]
207 ]
208
209
210 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
211 {- Similarly:
212 #define subIntCzh(r,c,a,b) \
213 { r = ((I_)(a)) - ((I_)(b)); \
214 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
215 >> (BITS_IN (I_) - 1); \
216 }
217
218 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
219 -}
220 = emit $ catAGraphs [
221 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
222 mkAssign (CmmLocal res_c) $
223 CmmMachOp (mo_wordUShr dflags) [
224 CmmMachOp (mo_wordAnd dflags) [
225 CmmMachOp (mo_wordXor dflags) [aa,bb],
226 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
227 ],
228 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
229 ]
230 ]
231
232
233 emitPrimOp _ [res] ParOp [arg]
234 =
235 -- for now, just implement this in a C function
236 -- later, we might want to inline it.
237 emitCCall
238 [(res,NoHint)]
239 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
240 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
241
242 emitPrimOp dflags [res] SparkOp [arg]
243 = do
244 -- returns the value of arg in res. We're going to therefore
245 -- refer to arg twice (once to pass to newSpark(), and once to
246 -- assign to res), so put it in a temporary.
247 tmp <- assignTemp arg
248 tmp2 <- newTemp (bWord dflags)
249 emitCCall
250 [(tmp2,NoHint)]
251 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
252 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
253 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
254
255 emitPrimOp dflags [res] GetCCSOfOp [arg]
256 = emitAssign (CmmLocal res) val
257 where
258 val
259 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
260 | otherwise = CmmLit (zeroCLit dflags)
261
262 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
263 = emitAssign (CmmLocal res) curCCS
264
265 emitPrimOp dflags [res] ReadMutVarOp [mutv]
266 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
267
268 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
269 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
270 emitCCall
271 [{-no results-}]
272 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
273 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
274
275 -- #define sizzeofByteArrayzh(r,a) \
276 -- r = ((StgArrWords *)(a))->bytes
277 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
278 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
279
280 -- #define sizzeofMutableByteArrayzh(r,a) \
281 -- r = ((StgArrWords *)(a))->bytes
282 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
283 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
284
285
286 -- #define touchzh(o) /* nothing */
287 emitPrimOp _ res@[] TouchOp args@[_arg]
288 = do emitPrimCall res MO_Touch args
289
290 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
291 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
292 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
293
294 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
295 emitPrimOp dflags [res] StableNameToIntOp [arg]
296 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
297
298 -- #define eqStableNamezh(r,sn1,sn2) \
299 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
300 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
301 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
302 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
303 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
304 ])
305
306
307 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
308 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
309
310 -- #define addrToHValuezh(r,a) r=(P_)a
311 emitPrimOp _ [res] AddrToAnyOp [arg]
312 = emitAssign (CmmLocal res) arg
313
314 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
315 -- Note: argument may be tagged!
316 emitPrimOp dflags [res] DataToTagOp [arg]
317 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
318
319 {- Freezing arrays-of-ptrs requires changing an info table, for the
320 benefit of the generational collector. It needs to scavenge mutable
321 objects, even if they are in old space. When they become immutable,
322 they can be removed from this scavenge list. -}
323
324 -- #define unsafeFreezzeArrayzh(r,a)
325 -- {
326 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
327 -- r = a;
328 -- }
329 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
330 = emit $ catAGraphs
331 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
332 mkAssign (CmmLocal res) arg ]
333 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
334 = emit $ catAGraphs
335 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
336 mkAssign (CmmLocal res) arg ]
337
338 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
339 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
340 = emitAssign (CmmLocal res) arg
341
342 -- Copying pointer arrays
343
344 emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
345 doCopyArrayOp src src_off dst dst_off n
346 emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
347 doCopyMutableArrayOp src src_off dst dst_off n
348 emitPrimOp _ [res] CloneArrayOp [src,src_off,n] =
349 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
350 emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] =
351 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
352 emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] =
353 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
354 emitPrimOp _ [res] ThawArrayOp [src,src_off,n] =
355 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
356
357 emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
358 doCopyArrayOp src src_off dst dst_off n
359 emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
360 doCopyMutableArrayOp src src_off dst dst_off n
361
362 -- Reading/writing pointer arrays
363
364 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
365 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
366 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
367
368 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
369 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
370 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
371 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
372 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
373 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
374 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
375 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
376 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
377 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
378
379 emitPrimOp dflags [res] SizeofArrayOp [arg]
380 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
381 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
382 = emitPrimOp dflags [res] SizeofArrayOp [arg]
383 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
384 = emitPrimOp dflags [res] SizeofArrayOp [arg]
385 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
386 = emitPrimOp dflags [res] SizeofArrayOp [arg]
387
388 -- IndexXXXoffAddr
389
390 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
391 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
392 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
393 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
394 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
395 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
396 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
397 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
398 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
399 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
400 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
401 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
402 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
403 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
404 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
405 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
406
407 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
408
409 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
410 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
411 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
412 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
413 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
414 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
415 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
416 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
417 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
418 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
419 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
420 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
421 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
422 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
423 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
424 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
425
426 -- IndexXXXArray
427
428 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
429 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
430 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
431 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
432 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
433 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
434 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
435 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
436 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
437 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
438 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
439 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
440 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
441 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
442 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
443 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
444
445 -- ReadXXXArray, identical to IndexXXXArray.
446
447 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
448 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
449 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
450 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
451 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
452 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
453 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
454 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
455 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
456 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
457 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
458 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
459 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
460 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
461 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
462 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
463
464 -- WriteXXXoffAddr
465
466 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
467 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
468 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
469 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
470 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
471 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
472 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
473 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
474 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
475 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
476 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
477 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
478 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
479 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
480 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
481 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
482
483 -- WriteXXXArray
484
485 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
486 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
487 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
488 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
489 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
490 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
491 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
492 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
493 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
494 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
495 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
496 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
497 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
498 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
499 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
500 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
501
502 -- Copying and setting byte arrays
503 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
504 doCopyByteArrayOp src src_off dst dst_off n
505 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
506 doCopyMutableByteArrayOp src src_off dst dst_off n
507 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
508 doCopyByteArrayToAddrOp src src_off dst n
509 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
510 doCopyMutableByteArrayToAddrOp src src_off dst n
511 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
512 doCopyAddrToByteArrayOp src dst dst_off n
513 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
514 doSetByteArrayOp ba off len c
515
516 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
517 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
518 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
519 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
520
521 -- Population count
522 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
523 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
524 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
525 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
526 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
527
528 -- Unsigned int to floating point conversions
529 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
530 (MO_UF_Conv W32) [w]
531 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
532 (MO_UF_Conv W64) [w]
533
534 -- SIMD primops
535 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
536 checkVecCompatibility dflags vcat n w
537 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
538 where
539 zeros :: CmmExpr
540 zeros = CmmLit $ CmmVec (replicate n zero)
541
542 zero :: CmmLit
543 zero = case vcat of
544 IntVec -> CmmInt 0 w
545 WordVec -> CmmInt 0 w
546 FloatVec -> CmmFloat 0 w
547
548 ty :: CmmType
549 ty = vecVmmType vcat n w
550
551 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
552 checkVecCompatibility dflags vcat n w
553 when (length es /= n) $
554 panic "emitPrimOp: VecPackOp has wrong number of arguments"
555 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
556 where
557 zeros :: CmmExpr
558 zeros = CmmLit $ CmmVec (replicate n zero)
559
560 zero :: CmmLit
561 zero = case vcat of
562 IntVec -> CmmInt 0 w
563 WordVec -> CmmInt 0 w
564 FloatVec -> CmmFloat 0 w
565
566 ty :: CmmType
567 ty = vecVmmType vcat n w
568
569 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
570 checkVecCompatibility dflags vcat n w
571 when (length res /= n) $
572 panic "emitPrimOp: VecUnpackOp has wrong number of results"
573 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
574 where
575 ty :: CmmType
576 ty = vecVmmType vcat n w
577
578 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
579 checkVecCompatibility dflags vcat n w
580 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
581 where
582 ty :: CmmType
583 ty = vecVmmType vcat n w
584
585 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
586 checkVecCompatibility dflags vcat n w
587 doIndexByteArrayOp Nothing ty res args
588 where
589 ty :: CmmType
590 ty = vecVmmType vcat n w
591
592 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
593 checkVecCompatibility dflags vcat n w
594 doIndexByteArrayOp Nothing ty res args
595 where
596 ty :: CmmType
597 ty = vecVmmType vcat n w
598
599 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
600 checkVecCompatibility dflags vcat n w
601 doWriteByteArrayOp Nothing ty res args
602 where
603 ty :: CmmType
604 ty = vecVmmType vcat n w
605
606 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
607 checkVecCompatibility dflags vcat n w
608 doIndexOffAddrOp Nothing ty res args
609 where
610 ty :: CmmType
611 ty = vecVmmType vcat n w
612
613 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
614 checkVecCompatibility dflags vcat n w
615 doIndexOffAddrOp Nothing ty res args
616 where
617 ty :: CmmType
618 ty = vecVmmType vcat n w
619
620 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
621 checkVecCompatibility dflags vcat n w
622 doWriteOffAddrOp Nothing ty res args
623 where
624 ty :: CmmType
625 ty = vecVmmType vcat n w
626
627 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
628 checkVecCompatibility dflags vcat n w
629 doIndexByteArrayOpAs Nothing vecty ty res args
630 where
631 vecty :: CmmType
632 vecty = vecVmmType vcat n w
633
634 ty :: CmmType
635 ty = vecCmmCat vcat w
636
637 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
638 checkVecCompatibility dflags vcat n w
639 doIndexByteArrayOpAs Nothing vecty ty res args
640 where
641 vecty :: CmmType
642 vecty = vecVmmType vcat n w
643
644 ty :: CmmType
645 ty = vecCmmCat vcat w
646
647 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
648 checkVecCompatibility dflags vcat n w
649 doWriteByteArrayOp Nothing ty res args
650 where
651 ty :: CmmType
652 ty = vecCmmCat vcat w
653
654 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
655 checkVecCompatibility dflags vcat n w
656 doIndexOffAddrOpAs Nothing vecty ty res args
657 where
658 vecty :: CmmType
659 vecty = vecVmmType vcat n w
660
661 ty :: CmmType
662 ty = vecCmmCat vcat w
663
664 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
665 checkVecCompatibility dflags vcat n w
666 doIndexOffAddrOpAs Nothing vecty ty res args
667 where
668 vecty :: CmmType
669 vecty = vecVmmType vcat n w
670
671 ty :: CmmType
672 ty = vecCmmCat vcat w
673
674 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
675 checkVecCompatibility dflags vcat n w
676 doWriteOffAddrOp Nothing ty res args
677 where
678 ty :: CmmType
679 ty = vecCmmCat vcat w
680
681 -- Prefetch
682 emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
683 emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
684 emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
685
686 emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
687 emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
688 emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
689
690 emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
691 emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
692 emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
693
694 emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
695 emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
696 emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
697
698
699 -- The rest just translate straightforwardly
700 emitPrimOp dflags [res] op [arg]
701 | nopOp op
702 = emitAssign (CmmLocal res) arg
703
704 | Just (mop,rep) <- narrowOp op
705 = emitAssign (CmmLocal res) $
706 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
707
708 emitPrimOp dflags r@[res] op args
709 | Just prim <- callishOp op
710 = do emitPrimCall r prim args
711
712 | Just mop <- translateOp dflags op
713 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
714 emit stmt
715
716 emitPrimOp dflags results op args
717 = case callishPrimOpSupported dflags op of
718 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
719 Right gen -> gen results args
720
721 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
722
723 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
724 callishPrimOpSupported dflags op
725 = case op of
726 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
727 | otherwise -> Right (genericIntQuotRemOp dflags)
728
729 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
730 | otherwise -> Right (genericWordQuotRemOp dflags)
731
732 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
733 | otherwise -> Right (genericWordQuotRem2Op dflags)
734
735 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
736 | otherwise -> Right genericWordAdd2Op
737
738 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
739 | otherwise -> Right genericWordMul2Op
740
741 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
742 where
743 ncg = case hscTarget dflags of
744 HscAsm -> True
745 _ -> False
746
747 x86ish = case platformArch (targetPlatform dflags) of
748 ArchX86 -> True
749 ArchX86_64 -> True
750 _ -> False
751
752 genericIntQuotRemOp :: DynFlags -> GenericOp
753 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
754 = emit $ mkAssign (CmmLocal res_q)
755 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
756 mkAssign (CmmLocal res_r)
757 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
758 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
759
760 genericWordQuotRemOp :: DynFlags -> GenericOp
761 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
762 = emit $ mkAssign (CmmLocal res_q)
763 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
764 mkAssign (CmmLocal res_r)
765 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
766 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
767
768 genericWordQuotRem2Op :: DynFlags -> GenericOp
769 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
770 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
771 where ty = cmmExprType dflags arg_x_high
772 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
773 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
774 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
775 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
776 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
777 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
778 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
779 zero = lit 0
780 one = lit 1
781 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
782 lit i = CmmLit (CmmInt i (wordWidth dflags))
783
784 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
785 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
786 mkAssign (CmmLocal res_r) high)
787 f i acc high low =
788 do roverflowedBit <- newTemp ty
789 rhigh' <- newTemp ty
790 rhigh'' <- newTemp ty
791 rlow' <- newTemp ty
792 risge <- newTemp ty
793 racc' <- newTemp ty
794 let high' = CmmReg (CmmLocal rhigh')
795 isge = CmmReg (CmmLocal risge)
796 overflowedBit = CmmReg (CmmLocal roverflowedBit)
797 let this = catAGraphs
798 [mkAssign (CmmLocal roverflowedBit)
799 (shr high negone),
800 mkAssign (CmmLocal rhigh')
801 (or (shl high one) (shr low negone)),
802 mkAssign (CmmLocal rlow')
803 (shl low one),
804 mkAssign (CmmLocal risge)
805 (or (overflowedBit `ne` zero)
806 (high' `ge` arg_y)),
807 mkAssign (CmmLocal rhigh'')
808 (high' `minus` (arg_y `times` isge)),
809 mkAssign (CmmLocal racc')
810 (or (shl acc one) isge)]
811 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
812 (CmmReg (CmmLocal rhigh''))
813 (CmmReg (CmmLocal rlow'))
814 return (this <*> rest)
815 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
816
817 genericWordAdd2Op :: GenericOp
818 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
819 = do dflags <- getDynFlags
820 r1 <- newTemp (cmmExprType dflags arg_x)
821 r2 <- newTemp (cmmExprType dflags arg_x)
822 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
823 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
824 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
825 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
826 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
827 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
828 (wordWidth dflags))
829 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
830 emit $ catAGraphs
831 [mkAssign (CmmLocal r1)
832 (add (bottomHalf arg_x) (bottomHalf arg_y)),
833 mkAssign (CmmLocal r2)
834 (add (topHalf (CmmReg (CmmLocal r1)))
835 (add (topHalf arg_x) (topHalf arg_y))),
836 mkAssign (CmmLocal res_h)
837 (topHalf (CmmReg (CmmLocal r2))),
838 mkAssign (CmmLocal res_l)
839 (or (toTopHalf (CmmReg (CmmLocal r2)))
840 (bottomHalf (CmmReg (CmmLocal r1))))]
841 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
842
843 genericWordMul2Op :: GenericOp
844 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
845 = do dflags <- getDynFlags
846 let t = cmmExprType dflags arg_x
847 xlyl <- liftM CmmLocal $ newTemp t
848 xlyh <- liftM CmmLocal $ newTemp t
849 xhyl <- liftM CmmLocal $ newTemp t
850 r <- liftM CmmLocal $ newTemp t
851 -- This generic implementation is very simple and slow. We might
852 -- well be able to do better, but for now this at least works.
853 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
854 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
855 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
856 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
857 sum = foldl1 add
858 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
859 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
860 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
861 (wordWidth dflags))
862 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
863 emit $ catAGraphs
864 [mkAssign xlyl
865 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
866 mkAssign xlyh
867 (mul (bottomHalf arg_x) (topHalf arg_y)),
868 mkAssign xhyl
869 (mul (topHalf arg_x) (bottomHalf arg_y)),
870 mkAssign r
871 (sum [topHalf (CmmReg xlyl),
872 bottomHalf (CmmReg xhyl),
873 bottomHalf (CmmReg xlyh)]),
874 mkAssign (CmmLocal res_l)
875 (or (bottomHalf (CmmReg xlyl))
876 (toTopHalf (CmmReg r))),
877 mkAssign (CmmLocal res_h)
878 (sum [mul (topHalf arg_x) (topHalf arg_y),
879 topHalf (CmmReg xhyl),
880 topHalf (CmmReg xlyh),
881 topHalf (CmmReg r)])]
882 genericWordMul2Op _ _ = panic "genericWordMul2Op"
883
884 -- These PrimOps are NOPs in Cmm
885
886 nopOp :: PrimOp -> Bool
887 nopOp Int2WordOp = True
888 nopOp Word2IntOp = True
889 nopOp Int2AddrOp = True
890 nopOp Addr2IntOp = True
891 nopOp ChrOp = True -- Int# and Char# are rep'd the same
892 nopOp OrdOp = True
893 nopOp _ = False
894
895 -- These PrimOps turn into double casts
896
897 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
898 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
899 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
900 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
901 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
902 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
903 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
904 narrowOp _ = Nothing
905
906 -- Native word signless ops
907
908 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
909 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
910 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
911 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
912 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
913 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
914 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
915
916 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
917 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
918 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
919 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
920 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
921 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
922
923 translateOp dflags AndOp = Just (mo_wordAnd dflags)
924 translateOp dflags OrOp = Just (mo_wordOr dflags)
925 translateOp dflags XorOp = Just (mo_wordXor dflags)
926 translateOp dflags NotOp = Just (mo_wordNot dflags)
927 translateOp dflags SllOp = Just (mo_wordShl dflags)
928 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
929
930 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
931
932 -- Native word signed ops
933
934 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
935 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
936 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
937 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
938 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
939
940
941 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
942 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
943 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
944 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
945
946 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
947 translateOp dflags OrIOp = Just (mo_wordOr dflags)
948 translateOp dflags XorIOp = Just (mo_wordXor dflags)
949 translateOp dflags NotIOp = Just (mo_wordNot dflags)
950 translateOp dflags ISllOp = Just (mo_wordShl dflags)
951 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
952 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
953
954 -- Native word unsigned ops
955
956 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
957 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
958 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
959 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
960
961 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
962 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
963 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
964
965 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
966 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
967 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
968 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
969
970 -- Char# ops
971
972 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
973 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
974 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
975 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
976 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
977 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
978
979 -- Double ops
980
981 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
982 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
983 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
984 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
985 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
986 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
987
988 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
989 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
990 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
991 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
992 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
993
994 -- Float ops
995
996 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
997 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
998 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
999 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1000 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1001 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1002
1003 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1004 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1005 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1006 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1007 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1008
1009 -- Vector ops
1010
1011 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1012 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1013 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1014 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1015 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1016
1017 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1018 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1019 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1020 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1021 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1022 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1023
1024 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1025 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1026 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1027 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1028 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1029
1030 -- Conversions
1031
1032 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1033 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1034
1035 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1036 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1037
1038 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1039 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1040
1041 -- Word comparisons masquerading as more exotic things.
1042
1043 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1044 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1045 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1046 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1047 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1048 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1049 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1050
1051 translateOp _ _ = Nothing
1052
1053 -- These primops are implemented by CallishMachOps, because they sometimes
1054 -- turn into foreign calls depending on the backend.
1055
1056 callishOp :: PrimOp -> Maybe CallishMachOp
1057 callishOp DoublePowerOp = Just MO_F64_Pwr
1058 callishOp DoubleSinOp = Just MO_F64_Sin
1059 callishOp DoubleCosOp = Just MO_F64_Cos
1060 callishOp DoubleTanOp = Just MO_F64_Tan
1061 callishOp DoubleSinhOp = Just MO_F64_Sinh
1062 callishOp DoubleCoshOp = Just MO_F64_Cosh
1063 callishOp DoubleTanhOp = Just MO_F64_Tanh
1064 callishOp DoubleAsinOp = Just MO_F64_Asin
1065 callishOp DoubleAcosOp = Just MO_F64_Acos
1066 callishOp DoubleAtanOp = Just MO_F64_Atan
1067 callishOp DoubleLogOp = Just MO_F64_Log
1068 callishOp DoubleExpOp = Just MO_F64_Exp
1069 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1070
1071 callishOp FloatPowerOp = Just MO_F32_Pwr
1072 callishOp FloatSinOp = Just MO_F32_Sin
1073 callishOp FloatCosOp = Just MO_F32_Cos
1074 callishOp FloatTanOp = Just MO_F32_Tan
1075 callishOp FloatSinhOp = Just MO_F32_Sinh
1076 callishOp FloatCoshOp = Just MO_F32_Cosh
1077 callishOp FloatTanhOp = Just MO_F32_Tanh
1078 callishOp FloatAsinOp = Just MO_F32_Asin
1079 callishOp FloatAcosOp = Just MO_F32_Acos
1080 callishOp FloatAtanOp = Just MO_F32_Atan
1081 callishOp FloatLogOp = Just MO_F32_Log
1082 callishOp FloatExpOp = Just MO_F32_Exp
1083 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1084
1085 callishOp _ = Nothing
1086
1087 ------------------------------------------------------------------------------
1088 -- Helpers for translating various minor variants of array indexing.
1089
1090 doIndexOffAddrOp :: Maybe MachOp
1091 -> CmmType
1092 -> [LocalReg]
1093 -> [CmmExpr]
1094 -> FCode ()
1095 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1096 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1097 doIndexOffAddrOp _ _ _ _
1098 = panic "StgCmmPrim: doIndexOffAddrOp"
1099
1100 doIndexOffAddrOpAs :: Maybe MachOp
1101 -> CmmType
1102 -> CmmType
1103 -> [LocalReg]
1104 -> [CmmExpr]
1105 -> FCode ()
1106 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1107 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1108 doIndexOffAddrOpAs _ _ _ _ _
1109 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1110
1111 doIndexByteArrayOp :: Maybe MachOp
1112 -> CmmType
1113 -> [LocalReg]
1114 -> [CmmExpr]
1115 -> FCode ()
1116 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1117 = do dflags <- getDynFlags
1118 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1119 doIndexByteArrayOp _ _ _ _
1120 = panic "StgCmmPrim: doIndexByteArrayOp"
1121
1122 doIndexByteArrayOpAs :: Maybe MachOp
1123 -> CmmType
1124 -> CmmType
1125 -> [LocalReg]
1126 -> [CmmExpr]
1127 -> FCode ()
1128 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1129 = do dflags <- getDynFlags
1130 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1131 doIndexByteArrayOpAs _ _ _ _ _
1132 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1133
1134 doReadPtrArrayOp :: LocalReg
1135 -> CmmExpr
1136 -> CmmExpr
1137 -> FCode ()
1138 doReadPtrArrayOp res addr idx
1139 = do dflags <- getDynFlags
1140 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1141
1142 doWriteOffAddrOp :: Maybe MachOp
1143 -> CmmType
1144 -> [LocalReg]
1145 -> [CmmExpr]
1146 -> FCode ()
1147 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1148 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1149 doWriteOffAddrOp _ _ _ _
1150 = panic "StgCmmPrim: doWriteOffAddrOp"
1151
1152 doWriteByteArrayOp :: Maybe MachOp
1153 -> CmmType
1154 -> [LocalReg]
1155 -> [CmmExpr]
1156 -> FCode ()
1157 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1158 = do dflags <- getDynFlags
1159 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1160 doWriteByteArrayOp _ _ _ _
1161 = panic "StgCmmPrim: doWriteByteArrayOp"
1162
1163 doWritePtrArrayOp :: CmmExpr
1164 -> CmmExpr
1165 -> CmmExpr
1166 -> FCode ()
1167 doWritePtrArrayOp addr idx val
1168 = do dflags <- getDynFlags
1169 let ty = cmmExprType dflags val
1170 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1171 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1172 -- the write barrier. We must write a byte into the mark table:
1173 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1174 emit $ mkStore (
1175 cmmOffsetExpr dflags
1176 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1177 (loadArrPtrsSize dflags addr))
1178 (CmmMachOp (mo_wordUShr dflags) [idx,
1179 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1180 ) (CmmLit (CmmInt 1 W8))
1181
1182 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1183 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1184 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1185
1186 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1187 -> Maybe MachOp -- Optional result cast
1188 -> CmmType -- Type of element we are accessing
1189 -> LocalReg -- Destination
1190 -> CmmExpr -- Base address
1191 -> CmmType -- Type of element by which we are indexing
1192 -> CmmExpr -- Index
1193 -> FCode ()
1194 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1195 = do dflags <- getDynFlags
1196 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1197 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1198 = do dflags <- getDynFlags
1199 emitAssign (CmmLocal res) (CmmMachOp cast [
1200 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1201
1202 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1203 -> Maybe MachOp -- Optional value cast
1204 -> CmmExpr -- Base address
1205 -> CmmType -- Type of element by which we are indexing
1206 -> CmmExpr -- Index
1207 -> CmmExpr -- Value to write
1208 -> FCode ()
1209 mkBasicIndexedWrite off Nothing base idx_ty idx val
1210 = do dflags <- getDynFlags
1211 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1212 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1213 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1214
1215 -- ----------------------------------------------------------------------------
1216 -- Misc utils
1217
1218 cmmIndexOffExpr :: DynFlags
1219 -> ByteOff -- Initial offset in bytes
1220 -> Width -- Width of element by which we are indexing
1221 -> CmmExpr -- Base address
1222 -> CmmExpr -- Index
1223 -> CmmExpr
1224 cmmIndexOffExpr dflags off width base idx
1225 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1226
1227 cmmLoadIndexOffExpr :: DynFlags
1228 -> ByteOff -- Initial offset in bytes
1229 -> CmmType -- Type of element we are accessing
1230 -> CmmExpr -- Base address
1231 -> CmmType -- Type of element by which we are indexing
1232 -> CmmExpr -- Index
1233 -> CmmExpr
1234 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1235 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1236
1237 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1238 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1239
1240 ------------------------------------------------------------------------------
1241 -- Helpers for translating vector primops.
1242
1243 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1244 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1245
1246 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1247 vecCmmCat IntVec = cmmBits
1248 vecCmmCat WordVec = cmmBits
1249 vecCmmCat FloatVec = cmmFloat
1250
1251 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1252 vecElemInjectCast _ FloatVec _ = Nothing
1253 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1254 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1255 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1256 vecElemInjectCast _ IntVec W64 = Nothing
1257 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1258 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1259 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1260 vecElemInjectCast _ WordVec W64 = Nothing
1261 vecElemInjectCast _ _ _ = Nothing
1262
1263 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1264 vecElemProjectCast _ FloatVec _ = Nothing
1265 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1266 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1267 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1268 vecElemProjectCast _ IntVec W64 = Nothing
1269 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1270 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1271 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1272 vecElemProjectCast _ WordVec W64 = Nothing
1273 vecElemProjectCast _ _ _ = Nothing
1274
1275 -- Check to make sure that we can generate code for the specified vector type
1276 -- given the current set of dynamic flags.
1277 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1278 checkVecCompatibility dflags vcat l w = do
1279 when (hscTarget dflags /= HscLlvm) $ do
1280 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1281 ,"Please use -fllvm."]
1282 check vecWidth vcat l w
1283 where
1284 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1285 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1286 sorry $ "128-bit wide single-precision floating point " ++
1287 "SIMD vector instructions require at least -msse."
1288 check W128 _ _ _ | not (isSse2Enabled dflags) =
1289 sorry $ "128-bit wide integer and double precision " ++
1290 "SIMD vector instructions require at least -msse2."
1291 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1292 sorry $ "256-bit wide floating point " ++
1293 "SIMD vector instructions require at least -mavx."
1294 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1295 sorry $ "256-bit wide integer " ++
1296 "SIMD vector instructions require at least -mavx2."
1297 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1298 sorry $ "512-bit wide " ++
1299 "SIMD vector instructions require -mavx512f."
1300 check _ _ _ _ = return ()
1301
1302 vecWidth = typeWidth (vecVmmType vcat l w)
1303
1304 ------------------------------------------------------------------------------
1305 -- Helpers for translating vector packing and unpacking.
1306
1307 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1308 -> CmmType -- Type of vector
1309 -> CmmExpr -- Initial vector
1310 -> [CmmExpr] -- Elements
1311 -> CmmFormal -- Destination for result
1312 -> FCode ()
1313 doVecPackOp maybe_pre_write_cast ty z es res = do
1314 dst <- newTemp ty
1315 emitAssign (CmmLocal dst) z
1316 vecPack dst es 0
1317 where
1318 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1319 vecPack src [] _ =
1320 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1321
1322 vecPack src (e : es) i = do
1323 dst <- newTemp ty
1324 if isFloatType (vecElemType ty)
1325 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1326 [CmmReg (CmmLocal src), cast e, iLit])
1327 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1328 [CmmReg (CmmLocal src), cast e, iLit])
1329 vecPack dst es (i + 1)
1330 where
1331 -- vector indices are always 32-bits
1332 iLit = CmmLit (CmmInt (toInteger i) W32)
1333
1334 cast :: CmmExpr -> CmmExpr
1335 cast val = case maybe_pre_write_cast of
1336 Nothing -> val
1337 Just cast -> CmmMachOp cast [val]
1338
1339 len :: Length
1340 len = vecLength ty
1341
1342 wid :: Width
1343 wid = typeWidth (vecElemType ty)
1344
1345 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1346 -> CmmType -- Type of vector
1347 -> CmmExpr -- Vector
1348 -> [CmmFormal] -- Element results
1349 -> FCode ()
1350 doVecUnpackOp maybe_post_read_cast ty e res =
1351 vecUnpack res 0
1352 where
1353 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1354 vecUnpack [] _ =
1355 return ()
1356
1357 vecUnpack (r : rs) i = do
1358 if isFloatType (vecElemType ty)
1359 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1360 [e, iLit]))
1361 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1362 [e, iLit]))
1363 vecUnpack rs (i + 1)
1364 where
1365 -- vector indices are always 32-bits
1366 iLit = CmmLit (CmmInt (toInteger i) W32)
1367
1368 cast :: CmmExpr -> CmmExpr
1369 cast val = case maybe_post_read_cast of
1370 Nothing -> val
1371 Just cast -> CmmMachOp cast [val]
1372
1373 len :: Length
1374 len = vecLength ty
1375
1376 wid :: Width
1377 wid = typeWidth (vecElemType ty)
1378
1379 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1380 -> CmmType -- Vector type
1381 -> CmmExpr -- Source vector
1382 -> CmmExpr -- Element
1383 -> CmmExpr -- Index at which to insert element
1384 -> CmmFormal -- Destination for result
1385 -> FCode ()
1386 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1387 dflags <- getDynFlags
1388 -- vector indices are always 32-bits
1389 let idx' :: CmmExpr
1390 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1391 if isFloatType (vecElemType ty)
1392 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1393 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1394 where
1395 cast :: CmmExpr -> CmmExpr
1396 cast val = case maybe_pre_write_cast of
1397 Nothing -> val
1398 Just cast -> CmmMachOp cast [val]
1399
1400 len :: Length
1401 len = vecLength ty
1402
1403 wid :: Width
1404 wid = typeWidth (vecElemType ty)
1405
1406 ------------------------------------------------------------------------------
1407 -- Helpers for translating prefetching.
1408
1409 doPrefetchByteArrayOp :: Int
1410 -> [LocalReg]
1411 -> [CmmExpr]
1412 -> FCode ()
1413 doPrefetchByteArrayOp locality res [addr,idx]
1414 = do dflags <- getDynFlags
1415 mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
1416 doPrefetchByteArrayOp _ _ _
1417 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1418
1419 doPrefetchAddrOp ::Int
1420 -> [LocalReg]
1421 -> [CmmExpr]
1422 -> FCode ()
1423 doPrefetchAddrOp locality res [addr,idx]
1424 = mkBasicPrefetch locality 0 res addr idx
1425 doPrefetchAddrOp _ _ _
1426 = panic "StgCmmPrim: doPrefetchAddrOp"
1427
1428 mkBasicPrefetch :: Int -- Locality level 0-3
1429 -> ByteOff -- Initial offset in bytes
1430 -> [LocalReg] -- Destination
1431 -> CmmExpr -- Base address
1432 -> CmmExpr -- Index
1433 -> FCode ()
1434 mkBasicPrefetch locality off res base idx
1435 = do dflags <- getDynFlags
1436 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1437 case res of
1438 [] -> return ()
1439 [reg] -> emitAssign (CmmLocal reg) base
1440 _ -> panic "StgCmmPrim: mkBasicPrefetch"
1441
1442 -- ----------------------------------------------------------------------------
1443 -- Allocating byte arrays
1444
1445 -- | Takes a register to return the newly allocated array in and the
1446 -- size of the new array in bytes. Allocates a new
1447 -- 'MutableByteArray#'.
1448 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1449 doNewByteArrayOp res_r n = do
1450 dflags <- getDynFlags
1451
1452 let info_ptr = mkLblExpr mkArrWords_infoLabel
1453 rep = arrWordsRep dflags n
1454
1455 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1456 (mkIntExpr dflags (nonHdrSize dflags rep))
1457 (zeroExpr dflags)
1458
1459 let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
1460
1461 base <- allocHeapClosure rep info_ptr curCCS
1462 [ (mkIntExpr dflags n,
1463 hdr_size + oFFSET_StgArrWords_bytes dflags)
1464 ]
1465
1466 emit $ mkAssign (CmmLocal res_r) base
1467
1468 -- ----------------------------------------------------------------------------
1469 -- Copying byte arrays
1470
1471 -- | Takes a source 'ByteArray#', an offset in the source array, a
1472 -- destination 'MutableByteArray#', an offset into the destination
1473 -- array, and the number of bytes to copy. Copies the given number of
1474 -- bytes from the source array to the destination array.
1475 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1476 -> FCode ()
1477 doCopyByteArrayOp = emitCopyByteArray copy
1478 where
1479 -- Copy data (we assume the arrays aren't overlapping since
1480 -- they're of different types)
1481 copy _src _dst dst_p src_p bytes =
1482 do dflags <- getDynFlags
1483 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1484
1485 -- | Takes a source 'MutableByteArray#', an offset in the source
1486 -- array, a destination 'MutableByteArray#', an offset into the
1487 -- destination array, and the number of bytes to copy. Copies the
1488 -- given number of bytes from the source array to the destination
1489 -- array.
1490 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1491 -> FCode ()
1492 doCopyMutableByteArrayOp = emitCopyByteArray copy
1493 where
1494 -- The only time the memory might overlap is when the two arrays
1495 -- we were provided are the same array!
1496 -- TODO: Optimize branch for common case of no aliasing.
1497 copy src dst dst_p src_p bytes = do
1498 dflags <- getDynFlags
1499 [moveCall, cpyCall] <- forkAlts [
1500 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1501 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1502 ]
1503 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1504
1505 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1506 -> FCode ())
1507 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1508 -> FCode ()
1509 emitCopyByteArray copy src src_off dst dst_off n = do
1510 dflags <- getDynFlags
1511 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1512 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1513 copy src dst dst_p src_p n
1514
1515 -- | Takes a source 'ByteArray#', an offset in the source array, a
1516 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1517 -- number of bytes from the source array to the destination memory region.
1518 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1519 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1520 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1521 dflags <- getDynFlags
1522 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1523 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1524
1525 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1526 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1527 -- number of bytes from the source array to the destination memory region.
1528 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1529 -> FCode ()
1530 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1531
1532 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1533 -- the destination array, and the number of bytes to copy. Copies the given
1534 -- number of bytes from the source memory region to the destination array.
1535 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1536 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1537 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1538 dflags <- getDynFlags
1539 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1540 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1541
1542
1543 -- ----------------------------------------------------------------------------
1544 -- Setting byte arrays
1545
1546 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1547 -- and a byte, and sets each of the selected bytes in the array to the
1548 -- character.
1549 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1550 -> FCode ()
1551 doSetByteArrayOp ba off len c
1552 = do dflags <- getDynFlags
1553 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1554 emitMemsetCall p c len (mkIntExpr dflags 1)
1555
1556 -- ----------------------------------------------------------------------------
1557 -- Allocating arrays
1558
1559 -- | Takes a register to return the newly allocated array in, the size
1560 -- of the new array, and an initial value for the elements. Allocates
1561 -- a new 'MutableArray#'.
1562 doNewArrayOp :: CmmFormal -> WordOff -> CmmExpr -> FCode ()
1563 doNewArrayOp res_r n init = do
1564 dflags <- getDynFlags
1565
1566 let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel
1567 rep = arrPtrsRep dflags n
1568
1569 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1570 (mkIntExpr dflags (nonHdrSize dflags rep))
1571 (zeroExpr dflags)
1572
1573 let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
1574
1575 base <- allocHeapClosure rep info_ptr curCCS
1576 [ (mkIntExpr dflags n,
1577 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1578 , (mkIntExpr dflags (nonHdrSizeW rep),
1579 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1580 ]
1581
1582 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1583 emit $ mkAssign arr base
1584
1585 -- Initialise all elements of the the array
1586 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags)
1587 for <- newLabelC
1588 emitLabel for
1589 let loopBody =
1590 [ mkStore (CmmReg (CmmLocal p)) init
1591 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1592 , mkBranch for ]
1593 emit =<< mkCmmIfThen
1594 (cmmULtWord dflags (CmmReg (CmmLocal p))
1595 (cmmOffsetW dflags (CmmReg arr)
1596 (arrPtrsHdrSizeW dflags + n)))
1597 (catAGraphs loopBody)
1598
1599 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1600
1601 -- | The inline allocation limit is 128 bytes.
1602 maxInlineAllocThreshold :: ByteOff
1603 maxInlineAllocThreshold = 128
1604
1605 -- ----------------------------------------------------------------------------
1606 -- Copying pointer arrays
1607
1608 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1609 -- nowhere else in the code generator. This is mostly because these
1610 -- "primitive" ops result in a surprisingly large amount of code. It
1611 -- will likely be worthwhile to optimize what is emitted here, so that
1612 -- our optimization passes don't waste time repeatedly optimizing the
1613 -- same bits of code.
1614
1615 -- More closely imitates 'assignTemp' from the old code generator, which
1616 -- returns a CmmExpr rather than a LocalReg.
1617 assignTempE :: CmmExpr -> FCode CmmExpr
1618 assignTempE e = do
1619 t <- assignTemp e
1620 return (CmmReg (CmmLocal t))
1621
1622 -- | Takes a source 'Array#', an offset in the source array, a
1623 -- destination 'MutableArray#', an offset into the destination array,
1624 -- and the number of elements to copy. Copies the given number of
1625 -- elements from the source array to the destination array.
1626 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1627 -> FCode ()
1628 doCopyArrayOp = emitCopyArray copy
1629 where
1630 -- Copy data (we assume the arrays aren't overlapping since
1631 -- they're of different types)
1632 copy _src _dst dst_p src_p bytes =
1633 do dflags <- getDynFlags
1634 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1635
1636
1637 -- | Takes a source 'MutableArray#', an offset in the source array, a
1638 -- destination 'MutableArray#', an offset into the destination array,
1639 -- and the number of elements to copy. Copies the given number of
1640 -- elements from the source array to the destination array.
1641 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1642 -> FCode ()
1643 doCopyMutableArrayOp = emitCopyArray copy
1644 where
1645 -- The only time the memory might overlap is when the two arrays
1646 -- we were provided are the same array!
1647 -- TODO: Optimize branch for common case of no aliasing.
1648 copy src dst dst_p src_p bytes = do
1649 dflags <- getDynFlags
1650 [moveCall, cpyCall] <- forkAlts [
1651 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags)),
1652 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1653 ]
1654 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1655
1656 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1657 -> FCode ())
1658 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1659 -> FCode ()
1660 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
1661 dflags <- getDynFlags
1662 n <- assignTempE n0
1663 nonzero <- getCode $ do
1664 -- Passed as arguments (be careful)
1665 src <- assignTempE src0
1666 src_off <- assignTempE src_off0
1667 dst <- assignTempE dst0
1668 dst_off <- assignTempE dst_off0
1669
1670 -- Set the dirty bit in the header.
1671 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1672
1673 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst (arrPtrsHdrSize dflags)
1674 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1675 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1676 bytes <- assignTempE $ cmmMulWord dflags n (mkIntExpr dflags (wORD_SIZE dflags))
1677
1678 copy src dst dst_p src_p bytes
1679
1680 -- The base address of the destination card table
1681 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p (loadArrPtrsSize dflags dst)
1682
1683 emitSetCards dst_off dst_cards_p n
1684
1685 -- TODO: Figure out if this branch is really neccesary.
1686 emit =<< mkCmmIfThen (cmmNeWord dflags n (mkIntExpr dflags 0)) nonzero
1687
1688 -- | Takes an info table label, a register to return the newly
1689 -- allocated array in, a source array, an offset in the source array,
1690 -- and the number of elements to copy. Allocates a new array and
1691 -- initializes it from the source array.
1692 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
1693 -> FCode ()
1694 emitCloneArray info_p res_r src0 src_off0 n0 = do
1695 dflags <- getDynFlags
1696 let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags +
1697 (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags))
1698 myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags))
1699 -- Passed as arguments (be careful)
1700 src <- assignTempE src0
1701 src_off <- assignTempE src_off0
1702 n <- assignTempE n0
1703
1704 card_bytes <- assignTempE $ cardRoundUpCmm dflags n
1705 size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUpCmm dflags card_bytes)
1706 words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size
1707
1708 arr_r <- newTemp (bWord dflags)
1709 emitAllocateCall arr_r myCapability words
1710 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags))
1711 (zeroExpr dflags)
1712
1713 let arr = CmmReg (CmmLocal arr_r)
1714 emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
1715 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1716 oFFSET_StgMutArrPtrs_ptrs dflags)) n
1717 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1718 oFFSET_StgMutArrPtrs_size dflags)) size
1719
1720 dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags)
1721 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags))
1722 src_off
1723
1724 emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
1725
1726 emitMemsetCall (cmmOffsetExprW dflags dst_p n)
1727 (mkIntExpr dflags 1)
1728 card_bytes
1729 (mkIntExpr dflags (wORD_SIZE dflags))
1730 emit $ mkAssign (CmmLocal res_r) arr
1731
1732 -- | Takes and offset in the destination array, the base address of
1733 -- the card table, and the number of elements affected (*not* the
1734 -- number of cards). The number of elements may not be zero.
1735 -- Marks the relevant cards as dirty.
1736 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1737 emitSetCards dst_start dst_cards_start n = do
1738 dflags <- getDynFlags
1739 start_card <- assignTempE $ cardCmm dflags dst_start
1740 let end_card = cardCmm dflags (cmmSubWord dflags (cmmAddWord dflags dst_start n) (mkIntExpr dflags 1))
1741 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1742 (mkIntExpr dflags 1)
1743 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1744 (mkIntExpr dflags 1) -- no alignment (1 byte)
1745
1746 -- Convert an element index to a card index
1747 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
1748 cardCmm dflags i =
1749 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1750
1751 -- Convert a number of elements to a number of cards, rounding up
1752 cardRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr
1753 cardRoundUpCmm dflags i =
1754 cardCmm dflags (cmmAddWord dflags i
1755 (mkIntExpr dflags
1756 ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)))
1757
1758 bytesToWordsRoundUpCmm :: DynFlags -> CmmExpr -> CmmExpr
1759 bytesToWordsRoundUpCmm dflags e =
1760 cmmQuotWord dflags (cmmAddWord dflags e
1761 (mkIntExpr dflags
1762 (wORD_SIZE dflags - 1))) (wordSize dflags)
1763
1764 wordSize :: DynFlags -> CmmExpr
1765 wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags)
1766
1767 -- | Emit a call to @memcpy@.
1768 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1769 emitMemcpyCall dst src n align = do
1770 emitPrimCall
1771 [ {-no results-} ]
1772 MO_Memcpy
1773 [ dst, src, n, align ]
1774
1775 -- | Emit a call to @memmove@.
1776 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1777 emitMemmoveCall dst src n align = do
1778 emitPrimCall
1779 [ {- no results -} ]
1780 MO_Memmove
1781 [ dst, src, n, align ]
1782
1783 -- | Emit a call to @memset@. The second argument must fit inside an
1784 -- unsigned char.
1785 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1786 emitMemsetCall dst c n align = do
1787 emitPrimCall
1788 [ {- no results -} ]
1789 MO_Memset
1790 [ dst, c, n, align ]
1791
1792 -- | Emit a call to @allocate@.
1793 emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
1794 emitAllocateCall res cap n = do
1795 emitCCall
1796 [ (res, AddrHint) ]
1797 allocate
1798 [ (cap, AddrHint)
1799 , (n, NoHint)
1800 ]
1801 where
1802 allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
1803 ForeignLabelInExternalPackage IsFunction))
1804
1805 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1806 emitBSwapCall res x width = do
1807 emitPrimCall
1808 [ res ]
1809 (MO_BSwap width)
1810 [ x ]
1811
1812 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1813 emitPopCntCall res x width = do
1814 emitPrimCall
1815 [ res ]
1816 (MO_PopCnt width)
1817 [ x ]