9a748da736256bd26fe842334908f4ed2895fc21
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmPrim (
10 cgOpApp,
11 cgPrimOp, -- internal(ish), used by cgCase to get code for a
12 -- comparison without also turning it into a Bool.
13 shouldInlinePrimOp
14 ) where
15
16 #include "HsVersions.h"
17
18 import StgCmmLayout
19 import StgCmmForeign
20 import StgCmmEnv
21 import StgCmmMonad
22 import StgCmmUtils
23 import StgCmmTicky
24 import StgCmmHeap
25 import StgCmmProf ( costCentreFrom, curCCS )
26
27 import DynFlags
28 import Platform
29 import BasicTypes
30 import MkGraph
31 import StgSyn
32 import Cmm
33 import CmmInfo
34 import Type ( Type, tyConAppTyCon )
35 import TyCon
36 import CLabel
37 import CmmUtils
38 import PrimOp
39 import SMRep
40 import FastString
41 import Outputable
42 import Util
43
44 import Control.Monad (liftM, when)
45
46 ------------------------------------------------------------------------
47 -- Primitive operations and foreign calls
48 ------------------------------------------------------------------------
49
50 {- Note [Foreign call results]
51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
52 A foreign call always returns an unboxed tuple of results, one
53 of which is the state token. This seems to happen even for pure
54 calls.
55
56 Even if we returned a single result for pure calls, it'd still be
57 right to wrap it in a singleton unboxed tuple, because the result
58 might be a Haskell closure pointer, we don't want to evaluate it. -}
59
60 ----------------------------------
61 cgOpApp :: StgOp -- The op
62 -> [StgArg] -- Arguments
63 -> Type -- Result type (always an unboxed tuple)
64 -> FCode ReturnKind
65
66 -- Foreign calls
67 cgOpApp (StgFCallOp fcall _) stg_args res_ty
68 = cgForeignCall fcall stg_args res_ty
69 -- Note [Foreign call results]
70
71 -- tagToEnum# is special: we need to pull the constructor
72 -- out of the table, and perform an appropriate return.
73
74 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
75 = ASSERT(isEnumerationTyCon tycon)
76 do { dflags <- getDynFlags
77 ; args' <- getNonVoidArgAmodes [arg]
78 ; let amode = case args' of [amode] -> amode
79 _ -> panic "TagToEnumOp had void arg"
80 ; emitReturn [tagToClosure dflags tycon amode] }
81 where
82 -- If you're reading this code in the attempt to figure
83 -- out why the compiler panic'ed here, it is probably because
84 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
85 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
86 -- That won't work.
87 tycon = tyConAppTyCon res_ty
88
89 cgOpApp (StgPrimOp primop) args res_ty = do
90 dflags <- getDynFlags
91 cmm_args <- getNonVoidArgAmodes args
92 case shouldInlinePrimOp dflags primop cmm_args of
93 Nothing -> do -- out-of-line
94 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
95 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
96
97 Just f -- inline
98 | ReturnsPrim VoidRep <- result_info
99 -> do f []
100 emitReturn []
101
102 | ReturnsPrim rep <- result_info
103 -> do dflags <- getDynFlags
104 res <- newTemp (primRepCmmType dflags rep)
105 f [res]
106 emitReturn [CmmReg (CmmLocal res)]
107
108 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
109 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
110 f regs
111 emitReturn (map (CmmReg . CmmLocal) regs)
112
113 | otherwise -> panic "cgPrimop"
114 where
115 result_info = getPrimOpResultInfo primop
116
117 cgOpApp (StgPrimCallOp primcall) args _res_ty
118 = do { cmm_args <- getNonVoidArgAmodes args
119 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
120 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
121
122 -- | Decide whether an out-of-line primop should be replaced by an
123 -- inline implementation. This might happen e.g. if there's enough
124 -- static information, such as statically know arguments, to emit a
125 -- more efficient implementation inline.
126 --
127 -- Returns 'Nothing' if this primop should use its out-of-line
128 -- implementation (defined elsewhere) and 'Just' together with a code
129 -- generating function that takes the output regs as arguments
130 -- otherwise.
131 shouldInlinePrimOp :: DynFlags
132 -> PrimOp -- ^ The primop
133 -> [CmmExpr] -- ^ The primop arguments
134 -> Maybe ([LocalReg] -> FCode ())
135
136 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n _))]
137 | fromInteger n <= maxInlineAllocSize dflags =
138 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
139
140 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
141 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
142 Just $ \ [res] -> doNewArrayOp res (fromInteger n) init
143
144 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n _))]
145 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
146 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
147
148 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n _))]
149 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
150 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
151
152 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n _))]
153 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
154 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
155
156 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n _))]
157 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
158 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
159
160 shouldInlinePrimOp dflags primop args
161 | primOpOutOfLine primop = Nothing
162 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
163
164 -- TODO: Several primops, such as 'copyArray#', only have an inline
165 -- implementation (below) but could possibly have both an inline
166 -- implementation and an out-of-line implementation, just like
167 -- 'newArray#'. This would lower the amount of code generated,
168 -- hopefully without a performance impact (needs to be measured).
169
170 ---------------------------------------------------
171 cgPrimOp :: [LocalReg] -- where to put the results
172 -> PrimOp -- the op
173 -> [StgArg] -- arguments
174 -> FCode ()
175
176 cgPrimOp results op args
177 = do dflags <- getDynFlags
178 arg_exprs <- getNonVoidArgAmodes args
179 emitPrimOp dflags results op arg_exprs
180
181
182 ------------------------------------------------------------------------
183 -- Emitting code for a primop
184 ------------------------------------------------------------------------
185
186 emitPrimOp :: DynFlags
187 -> [LocalReg] -- where to put the results
188 -> PrimOp -- the op
189 -> [CmmExpr] -- arguments
190 -> FCode ()
191
192 -- First we handle various awkward cases specially. The remaining
193 -- easy cases are then handled by translateOp, defined below.
194
195 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
196 {-
197 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
198 C, and without needing any comparisons. This may not be the
199 fastest way to do it - if you have better code, please send it! --SDM
200
201 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
202
203 We currently don't make use of the r value if c is != 0 (i.e.
204 overflow), we just convert to big integers and try again. This
205 could be improved by making r and c the correct values for
206 plugging into a new J#.
207
208 { r = ((I_)(a)) + ((I_)(b)); \
209 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
210 >> (BITS_IN (I_) - 1); \
211 }
212 Wading through the mass of bracketry, it seems to reduce to:
213 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
214
215 -}
216 = emit $ catAGraphs [
217 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
218 mkAssign (CmmLocal res_c) $
219 CmmMachOp (mo_wordUShr dflags) [
220 CmmMachOp (mo_wordAnd dflags) [
221 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
222 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
223 ],
224 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
225 ]
226 ]
227
228
229 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
230 {- Similarly:
231 #define subIntCzh(r,c,a,b) \
232 { r = ((I_)(a)) - ((I_)(b)); \
233 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
234 >> (BITS_IN (I_) - 1); \
235 }
236
237 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
238 -}
239 = emit $ catAGraphs [
240 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
241 mkAssign (CmmLocal res_c) $
242 CmmMachOp (mo_wordUShr dflags) [
243 CmmMachOp (mo_wordAnd dflags) [
244 CmmMachOp (mo_wordXor dflags) [aa,bb],
245 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
246 ],
247 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
248 ]
249 ]
250
251
252 emitPrimOp _ [res] ParOp [arg]
253 =
254 -- for now, just implement this in a C function
255 -- later, we might want to inline it.
256 emitCCall
257 [(res,NoHint)]
258 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
259 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
260
261 emitPrimOp dflags [res] SparkOp [arg]
262 = do
263 -- returns the value of arg in res. We're going to therefore
264 -- refer to arg twice (once to pass to newSpark(), and once to
265 -- assign to res), so put it in a temporary.
266 tmp <- assignTemp arg
267 tmp2 <- newTemp (bWord dflags)
268 emitCCall
269 [(tmp2,NoHint)]
270 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
271 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
272 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
273
274 emitPrimOp dflags [res] GetCCSOfOp [arg]
275 = emitAssign (CmmLocal res) val
276 where
277 val
278 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
279 | otherwise = CmmLit (zeroCLit dflags)
280
281 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
282 = emitAssign (CmmLocal res) curCCS
283
284 emitPrimOp dflags [res] ReadMutVarOp [mutv]
285 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
286
287 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
288 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
289 emitCCall
290 [{-no results-}]
291 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
292 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
293
294 -- #define sizzeofByteArrayzh(r,a) \
295 -- r = ((StgArrWords *)(a))->bytes
296 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
297 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
298
299 -- #define sizzeofMutableByteArrayzh(r,a) \
300 -- r = ((StgArrWords *)(a))->bytes
301 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
302 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
303
304
305 -- #define touchzh(o) /* nothing */
306 emitPrimOp _ res@[] TouchOp args@[_arg]
307 = do emitPrimCall res MO_Touch args
308
309 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
310 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
311 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
312
313 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
314 emitPrimOp dflags [res] StableNameToIntOp [arg]
315 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
316
317 -- #define eqStableNamezh(r,sn1,sn2) \
318 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
319 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
320 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
321 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
322 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
323 ])
324
325
326 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
327 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
328
329 -- #define addrToHValuezh(r,a) r=(P_)a
330 emitPrimOp _ [res] AddrToAnyOp [arg]
331 = emitAssign (CmmLocal res) arg
332
333 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
334 -- Note: argument may be tagged!
335 emitPrimOp dflags [res] DataToTagOp [arg]
336 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
337
338 {- Freezing arrays-of-ptrs requires changing an info table, for the
339 benefit of the generational collector. It needs to scavenge mutable
340 objects, even if they are in old space. When they become immutable,
341 they can be removed from this scavenge list. -}
342
343 -- #define unsafeFreezzeArrayzh(r,a)
344 -- {
345 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
346 -- r = a;
347 -- }
348 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
349 = emit $ catAGraphs
350 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
351 mkAssign (CmmLocal res) arg ]
352 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
353 = emit $ catAGraphs
354 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
355 mkAssign (CmmLocal res) arg ]
356
357 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
358 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
359 = emitAssign (CmmLocal res) arg
360
361 -- Copying pointer arrays
362
363 emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
364 doCopyArrayOp src src_off dst dst_off n
365 emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
366 doCopyMutableArrayOp src src_off dst dst_off n
367 emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
368 doCopyArrayOp src src_off dst dst_off n
369 emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
370 doCopyMutableArrayOp src src_off dst dst_off n
371
372 -- Reading/writing pointer arrays
373
374 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
375 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
376 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
377
378 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
379 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
380 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
381 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
382 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
383 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
384 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
385 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
386 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
387 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
388
389 emitPrimOp dflags [res] SizeofArrayOp [arg]
390 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
391 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
392 = emitPrimOp dflags [res] SizeofArrayOp [arg]
393 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
394 = emitPrimOp dflags [res] SizeofArrayOp [arg]
395 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
396 = emitPrimOp dflags [res] SizeofArrayOp [arg]
397
398 -- IndexXXXoffAddr
399
400 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
401 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
402 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
403 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
404 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
405 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
406 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
407 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
408 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
409 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
410 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
411 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
412 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
413 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
414 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
415 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
416
417 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
418
419 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
420 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
421 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
422 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
423 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
424 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
425 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
426 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
427 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
428 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
429 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
430 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
431 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
432 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
433 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
434 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
435
436 -- IndexXXXArray
437
438 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
439 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
440 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
441 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
442 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
443 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
444 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
445 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
446 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
447 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
448 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
449 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
450 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
451 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
452 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
453 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
454
455 -- ReadXXXArray, identical to IndexXXXArray.
456
457 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
458 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
459 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
460 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
461 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
462 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
463 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
464 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
465 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
466 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
467 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
468 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
469 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
470 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
471 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
472 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
473
474 -- WriteXXXoffAddr
475
476 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
477 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
478 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
479 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
480 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
481 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
482 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
483 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
484 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
485 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
486 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
487 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
488 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
489 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
490 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
491 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
492
493 -- WriteXXXArray
494
495 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
496 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
497 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
498 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
499 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
500 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
501 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
502 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
503 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
504 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
505 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
506 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
507 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
508 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
509 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
510 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
511
512 -- Copying and setting byte arrays
513 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
514 doCopyByteArrayOp src src_off dst dst_off n
515 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
516 doCopyMutableByteArrayOp src src_off dst dst_off n
517 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
518 doCopyByteArrayToAddrOp src src_off dst n
519 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
520 doCopyMutableByteArrayToAddrOp src src_off dst n
521 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
522 doCopyAddrToByteArrayOp src dst dst_off n
523 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
524 doSetByteArrayOp ba off len c
525
526 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
527 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
528 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
529 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
530
531 -- Population count
532 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
533 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
534 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
535 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
536 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
537
538 -- Unsigned int to floating point conversions
539 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
540 (MO_UF_Conv W32) [w]
541 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
542 (MO_UF_Conv W64) [w]
543
544 -- SIMD primops
545 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
546 checkVecCompatibility dflags vcat n w
547 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
548 where
549 zeros :: CmmExpr
550 zeros = CmmLit $ CmmVec (replicate n zero)
551
552 zero :: CmmLit
553 zero = case vcat of
554 IntVec -> CmmInt 0 w
555 WordVec -> CmmInt 0 w
556 FloatVec -> CmmFloat 0 w
557
558 ty :: CmmType
559 ty = vecVmmType vcat n w
560
561 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
562 checkVecCompatibility dflags vcat n w
563 when (length es /= n) $
564 panic "emitPrimOp: VecPackOp has wrong number of arguments"
565 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
566 where
567 zeros :: CmmExpr
568 zeros = CmmLit $ CmmVec (replicate n zero)
569
570 zero :: CmmLit
571 zero = case vcat of
572 IntVec -> CmmInt 0 w
573 WordVec -> CmmInt 0 w
574 FloatVec -> CmmFloat 0 w
575
576 ty :: CmmType
577 ty = vecVmmType vcat n w
578
579 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
580 checkVecCompatibility dflags vcat n w
581 when (length res /= n) $
582 panic "emitPrimOp: VecUnpackOp has wrong number of results"
583 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
584 where
585 ty :: CmmType
586 ty = vecVmmType vcat n w
587
588 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
589 checkVecCompatibility dflags vcat n w
590 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
591 where
592 ty :: CmmType
593 ty = vecVmmType vcat n w
594
595 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
596 checkVecCompatibility dflags vcat n w
597 doIndexByteArrayOp Nothing ty res args
598 where
599 ty :: CmmType
600 ty = vecVmmType vcat n w
601
602 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
603 checkVecCompatibility dflags vcat n w
604 doIndexByteArrayOp Nothing ty res args
605 where
606 ty :: CmmType
607 ty = vecVmmType vcat n w
608
609 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
610 checkVecCompatibility dflags vcat n w
611 doWriteByteArrayOp Nothing ty res args
612 where
613 ty :: CmmType
614 ty = vecVmmType vcat n w
615
616 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
617 checkVecCompatibility dflags vcat n w
618 doIndexOffAddrOp Nothing ty res args
619 where
620 ty :: CmmType
621 ty = vecVmmType vcat n w
622
623 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
624 checkVecCompatibility dflags vcat n w
625 doIndexOffAddrOp Nothing ty res args
626 where
627 ty :: CmmType
628 ty = vecVmmType vcat n w
629
630 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
631 checkVecCompatibility dflags vcat n w
632 doWriteOffAddrOp Nothing ty res args
633 where
634 ty :: CmmType
635 ty = vecVmmType vcat n w
636
637 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
638 checkVecCompatibility dflags vcat n w
639 doIndexByteArrayOpAs Nothing vecty ty res args
640 where
641 vecty :: CmmType
642 vecty = vecVmmType vcat n w
643
644 ty :: CmmType
645 ty = vecCmmCat vcat w
646
647 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
648 checkVecCompatibility dflags vcat n w
649 doIndexByteArrayOpAs Nothing vecty ty res args
650 where
651 vecty :: CmmType
652 vecty = vecVmmType vcat n w
653
654 ty :: CmmType
655 ty = vecCmmCat vcat w
656
657 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
658 checkVecCompatibility dflags vcat n w
659 doWriteByteArrayOp Nothing ty res args
660 where
661 ty :: CmmType
662 ty = vecCmmCat vcat w
663
664 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
665 checkVecCompatibility dflags vcat n w
666 doIndexOffAddrOpAs Nothing vecty ty res args
667 where
668 vecty :: CmmType
669 vecty = vecVmmType vcat n w
670
671 ty :: CmmType
672 ty = vecCmmCat vcat w
673
674 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
675 checkVecCompatibility dflags vcat n w
676 doIndexOffAddrOpAs Nothing vecty ty res args
677 where
678 vecty :: CmmType
679 vecty = vecVmmType vcat n w
680
681 ty :: CmmType
682 ty = vecCmmCat vcat w
683
684 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
685 checkVecCompatibility dflags vcat n w
686 doWriteOffAddrOp Nothing ty res args
687 where
688 ty :: CmmType
689 ty = vecCmmCat vcat w
690
691 -- Prefetch
692 emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
693 emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
694 emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
695
696 emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
697 emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
698 emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
699
700 emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
701 emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
702 emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
703
704 emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
705 emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
706 emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
707
708
709 -- The rest just translate straightforwardly
710 emitPrimOp dflags [res] op [arg]
711 | nopOp op
712 = emitAssign (CmmLocal res) arg
713
714 | Just (mop,rep) <- narrowOp op
715 = emitAssign (CmmLocal res) $
716 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
717
718 emitPrimOp dflags r@[res] op args
719 | Just prim <- callishOp op
720 = do emitPrimCall r prim args
721
722 | Just mop <- translateOp dflags op
723 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
724 emit stmt
725
726 emitPrimOp dflags results op args
727 = case callishPrimOpSupported dflags op of
728 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
729 Right gen -> gen results args
730
731 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
732
733 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
734 callishPrimOpSupported dflags op
735 = case op of
736 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
737 | otherwise -> Right (genericIntQuotRemOp dflags)
738
739 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
740 | otherwise -> Right (genericWordQuotRemOp dflags)
741
742 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
743 | otherwise -> Right (genericWordQuotRem2Op dflags)
744
745 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
746 | otherwise -> Right genericWordAdd2Op
747
748 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
749 | otherwise -> Right genericWordMul2Op
750
751 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
752 where
753 ncg = case hscTarget dflags of
754 HscAsm -> True
755 _ -> False
756
757 x86ish = case platformArch (targetPlatform dflags) of
758 ArchX86 -> True
759 ArchX86_64 -> True
760 _ -> False
761
762 genericIntQuotRemOp :: DynFlags -> GenericOp
763 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
764 = emit $ mkAssign (CmmLocal res_q)
765 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
766 mkAssign (CmmLocal res_r)
767 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
768 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
769
770 genericWordQuotRemOp :: DynFlags -> GenericOp
771 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
772 = emit $ mkAssign (CmmLocal res_q)
773 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
774 mkAssign (CmmLocal res_r)
775 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
776 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
777
778 genericWordQuotRem2Op :: DynFlags -> GenericOp
779 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
780 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
781 where ty = cmmExprType dflags arg_x_high
782 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
783 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
784 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
785 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
786 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
787 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
788 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
789 zero = lit 0
790 one = lit 1
791 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
792 lit i = CmmLit (CmmInt i (wordWidth dflags))
793
794 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
795 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
796 mkAssign (CmmLocal res_r) high)
797 f i acc high low =
798 do roverflowedBit <- newTemp ty
799 rhigh' <- newTemp ty
800 rhigh'' <- newTemp ty
801 rlow' <- newTemp ty
802 risge <- newTemp ty
803 racc' <- newTemp ty
804 let high' = CmmReg (CmmLocal rhigh')
805 isge = CmmReg (CmmLocal risge)
806 overflowedBit = CmmReg (CmmLocal roverflowedBit)
807 let this = catAGraphs
808 [mkAssign (CmmLocal roverflowedBit)
809 (shr high negone),
810 mkAssign (CmmLocal rhigh')
811 (or (shl high one) (shr low negone)),
812 mkAssign (CmmLocal rlow')
813 (shl low one),
814 mkAssign (CmmLocal risge)
815 (or (overflowedBit `ne` zero)
816 (high' `ge` arg_y)),
817 mkAssign (CmmLocal rhigh'')
818 (high' `minus` (arg_y `times` isge)),
819 mkAssign (CmmLocal racc')
820 (or (shl acc one) isge)]
821 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
822 (CmmReg (CmmLocal rhigh''))
823 (CmmReg (CmmLocal rlow'))
824 return (this <*> rest)
825 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
826
827 genericWordAdd2Op :: GenericOp
828 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
829 = do dflags <- getDynFlags
830 r1 <- newTemp (cmmExprType dflags arg_x)
831 r2 <- newTemp (cmmExprType dflags arg_x)
832 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
833 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
834 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
835 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
836 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
837 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
838 (wordWidth dflags))
839 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
840 emit $ catAGraphs
841 [mkAssign (CmmLocal r1)
842 (add (bottomHalf arg_x) (bottomHalf arg_y)),
843 mkAssign (CmmLocal r2)
844 (add (topHalf (CmmReg (CmmLocal r1)))
845 (add (topHalf arg_x) (topHalf arg_y))),
846 mkAssign (CmmLocal res_h)
847 (topHalf (CmmReg (CmmLocal r2))),
848 mkAssign (CmmLocal res_l)
849 (or (toTopHalf (CmmReg (CmmLocal r2)))
850 (bottomHalf (CmmReg (CmmLocal r1))))]
851 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
852
853 genericWordMul2Op :: GenericOp
854 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
855 = do dflags <- getDynFlags
856 let t = cmmExprType dflags arg_x
857 xlyl <- liftM CmmLocal $ newTemp t
858 xlyh <- liftM CmmLocal $ newTemp t
859 xhyl <- liftM CmmLocal $ newTemp t
860 r <- liftM CmmLocal $ newTemp t
861 -- This generic implementation is very simple and slow. We might
862 -- well be able to do better, but for now this at least works.
863 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
864 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
865 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
866 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
867 sum = foldl1 add
868 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
869 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
870 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
871 (wordWidth dflags))
872 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
873 emit $ catAGraphs
874 [mkAssign xlyl
875 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
876 mkAssign xlyh
877 (mul (bottomHalf arg_x) (topHalf arg_y)),
878 mkAssign xhyl
879 (mul (topHalf arg_x) (bottomHalf arg_y)),
880 mkAssign r
881 (sum [topHalf (CmmReg xlyl),
882 bottomHalf (CmmReg xhyl),
883 bottomHalf (CmmReg xlyh)]),
884 mkAssign (CmmLocal res_l)
885 (or (bottomHalf (CmmReg xlyl))
886 (toTopHalf (CmmReg r))),
887 mkAssign (CmmLocal res_h)
888 (sum [mul (topHalf arg_x) (topHalf arg_y),
889 topHalf (CmmReg xhyl),
890 topHalf (CmmReg xlyh),
891 topHalf (CmmReg r)])]
892 genericWordMul2Op _ _ = panic "genericWordMul2Op"
893
894 -- These PrimOps are NOPs in Cmm
895
896 nopOp :: PrimOp -> Bool
897 nopOp Int2WordOp = True
898 nopOp Word2IntOp = True
899 nopOp Int2AddrOp = True
900 nopOp Addr2IntOp = True
901 nopOp ChrOp = True -- Int# and Char# are rep'd the same
902 nopOp OrdOp = True
903 nopOp _ = False
904
905 -- These PrimOps turn into double casts
906
907 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
908 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
909 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
910 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
911 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
912 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
913 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
914 narrowOp _ = Nothing
915
916 -- Native word signless ops
917
918 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
919 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
920 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
921 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
922 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
923 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
924 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
925
926 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
927 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
928 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
929 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
930 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
931 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
932
933 translateOp dflags AndOp = Just (mo_wordAnd dflags)
934 translateOp dflags OrOp = Just (mo_wordOr dflags)
935 translateOp dflags XorOp = Just (mo_wordXor dflags)
936 translateOp dflags NotOp = Just (mo_wordNot dflags)
937 translateOp dflags SllOp = Just (mo_wordShl dflags)
938 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
939
940 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
941
942 -- Native word signed ops
943
944 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
945 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
946 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
947 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
948 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
949
950
951 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
952 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
953 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
954 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
955
956 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
957 translateOp dflags OrIOp = Just (mo_wordOr dflags)
958 translateOp dflags XorIOp = Just (mo_wordXor dflags)
959 translateOp dflags NotIOp = Just (mo_wordNot dflags)
960 translateOp dflags ISllOp = Just (mo_wordShl dflags)
961 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
962 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
963
964 -- Native word unsigned ops
965
966 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
967 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
968 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
969 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
970
971 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
972 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
973 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
974
975 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
976 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
977 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
978 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
979
980 -- Char# ops
981
982 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
983 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
984 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
985 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
986 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
987 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
988
989 -- Double ops
990
991 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
992 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
993 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
994 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
995 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
996 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
997
998 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
999 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1000 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1001 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1002 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1003
1004 -- Float ops
1005
1006 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1007 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1008 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1009 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1010 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1011 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1012
1013 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1014 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1015 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1016 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1017 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1018
1019 -- Vector ops
1020
1021 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1022 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1023 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1024 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1025 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1026
1027 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1028 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1029 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1030 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1031 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1032 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1033
1034 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1035 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1036 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1037 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1038 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1039
1040 -- Conversions
1041
1042 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1043 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1044
1045 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1046 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1047
1048 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1049 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1050
1051 -- Word comparisons masquerading as more exotic things.
1052
1053 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1054 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1055 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1056 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1057 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1058 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1059 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1060
1061 translateOp _ _ = Nothing
1062
1063 -- These primops are implemented by CallishMachOps, because they sometimes
1064 -- turn into foreign calls depending on the backend.
1065
1066 callishOp :: PrimOp -> Maybe CallishMachOp
1067 callishOp DoublePowerOp = Just MO_F64_Pwr
1068 callishOp DoubleSinOp = Just MO_F64_Sin
1069 callishOp DoubleCosOp = Just MO_F64_Cos
1070 callishOp DoubleTanOp = Just MO_F64_Tan
1071 callishOp DoubleSinhOp = Just MO_F64_Sinh
1072 callishOp DoubleCoshOp = Just MO_F64_Cosh
1073 callishOp DoubleTanhOp = Just MO_F64_Tanh
1074 callishOp DoubleAsinOp = Just MO_F64_Asin
1075 callishOp DoubleAcosOp = Just MO_F64_Acos
1076 callishOp DoubleAtanOp = Just MO_F64_Atan
1077 callishOp DoubleLogOp = Just MO_F64_Log
1078 callishOp DoubleExpOp = Just MO_F64_Exp
1079 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1080
1081 callishOp FloatPowerOp = Just MO_F32_Pwr
1082 callishOp FloatSinOp = Just MO_F32_Sin
1083 callishOp FloatCosOp = Just MO_F32_Cos
1084 callishOp FloatTanOp = Just MO_F32_Tan
1085 callishOp FloatSinhOp = Just MO_F32_Sinh
1086 callishOp FloatCoshOp = Just MO_F32_Cosh
1087 callishOp FloatTanhOp = Just MO_F32_Tanh
1088 callishOp FloatAsinOp = Just MO_F32_Asin
1089 callishOp FloatAcosOp = Just MO_F32_Acos
1090 callishOp FloatAtanOp = Just MO_F32_Atan
1091 callishOp FloatLogOp = Just MO_F32_Log
1092 callishOp FloatExpOp = Just MO_F32_Exp
1093 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1094
1095 callishOp _ = Nothing
1096
1097 ------------------------------------------------------------------------------
1098 -- Helpers for translating various minor variants of array indexing.
1099
1100 doIndexOffAddrOp :: Maybe MachOp
1101 -> CmmType
1102 -> [LocalReg]
1103 -> [CmmExpr]
1104 -> FCode ()
1105 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1106 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1107 doIndexOffAddrOp _ _ _ _
1108 = panic "StgCmmPrim: doIndexOffAddrOp"
1109
1110 doIndexOffAddrOpAs :: Maybe MachOp
1111 -> CmmType
1112 -> CmmType
1113 -> [LocalReg]
1114 -> [CmmExpr]
1115 -> FCode ()
1116 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1117 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1118 doIndexOffAddrOpAs _ _ _ _ _
1119 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1120
1121 doIndexByteArrayOp :: Maybe MachOp
1122 -> CmmType
1123 -> [LocalReg]
1124 -> [CmmExpr]
1125 -> FCode ()
1126 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1127 = do dflags <- getDynFlags
1128 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1129 doIndexByteArrayOp _ _ _ _
1130 = panic "StgCmmPrim: doIndexByteArrayOp"
1131
1132 doIndexByteArrayOpAs :: Maybe MachOp
1133 -> CmmType
1134 -> CmmType
1135 -> [LocalReg]
1136 -> [CmmExpr]
1137 -> FCode ()
1138 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1139 = do dflags <- getDynFlags
1140 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1141 doIndexByteArrayOpAs _ _ _ _ _
1142 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1143
1144 doReadPtrArrayOp :: LocalReg
1145 -> CmmExpr
1146 -> CmmExpr
1147 -> FCode ()
1148 doReadPtrArrayOp res addr idx
1149 = do dflags <- getDynFlags
1150 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1151
1152 doWriteOffAddrOp :: Maybe MachOp
1153 -> CmmType
1154 -> [LocalReg]
1155 -> [CmmExpr]
1156 -> FCode ()
1157 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1158 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1159 doWriteOffAddrOp _ _ _ _
1160 = panic "StgCmmPrim: doWriteOffAddrOp"
1161
1162 doWriteByteArrayOp :: Maybe MachOp
1163 -> CmmType
1164 -> [LocalReg]
1165 -> [CmmExpr]
1166 -> FCode ()
1167 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1168 = do dflags <- getDynFlags
1169 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1170 doWriteByteArrayOp _ _ _ _
1171 = panic "StgCmmPrim: doWriteByteArrayOp"
1172
1173 doWritePtrArrayOp :: CmmExpr
1174 -> CmmExpr
1175 -> CmmExpr
1176 -> FCode ()
1177 doWritePtrArrayOp addr idx val
1178 = do dflags <- getDynFlags
1179 let ty = cmmExprType dflags val
1180 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1181 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1182 -- the write barrier. We must write a byte into the mark table:
1183 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1184 emit $ mkStore (
1185 cmmOffsetExpr dflags
1186 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1187 (loadArrPtrsSize dflags addr))
1188 (CmmMachOp (mo_wordUShr dflags) [idx,
1189 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1190 ) (CmmLit (CmmInt 1 W8))
1191
1192 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1193 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1194 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1195
1196 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1197 -> Maybe MachOp -- Optional result cast
1198 -> CmmType -- Type of element we are accessing
1199 -> LocalReg -- Destination
1200 -> CmmExpr -- Base address
1201 -> CmmType -- Type of element by which we are indexing
1202 -> CmmExpr -- Index
1203 -> FCode ()
1204 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1205 = do dflags <- getDynFlags
1206 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1207 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1208 = do dflags <- getDynFlags
1209 emitAssign (CmmLocal res) (CmmMachOp cast [
1210 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1211
1212 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1213 -> Maybe MachOp -- Optional value cast
1214 -> CmmExpr -- Base address
1215 -> CmmType -- Type of element by which we are indexing
1216 -> CmmExpr -- Index
1217 -> CmmExpr -- Value to write
1218 -> FCode ()
1219 mkBasicIndexedWrite off Nothing base idx_ty idx val
1220 = do dflags <- getDynFlags
1221 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1222 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1223 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1224
1225 -- ----------------------------------------------------------------------------
1226 -- Misc utils
1227
1228 cmmIndexOffExpr :: DynFlags
1229 -> ByteOff -- Initial offset in bytes
1230 -> Width -- Width of element by which we are indexing
1231 -> CmmExpr -- Base address
1232 -> CmmExpr -- Index
1233 -> CmmExpr
1234 cmmIndexOffExpr dflags off width base idx
1235 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1236
1237 cmmLoadIndexOffExpr :: DynFlags
1238 -> ByteOff -- Initial offset in bytes
1239 -> CmmType -- Type of element we are accessing
1240 -> CmmExpr -- Base address
1241 -> CmmType -- Type of element by which we are indexing
1242 -> CmmExpr -- Index
1243 -> CmmExpr
1244 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1245 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1246
1247 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1248 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1249
1250 ------------------------------------------------------------------------------
1251 -- Helpers for translating vector primops.
1252
1253 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1254 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1255
1256 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1257 vecCmmCat IntVec = cmmBits
1258 vecCmmCat WordVec = cmmBits
1259 vecCmmCat FloatVec = cmmFloat
1260
1261 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1262 vecElemInjectCast _ FloatVec _ = Nothing
1263 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1264 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1265 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1266 vecElemInjectCast _ IntVec W64 = Nothing
1267 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1268 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1269 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1270 vecElemInjectCast _ WordVec W64 = Nothing
1271 vecElemInjectCast _ _ _ = Nothing
1272
1273 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1274 vecElemProjectCast _ FloatVec _ = Nothing
1275 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1276 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1277 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1278 vecElemProjectCast _ IntVec W64 = Nothing
1279 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1280 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1281 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1282 vecElemProjectCast _ WordVec W64 = Nothing
1283 vecElemProjectCast _ _ _ = Nothing
1284
1285 -- Check to make sure that we can generate code for the specified vector type
1286 -- given the current set of dynamic flags.
1287 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1288 checkVecCompatibility dflags vcat l w = do
1289 when (hscTarget dflags /= HscLlvm) $ do
1290 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1291 ,"Please use -fllvm."]
1292 check vecWidth vcat l w
1293 where
1294 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1295 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1296 sorry $ "128-bit wide single-precision floating point " ++
1297 "SIMD vector instructions require at least -msse."
1298 check W128 _ _ _ | not (isSse2Enabled dflags) =
1299 sorry $ "128-bit wide integer and double precision " ++
1300 "SIMD vector instructions require at least -msse2."
1301 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1302 sorry $ "256-bit wide floating point " ++
1303 "SIMD vector instructions require at least -mavx."
1304 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1305 sorry $ "256-bit wide integer " ++
1306 "SIMD vector instructions require at least -mavx2."
1307 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1308 sorry $ "512-bit wide " ++
1309 "SIMD vector instructions require -mavx512f."
1310 check _ _ _ _ = return ()
1311
1312 vecWidth = typeWidth (vecVmmType vcat l w)
1313
1314 ------------------------------------------------------------------------------
1315 -- Helpers for translating vector packing and unpacking.
1316
1317 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1318 -> CmmType -- Type of vector
1319 -> CmmExpr -- Initial vector
1320 -> [CmmExpr] -- Elements
1321 -> CmmFormal -- Destination for result
1322 -> FCode ()
1323 doVecPackOp maybe_pre_write_cast ty z es res = do
1324 dst <- newTemp ty
1325 emitAssign (CmmLocal dst) z
1326 vecPack dst es 0
1327 where
1328 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1329 vecPack src [] _ =
1330 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1331
1332 vecPack src (e : es) i = do
1333 dst <- newTemp ty
1334 if isFloatType (vecElemType ty)
1335 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1336 [CmmReg (CmmLocal src), cast e, iLit])
1337 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1338 [CmmReg (CmmLocal src), cast e, iLit])
1339 vecPack dst es (i + 1)
1340 where
1341 -- vector indices are always 32-bits
1342 iLit = CmmLit (CmmInt (toInteger i) W32)
1343
1344 cast :: CmmExpr -> CmmExpr
1345 cast val = case maybe_pre_write_cast of
1346 Nothing -> val
1347 Just cast -> CmmMachOp cast [val]
1348
1349 len :: Length
1350 len = vecLength ty
1351
1352 wid :: Width
1353 wid = typeWidth (vecElemType ty)
1354
1355 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1356 -> CmmType -- Type of vector
1357 -> CmmExpr -- Vector
1358 -> [CmmFormal] -- Element results
1359 -> FCode ()
1360 doVecUnpackOp maybe_post_read_cast ty e res =
1361 vecUnpack res 0
1362 where
1363 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1364 vecUnpack [] _ =
1365 return ()
1366
1367 vecUnpack (r : rs) i = do
1368 if isFloatType (vecElemType ty)
1369 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1370 [e, iLit]))
1371 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1372 [e, iLit]))
1373 vecUnpack rs (i + 1)
1374 where
1375 -- vector indices are always 32-bits
1376 iLit = CmmLit (CmmInt (toInteger i) W32)
1377
1378 cast :: CmmExpr -> CmmExpr
1379 cast val = case maybe_post_read_cast of
1380 Nothing -> val
1381 Just cast -> CmmMachOp cast [val]
1382
1383 len :: Length
1384 len = vecLength ty
1385
1386 wid :: Width
1387 wid = typeWidth (vecElemType ty)
1388
1389 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1390 -> CmmType -- Vector type
1391 -> CmmExpr -- Source vector
1392 -> CmmExpr -- Element
1393 -> CmmExpr -- Index at which to insert element
1394 -> CmmFormal -- Destination for result
1395 -> FCode ()
1396 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1397 dflags <- getDynFlags
1398 -- vector indices are always 32-bits
1399 let idx' :: CmmExpr
1400 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1401 if isFloatType (vecElemType ty)
1402 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1403 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1404 where
1405 cast :: CmmExpr -> CmmExpr
1406 cast val = case maybe_pre_write_cast of
1407 Nothing -> val
1408 Just cast -> CmmMachOp cast [val]
1409
1410 len :: Length
1411 len = vecLength ty
1412
1413 wid :: Width
1414 wid = typeWidth (vecElemType ty)
1415
1416 ------------------------------------------------------------------------------
1417 -- Helpers for translating prefetching.
1418
1419 doPrefetchByteArrayOp :: Int
1420 -> [LocalReg]
1421 -> [CmmExpr]
1422 -> FCode ()
1423 doPrefetchByteArrayOp locality res [addr,idx]
1424 = do dflags <- getDynFlags
1425 mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
1426 doPrefetchByteArrayOp _ _ _
1427 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1428
1429 doPrefetchAddrOp ::Int
1430 -> [LocalReg]
1431 -> [CmmExpr]
1432 -> FCode ()
1433 doPrefetchAddrOp locality res [addr,idx]
1434 = mkBasicPrefetch locality 0 res addr idx
1435 doPrefetchAddrOp _ _ _
1436 = panic "StgCmmPrim: doPrefetchAddrOp"
1437
1438 mkBasicPrefetch :: Int -- Locality level 0-3
1439 -> ByteOff -- Initial offset in bytes
1440 -> [LocalReg] -- Destination
1441 -> CmmExpr -- Base address
1442 -> CmmExpr -- Index
1443 -> FCode ()
1444 mkBasicPrefetch locality off res base idx
1445 = do dflags <- getDynFlags
1446 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1447 case res of
1448 [] -> return ()
1449 [reg] -> emitAssign (CmmLocal reg) base
1450 _ -> panic "StgCmmPrim: mkBasicPrefetch"
1451
1452 -- ----------------------------------------------------------------------------
1453 -- Allocating byte arrays
1454
1455 -- | Takes a register to return the newly allocated array in and the
1456 -- size of the new array in bytes. Allocates a new
1457 -- 'MutableByteArray#'.
1458 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1459 doNewByteArrayOp res_r n = do
1460 dflags <- getDynFlags
1461
1462 let info_ptr = mkLblExpr mkArrWords_infoLabel
1463 rep = arrWordsRep dflags n
1464
1465 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1466 (mkIntExpr dflags (nonHdrSize dflags rep))
1467 (zeroExpr dflags)
1468
1469 let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
1470
1471 base <- allocHeapClosure rep info_ptr curCCS
1472 [ (mkIntExpr dflags n,
1473 hdr_size + oFFSET_StgArrWords_bytes dflags)
1474 ]
1475
1476 emit $ mkAssign (CmmLocal res_r) base
1477
1478 -- ----------------------------------------------------------------------------
1479 -- Copying byte arrays
1480
1481 -- | Takes a source 'ByteArray#', an offset in the source array, a
1482 -- destination 'MutableByteArray#', an offset into the destination
1483 -- array, and the number of bytes to copy. Copies the given number of
1484 -- bytes from the source array to the destination array.
1485 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1486 -> FCode ()
1487 doCopyByteArrayOp = emitCopyByteArray copy
1488 where
1489 -- Copy data (we assume the arrays aren't overlapping since
1490 -- they're of different types)
1491 copy _src _dst dst_p src_p bytes =
1492 do dflags <- getDynFlags
1493 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1494
1495 -- | Takes a source 'MutableByteArray#', an offset in the source
1496 -- array, a destination 'MutableByteArray#', an offset into the
1497 -- destination array, and the number of bytes to copy. Copies the
1498 -- given number of bytes from the source array to the destination
1499 -- array.
1500 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1501 -> FCode ()
1502 doCopyMutableByteArrayOp = emitCopyByteArray copy
1503 where
1504 -- The only time the memory might overlap is when the two arrays
1505 -- we were provided are the same array!
1506 -- TODO: Optimize branch for common case of no aliasing.
1507 copy src dst dst_p src_p bytes = do
1508 dflags <- getDynFlags
1509 [moveCall, cpyCall] <- forkAlts [
1510 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1511 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1512 ]
1513 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1514
1515 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1516 -> FCode ())
1517 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1518 -> FCode ()
1519 emitCopyByteArray copy src src_off dst dst_off n = do
1520 dflags <- getDynFlags
1521 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1522 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1523 copy src dst dst_p src_p n
1524
1525 -- | Takes a source 'ByteArray#', an offset in the source array, a
1526 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1527 -- number of bytes from the source array to the destination memory region.
1528 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1529 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1530 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1531 dflags <- getDynFlags
1532 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1533 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1534
1535 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1536 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1537 -- number of bytes from the source array to the destination memory region.
1538 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1539 -> FCode ()
1540 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1541
1542 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1543 -- the destination array, and the number of bytes to copy. Copies the given
1544 -- number of bytes from the source memory region to the destination array.
1545 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1546 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1547 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1548 dflags <- getDynFlags
1549 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1550 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1551
1552
1553 -- ----------------------------------------------------------------------------
1554 -- Setting byte arrays
1555
1556 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1557 -- and a byte, and sets each of the selected bytes in the array to the
1558 -- character.
1559 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1560 -> FCode ()
1561 doSetByteArrayOp ba off len c
1562 = do dflags <- getDynFlags
1563 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1564 emitMemsetCall p c len (mkIntExpr dflags 1)
1565
1566 -- ----------------------------------------------------------------------------
1567 -- Allocating arrays
1568
1569 -- | Takes a register to return the newly allocated array in, the size
1570 -- of the new array, and an initial value for the elements. Allocates
1571 -- a new 'MutableArray#'.
1572 doNewArrayOp :: CmmFormal -> WordOff -> CmmExpr -> FCode ()
1573 doNewArrayOp res_r n init = do
1574 dflags <- getDynFlags
1575
1576 let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel
1577 rep = arrPtrsRep dflags n
1578
1579 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1580 (mkIntExpr dflags (nonHdrSize dflags rep))
1581 (zeroExpr dflags)
1582
1583 let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
1584
1585 base <- allocHeapClosure rep info_ptr curCCS
1586 [ (mkIntExpr dflags n,
1587 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1588 , (mkIntExpr dflags (nonHdrSizeW rep),
1589 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1590 ]
1591
1592 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1593 emit $ mkAssign arr base
1594
1595 -- Initialise all elements of the the array
1596 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags)
1597 for <- newLabelC
1598 emitLabel for
1599 let loopBody =
1600 [ mkStore (CmmReg (CmmLocal p)) init
1601 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1602 , mkBranch for ]
1603 emit =<< mkCmmIfThen
1604 (cmmULtWord dflags (CmmReg (CmmLocal p))
1605 (cmmOffsetW dflags (CmmReg arr)
1606 (arrPtrsHdrSizeW dflags + n)))
1607 (catAGraphs loopBody)
1608
1609 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1610
1611 -- ----------------------------------------------------------------------------
1612 -- Copying pointer arrays
1613
1614 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1615 -- nowhere else in the code generator. This is mostly because these
1616 -- "primitive" ops result in a surprisingly large amount of code. It
1617 -- will likely be worthwhile to optimize what is emitted here, so that
1618 -- our optimization passes don't waste time repeatedly optimizing the
1619 -- same bits of code.
1620
1621 -- More closely imitates 'assignTemp' from the old code generator, which
1622 -- returns a CmmExpr rather than a LocalReg.
1623 assignTempE :: CmmExpr -> FCode CmmExpr
1624 assignTempE e = do
1625 t <- assignTemp e
1626 return (CmmReg (CmmLocal t))
1627
1628 -- | Takes a source 'Array#', an offset in the source array, a
1629 -- destination 'MutableArray#', an offset into the destination array,
1630 -- and the number of elements to copy. Copies the given number of
1631 -- elements from the source array to the destination array.
1632 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1633 -> FCode ()
1634 doCopyArrayOp = emitCopyArray copy
1635 where
1636 -- Copy data (we assume the arrays aren't overlapping since
1637 -- they're of different types)
1638 copy _src _dst dst_p src_p bytes =
1639 do dflags <- getDynFlags
1640 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1641
1642
1643 -- | Takes a source 'MutableArray#', an offset in the source array, a
1644 -- destination 'MutableArray#', an offset into the destination array,
1645 -- and the number of elements to copy. Copies the given number of
1646 -- elements from the source array to the destination array.
1647 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1648 -> FCode ()
1649 doCopyMutableArrayOp = emitCopyArray copy
1650 where
1651 -- The only time the memory might overlap is when the two arrays
1652 -- we were provided are the same array!
1653 -- TODO: Optimize branch for common case of no aliasing.
1654 copy src dst dst_p src_p bytes = do
1655 dflags <- getDynFlags
1656 [moveCall, cpyCall] <- forkAlts [
1657 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags)),
1658 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1659 ]
1660 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1661
1662 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1663 -> FCode ())
1664 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1665 -> FCode ()
1666 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
1667 dflags <- getDynFlags
1668 n <- assignTempE n0
1669 nonzero <- getCode $ do
1670 -- Passed as arguments (be careful)
1671 src <- assignTempE src0
1672 src_off <- assignTempE src_off0
1673 dst <- assignTempE dst0
1674 dst_off <- assignTempE dst_off0
1675
1676 -- Set the dirty bit in the header.
1677 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1678
1679 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst (arrPtrsHdrSize dflags)
1680 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1681 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1682 bytes <- assignTempE $ cmmMulWord dflags n (mkIntExpr dflags (wORD_SIZE dflags))
1683
1684 copy src dst dst_p src_p bytes
1685
1686 -- The base address of the destination card table
1687 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p (loadArrPtrsSize dflags dst)
1688
1689 emitSetCards dst_off dst_cards_p n
1690
1691 -- TODO: Figure out if this branch is really neccesary.
1692 emit =<< mkCmmIfThen (cmmNeWord dflags n (mkIntExpr dflags 0)) nonzero
1693
1694 -- | Takes an info table label, a register to return the newly
1695 -- allocated array in, a source array, an offset in the source array,
1696 -- and the number of elements to copy. Allocates a new array and
1697 -- initializes it from the source array.
1698 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1699 -> FCode ()
1700 emitCloneArray info_p res_r src src_off n = do
1701 dflags <- getDynFlags
1702
1703 let info_ptr = mkLblExpr info_p
1704 rep = arrPtrsRep dflags n
1705
1706 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1707 (mkIntExpr dflags (nonHdrSize dflags rep))
1708 (zeroExpr dflags)
1709
1710 let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
1711
1712 base <- allocHeapClosure rep info_ptr curCCS
1713 [ (mkIntExpr dflags n,
1714 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1715 , (mkIntExpr dflags (nonHdrSizeW rep),
1716 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1717 ]
1718
1719 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1720 emit $ mkAssign arr base
1721
1722 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1723 (arrPtrsHdrSize dflags)
1724 src_p <- assignTempE $ cmmOffsetExprW dflags src
1725 (cmmAddWord dflags
1726 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1727
1728 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1729 (mkIntExpr dflags (wORD_SIZE dflags))
1730
1731 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1732
1733 -- | Takes and offset in the destination array, the base address of
1734 -- the card table, and the number of elements affected (*not* the
1735 -- number of cards). The number of elements may not be zero.
1736 -- Marks the relevant cards as dirty.
1737 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1738 emitSetCards dst_start dst_cards_start n = do
1739 dflags <- getDynFlags
1740 start_card <- assignTempE $ cardCmm dflags dst_start
1741 let end_card = cardCmm dflags (cmmSubWord dflags (cmmAddWord dflags dst_start n) (mkIntExpr dflags 1))
1742 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1743 (mkIntExpr dflags 1)
1744 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1745 (mkIntExpr dflags 1) -- no alignment (1 byte)
1746
1747 -- Convert an element index to a card index
1748 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
1749 cardCmm dflags i =
1750 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1751
1752 -- | Emit a call to @memcpy@.
1753 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1754 emitMemcpyCall dst src n align = do
1755 emitPrimCall
1756 [ {-no results-} ]
1757 MO_Memcpy
1758 [ dst, src, n, align ]
1759
1760 -- | Emit a call to @memmove@.
1761 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1762 emitMemmoveCall dst src n align = do
1763 emitPrimCall
1764 [ {- no results -} ]
1765 MO_Memmove
1766 [ dst, src, n, align ]
1767
1768 -- | Emit a call to @memset@. The second argument must fit inside an
1769 -- unsigned char.
1770 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1771 emitMemsetCall dst c n align = do
1772 emitPrimCall
1773 [ {- no results -} ]
1774 MO_Memset
1775 [ dst, c, n, align ]
1776
1777 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1778 emitBSwapCall res x width = do
1779 emitPrimCall
1780 [ res ]
1781 (MO_BSwap width)
1782 [ x ]
1783
1784 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1785 emitPopCntCall res x width = do
1786 emitPrimCall
1787 [ res ]
1788 (MO_PopCnt width)
1789 [ x ]