Make copy array ops out-of-line by default
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmPrim (
10 cgOpApp,
11 cgPrimOp, -- internal(ish), used by cgCase to get code for a
12 -- comparison without also turning it into a Bool.
13 shouldInlinePrimOp
14 ) where
15
16 #include "HsVersions.h"
17
18 import StgCmmLayout
19 import StgCmmForeign
20 import StgCmmEnv
21 import StgCmmMonad
22 import StgCmmUtils
23 import StgCmmTicky
24 import StgCmmHeap
25 import StgCmmProf ( costCentreFrom, curCCS )
26
27 import DynFlags
28 import Platform
29 import BasicTypes
30 import MkGraph
31 import StgSyn
32 import Cmm
33 import CmmInfo
34 import Type ( Type, tyConAppTyCon )
35 import TyCon
36 import CLabel
37 import CmmUtils
38 import PrimOp
39 import SMRep
40 import FastString
41 import Outputable
42 import Util
43
44 import Control.Monad (liftM, when)
45
46 ------------------------------------------------------------------------
47 -- Primitive operations and foreign calls
48 ------------------------------------------------------------------------
49
50 {- Note [Foreign call results]
51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
52 A foreign call always returns an unboxed tuple of results, one
53 of which is the state token. This seems to happen even for pure
54 calls.
55
56 Even if we returned a single result for pure calls, it'd still be
57 right to wrap it in a singleton unboxed tuple, because the result
58 might be a Haskell closure pointer, we don't want to evaluate it. -}
59
60 ----------------------------------
61 cgOpApp :: StgOp -- The op
62 -> [StgArg] -- Arguments
63 -> Type -- Result type (always an unboxed tuple)
64 -> FCode ReturnKind
65
66 -- Foreign calls
67 cgOpApp (StgFCallOp fcall _) stg_args res_ty
68 = cgForeignCall fcall stg_args res_ty
69 -- Note [Foreign call results]
70
71 -- tagToEnum# is special: we need to pull the constructor
72 -- out of the table, and perform an appropriate return.
73
74 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
75 = ASSERT(isEnumerationTyCon tycon)
76 do { dflags <- getDynFlags
77 ; args' <- getNonVoidArgAmodes [arg]
78 ; let amode = case args' of [amode] -> amode
79 _ -> panic "TagToEnumOp had void arg"
80 ; emitReturn [tagToClosure dflags tycon amode] }
81 where
82 -- If you're reading this code in the attempt to figure
83 -- out why the compiler panic'ed here, it is probably because
84 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
85 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
86 -- That won't work.
87 tycon = tyConAppTyCon res_ty
88
89 cgOpApp (StgPrimOp primop) args res_ty = do
90 dflags <- getDynFlags
91 cmm_args <- getNonVoidArgAmodes args
92 case shouldInlinePrimOp dflags primop cmm_args of
93 Nothing -> do -- out-of-line
94 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
95 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
96
97 Just f -- inline
98 | ReturnsPrim VoidRep <- result_info
99 -> do f []
100 emitReturn []
101
102 | ReturnsPrim rep <- result_info
103 -> do dflags <- getDynFlags
104 res <- newTemp (primRepCmmType dflags rep)
105 f [res]
106 emitReturn [CmmReg (CmmLocal res)]
107
108 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
109 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
110 f regs
111 emitReturn (map (CmmReg . CmmLocal) regs)
112
113 | otherwise -> panic "cgPrimop"
114 where
115 result_info = getPrimOpResultInfo primop
116
117 cgOpApp (StgPrimCallOp primcall) args _res_ty
118 = do { cmm_args <- getNonVoidArgAmodes args
119 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
120 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
121
122 -- | Decide whether an out-of-line primop should be replaced by an
123 -- inline implementation. This might happen e.g. if there's enough
124 -- static information, such as statically know arguments, to emit a
125 -- more efficient implementation inline.
126 --
127 -- Returns 'Nothing' if this primop should use its out-of-line
128 -- implementation (defined elsewhere) and 'Just' together with a code
129 -- generating function that takes the output regs as arguments
130 -- otherwise.
131 shouldInlinePrimOp :: DynFlags
132 -> PrimOp -- ^ The primop
133 -> [CmmExpr] -- ^ The primop arguments
134 -> Maybe ([LocalReg] -> FCode ())
135
136 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n _))]
137 | fromInteger n <= maxInlineAllocSize dflags =
138 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
139
140 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
141 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
142 Just $ \ [res] -> doNewArrayOp res (fromInteger n) init
143
144 shouldInlinePrimOp _ CopyArrayOp
145 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
146 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
147
148 shouldInlinePrimOp _ CopyMutableArrayOp
149 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
150 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
151
152 shouldInlinePrimOp _ CopyArrayArrayOp
153 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
154 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
155
156 shouldInlinePrimOp _ CopyMutableArrayArrayOp
157 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
158 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
159
160 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n _))]
161 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
162 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
163
164 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n _))]
165 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
166 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
167
168 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n _))]
169 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
170 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
171
172 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n _))]
173 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
174 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
175
176 shouldInlinePrimOp dflags primop args
177 | primOpOutOfLine primop = Nothing
178 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
179
180 -- TODO: Several primops, such as 'copyArray#', only have an inline
181 -- implementation (below) but could possibly have both an inline
182 -- implementation and an out-of-line implementation, just like
183 -- 'newArray#'. This would lower the amount of code generated,
184 -- hopefully without a performance impact (needs to be measured).
185
186 ---------------------------------------------------
187 cgPrimOp :: [LocalReg] -- where to put the results
188 -> PrimOp -- the op
189 -> [StgArg] -- arguments
190 -> FCode ()
191
192 cgPrimOp results op args
193 = do dflags <- getDynFlags
194 arg_exprs <- getNonVoidArgAmodes args
195 emitPrimOp dflags results op arg_exprs
196
197
198 ------------------------------------------------------------------------
199 -- Emitting code for a primop
200 ------------------------------------------------------------------------
201
202 emitPrimOp :: DynFlags
203 -> [LocalReg] -- where to put the results
204 -> PrimOp -- the op
205 -> [CmmExpr] -- arguments
206 -> FCode ()
207
208 -- First we handle various awkward cases specially. The remaining
209 -- easy cases are then handled by translateOp, defined below.
210
211 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
212 {-
213 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
214 C, and without needing any comparisons. This may not be the
215 fastest way to do it - if you have better code, please send it! --SDM
216
217 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
218
219 We currently don't make use of the r value if c is != 0 (i.e.
220 overflow), we just convert to big integers and try again. This
221 could be improved by making r and c the correct values for
222 plugging into a new J#.
223
224 { r = ((I_)(a)) + ((I_)(b)); \
225 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
226 >> (BITS_IN (I_) - 1); \
227 }
228 Wading through the mass of bracketry, it seems to reduce to:
229 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
230
231 -}
232 = emit $ catAGraphs [
233 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
234 mkAssign (CmmLocal res_c) $
235 CmmMachOp (mo_wordUShr dflags) [
236 CmmMachOp (mo_wordAnd dflags) [
237 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
238 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
239 ],
240 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
241 ]
242 ]
243
244
245 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
246 {- Similarly:
247 #define subIntCzh(r,c,a,b) \
248 { r = ((I_)(a)) - ((I_)(b)); \
249 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
250 >> (BITS_IN (I_) - 1); \
251 }
252
253 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
254 -}
255 = emit $ catAGraphs [
256 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
257 mkAssign (CmmLocal res_c) $
258 CmmMachOp (mo_wordUShr dflags) [
259 CmmMachOp (mo_wordAnd dflags) [
260 CmmMachOp (mo_wordXor dflags) [aa,bb],
261 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
262 ],
263 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
264 ]
265 ]
266
267
268 emitPrimOp _ [res] ParOp [arg]
269 =
270 -- for now, just implement this in a C function
271 -- later, we might want to inline it.
272 emitCCall
273 [(res,NoHint)]
274 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
275 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
276
277 emitPrimOp dflags [res] SparkOp [arg]
278 = do
279 -- returns the value of arg in res. We're going to therefore
280 -- refer to arg twice (once to pass to newSpark(), and once to
281 -- assign to res), so put it in a temporary.
282 tmp <- assignTemp arg
283 tmp2 <- newTemp (bWord dflags)
284 emitCCall
285 [(tmp2,NoHint)]
286 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
287 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
288 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
289
290 emitPrimOp dflags [res] GetCCSOfOp [arg]
291 = emitAssign (CmmLocal res) val
292 where
293 val
294 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
295 | otherwise = CmmLit (zeroCLit dflags)
296
297 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
298 = emitAssign (CmmLocal res) curCCS
299
300 emitPrimOp dflags [res] ReadMutVarOp [mutv]
301 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
302
303 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
304 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
305 emitCCall
306 [{-no results-}]
307 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
308 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
309
310 -- #define sizzeofByteArrayzh(r,a) \
311 -- r = ((StgArrWords *)(a))->bytes
312 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
313 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
314
315 -- #define sizzeofMutableByteArrayzh(r,a) \
316 -- r = ((StgArrWords *)(a))->bytes
317 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
318 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
319
320
321 -- #define touchzh(o) /* nothing */
322 emitPrimOp _ res@[] TouchOp args@[_arg]
323 = do emitPrimCall res MO_Touch args
324
325 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
326 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
327 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
328
329 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
330 emitPrimOp dflags [res] StableNameToIntOp [arg]
331 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
332
333 -- #define eqStableNamezh(r,sn1,sn2) \
334 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
335 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
336 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
337 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
338 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
339 ])
340
341
342 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
343 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
344
345 -- #define addrToHValuezh(r,a) r=(P_)a
346 emitPrimOp _ [res] AddrToAnyOp [arg]
347 = emitAssign (CmmLocal res) arg
348
349 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
350 -- Note: argument may be tagged!
351 emitPrimOp dflags [res] DataToTagOp [arg]
352 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
353
354 {- Freezing arrays-of-ptrs requires changing an info table, for the
355 benefit of the generational collector. It needs to scavenge mutable
356 objects, even if they are in old space. When they become immutable,
357 they can be removed from this scavenge list. -}
358
359 -- #define unsafeFreezzeArrayzh(r,a)
360 -- {
361 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
362 -- r = a;
363 -- }
364 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
365 = emit $ catAGraphs
366 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
367 mkAssign (CmmLocal res) arg ]
368 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
369 = emit $ catAGraphs
370 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
371 mkAssign (CmmLocal res) arg ]
372
373 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
374 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
375 = emitAssign (CmmLocal res) arg
376
377 -- Reading/writing pointer arrays
378
379 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
380 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
381 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
382
383 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
384 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
385 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
386 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
387 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
388 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
389 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
390 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
391 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
392 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
393
394 emitPrimOp dflags [res] SizeofArrayOp [arg]
395 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
396 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
397 = emitPrimOp dflags [res] SizeofArrayOp [arg]
398 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
399 = emitPrimOp dflags [res] SizeofArrayOp [arg]
400 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
401 = emitPrimOp dflags [res] SizeofArrayOp [arg]
402
403 -- IndexXXXoffAddr
404
405 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
406 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
407 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
408 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
409 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
410 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
411 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
412 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
413 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
414 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
415 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
416 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
417 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
418 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
419 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
420 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
421
422 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
423
424 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
425 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
426 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
427 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
428 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
429 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
430 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
431 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
432 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
433 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
434 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
435 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
436 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
437 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
438 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
439 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
440
441 -- IndexXXXArray
442
443 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
444 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
445 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
446 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
447 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
448 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
449 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
450 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
451 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
452 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
453 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
454 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
455 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
456 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
457 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
458 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
459
460 -- ReadXXXArray, identical to IndexXXXArray.
461
462 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
463 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
464 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
465 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
466 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
467 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
468 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
469 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
470 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
471 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
472 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
473 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
474 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
475 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
476 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
477 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
478
479 -- WriteXXXoffAddr
480
481 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
482 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
483 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
484 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
485 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
486 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
487 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
488 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
489 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
490 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
491 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
492 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
493 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
494 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
495 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
496 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
497
498 -- WriteXXXArray
499
500 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
501 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
502 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
503 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
504 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
505 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
506 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
507 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
508 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
509 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
510 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
511 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
512 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
513 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
514 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
515 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
516
517 -- Copying and setting byte arrays
518 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
519 doCopyByteArrayOp src src_off dst dst_off n
520 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
521 doCopyMutableByteArrayOp src src_off dst dst_off n
522 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
523 doCopyByteArrayToAddrOp src src_off dst n
524 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
525 doCopyMutableByteArrayToAddrOp src src_off dst n
526 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
527 doCopyAddrToByteArrayOp src dst dst_off n
528 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
529 doSetByteArrayOp ba off len c
530
531 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
532 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
533 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
534 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
535
536 -- Population count
537 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
538 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
539 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
540 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
541 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
542
543 -- Unsigned int to floating point conversions
544 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
545 (MO_UF_Conv W32) [w]
546 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
547 (MO_UF_Conv W64) [w]
548
549 -- SIMD primops
550 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
551 checkVecCompatibility dflags vcat n w
552 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
553 where
554 zeros :: CmmExpr
555 zeros = CmmLit $ CmmVec (replicate n zero)
556
557 zero :: CmmLit
558 zero = case vcat of
559 IntVec -> CmmInt 0 w
560 WordVec -> CmmInt 0 w
561 FloatVec -> CmmFloat 0 w
562
563 ty :: CmmType
564 ty = vecVmmType vcat n w
565
566 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
567 checkVecCompatibility dflags vcat n w
568 when (length es /= n) $
569 panic "emitPrimOp: VecPackOp has wrong number of arguments"
570 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
571 where
572 zeros :: CmmExpr
573 zeros = CmmLit $ CmmVec (replicate n zero)
574
575 zero :: CmmLit
576 zero = case vcat of
577 IntVec -> CmmInt 0 w
578 WordVec -> CmmInt 0 w
579 FloatVec -> CmmFloat 0 w
580
581 ty :: CmmType
582 ty = vecVmmType vcat n w
583
584 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
585 checkVecCompatibility dflags vcat n w
586 when (length res /= n) $
587 panic "emitPrimOp: VecUnpackOp has wrong number of results"
588 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
589 where
590 ty :: CmmType
591 ty = vecVmmType vcat n w
592
593 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
594 checkVecCompatibility dflags vcat n w
595 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
596 where
597 ty :: CmmType
598 ty = vecVmmType vcat n w
599
600 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
601 checkVecCompatibility dflags vcat n w
602 doIndexByteArrayOp Nothing ty res args
603 where
604 ty :: CmmType
605 ty = vecVmmType vcat n w
606
607 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
608 checkVecCompatibility dflags vcat n w
609 doIndexByteArrayOp Nothing ty res args
610 where
611 ty :: CmmType
612 ty = vecVmmType vcat n w
613
614 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
615 checkVecCompatibility dflags vcat n w
616 doWriteByteArrayOp Nothing ty res args
617 where
618 ty :: CmmType
619 ty = vecVmmType vcat n w
620
621 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
622 checkVecCompatibility dflags vcat n w
623 doIndexOffAddrOp Nothing ty res args
624 where
625 ty :: CmmType
626 ty = vecVmmType vcat n w
627
628 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
629 checkVecCompatibility dflags vcat n w
630 doIndexOffAddrOp Nothing ty res args
631 where
632 ty :: CmmType
633 ty = vecVmmType vcat n w
634
635 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
636 checkVecCompatibility dflags vcat n w
637 doWriteOffAddrOp Nothing ty res args
638 where
639 ty :: CmmType
640 ty = vecVmmType vcat n w
641
642 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
643 checkVecCompatibility dflags vcat n w
644 doIndexByteArrayOpAs Nothing vecty ty res args
645 where
646 vecty :: CmmType
647 vecty = vecVmmType vcat n w
648
649 ty :: CmmType
650 ty = vecCmmCat vcat w
651
652 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
653 checkVecCompatibility dflags vcat n w
654 doIndexByteArrayOpAs Nothing vecty ty res args
655 where
656 vecty :: CmmType
657 vecty = vecVmmType vcat n w
658
659 ty :: CmmType
660 ty = vecCmmCat vcat w
661
662 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
663 checkVecCompatibility dflags vcat n w
664 doWriteByteArrayOp Nothing ty res args
665 where
666 ty :: CmmType
667 ty = vecCmmCat vcat w
668
669 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
670 checkVecCompatibility dflags vcat n w
671 doIndexOffAddrOpAs Nothing vecty ty res args
672 where
673 vecty :: CmmType
674 vecty = vecVmmType vcat n w
675
676 ty :: CmmType
677 ty = vecCmmCat vcat w
678
679 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
680 checkVecCompatibility dflags vcat n w
681 doIndexOffAddrOpAs Nothing vecty ty res args
682 where
683 vecty :: CmmType
684 vecty = vecVmmType vcat n w
685
686 ty :: CmmType
687 ty = vecCmmCat vcat w
688
689 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
690 checkVecCompatibility dflags vcat n w
691 doWriteOffAddrOp Nothing ty res args
692 where
693 ty :: CmmType
694 ty = vecCmmCat vcat w
695
696 -- Prefetch
697 emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
698 emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
699 emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
700
701 emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
702 emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
703 emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
704
705 emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
706 emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
707 emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
708
709 emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
710 emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
711 emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
712
713
714 -- The rest just translate straightforwardly
715 emitPrimOp dflags [res] op [arg]
716 | nopOp op
717 = emitAssign (CmmLocal res) arg
718
719 | Just (mop,rep) <- narrowOp op
720 = emitAssign (CmmLocal res) $
721 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
722
723 emitPrimOp dflags r@[res] op args
724 | Just prim <- callishOp op
725 = do emitPrimCall r prim args
726
727 | Just mop <- translateOp dflags op
728 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
729 emit stmt
730
731 emitPrimOp dflags results op args
732 = case callishPrimOpSupported dflags op of
733 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
734 Right gen -> gen results args
735
736 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
737
738 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
739 callishPrimOpSupported dflags op
740 = case op of
741 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
742 | otherwise -> Right (genericIntQuotRemOp dflags)
743
744 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
745 | otherwise -> Right (genericWordQuotRemOp dflags)
746
747 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
748 | otherwise -> Right (genericWordQuotRem2Op dflags)
749
750 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
751 | otherwise -> Right genericWordAdd2Op
752
753 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
754 | otherwise -> Right genericWordMul2Op
755
756 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
757 where
758 ncg = case hscTarget dflags of
759 HscAsm -> True
760 _ -> False
761
762 x86ish = case platformArch (targetPlatform dflags) of
763 ArchX86 -> True
764 ArchX86_64 -> True
765 _ -> False
766
767 genericIntQuotRemOp :: DynFlags -> GenericOp
768 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
769 = emit $ mkAssign (CmmLocal res_q)
770 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
771 mkAssign (CmmLocal res_r)
772 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
773 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
774
775 genericWordQuotRemOp :: DynFlags -> GenericOp
776 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
777 = emit $ mkAssign (CmmLocal res_q)
778 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
779 mkAssign (CmmLocal res_r)
780 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
781 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
782
783 genericWordQuotRem2Op :: DynFlags -> GenericOp
784 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
785 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
786 where ty = cmmExprType dflags arg_x_high
787 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
788 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
789 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
790 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
791 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
792 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
793 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
794 zero = lit 0
795 one = lit 1
796 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
797 lit i = CmmLit (CmmInt i (wordWidth dflags))
798
799 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
800 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
801 mkAssign (CmmLocal res_r) high)
802 f i acc high low =
803 do roverflowedBit <- newTemp ty
804 rhigh' <- newTemp ty
805 rhigh'' <- newTemp ty
806 rlow' <- newTemp ty
807 risge <- newTemp ty
808 racc' <- newTemp ty
809 let high' = CmmReg (CmmLocal rhigh')
810 isge = CmmReg (CmmLocal risge)
811 overflowedBit = CmmReg (CmmLocal roverflowedBit)
812 let this = catAGraphs
813 [mkAssign (CmmLocal roverflowedBit)
814 (shr high negone),
815 mkAssign (CmmLocal rhigh')
816 (or (shl high one) (shr low negone)),
817 mkAssign (CmmLocal rlow')
818 (shl low one),
819 mkAssign (CmmLocal risge)
820 (or (overflowedBit `ne` zero)
821 (high' `ge` arg_y)),
822 mkAssign (CmmLocal rhigh'')
823 (high' `minus` (arg_y `times` isge)),
824 mkAssign (CmmLocal racc')
825 (or (shl acc one) isge)]
826 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
827 (CmmReg (CmmLocal rhigh''))
828 (CmmReg (CmmLocal rlow'))
829 return (this <*> rest)
830 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
831
832 genericWordAdd2Op :: GenericOp
833 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
834 = do dflags <- getDynFlags
835 r1 <- newTemp (cmmExprType dflags arg_x)
836 r2 <- newTemp (cmmExprType dflags arg_x)
837 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
838 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
839 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
840 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
841 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
842 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
843 (wordWidth dflags))
844 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
845 emit $ catAGraphs
846 [mkAssign (CmmLocal r1)
847 (add (bottomHalf arg_x) (bottomHalf arg_y)),
848 mkAssign (CmmLocal r2)
849 (add (topHalf (CmmReg (CmmLocal r1)))
850 (add (topHalf arg_x) (topHalf arg_y))),
851 mkAssign (CmmLocal res_h)
852 (topHalf (CmmReg (CmmLocal r2))),
853 mkAssign (CmmLocal res_l)
854 (or (toTopHalf (CmmReg (CmmLocal r2)))
855 (bottomHalf (CmmReg (CmmLocal r1))))]
856 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
857
858 genericWordMul2Op :: GenericOp
859 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
860 = do dflags <- getDynFlags
861 let t = cmmExprType dflags arg_x
862 xlyl <- liftM CmmLocal $ newTemp t
863 xlyh <- liftM CmmLocal $ newTemp t
864 xhyl <- liftM CmmLocal $ newTemp t
865 r <- liftM CmmLocal $ newTemp t
866 -- This generic implementation is very simple and slow. We might
867 -- well be able to do better, but for now this at least works.
868 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
869 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
870 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
871 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
872 sum = foldl1 add
873 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
874 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
875 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
876 (wordWidth dflags))
877 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
878 emit $ catAGraphs
879 [mkAssign xlyl
880 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
881 mkAssign xlyh
882 (mul (bottomHalf arg_x) (topHalf arg_y)),
883 mkAssign xhyl
884 (mul (topHalf arg_x) (bottomHalf arg_y)),
885 mkAssign r
886 (sum [topHalf (CmmReg xlyl),
887 bottomHalf (CmmReg xhyl),
888 bottomHalf (CmmReg xlyh)]),
889 mkAssign (CmmLocal res_l)
890 (or (bottomHalf (CmmReg xlyl))
891 (toTopHalf (CmmReg r))),
892 mkAssign (CmmLocal res_h)
893 (sum [mul (topHalf arg_x) (topHalf arg_y),
894 topHalf (CmmReg xhyl),
895 topHalf (CmmReg xlyh),
896 topHalf (CmmReg r)])]
897 genericWordMul2Op _ _ = panic "genericWordMul2Op"
898
899 -- These PrimOps are NOPs in Cmm
900
901 nopOp :: PrimOp -> Bool
902 nopOp Int2WordOp = True
903 nopOp Word2IntOp = True
904 nopOp Int2AddrOp = True
905 nopOp Addr2IntOp = True
906 nopOp ChrOp = True -- Int# and Char# are rep'd the same
907 nopOp OrdOp = True
908 nopOp _ = False
909
910 -- These PrimOps turn into double casts
911
912 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
913 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
914 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
915 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
916 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
917 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
918 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
919 narrowOp _ = Nothing
920
921 -- Native word signless ops
922
923 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
924 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
925 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
926 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
927 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
928 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
929 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
930
931 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
932 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
933 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
934 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
935 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
936 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
937
938 translateOp dflags AndOp = Just (mo_wordAnd dflags)
939 translateOp dflags OrOp = Just (mo_wordOr dflags)
940 translateOp dflags XorOp = Just (mo_wordXor dflags)
941 translateOp dflags NotOp = Just (mo_wordNot dflags)
942 translateOp dflags SllOp = Just (mo_wordShl dflags)
943 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
944
945 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
946
947 -- Native word signed ops
948
949 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
950 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
951 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
952 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
953 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
954
955
956 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
957 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
958 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
959 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
960
961 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
962 translateOp dflags OrIOp = Just (mo_wordOr dflags)
963 translateOp dflags XorIOp = Just (mo_wordXor dflags)
964 translateOp dflags NotIOp = Just (mo_wordNot dflags)
965 translateOp dflags ISllOp = Just (mo_wordShl dflags)
966 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
967 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
968
969 -- Native word unsigned ops
970
971 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
972 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
973 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
974 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
975
976 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
977 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
978 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
979
980 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
981 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
982 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
983 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
984
985 -- Char# ops
986
987 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
988 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
989 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
990 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
991 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
992 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
993
994 -- Double ops
995
996 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
997 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
998 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
999 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1000 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1001 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1002
1003 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1004 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1005 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1006 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1007 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1008
1009 -- Float ops
1010
1011 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1012 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1013 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1014 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1015 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1016 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1017
1018 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1019 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1020 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1021 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1022 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1023
1024 -- Vector ops
1025
1026 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1027 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1028 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1029 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1030 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1031
1032 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1033 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1034 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1035 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1036 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1037 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1038
1039 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1040 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1041 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1042 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1043 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1044
1045 -- Conversions
1046
1047 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1048 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1049
1050 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1051 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1052
1053 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1054 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1055
1056 -- Word comparisons masquerading as more exotic things.
1057
1058 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1059 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1060 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1061 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1062 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1063 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1064 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1065
1066 translateOp _ _ = Nothing
1067
1068 -- These primops are implemented by CallishMachOps, because they sometimes
1069 -- turn into foreign calls depending on the backend.
1070
1071 callishOp :: PrimOp -> Maybe CallishMachOp
1072 callishOp DoublePowerOp = Just MO_F64_Pwr
1073 callishOp DoubleSinOp = Just MO_F64_Sin
1074 callishOp DoubleCosOp = Just MO_F64_Cos
1075 callishOp DoubleTanOp = Just MO_F64_Tan
1076 callishOp DoubleSinhOp = Just MO_F64_Sinh
1077 callishOp DoubleCoshOp = Just MO_F64_Cosh
1078 callishOp DoubleTanhOp = Just MO_F64_Tanh
1079 callishOp DoubleAsinOp = Just MO_F64_Asin
1080 callishOp DoubleAcosOp = Just MO_F64_Acos
1081 callishOp DoubleAtanOp = Just MO_F64_Atan
1082 callishOp DoubleLogOp = Just MO_F64_Log
1083 callishOp DoubleExpOp = Just MO_F64_Exp
1084 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1085
1086 callishOp FloatPowerOp = Just MO_F32_Pwr
1087 callishOp FloatSinOp = Just MO_F32_Sin
1088 callishOp FloatCosOp = Just MO_F32_Cos
1089 callishOp FloatTanOp = Just MO_F32_Tan
1090 callishOp FloatSinhOp = Just MO_F32_Sinh
1091 callishOp FloatCoshOp = Just MO_F32_Cosh
1092 callishOp FloatTanhOp = Just MO_F32_Tanh
1093 callishOp FloatAsinOp = Just MO_F32_Asin
1094 callishOp FloatAcosOp = Just MO_F32_Acos
1095 callishOp FloatAtanOp = Just MO_F32_Atan
1096 callishOp FloatLogOp = Just MO_F32_Log
1097 callishOp FloatExpOp = Just MO_F32_Exp
1098 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1099
1100 callishOp _ = Nothing
1101
1102 ------------------------------------------------------------------------------
1103 -- Helpers for translating various minor variants of array indexing.
1104
1105 doIndexOffAddrOp :: Maybe MachOp
1106 -> CmmType
1107 -> [LocalReg]
1108 -> [CmmExpr]
1109 -> FCode ()
1110 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1111 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1112 doIndexOffAddrOp _ _ _ _
1113 = panic "StgCmmPrim: doIndexOffAddrOp"
1114
1115 doIndexOffAddrOpAs :: Maybe MachOp
1116 -> CmmType
1117 -> CmmType
1118 -> [LocalReg]
1119 -> [CmmExpr]
1120 -> FCode ()
1121 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1122 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1123 doIndexOffAddrOpAs _ _ _ _ _
1124 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1125
1126 doIndexByteArrayOp :: Maybe MachOp
1127 -> CmmType
1128 -> [LocalReg]
1129 -> [CmmExpr]
1130 -> FCode ()
1131 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1132 = do dflags <- getDynFlags
1133 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1134 doIndexByteArrayOp _ _ _ _
1135 = panic "StgCmmPrim: doIndexByteArrayOp"
1136
1137 doIndexByteArrayOpAs :: Maybe MachOp
1138 -> CmmType
1139 -> CmmType
1140 -> [LocalReg]
1141 -> [CmmExpr]
1142 -> FCode ()
1143 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1144 = do dflags <- getDynFlags
1145 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1146 doIndexByteArrayOpAs _ _ _ _ _
1147 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1148
1149 doReadPtrArrayOp :: LocalReg
1150 -> CmmExpr
1151 -> CmmExpr
1152 -> FCode ()
1153 doReadPtrArrayOp res addr idx
1154 = do dflags <- getDynFlags
1155 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1156
1157 doWriteOffAddrOp :: Maybe MachOp
1158 -> CmmType
1159 -> [LocalReg]
1160 -> [CmmExpr]
1161 -> FCode ()
1162 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1163 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1164 doWriteOffAddrOp _ _ _ _
1165 = panic "StgCmmPrim: doWriteOffAddrOp"
1166
1167 doWriteByteArrayOp :: Maybe MachOp
1168 -> CmmType
1169 -> [LocalReg]
1170 -> [CmmExpr]
1171 -> FCode ()
1172 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1173 = do dflags <- getDynFlags
1174 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1175 doWriteByteArrayOp _ _ _ _
1176 = panic "StgCmmPrim: doWriteByteArrayOp"
1177
1178 doWritePtrArrayOp :: CmmExpr
1179 -> CmmExpr
1180 -> CmmExpr
1181 -> FCode ()
1182 doWritePtrArrayOp addr idx val
1183 = do dflags <- getDynFlags
1184 let ty = cmmExprType dflags val
1185 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1186 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1187 -- the write barrier. We must write a byte into the mark table:
1188 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1189 emit $ mkStore (
1190 cmmOffsetExpr dflags
1191 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1192 (loadArrPtrsSize dflags addr))
1193 (CmmMachOp (mo_wordUShr dflags) [idx,
1194 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1195 ) (CmmLit (CmmInt 1 W8))
1196
1197 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1198 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1199 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1200
1201 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1202 -> Maybe MachOp -- Optional result cast
1203 -> CmmType -- Type of element we are accessing
1204 -> LocalReg -- Destination
1205 -> CmmExpr -- Base address
1206 -> CmmType -- Type of element by which we are indexing
1207 -> CmmExpr -- Index
1208 -> FCode ()
1209 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1210 = do dflags <- getDynFlags
1211 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1212 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1213 = do dflags <- getDynFlags
1214 emitAssign (CmmLocal res) (CmmMachOp cast [
1215 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1216
1217 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1218 -> Maybe MachOp -- Optional value cast
1219 -> CmmExpr -- Base address
1220 -> CmmType -- Type of element by which we are indexing
1221 -> CmmExpr -- Index
1222 -> CmmExpr -- Value to write
1223 -> FCode ()
1224 mkBasicIndexedWrite off Nothing base idx_ty idx val
1225 = do dflags <- getDynFlags
1226 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1227 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1228 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1229
1230 -- ----------------------------------------------------------------------------
1231 -- Misc utils
1232
1233 cmmIndexOffExpr :: DynFlags
1234 -> ByteOff -- Initial offset in bytes
1235 -> Width -- Width of element by which we are indexing
1236 -> CmmExpr -- Base address
1237 -> CmmExpr -- Index
1238 -> CmmExpr
1239 cmmIndexOffExpr dflags off width base idx
1240 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1241
1242 cmmLoadIndexOffExpr :: DynFlags
1243 -> ByteOff -- Initial offset in bytes
1244 -> CmmType -- Type of element we are accessing
1245 -> CmmExpr -- Base address
1246 -> CmmType -- Type of element by which we are indexing
1247 -> CmmExpr -- Index
1248 -> CmmExpr
1249 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1250 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1251
1252 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1253 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1254
1255 ------------------------------------------------------------------------------
1256 -- Helpers for translating vector primops.
1257
1258 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1259 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1260
1261 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1262 vecCmmCat IntVec = cmmBits
1263 vecCmmCat WordVec = cmmBits
1264 vecCmmCat FloatVec = cmmFloat
1265
1266 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1267 vecElemInjectCast _ FloatVec _ = Nothing
1268 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1269 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1270 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1271 vecElemInjectCast _ IntVec W64 = Nothing
1272 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1273 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1274 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1275 vecElemInjectCast _ WordVec W64 = Nothing
1276 vecElemInjectCast _ _ _ = Nothing
1277
1278 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1279 vecElemProjectCast _ FloatVec _ = Nothing
1280 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1281 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1282 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1283 vecElemProjectCast _ IntVec W64 = Nothing
1284 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1285 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1286 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1287 vecElemProjectCast _ WordVec W64 = Nothing
1288 vecElemProjectCast _ _ _ = Nothing
1289
1290 -- Check to make sure that we can generate code for the specified vector type
1291 -- given the current set of dynamic flags.
1292 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1293 checkVecCompatibility dflags vcat l w = do
1294 when (hscTarget dflags /= HscLlvm) $ do
1295 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1296 ,"Please use -fllvm."]
1297 check vecWidth vcat l w
1298 where
1299 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1300 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1301 sorry $ "128-bit wide single-precision floating point " ++
1302 "SIMD vector instructions require at least -msse."
1303 check W128 _ _ _ | not (isSse2Enabled dflags) =
1304 sorry $ "128-bit wide integer and double precision " ++
1305 "SIMD vector instructions require at least -msse2."
1306 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1307 sorry $ "256-bit wide floating point " ++
1308 "SIMD vector instructions require at least -mavx."
1309 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1310 sorry $ "256-bit wide integer " ++
1311 "SIMD vector instructions require at least -mavx2."
1312 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1313 sorry $ "512-bit wide " ++
1314 "SIMD vector instructions require -mavx512f."
1315 check _ _ _ _ = return ()
1316
1317 vecWidth = typeWidth (vecVmmType vcat l w)
1318
1319 ------------------------------------------------------------------------------
1320 -- Helpers for translating vector packing and unpacking.
1321
1322 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1323 -> CmmType -- Type of vector
1324 -> CmmExpr -- Initial vector
1325 -> [CmmExpr] -- Elements
1326 -> CmmFormal -- Destination for result
1327 -> FCode ()
1328 doVecPackOp maybe_pre_write_cast ty z es res = do
1329 dst <- newTemp ty
1330 emitAssign (CmmLocal dst) z
1331 vecPack dst es 0
1332 where
1333 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1334 vecPack src [] _ =
1335 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1336
1337 vecPack src (e : es) i = do
1338 dst <- newTemp ty
1339 if isFloatType (vecElemType ty)
1340 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1341 [CmmReg (CmmLocal src), cast e, iLit])
1342 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1343 [CmmReg (CmmLocal src), cast e, iLit])
1344 vecPack dst es (i + 1)
1345 where
1346 -- vector indices are always 32-bits
1347 iLit = CmmLit (CmmInt (toInteger i) W32)
1348
1349 cast :: CmmExpr -> CmmExpr
1350 cast val = case maybe_pre_write_cast of
1351 Nothing -> val
1352 Just cast -> CmmMachOp cast [val]
1353
1354 len :: Length
1355 len = vecLength ty
1356
1357 wid :: Width
1358 wid = typeWidth (vecElemType ty)
1359
1360 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1361 -> CmmType -- Type of vector
1362 -> CmmExpr -- Vector
1363 -> [CmmFormal] -- Element results
1364 -> FCode ()
1365 doVecUnpackOp maybe_post_read_cast ty e res =
1366 vecUnpack res 0
1367 where
1368 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1369 vecUnpack [] _ =
1370 return ()
1371
1372 vecUnpack (r : rs) i = do
1373 if isFloatType (vecElemType ty)
1374 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1375 [e, iLit]))
1376 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1377 [e, iLit]))
1378 vecUnpack rs (i + 1)
1379 where
1380 -- vector indices are always 32-bits
1381 iLit = CmmLit (CmmInt (toInteger i) W32)
1382
1383 cast :: CmmExpr -> CmmExpr
1384 cast val = case maybe_post_read_cast of
1385 Nothing -> val
1386 Just cast -> CmmMachOp cast [val]
1387
1388 len :: Length
1389 len = vecLength ty
1390
1391 wid :: Width
1392 wid = typeWidth (vecElemType ty)
1393
1394 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1395 -> CmmType -- Vector type
1396 -> CmmExpr -- Source vector
1397 -> CmmExpr -- Element
1398 -> CmmExpr -- Index at which to insert element
1399 -> CmmFormal -- Destination for result
1400 -> FCode ()
1401 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1402 dflags <- getDynFlags
1403 -- vector indices are always 32-bits
1404 let idx' :: CmmExpr
1405 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1406 if isFloatType (vecElemType ty)
1407 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1408 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1409 where
1410 cast :: CmmExpr -> CmmExpr
1411 cast val = case maybe_pre_write_cast of
1412 Nothing -> val
1413 Just cast -> CmmMachOp cast [val]
1414
1415 len :: Length
1416 len = vecLength ty
1417
1418 wid :: Width
1419 wid = typeWidth (vecElemType ty)
1420
1421 ------------------------------------------------------------------------------
1422 -- Helpers for translating prefetching.
1423
1424 doPrefetchByteArrayOp :: Int
1425 -> [LocalReg]
1426 -> [CmmExpr]
1427 -> FCode ()
1428 doPrefetchByteArrayOp locality res [addr,idx]
1429 = do dflags <- getDynFlags
1430 mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
1431 doPrefetchByteArrayOp _ _ _
1432 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1433
1434 doPrefetchAddrOp ::Int
1435 -> [LocalReg]
1436 -> [CmmExpr]
1437 -> FCode ()
1438 doPrefetchAddrOp locality res [addr,idx]
1439 = mkBasicPrefetch locality 0 res addr idx
1440 doPrefetchAddrOp _ _ _
1441 = panic "StgCmmPrim: doPrefetchAddrOp"
1442
1443 mkBasicPrefetch :: Int -- Locality level 0-3
1444 -> ByteOff -- Initial offset in bytes
1445 -> [LocalReg] -- Destination
1446 -> CmmExpr -- Base address
1447 -> CmmExpr -- Index
1448 -> FCode ()
1449 mkBasicPrefetch locality off res base idx
1450 = do dflags <- getDynFlags
1451 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1452 case res of
1453 [] -> return ()
1454 [reg] -> emitAssign (CmmLocal reg) base
1455 _ -> panic "StgCmmPrim: mkBasicPrefetch"
1456
1457 -- ----------------------------------------------------------------------------
1458 -- Allocating byte arrays
1459
1460 -- | Takes a register to return the newly allocated array in and the
1461 -- size of the new array in bytes. Allocates a new
1462 -- 'MutableByteArray#'.
1463 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1464 doNewByteArrayOp res_r n = do
1465 dflags <- getDynFlags
1466
1467 let info_ptr = mkLblExpr mkArrWords_infoLabel
1468 rep = arrWordsRep dflags n
1469
1470 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1471 (mkIntExpr dflags (nonHdrSize dflags rep))
1472 (zeroExpr dflags)
1473
1474 let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
1475
1476 base <- allocHeapClosure rep info_ptr curCCS
1477 [ (mkIntExpr dflags n,
1478 hdr_size + oFFSET_StgArrWords_bytes dflags)
1479 ]
1480
1481 emit $ mkAssign (CmmLocal res_r) base
1482
1483 -- ----------------------------------------------------------------------------
1484 -- Copying byte arrays
1485
1486 -- | Takes a source 'ByteArray#', an offset in the source array, a
1487 -- destination 'MutableByteArray#', an offset into the destination
1488 -- array, and the number of bytes to copy. Copies the given number of
1489 -- bytes from the source array to the destination array.
1490 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1491 -> FCode ()
1492 doCopyByteArrayOp = emitCopyByteArray copy
1493 where
1494 -- Copy data (we assume the arrays aren't overlapping since
1495 -- they're of different types)
1496 copy _src _dst dst_p src_p bytes =
1497 do dflags <- getDynFlags
1498 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1499
1500 -- | Takes a source 'MutableByteArray#', an offset in the source
1501 -- array, a destination 'MutableByteArray#', an offset into the
1502 -- destination array, and the number of bytes to copy. Copies the
1503 -- given number of bytes from the source array to the destination
1504 -- array.
1505 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1506 -> FCode ()
1507 doCopyMutableByteArrayOp = emitCopyByteArray copy
1508 where
1509 -- The only time the memory might overlap is when the two arrays
1510 -- we were provided are the same array!
1511 -- TODO: Optimize branch for common case of no aliasing.
1512 copy src dst dst_p src_p bytes = do
1513 dflags <- getDynFlags
1514 [moveCall, cpyCall] <- forkAlts [
1515 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1516 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1517 ]
1518 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1519
1520 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1521 -> FCode ())
1522 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1523 -> FCode ()
1524 emitCopyByteArray copy src src_off dst dst_off n = do
1525 dflags <- getDynFlags
1526 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1527 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1528 copy src dst dst_p src_p n
1529
1530 -- | Takes a source 'ByteArray#', an offset in the source array, a
1531 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1532 -- number of bytes from the source array to the destination memory region.
1533 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1534 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1535 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1536 dflags <- getDynFlags
1537 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1538 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1539
1540 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1541 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1542 -- number of bytes from the source array to the destination memory region.
1543 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1544 -> FCode ()
1545 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1546
1547 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1548 -- the destination array, and the number of bytes to copy. Copies the given
1549 -- number of bytes from the source memory region to the destination array.
1550 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1551 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1552 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1553 dflags <- getDynFlags
1554 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1555 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1556
1557
1558 -- ----------------------------------------------------------------------------
1559 -- Setting byte arrays
1560
1561 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1562 -- and a byte, and sets each of the selected bytes in the array to the
1563 -- character.
1564 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1565 -> FCode ()
1566 doSetByteArrayOp ba off len c
1567 = do dflags <- getDynFlags
1568 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1569 emitMemsetCall p c len (mkIntExpr dflags 1)
1570
1571 -- ----------------------------------------------------------------------------
1572 -- Allocating arrays
1573
1574 -- | Takes a register to return the newly allocated array in, the size
1575 -- of the new array, and an initial value for the elements. Allocates
1576 -- a new 'MutableArray#'.
1577 doNewArrayOp :: CmmFormal -> WordOff -> CmmExpr -> FCode ()
1578 doNewArrayOp res_r n init = do
1579 dflags <- getDynFlags
1580
1581 let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel
1582 rep = arrPtrsRep dflags n
1583
1584 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1585 (mkIntExpr dflags (nonHdrSize dflags rep))
1586 (zeroExpr dflags)
1587
1588 let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
1589
1590 base <- allocHeapClosure rep info_ptr curCCS
1591 [ (mkIntExpr dflags n,
1592 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1593 , (mkIntExpr dflags (nonHdrSizeW rep),
1594 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1595 ]
1596
1597 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1598 emit $ mkAssign arr base
1599
1600 -- Initialise all elements of the the array
1601 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags)
1602 for <- newLabelC
1603 emitLabel for
1604 let loopBody =
1605 [ mkStore (CmmReg (CmmLocal p)) init
1606 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1607 , mkBranch for ]
1608 emit =<< mkCmmIfThen
1609 (cmmULtWord dflags (CmmReg (CmmLocal p))
1610 (cmmOffsetW dflags (CmmReg arr)
1611 (arrPtrsHdrSizeW dflags + n)))
1612 (catAGraphs loopBody)
1613
1614 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1615
1616 -- ----------------------------------------------------------------------------
1617 -- Copying pointer arrays
1618
1619 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1620 -- nowhere else in the code generator. This is mostly because these
1621 -- "primitive" ops result in a surprisingly large amount of code. It
1622 -- will likely be worthwhile to optimize what is emitted here, so that
1623 -- our optimization passes don't waste time repeatedly optimizing the
1624 -- same bits of code.
1625
1626 -- More closely imitates 'assignTemp' from the old code generator, which
1627 -- returns a CmmExpr rather than a LocalReg.
1628 assignTempE :: CmmExpr -> FCode CmmExpr
1629 assignTempE e = do
1630 t <- assignTemp e
1631 return (CmmReg (CmmLocal t))
1632
1633 -- | Takes a source 'Array#', an offset in the source array, a
1634 -- destination 'MutableArray#', an offset into the destination array,
1635 -- and the number of elements to copy. Copies the given number of
1636 -- elements from the source array to the destination array.
1637 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1638 -> FCode ()
1639 doCopyArrayOp = emitCopyArray copy
1640 where
1641 -- Copy data (we assume the arrays aren't overlapping since
1642 -- they're of different types)
1643 copy _src _dst dst_p src_p bytes =
1644 do dflags <- getDynFlags
1645 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1646 (mkIntExpr dflags (wORD_SIZE dflags))
1647
1648
1649 -- | Takes a source 'MutableArray#', an offset in the source array, a
1650 -- destination 'MutableArray#', an offset into the destination array,
1651 -- and the number of elements to copy. Copies the given number of
1652 -- elements from the source array to the destination array.
1653 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1654 -> FCode ()
1655 doCopyMutableArrayOp = emitCopyArray copy
1656 where
1657 -- The only time the memory might overlap is when the two arrays
1658 -- we were provided are the same array!
1659 -- TODO: Optimize branch for common case of no aliasing.
1660 copy src dst dst_p src_p bytes = do
1661 dflags <- getDynFlags
1662 [moveCall, cpyCall] <- forkAlts [
1663 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1664 (mkIntExpr dflags (wORD_SIZE dflags)),
1665 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1666 (mkIntExpr dflags (wORD_SIZE dflags))
1667 ]
1668 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1669
1670 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1671 -> FCode ()) -- ^ copy function
1672 -> CmmExpr -- ^ source array
1673 -> CmmExpr -- ^ offset in source array
1674 -> CmmExpr -- ^ destination array
1675 -> CmmExpr -- ^ offset in destination array
1676 -> WordOff -- ^ number of elements to copy
1677 -> FCode ()
1678 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1679 dflags <- getDynFlags
1680 when (n /= 0) $ do
1681 -- Passed as arguments (be careful)
1682 src <- assignTempE src0
1683 dst <- assignTempE dst0
1684 dst_off <- assignTempE dst_off0
1685
1686 -- Set the dirty bit in the header.
1687 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1688
1689 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1690 (arrPtrsHdrSize dflags)
1691 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1692 src_p <- assignTempE $ cmmOffsetExprW dflags
1693 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1694 let bytes = wordsToBytes dflags n
1695
1696 copy src dst dst_p src_p bytes
1697
1698 -- The base address of the destination card table
1699 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1700 (loadArrPtrsSize dflags dst)
1701
1702 emitSetCards dst_off dst_cards_p n
1703
1704 -- | Takes an info table label, a register to return the newly
1705 -- allocated array in, a source array, an offset in the source array,
1706 -- and the number of elements to copy. Allocates a new array and
1707 -- initializes it from the source array.
1708 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1709 -> FCode ()
1710 emitCloneArray info_p res_r src src_off n = do
1711 dflags <- getDynFlags
1712
1713 let info_ptr = mkLblExpr info_p
1714 rep = arrPtrsRep dflags n
1715
1716 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1717 (mkIntExpr dflags (nonHdrSize dflags rep))
1718 (zeroExpr dflags)
1719
1720 let hdr_size = wordsToBytes dflags (fixedHdrSize dflags)
1721
1722 base <- allocHeapClosure rep info_ptr curCCS
1723 [ (mkIntExpr dflags n,
1724 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1725 , (mkIntExpr dflags (nonHdrSizeW rep),
1726 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1727 ]
1728
1729 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1730 emit $ mkAssign arr base
1731
1732 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1733 (arrPtrsHdrSize dflags)
1734 src_p <- assignTempE $ cmmOffsetExprW dflags src
1735 (cmmAddWord dflags
1736 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1737
1738 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1739 (mkIntExpr dflags (wORD_SIZE dflags))
1740
1741 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1742
1743 -- | Takes and offset in the destination array, the base address of
1744 -- the card table, and the number of elements affected (*not* the
1745 -- number of cards). The number of elements may not be zero.
1746 -- Marks the relevant cards as dirty.
1747 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
1748 emitSetCards dst_start dst_cards_start n = do
1749 dflags <- getDynFlags
1750 start_card <- assignTempE $ cardCmm dflags dst_start
1751 let end_card = cardCmm dflags
1752 (cmmSubWord dflags
1753 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
1754 (mkIntExpr dflags 1))
1755 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1756 (mkIntExpr dflags 1)
1757 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1758 (mkIntExpr dflags 1) -- no alignment (1 byte)
1759
1760 -- Convert an element index to a card index
1761 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
1762 cardCmm dflags i =
1763 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1764
1765 -- | Emit a call to @memcpy@.
1766 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1767 emitMemcpyCall dst src n align = do
1768 emitPrimCall
1769 [ {-no results-} ]
1770 MO_Memcpy
1771 [ dst, src, n, align ]
1772
1773 -- | Emit a call to @memmove@.
1774 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1775 emitMemmoveCall dst src n align = do
1776 emitPrimCall
1777 [ {- no results -} ]
1778 MO_Memmove
1779 [ dst, src, n, align ]
1780
1781 -- | Emit a call to @memset@. The second argument must fit inside an
1782 -- unsigned char.
1783 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1784 emitMemsetCall dst c n align = do
1785 emitPrimCall
1786 [ {- no results -} ]
1787 MO_Memset
1788 [ dst, c, n, align ]
1789
1790 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1791 emitBSwapCall res x width = do
1792 emitPrimCall
1793 [ res ]
1794 (MO_BSwap width)
1795 [ x ]
1796
1797 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1798 emitPopCntCall res x width = do
1799 emitPrimCall
1800 [ res ]
1801 (MO_PopCnt width)
1802 [ x ]