Revert "Add more primops for atomic ops on byte arrays"
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 {-# LANGUAGE CPP #-}
2
3 ----------------------------------------------------------------------------
4 --
5 -- Stg to C--: primitive operations
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmPrim (
12 cgOpApp,
13 cgPrimOp, -- internal(ish), used by cgCase to get code for a
14 -- comparison without also turning it into a Bool.
15 shouldInlinePrimOp
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgCmmLayout
21 import StgCmmForeign
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmTicky
26 import StgCmmHeap
27 import StgCmmProf ( costCentreFrom, curCCS )
28
29 import DynFlags
30 import Platform
31 import BasicTypes
32 import MkGraph
33 import StgSyn
34 import Cmm
35 import CmmInfo
36 import Type ( Type, tyConAppTyCon )
37 import TyCon
38 import CLabel
39 import CmmUtils
40 import PrimOp
41 import SMRep
42 import FastString
43 import Outputable
44 import Util
45
46 import Control.Monad (liftM, when)
47
48 ------------------------------------------------------------------------
49 -- Primitive operations and foreign calls
50 ------------------------------------------------------------------------
51
52 {- Note [Foreign call results]
53 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 A foreign call always returns an unboxed tuple of results, one
55 of which is the state token. This seems to happen even for pure
56 calls.
57
58 Even if we returned a single result for pure calls, it'd still be
59 right to wrap it in a singleton unboxed tuple, because the result
60 might be a Haskell closure pointer, we don't want to evaluate it. -}
61
62 ----------------------------------
63 cgOpApp :: StgOp -- The op
64 -> [StgArg] -- Arguments
65 -> Type -- Result type (always an unboxed tuple)
66 -> FCode ReturnKind
67
68 -- Foreign calls
69 cgOpApp (StgFCallOp fcall _) stg_args res_ty
70 = cgForeignCall fcall stg_args res_ty
71 -- Note [Foreign call results]
72
73 -- tagToEnum# is special: we need to pull the constructor
74 -- out of the table, and perform an appropriate return.
75
76 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
77 = ASSERT(isEnumerationTyCon tycon)
78 do { dflags <- getDynFlags
79 ; args' <- getNonVoidArgAmodes [arg]
80 ; let amode = case args' of [amode] -> amode
81 _ -> panic "TagToEnumOp had void arg"
82 ; emitReturn [tagToClosure dflags tycon amode] }
83 where
84 -- If you're reading this code in the attempt to figure
85 -- out why the compiler panic'ed here, it is probably because
86 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
87 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
88 -- That won't work.
89 tycon = tyConAppTyCon res_ty
90
91 cgOpApp (StgPrimOp primop) args res_ty = do
92 dflags <- getDynFlags
93 cmm_args <- getNonVoidArgAmodes args
94 case shouldInlinePrimOp dflags primop cmm_args of
95 Nothing -> do -- out-of-line
96 let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
97 emitCall (NativeNodeCall, NativeReturn) fun cmm_args
98
99 Just f -- inline
100 | ReturnsPrim VoidRep <- result_info
101 -> do f []
102 emitReturn []
103
104 | ReturnsPrim rep <- result_info
105 -> do dflags <- getDynFlags
106 res <- newTemp (primRepCmmType dflags rep)
107 f [res]
108 emitReturn [CmmReg (CmmLocal res)]
109
110 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
111 -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
112 f regs
113 emitReturn (map (CmmReg . CmmLocal) regs)
114
115 | otherwise -> panic "cgPrimop"
116 where
117 result_info = getPrimOpResultInfo primop
118
119 cgOpApp (StgPrimCallOp primcall) args _res_ty
120 = do { cmm_args <- getNonVoidArgAmodes args
121 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
122 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
123
124 -- | Decide whether an out-of-line primop should be replaced by an
125 -- inline implementation. This might happen e.g. if there's enough
126 -- static information, such as statically know arguments, to emit a
127 -- more efficient implementation inline.
128 --
129 -- Returns 'Nothing' if this primop should use its out-of-line
130 -- implementation (defined elsewhere) and 'Just' together with a code
131 -- generating function that takes the output regs as arguments
132 -- otherwise.
133 shouldInlinePrimOp :: DynFlags
134 -> PrimOp -- ^ The primop
135 -> [CmmExpr] -- ^ The primop arguments
136 -> Maybe ([LocalReg] -> FCode ())
137
138 shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n _))]
139 | fromInteger n <= maxInlineAllocSize dflags =
140 Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
141
142 shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
143 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
144 Just $ \ [res] ->
145 doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
146 [ (mkIntExpr dflags (fromInteger n),
147 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
148 , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
149 fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
150 ]
151 (fromInteger n) init
152
153 shouldInlinePrimOp _ CopyArrayOp
154 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
155 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
156
157 shouldInlinePrimOp _ CopyMutableArrayOp
158 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
159 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
160
161 shouldInlinePrimOp _ CopyArrayArrayOp
162 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
163 Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
164
165 shouldInlinePrimOp _ CopyMutableArrayArrayOp
166 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
167 Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
168
169 shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n _))]
170 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
171 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
172
173 shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n _))]
174 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
175 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
176
177 shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n _))]
178 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
179 Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
180
181 shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n _))]
182 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
183 Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
184
185 shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n _)), init]
186 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
187 Just $ \ [res] ->
188 doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
189 [ (mkIntExpr dflags (fromInteger n),
190 fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
191 ]
192 (fromInteger n) init
193
194 shouldInlinePrimOp _ CopySmallArrayOp
195 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
196 Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
197
198 shouldInlinePrimOp _ CopySmallMutableArrayOp
199 [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
200 Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
201
202 shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n _))]
203 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
204 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
205
206 shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n _))]
207 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
208 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
209
210 shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n _))]
211 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
212 Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
213
214 shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n _))]
215 | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags =
216 Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
217
218 shouldInlinePrimOp dflags primop args
219 | primOpOutOfLine primop = Nothing
220 | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
221
222 -- TODO: Several primops, such as 'copyArray#', only have an inline
223 -- implementation (below) but could possibly have both an inline
224 -- implementation and an out-of-line implementation, just like
225 -- 'newArray#'. This would lower the amount of code generated,
226 -- hopefully without a performance impact (needs to be measured).
227
228 ---------------------------------------------------
229 cgPrimOp :: [LocalReg] -- where to put the results
230 -> PrimOp -- the op
231 -> [StgArg] -- arguments
232 -> FCode ()
233
234 cgPrimOp results op args
235 = do dflags <- getDynFlags
236 arg_exprs <- getNonVoidArgAmodes args
237 emitPrimOp dflags results op arg_exprs
238
239
240 ------------------------------------------------------------------------
241 -- Emitting code for a primop
242 ------------------------------------------------------------------------
243
244 emitPrimOp :: DynFlags
245 -> [LocalReg] -- where to put the results
246 -> PrimOp -- the op
247 -> [CmmExpr] -- arguments
248 -> FCode ()
249
250 -- First we handle various awkward cases specially. The remaining
251 -- easy cases are then handled by translateOp, defined below.
252
253 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
254 {-
255 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
256 C, and without needing any comparisons. This may not be the
257 fastest way to do it - if you have better code, please send it! --SDM
258
259 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
260
261 We currently don't make use of the r value if c is != 0 (i.e.
262 overflow), we just convert to big integers and try again. This
263 could be improved by making r and c the correct values for
264 plugging into a new J#.
265
266 { r = ((I_)(a)) + ((I_)(b)); \
267 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
268 >> (BITS_IN (I_) - 1); \
269 }
270 Wading through the mass of bracketry, it seems to reduce to:
271 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
272
273 -}
274 = emit $ catAGraphs [
275 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
276 mkAssign (CmmLocal res_c) $
277 CmmMachOp (mo_wordUShr dflags) [
278 CmmMachOp (mo_wordAnd dflags) [
279 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
280 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
281 ],
282 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
283 ]
284 ]
285
286
287 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
288 {- Similarly:
289 #define subIntCzh(r,c,a,b) \
290 { r = ((I_)(a)) - ((I_)(b)); \
291 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
292 >> (BITS_IN (I_) - 1); \
293 }
294
295 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
296 -}
297 = emit $ catAGraphs [
298 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
299 mkAssign (CmmLocal res_c) $
300 CmmMachOp (mo_wordUShr dflags) [
301 CmmMachOp (mo_wordAnd dflags) [
302 CmmMachOp (mo_wordXor dflags) [aa,bb],
303 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
304 ],
305 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
306 ]
307 ]
308
309
310 emitPrimOp _ [res] ParOp [arg]
311 =
312 -- for now, just implement this in a C function
313 -- later, we might want to inline it.
314 emitCCall
315 [(res,NoHint)]
316 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
317 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
318
319 emitPrimOp dflags [res] SparkOp [arg]
320 = do
321 -- returns the value of arg in res. We're going to therefore
322 -- refer to arg twice (once to pass to newSpark(), and once to
323 -- assign to res), so put it in a temporary.
324 tmp <- assignTemp arg
325 tmp2 <- newTemp (bWord dflags)
326 emitCCall
327 [(tmp2,NoHint)]
328 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
329 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
330 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
331
332 emitPrimOp dflags [res] GetCCSOfOp [arg]
333 = emitAssign (CmmLocal res) val
334 where
335 val
336 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
337 | otherwise = CmmLit (zeroCLit dflags)
338
339 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
340 = emitAssign (CmmLocal res) curCCS
341
342 emitPrimOp dflags [res] ReadMutVarOp [mutv]
343 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
344
345 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
346 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
347 emitCCall
348 [{-no results-}]
349 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
350 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
351
352 -- #define sizzeofByteArrayzh(r,a) \
353 -- r = ((StgArrWords *)(a))->bytes
354 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
355 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
356
357 -- #define sizzeofMutableByteArrayzh(r,a) \
358 -- r = ((StgArrWords *)(a))->bytes
359 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
360 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
361
362
363 -- #define touchzh(o) /* nothing */
364 emitPrimOp _ res@[] TouchOp args@[_arg]
365 = do emitPrimCall res MO_Touch args
366
367 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
368 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
369 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
370
371 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
372 emitPrimOp dflags [res] StableNameToIntOp [arg]
373 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
374
375 -- #define eqStableNamezh(r,sn1,sn2) \
376 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
377 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
378 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
379 cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
380 cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
381 ])
382
383
384 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
385 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
386
387 -- #define addrToHValuezh(r,a) r=(P_)a
388 emitPrimOp _ [res] AddrToAnyOp [arg]
389 = emitAssign (CmmLocal res) arg
390
391 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
392 -- Note: argument may be tagged!
393 emitPrimOp dflags [res] DataToTagOp [arg]
394 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
395
396 {- Freezing arrays-of-ptrs requires changing an info table, for the
397 benefit of the generational collector. It needs to scavenge mutable
398 objects, even if they are in old space. When they become immutable,
399 they can be removed from this scavenge list. -}
400
401 -- #define unsafeFreezzeArrayzh(r,a)
402 -- {
403 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
404 -- r = a;
405 -- }
406 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
407 = emit $ catAGraphs
408 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
409 mkAssign (CmmLocal res) arg ]
410 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
411 = emit $ catAGraphs
412 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
413 mkAssign (CmmLocal res) arg ]
414 emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
415 = emit $ catAGraphs
416 [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
417 mkAssign (CmmLocal res) arg ]
418
419 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
420 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
421 = emitAssign (CmmLocal res) arg
422
423 -- Reading/writing pointer arrays
424
425 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
426 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
427 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
428
429 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
430 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
431 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
432 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
433 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
434 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
435 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
436 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
437 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
438 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
439
440 emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
441 emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
442 emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
443
444 -- Getting the size of pointer arrays
445
446 emitPrimOp dflags [res] SizeofArrayOp [arg]
447 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
448 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
449 = emitPrimOp dflags [res] SizeofArrayOp [arg]
450 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
451 = emitPrimOp dflags [res] SizeofArrayOp [arg]
452 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
453 = emitPrimOp dflags [res] SizeofArrayOp [arg]
454
455 emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
456 emit $ mkAssign (CmmLocal res)
457 (cmmLoadIndexW dflags arg
458 (fixedHdrSizeW dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags) (bWord dflags))
459 emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
460 emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
461
462 -- IndexXXXoffAddr
463
464 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
465 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
466 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
467 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
468 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
469 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
470 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
471 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
472 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
473 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
474 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
475 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
476 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
477 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
478 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
479 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
480
481 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
482
483 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
484 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
485 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
486 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
487 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
488 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
489 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
490 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
491 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
492 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
493 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
494 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
495 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
496 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
497 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
498 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
499
500 -- IndexXXXArray
501
502 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
503 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
504 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
505 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
506 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
507 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
508 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
509 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
510 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
511 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
512 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
513 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
514 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
515 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
516 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
517 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
518
519 -- ReadXXXArray, identical to IndexXXXArray.
520
521 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
522 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
523 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
524 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
525 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
526 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
527 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
528 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
529 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
530 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
531 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
532 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
533 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
534 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
535 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
536 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
537
538 -- WriteXXXoffAddr
539
540 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
541 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
542 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
543 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
544 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
545 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
546 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
547 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
548 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
549 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
550 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
551 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
552 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
553 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
554 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
555 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
556
557 -- WriteXXXArray
558
559 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
560 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
561 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
562 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
563 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
564 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
565 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
566 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
567 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
568 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
569 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
570 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
571 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
572 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
573 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
574 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
575
576 -- Copying and setting byte arrays
577 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
578 doCopyByteArrayOp src src_off dst dst_off n
579 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
580 doCopyMutableByteArrayOp src src_off dst dst_off n
581 emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
582 doCopyByteArrayToAddrOp src src_off dst n
583 emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
584 doCopyMutableByteArrayToAddrOp src src_off dst n
585 emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
586 doCopyAddrToByteArrayOp src dst dst_off n
587 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
588 doSetByteArrayOp ba off len c
589
590 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
591 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
592 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
593 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
594
595 -- Population count
596 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
597 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
598 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
599 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
600 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
601
602 -- Unsigned int to floating point conversions
603 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
604 (MO_UF_Conv W32) [w]
605 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
606 (MO_UF_Conv W64) [w]
607
608 -- SIMD primops
609 emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
610 checkVecCompatibility dflags vcat n w
611 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
612 where
613 zeros :: CmmExpr
614 zeros = CmmLit $ CmmVec (replicate n zero)
615
616 zero :: CmmLit
617 zero = case vcat of
618 IntVec -> CmmInt 0 w
619 WordVec -> CmmInt 0 w
620 FloatVec -> CmmFloat 0 w
621
622 ty :: CmmType
623 ty = vecVmmType vcat n w
624
625 emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
626 checkVecCompatibility dflags vcat n w
627 when (length es /= n) $
628 panic "emitPrimOp: VecPackOp has wrong number of arguments"
629 doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
630 where
631 zeros :: CmmExpr
632 zeros = CmmLit $ CmmVec (replicate n zero)
633
634 zero :: CmmLit
635 zero = case vcat of
636 IntVec -> CmmInt 0 w
637 WordVec -> CmmInt 0 w
638 FloatVec -> CmmFloat 0 w
639
640 ty :: CmmType
641 ty = vecVmmType vcat n w
642
643 emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
644 checkVecCompatibility dflags vcat n w
645 when (length res /= n) $
646 panic "emitPrimOp: VecUnpackOp has wrong number of results"
647 doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
648 where
649 ty :: CmmType
650 ty = vecVmmType vcat n w
651
652 emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
653 checkVecCompatibility dflags vcat n w
654 doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
655 where
656 ty :: CmmType
657 ty = vecVmmType vcat n w
658
659 emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
660 checkVecCompatibility dflags vcat n w
661 doIndexByteArrayOp Nothing ty res args
662 where
663 ty :: CmmType
664 ty = vecVmmType vcat n w
665
666 emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
667 checkVecCompatibility dflags vcat n w
668 doIndexByteArrayOp Nothing ty res args
669 where
670 ty :: CmmType
671 ty = vecVmmType vcat n w
672
673 emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
674 checkVecCompatibility dflags vcat n w
675 doWriteByteArrayOp Nothing ty res args
676 where
677 ty :: CmmType
678 ty = vecVmmType vcat n w
679
680 emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
681 checkVecCompatibility dflags vcat n w
682 doIndexOffAddrOp Nothing ty res args
683 where
684 ty :: CmmType
685 ty = vecVmmType vcat n w
686
687 emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
688 checkVecCompatibility dflags vcat n w
689 doIndexOffAddrOp Nothing ty res args
690 where
691 ty :: CmmType
692 ty = vecVmmType vcat n w
693
694 emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
695 checkVecCompatibility dflags vcat n w
696 doWriteOffAddrOp Nothing ty res args
697 where
698 ty :: CmmType
699 ty = vecVmmType vcat n w
700
701 emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
702 checkVecCompatibility dflags vcat n w
703 doIndexByteArrayOpAs Nothing vecty ty res args
704 where
705 vecty :: CmmType
706 vecty = vecVmmType vcat n w
707
708 ty :: CmmType
709 ty = vecCmmCat vcat w
710
711 emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
712 checkVecCompatibility dflags vcat n w
713 doIndexByteArrayOpAs Nothing vecty ty res args
714 where
715 vecty :: CmmType
716 vecty = vecVmmType vcat n w
717
718 ty :: CmmType
719 ty = vecCmmCat vcat w
720
721 emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
722 checkVecCompatibility dflags vcat n w
723 doWriteByteArrayOp Nothing ty res args
724 where
725 ty :: CmmType
726 ty = vecCmmCat vcat w
727
728 emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
729 checkVecCompatibility dflags vcat n w
730 doIndexOffAddrOpAs Nothing vecty ty res args
731 where
732 vecty :: CmmType
733 vecty = vecVmmType vcat n w
734
735 ty :: CmmType
736 ty = vecCmmCat vcat w
737
738 emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
739 checkVecCompatibility dflags vcat n w
740 doIndexOffAddrOpAs Nothing vecty ty res args
741 where
742 vecty :: CmmType
743 vecty = vecVmmType vcat n w
744
745 ty :: CmmType
746 ty = vecCmmCat vcat w
747
748 emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
749 checkVecCompatibility dflags vcat n w
750 doWriteOffAddrOp Nothing ty res args
751 where
752 ty :: CmmType
753 ty = vecCmmCat vcat w
754
755 -- Prefetch
756 emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
757 emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
758 emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
759
760 emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
761 emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
762 emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
763
764 emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
765 emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
766 emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
767
768 emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
769 emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
770 emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
771
772
773 -- The rest just translate straightforwardly
774 emitPrimOp dflags [res] op [arg]
775 | nopOp op
776 = emitAssign (CmmLocal res) arg
777
778 | Just (mop,rep) <- narrowOp op
779 = emitAssign (CmmLocal res) $
780 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
781
782 emitPrimOp dflags r@[res] op args
783 | Just prim <- callishOp op
784 = do emitPrimCall r prim args
785
786 | Just mop <- translateOp dflags op
787 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
788 emit stmt
789
790 emitPrimOp dflags results op args
791 = case callishPrimOpSupported dflags op of
792 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
793 Right gen -> gen results args
794
795 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
796
797 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
798 callishPrimOpSupported dflags op
799 = case op of
800 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
801 | otherwise -> Right (genericIntQuotRemOp dflags)
802
803 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
804 | otherwise -> Right (genericWordQuotRemOp dflags)
805
806 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
807 | otherwise -> Right (genericWordQuotRem2Op dflags)
808
809 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
810 | otherwise -> Right genericWordAdd2Op
811
812 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
813 | otherwise -> Right genericWordMul2Op
814
815 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
816 where
817 ncg = case hscTarget dflags of
818 HscAsm -> True
819 _ -> False
820
821 x86ish = case platformArch (targetPlatform dflags) of
822 ArchX86 -> True
823 ArchX86_64 -> True
824 _ -> False
825
826 genericIntQuotRemOp :: DynFlags -> GenericOp
827 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
828 = emit $ mkAssign (CmmLocal res_q)
829 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
830 mkAssign (CmmLocal res_r)
831 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
832 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
833
834 genericWordQuotRemOp :: DynFlags -> GenericOp
835 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
836 = emit $ mkAssign (CmmLocal res_q)
837 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
838 mkAssign (CmmLocal res_r)
839 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
840 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
841
842 genericWordQuotRem2Op :: DynFlags -> GenericOp
843 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
844 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
845 where ty = cmmExprType dflags arg_x_high
846 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
847 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
848 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
849 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
850 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
851 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
852 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
853 zero = lit 0
854 one = lit 1
855 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
856 lit i = CmmLit (CmmInt i (wordWidth dflags))
857
858 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
859 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
860 mkAssign (CmmLocal res_r) high)
861 f i acc high low =
862 do roverflowedBit <- newTemp ty
863 rhigh' <- newTemp ty
864 rhigh'' <- newTemp ty
865 rlow' <- newTemp ty
866 risge <- newTemp ty
867 racc' <- newTemp ty
868 let high' = CmmReg (CmmLocal rhigh')
869 isge = CmmReg (CmmLocal risge)
870 overflowedBit = CmmReg (CmmLocal roverflowedBit)
871 let this = catAGraphs
872 [mkAssign (CmmLocal roverflowedBit)
873 (shr high negone),
874 mkAssign (CmmLocal rhigh')
875 (or (shl high one) (shr low negone)),
876 mkAssign (CmmLocal rlow')
877 (shl low one),
878 mkAssign (CmmLocal risge)
879 (or (overflowedBit `ne` zero)
880 (high' `ge` arg_y)),
881 mkAssign (CmmLocal rhigh'')
882 (high' `minus` (arg_y `times` isge)),
883 mkAssign (CmmLocal racc')
884 (or (shl acc one) isge)]
885 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
886 (CmmReg (CmmLocal rhigh''))
887 (CmmReg (CmmLocal rlow'))
888 return (this <*> rest)
889 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
890
891 genericWordAdd2Op :: GenericOp
892 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
893 = do dflags <- getDynFlags
894 r1 <- newTemp (cmmExprType dflags arg_x)
895 r2 <- newTemp (cmmExprType dflags arg_x)
896 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
897 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
898 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
899 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
900 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
901 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
902 (wordWidth dflags))
903 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
904 emit $ catAGraphs
905 [mkAssign (CmmLocal r1)
906 (add (bottomHalf arg_x) (bottomHalf arg_y)),
907 mkAssign (CmmLocal r2)
908 (add (topHalf (CmmReg (CmmLocal r1)))
909 (add (topHalf arg_x) (topHalf arg_y))),
910 mkAssign (CmmLocal res_h)
911 (topHalf (CmmReg (CmmLocal r2))),
912 mkAssign (CmmLocal res_l)
913 (or (toTopHalf (CmmReg (CmmLocal r2)))
914 (bottomHalf (CmmReg (CmmLocal r1))))]
915 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
916
917 genericWordMul2Op :: GenericOp
918 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
919 = do dflags <- getDynFlags
920 let t = cmmExprType dflags arg_x
921 xlyl <- liftM CmmLocal $ newTemp t
922 xlyh <- liftM CmmLocal $ newTemp t
923 xhyl <- liftM CmmLocal $ newTemp t
924 r <- liftM CmmLocal $ newTemp t
925 -- This generic implementation is very simple and slow. We might
926 -- well be able to do better, but for now this at least works.
927 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
928 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
929 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
930 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
931 sum = foldl1 add
932 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
933 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
934 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
935 (wordWidth dflags))
936 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
937 emit $ catAGraphs
938 [mkAssign xlyl
939 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
940 mkAssign xlyh
941 (mul (bottomHalf arg_x) (topHalf arg_y)),
942 mkAssign xhyl
943 (mul (topHalf arg_x) (bottomHalf arg_y)),
944 mkAssign r
945 (sum [topHalf (CmmReg xlyl),
946 bottomHalf (CmmReg xhyl),
947 bottomHalf (CmmReg xlyh)]),
948 mkAssign (CmmLocal res_l)
949 (or (bottomHalf (CmmReg xlyl))
950 (toTopHalf (CmmReg r))),
951 mkAssign (CmmLocal res_h)
952 (sum [mul (topHalf arg_x) (topHalf arg_y),
953 topHalf (CmmReg xhyl),
954 topHalf (CmmReg xlyh),
955 topHalf (CmmReg r)])]
956 genericWordMul2Op _ _ = panic "genericWordMul2Op"
957
958 -- These PrimOps are NOPs in Cmm
959
960 nopOp :: PrimOp -> Bool
961 nopOp Int2WordOp = True
962 nopOp Word2IntOp = True
963 nopOp Int2AddrOp = True
964 nopOp Addr2IntOp = True
965 nopOp ChrOp = True -- Int# and Char# are rep'd the same
966 nopOp OrdOp = True
967 nopOp _ = False
968
969 -- These PrimOps turn into double casts
970
971 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
972 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
973 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
974 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
975 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
976 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
977 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
978 narrowOp _ = Nothing
979
980 -- Native word signless ops
981
982 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
983 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
984 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
985 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
986 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
987 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
988 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
989
990 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
991 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
992 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
993 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
994 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
995 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
996
997 translateOp dflags AndOp = Just (mo_wordAnd dflags)
998 translateOp dflags OrOp = Just (mo_wordOr dflags)
999 translateOp dflags XorOp = Just (mo_wordXor dflags)
1000 translateOp dflags NotOp = Just (mo_wordNot dflags)
1001 translateOp dflags SllOp = Just (mo_wordShl dflags)
1002 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
1003
1004 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
1005
1006 -- Native word signed ops
1007
1008 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
1009 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
1010 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
1011 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
1012 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
1013
1014
1015 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
1016 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
1017 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
1018 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
1019
1020 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
1021 translateOp dflags OrIOp = Just (mo_wordOr dflags)
1022 translateOp dflags XorIOp = Just (mo_wordXor dflags)
1023 translateOp dflags NotIOp = Just (mo_wordNot dflags)
1024 translateOp dflags ISllOp = Just (mo_wordShl dflags)
1025 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
1026 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
1027
1028 -- Native word unsigned ops
1029
1030 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
1031 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
1032 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
1033 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
1034
1035 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
1036 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
1037 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
1038
1039 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
1040 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
1041 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
1042 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
1043
1044 -- Char# ops
1045
1046 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
1047 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
1048 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
1049 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
1050 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
1051 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
1052
1053 -- Double ops
1054
1055 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
1056 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
1057 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
1058 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
1059 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
1060 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
1061
1062 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
1063 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
1064 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
1065 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
1066 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
1067
1068 -- Float ops
1069
1070 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
1071 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
1072 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
1073 translateOp _ FloatLeOp = Just (MO_F_Le W32)
1074 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
1075 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
1076
1077 translateOp _ FloatAddOp = Just (MO_F_Add W32)
1078 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
1079 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
1080 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
1081 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
1082
1083 -- Vector ops
1084
1085 translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
1086 translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
1087 translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
1088 translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
1089 translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
1090
1091 translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
1092 translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
1093 translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
1094 translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
1095 translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
1096 translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
1097
1098 translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
1099 translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
1100 translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
1101 translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
1102 translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
1103
1104 -- Conversions
1105
1106 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
1107 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
1108
1109 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
1110 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
1111
1112 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
1113 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
1114
1115 -- Word comparisons masquerading as more exotic things.
1116
1117 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
1118 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
1119 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
1120 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
1121 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
1122 translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
1123 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
1124 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
1125
1126 translateOp _ _ = Nothing
1127
1128 -- These primops are implemented by CallishMachOps, because they sometimes
1129 -- turn into foreign calls depending on the backend.
1130
1131 callishOp :: PrimOp -> Maybe CallishMachOp
1132 callishOp DoublePowerOp = Just MO_F64_Pwr
1133 callishOp DoubleSinOp = Just MO_F64_Sin
1134 callishOp DoubleCosOp = Just MO_F64_Cos
1135 callishOp DoubleTanOp = Just MO_F64_Tan
1136 callishOp DoubleSinhOp = Just MO_F64_Sinh
1137 callishOp DoubleCoshOp = Just MO_F64_Cosh
1138 callishOp DoubleTanhOp = Just MO_F64_Tanh
1139 callishOp DoubleAsinOp = Just MO_F64_Asin
1140 callishOp DoubleAcosOp = Just MO_F64_Acos
1141 callishOp DoubleAtanOp = Just MO_F64_Atan
1142 callishOp DoubleLogOp = Just MO_F64_Log
1143 callishOp DoubleExpOp = Just MO_F64_Exp
1144 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1145
1146 callishOp FloatPowerOp = Just MO_F32_Pwr
1147 callishOp FloatSinOp = Just MO_F32_Sin
1148 callishOp FloatCosOp = Just MO_F32_Cos
1149 callishOp FloatTanOp = Just MO_F32_Tan
1150 callishOp FloatSinhOp = Just MO_F32_Sinh
1151 callishOp FloatCoshOp = Just MO_F32_Cosh
1152 callishOp FloatTanhOp = Just MO_F32_Tanh
1153 callishOp FloatAsinOp = Just MO_F32_Asin
1154 callishOp FloatAcosOp = Just MO_F32_Acos
1155 callishOp FloatAtanOp = Just MO_F32_Atan
1156 callishOp FloatLogOp = Just MO_F32_Log
1157 callishOp FloatExpOp = Just MO_F32_Exp
1158 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1159
1160 callishOp _ = Nothing
1161
1162 ------------------------------------------------------------------------------
1163 -- Helpers for translating various minor variants of array indexing.
1164
1165 doIndexOffAddrOp :: Maybe MachOp
1166 -> CmmType
1167 -> [LocalReg]
1168 -> [CmmExpr]
1169 -> FCode ()
1170 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1171 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1172 doIndexOffAddrOp _ _ _ _
1173 = panic "StgCmmPrim: doIndexOffAddrOp"
1174
1175 doIndexOffAddrOpAs :: Maybe MachOp
1176 -> CmmType
1177 -> CmmType
1178 -> [LocalReg]
1179 -> [CmmExpr]
1180 -> FCode ()
1181 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1182 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1183 doIndexOffAddrOpAs _ _ _ _ _
1184 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1185
1186 doIndexByteArrayOp :: Maybe MachOp
1187 -> CmmType
1188 -> [LocalReg]
1189 -> [CmmExpr]
1190 -> FCode ()
1191 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1192 = do dflags <- getDynFlags
1193 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1194 doIndexByteArrayOp _ _ _ _
1195 = panic "StgCmmPrim: doIndexByteArrayOp"
1196
1197 doIndexByteArrayOpAs :: Maybe MachOp
1198 -> CmmType
1199 -> CmmType
1200 -> [LocalReg]
1201 -> [CmmExpr]
1202 -> FCode ()
1203 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1204 = do dflags <- getDynFlags
1205 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1206 doIndexByteArrayOpAs _ _ _ _ _
1207 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1208
1209 doReadPtrArrayOp :: LocalReg
1210 -> CmmExpr
1211 -> CmmExpr
1212 -> FCode ()
1213 doReadPtrArrayOp res addr idx
1214 = do dflags <- getDynFlags
1215 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1216
1217 doWriteOffAddrOp :: Maybe MachOp
1218 -> CmmType
1219 -> [LocalReg]
1220 -> [CmmExpr]
1221 -> FCode ()
1222 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1223 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1224 doWriteOffAddrOp _ _ _ _
1225 = panic "StgCmmPrim: doWriteOffAddrOp"
1226
1227 doWriteByteArrayOp :: Maybe MachOp
1228 -> CmmType
1229 -> [LocalReg]
1230 -> [CmmExpr]
1231 -> FCode ()
1232 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1233 = do dflags <- getDynFlags
1234 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1235 doWriteByteArrayOp _ _ _ _
1236 = panic "StgCmmPrim: doWriteByteArrayOp"
1237
1238 doWritePtrArrayOp :: CmmExpr
1239 -> CmmExpr
1240 -> CmmExpr
1241 -> FCode ()
1242 doWritePtrArrayOp addr idx val
1243 = do dflags <- getDynFlags
1244 let ty = cmmExprType dflags val
1245 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1246 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1247 -- the write barrier. We must write a byte into the mark table:
1248 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1249 emit $ mkStore (
1250 cmmOffsetExpr dflags
1251 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1252 (loadArrPtrsSize dflags addr))
1253 (CmmMachOp (mo_wordUShr dflags) [idx,
1254 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1255 ) (CmmLit (CmmInt 1 W8))
1256
1257 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1258 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1259 where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1260
1261 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1262 -> Maybe MachOp -- Optional result cast
1263 -> CmmType -- Type of element we are accessing
1264 -> LocalReg -- Destination
1265 -> CmmExpr -- Base address
1266 -> CmmType -- Type of element by which we are indexing
1267 -> CmmExpr -- Index
1268 -> FCode ()
1269 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1270 = do dflags <- getDynFlags
1271 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1272 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1273 = do dflags <- getDynFlags
1274 emitAssign (CmmLocal res) (CmmMachOp cast [
1275 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1276
1277 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1278 -> Maybe MachOp -- Optional value cast
1279 -> CmmExpr -- Base address
1280 -> CmmType -- Type of element by which we are indexing
1281 -> CmmExpr -- Index
1282 -> CmmExpr -- Value to write
1283 -> FCode ()
1284 mkBasicIndexedWrite off Nothing base idx_ty idx val
1285 = do dflags <- getDynFlags
1286 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1287 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1288 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1289
1290 -- ----------------------------------------------------------------------------
1291 -- Misc utils
1292
1293 cmmIndexOffExpr :: DynFlags
1294 -> ByteOff -- Initial offset in bytes
1295 -> Width -- Width of element by which we are indexing
1296 -> CmmExpr -- Base address
1297 -> CmmExpr -- Index
1298 -> CmmExpr
1299 cmmIndexOffExpr dflags off width base idx
1300 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1301
1302 cmmLoadIndexOffExpr :: DynFlags
1303 -> ByteOff -- Initial offset in bytes
1304 -> CmmType -- Type of element we are accessing
1305 -> CmmExpr -- Base address
1306 -> CmmType -- Type of element by which we are indexing
1307 -> CmmExpr -- Index
1308 -> CmmExpr
1309 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1310 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1311
1312 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1313 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1314
1315 ------------------------------------------------------------------------------
1316 -- Helpers for translating vector primops.
1317
1318 vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
1319 vecVmmType pocat n w = vec n (vecCmmCat pocat w)
1320
1321 vecCmmCat :: PrimOpVecCat -> Width -> CmmType
1322 vecCmmCat IntVec = cmmBits
1323 vecCmmCat WordVec = cmmBits
1324 vecCmmCat FloatVec = cmmFloat
1325
1326 vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1327 vecElemInjectCast _ FloatVec _ = Nothing
1328 vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
1329 vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
1330 vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
1331 vecElemInjectCast _ IntVec W64 = Nothing
1332 vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
1333 vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
1334 vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
1335 vecElemInjectCast _ WordVec W64 = Nothing
1336 vecElemInjectCast _ _ _ = Nothing
1337
1338 vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
1339 vecElemProjectCast _ FloatVec _ = Nothing
1340 vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
1341 vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
1342 vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
1343 vecElemProjectCast _ IntVec W64 = Nothing
1344 vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
1345 vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
1346 vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
1347 vecElemProjectCast _ WordVec W64 = Nothing
1348 vecElemProjectCast _ _ _ = Nothing
1349
1350 -- Check to make sure that we can generate code for the specified vector type
1351 -- given the current set of dynamic flags.
1352 checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
1353 checkVecCompatibility dflags vcat l w = do
1354 when (hscTarget dflags /= HscLlvm) $ do
1355 sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
1356 ,"Please use -fllvm."]
1357 check vecWidth vcat l w
1358 where
1359 check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
1360 check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
1361 sorry $ "128-bit wide single-precision floating point " ++
1362 "SIMD vector instructions require at least -msse."
1363 check W128 _ _ _ | not (isSse2Enabled dflags) =
1364 sorry $ "128-bit wide integer and double precision " ++
1365 "SIMD vector instructions require at least -msse2."
1366 check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
1367 sorry $ "256-bit wide floating point " ++
1368 "SIMD vector instructions require at least -mavx."
1369 check W256 _ _ _ | not (isAvx2Enabled dflags) =
1370 sorry $ "256-bit wide integer " ++
1371 "SIMD vector instructions require at least -mavx2."
1372 check W512 _ _ _ | not (isAvx512fEnabled dflags) =
1373 sorry $ "512-bit wide " ++
1374 "SIMD vector instructions require -mavx512f."
1375 check _ _ _ _ = return ()
1376
1377 vecWidth = typeWidth (vecVmmType vcat l w)
1378
1379 ------------------------------------------------------------------------------
1380 -- Helpers for translating vector packing and unpacking.
1381
1382 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1383 -> CmmType -- Type of vector
1384 -> CmmExpr -- Initial vector
1385 -> [CmmExpr] -- Elements
1386 -> CmmFormal -- Destination for result
1387 -> FCode ()
1388 doVecPackOp maybe_pre_write_cast ty z es res = do
1389 dst <- newTemp ty
1390 emitAssign (CmmLocal dst) z
1391 vecPack dst es 0
1392 where
1393 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1394 vecPack src [] _ =
1395 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1396
1397 vecPack src (e : es) i = do
1398 dst <- newTemp ty
1399 if isFloatType (vecElemType ty)
1400 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1401 [CmmReg (CmmLocal src), cast e, iLit])
1402 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1403 [CmmReg (CmmLocal src), cast e, iLit])
1404 vecPack dst es (i + 1)
1405 where
1406 -- vector indices are always 32-bits
1407 iLit = CmmLit (CmmInt (toInteger i) W32)
1408
1409 cast :: CmmExpr -> CmmExpr
1410 cast val = case maybe_pre_write_cast of
1411 Nothing -> val
1412 Just cast -> CmmMachOp cast [val]
1413
1414 len :: Length
1415 len = vecLength ty
1416
1417 wid :: Width
1418 wid = typeWidth (vecElemType ty)
1419
1420 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1421 -> CmmType -- Type of vector
1422 -> CmmExpr -- Vector
1423 -> [CmmFormal] -- Element results
1424 -> FCode ()
1425 doVecUnpackOp maybe_post_read_cast ty e res =
1426 vecUnpack res 0
1427 where
1428 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1429 vecUnpack [] _ =
1430 return ()
1431
1432 vecUnpack (r : rs) i = do
1433 if isFloatType (vecElemType ty)
1434 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1435 [e, iLit]))
1436 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1437 [e, iLit]))
1438 vecUnpack rs (i + 1)
1439 where
1440 -- vector indices are always 32-bits
1441 iLit = CmmLit (CmmInt (toInteger i) W32)
1442
1443 cast :: CmmExpr -> CmmExpr
1444 cast val = case maybe_post_read_cast of
1445 Nothing -> val
1446 Just cast -> CmmMachOp cast [val]
1447
1448 len :: Length
1449 len = vecLength ty
1450
1451 wid :: Width
1452 wid = typeWidth (vecElemType ty)
1453
1454 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1455 -> CmmType -- Vector type
1456 -> CmmExpr -- Source vector
1457 -> CmmExpr -- Element
1458 -> CmmExpr -- Index at which to insert element
1459 -> CmmFormal -- Destination for result
1460 -> FCode ()
1461 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1462 dflags <- getDynFlags
1463 -- vector indices are always 32-bits
1464 let idx' :: CmmExpr
1465 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1466 if isFloatType (vecElemType ty)
1467 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1468 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1469 where
1470 cast :: CmmExpr -> CmmExpr
1471 cast val = case maybe_pre_write_cast of
1472 Nothing -> val
1473 Just cast -> CmmMachOp cast [val]
1474
1475 len :: Length
1476 len = vecLength ty
1477
1478 wid :: Width
1479 wid = typeWidth (vecElemType ty)
1480
1481 ------------------------------------------------------------------------------
1482 -- Helpers for translating prefetching.
1483
1484 doPrefetchByteArrayOp :: Int
1485 -> [LocalReg]
1486 -> [CmmExpr]
1487 -> FCode ()
1488 doPrefetchByteArrayOp locality res [addr,idx]
1489 = do dflags <- getDynFlags
1490 mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
1491 doPrefetchByteArrayOp _ _ _
1492 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1493
1494 doPrefetchAddrOp ::Int
1495 -> [LocalReg]
1496 -> [CmmExpr]
1497 -> FCode ()
1498 doPrefetchAddrOp locality res [addr,idx]
1499 = mkBasicPrefetch locality 0 res addr idx
1500 doPrefetchAddrOp _ _ _
1501 = panic "StgCmmPrim: doPrefetchAddrOp"
1502
1503 mkBasicPrefetch :: Int -- Locality level 0-3
1504 -> ByteOff -- Initial offset in bytes
1505 -> [LocalReg] -- Destination
1506 -> CmmExpr -- Base address
1507 -> CmmExpr -- Index
1508 -> FCode ()
1509 mkBasicPrefetch locality off res base idx
1510 = do dflags <- getDynFlags
1511 emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1512 case res of
1513 [] -> return ()
1514 [reg] -> emitAssign (CmmLocal reg) base
1515 _ -> panic "StgCmmPrim: mkBasicPrefetch"
1516
1517 -- ----------------------------------------------------------------------------
1518 -- Allocating byte arrays
1519
1520 -- | Takes a register to return the newly allocated array in and the
1521 -- size of the new array in bytes. Allocates a new
1522 -- 'MutableByteArray#'.
1523 doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
1524 doNewByteArrayOp res_r n = do
1525 dflags <- getDynFlags
1526
1527 let info_ptr = mkLblExpr mkArrWords_infoLabel
1528 rep = arrWordsRep dflags n
1529
1530 tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
1531 (mkIntExpr dflags (nonHdrSize dflags rep))
1532 (zeroExpr dflags)
1533
1534 let hdr_size = fixedHdrSize dflags
1535
1536 base <- allocHeapClosure rep info_ptr curCCS
1537 [ (mkIntExpr dflags n,
1538 hdr_size + oFFSET_StgArrWords_bytes dflags)
1539 ]
1540
1541 emit $ mkAssign (CmmLocal res_r) base
1542
1543 -- ----------------------------------------------------------------------------
1544 -- Copying byte arrays
1545
1546 -- | Takes a source 'ByteArray#', an offset in the source array, a
1547 -- destination 'MutableByteArray#', an offset into the destination
1548 -- array, and the number of bytes to copy. Copies the given number of
1549 -- bytes from the source array to the destination array.
1550 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1551 -> FCode ()
1552 doCopyByteArrayOp = emitCopyByteArray copy
1553 where
1554 -- Copy data (we assume the arrays aren't overlapping since
1555 -- they're of different types)
1556 copy _src _dst dst_p src_p bytes =
1557 do dflags <- getDynFlags
1558 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1559
1560 -- | Takes a source 'MutableByteArray#', an offset in the source
1561 -- array, a destination 'MutableByteArray#', an offset into the
1562 -- destination array, and the number of bytes to copy. Copies the
1563 -- given number of bytes from the source array to the destination
1564 -- array.
1565 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1566 -> FCode ()
1567 doCopyMutableByteArrayOp = emitCopyByteArray copy
1568 where
1569 -- The only time the memory might overlap is when the two arrays
1570 -- we were provided are the same array!
1571 -- TODO: Optimize branch for common case of no aliasing.
1572 copy src dst dst_p src_p bytes = do
1573 dflags <- getDynFlags
1574 [moveCall, cpyCall] <- forkAlts [
1575 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1576 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1577 ]
1578 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1579
1580 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1581 -> FCode ())
1582 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1583 -> FCode ()
1584 emitCopyByteArray copy src src_off dst dst_off n = do
1585 dflags <- getDynFlags
1586 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1587 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1588 copy src dst dst_p src_p n
1589
1590 -- | Takes a source 'ByteArray#', an offset in the source array, a
1591 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1592 -- number of bytes from the source array to the destination memory region.
1593 doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1594 doCopyByteArrayToAddrOp src src_off dst_p bytes = do
1595 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1596 dflags <- getDynFlags
1597 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1598 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1599
1600 -- | Takes a source 'MutableByteArray#', an offset in the source array, a
1601 -- destination 'Addr#', and the number of bytes to copy. Copies the given
1602 -- number of bytes from the source array to the destination memory region.
1603 doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1604 -> FCode ()
1605 doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
1606
1607 -- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
1608 -- the destination array, and the number of bytes to copy. Copies the given
1609 -- number of bytes from the source memory region to the destination array.
1610 doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1611 doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
1612 -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
1613 dflags <- getDynFlags
1614 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1615 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1616
1617
1618 -- ----------------------------------------------------------------------------
1619 -- Setting byte arrays
1620
1621 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1622 -- and a byte, and sets each of the selected bytes in the array to the
1623 -- character.
1624 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1625 -> FCode ()
1626 doSetByteArrayOp ba off len c
1627 = do dflags <- getDynFlags
1628 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1629 emitMemsetCall p c len (mkIntExpr dflags 1)
1630
1631 -- ----------------------------------------------------------------------------
1632 -- Allocating arrays
1633
1634 -- | Allocate a new array.
1635 doNewArrayOp :: CmmFormal -- ^ return register
1636 -> SMRep -- ^ representation of the array
1637 -> CLabel -- ^ info pointer
1638 -> [(CmmExpr, ByteOff)] -- ^ header payload
1639 -> WordOff -- ^ array size
1640 -> CmmExpr -- ^ initial element
1641 -> FCode ()
1642 doNewArrayOp res_r rep info payload n init = do
1643 dflags <- getDynFlags
1644
1645 let info_ptr = mkLblExpr info
1646
1647 tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
1648 (mkIntExpr dflags (nonHdrSize dflags rep))
1649 (zeroExpr dflags)
1650
1651 base <- allocHeapClosure rep info_ptr curCCS payload
1652
1653 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1654 emit $ mkAssign arr base
1655
1656 -- Initialise all elements of the the array
1657 p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
1658 for <- newLabelC
1659 emitLabel for
1660 let loopBody =
1661 [ mkStore (CmmReg (CmmLocal p)) init
1662 , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
1663 , mkBranch for ]
1664 emit =<< mkCmmIfThen
1665 (cmmULtWord dflags (CmmReg (CmmLocal p))
1666 (cmmOffsetW dflags (CmmReg arr)
1667 (hdrSizeW dflags rep + n)))
1668 (catAGraphs loopBody)
1669
1670 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1671
1672 -- ----------------------------------------------------------------------------
1673 -- Copying pointer arrays
1674
1675 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1676 -- nowhere else in the code generator. This is mostly because these
1677 -- "primitive" ops result in a surprisingly large amount of code. It
1678 -- will likely be worthwhile to optimize what is emitted here, so that
1679 -- our optimization passes don't waste time repeatedly optimizing the
1680 -- same bits of code.
1681
1682 -- More closely imitates 'assignTemp' from the old code generator, which
1683 -- returns a CmmExpr rather than a LocalReg.
1684 assignTempE :: CmmExpr -> FCode CmmExpr
1685 assignTempE e = do
1686 t <- assignTemp e
1687 return (CmmReg (CmmLocal t))
1688
1689 -- | Takes a source 'Array#', an offset in the source array, a
1690 -- destination 'MutableArray#', an offset into the destination array,
1691 -- and the number of elements to copy. Copies the given number of
1692 -- elements from the source array to the destination array.
1693 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1694 -> FCode ()
1695 doCopyArrayOp = emitCopyArray copy
1696 where
1697 -- Copy data (we assume the arrays aren't overlapping since
1698 -- they're of different types)
1699 copy _src _dst dst_p src_p bytes =
1700 do dflags <- getDynFlags
1701 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1702 (mkIntExpr dflags (wORD_SIZE dflags))
1703
1704
1705 -- | Takes a source 'MutableArray#', an offset in the source array, a
1706 -- destination 'MutableArray#', an offset into the destination array,
1707 -- and the number of elements to copy. Copies the given number of
1708 -- elements from the source array to the destination array.
1709 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1710 -> FCode ()
1711 doCopyMutableArrayOp = emitCopyArray copy
1712 where
1713 -- The only time the memory might overlap is when the two arrays
1714 -- we were provided are the same array!
1715 -- TODO: Optimize branch for common case of no aliasing.
1716 copy src dst dst_p src_p bytes = do
1717 dflags <- getDynFlags
1718 [moveCall, cpyCall] <- forkAlts [
1719 getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1720 (mkIntExpr dflags (wORD_SIZE dflags)),
1721 getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1722 (mkIntExpr dflags (wORD_SIZE dflags))
1723 ]
1724 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1725
1726 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1727 -> FCode ()) -- ^ copy function
1728 -> CmmExpr -- ^ source array
1729 -> CmmExpr -- ^ offset in source array
1730 -> CmmExpr -- ^ destination array
1731 -> CmmExpr -- ^ offset in destination array
1732 -> WordOff -- ^ number of elements to copy
1733 -> FCode ()
1734 emitCopyArray copy src0 src_off dst0 dst_off0 n = do
1735 dflags <- getDynFlags
1736 when (n /= 0) $ do
1737 -- Passed as arguments (be careful)
1738 src <- assignTempE src0
1739 dst <- assignTempE dst0
1740 dst_off <- assignTempE dst_off0
1741
1742 -- Set the dirty bit in the header.
1743 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1744
1745 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
1746 (arrPtrsHdrSize dflags)
1747 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1748 src_p <- assignTempE $ cmmOffsetExprW dflags
1749 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1750 let bytes = wordsToBytes dflags n
1751
1752 copy src dst dst_p src_p bytes
1753
1754 -- The base address of the destination card table
1755 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
1756 (loadArrPtrsSize dflags dst)
1757
1758 emitSetCards dst_off dst_cards_p n
1759
1760 doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1761 -> FCode ()
1762 doCopySmallArrayOp = emitCopySmallArray copy
1763 where
1764 -- Copy data (we assume the arrays aren't overlapping since
1765 -- they're of different types)
1766 copy _src _dst dst_p src_p bytes =
1767 do dflags <- getDynFlags
1768 emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1769 (mkIntExpr dflags (wORD_SIZE dflags))
1770
1771
1772 doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
1773 -> FCode ()
1774 doCopySmallMutableArrayOp = emitCopySmallArray copy
1775 where
1776 -- The only time the memory might overlap is when the two arrays
1777 -- we were provided are the same array!
1778 -- TODO: Optimize branch for common case of no aliasing.
1779 copy src dst dst_p src_p bytes = do
1780 dflags <- getDynFlags
1781 [moveCall, cpyCall] <- forkAlts
1782 [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
1783 (mkIntExpr dflags (wORD_SIZE dflags))
1784 , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
1785 (mkIntExpr dflags (wORD_SIZE dflags))
1786 ]
1787 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1788
1789 emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
1790 -> FCode ()) -- ^ copy function
1791 -> CmmExpr -- ^ source array
1792 -> CmmExpr -- ^ offset in source array
1793 -> CmmExpr -- ^ destination array
1794 -> CmmExpr -- ^ offset in destination array
1795 -> WordOff -- ^ number of elements to copy
1796 -> FCode ()
1797 emitCopySmallArray copy src0 src_off dst0 dst_off n = do
1798 dflags <- getDynFlags
1799
1800 -- Passed as arguments (be careful)
1801 src <- assignTempE src0
1802 dst <- assignTempE dst0
1803
1804 -- Set the dirty bit in the header.
1805 emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1806
1807 dst_p <- assignTempE $ cmmOffsetExprW dflags
1808 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
1809 src_p <- assignTempE $ cmmOffsetExprW dflags
1810 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
1811 let bytes = wordsToBytes dflags n
1812
1813 copy src dst dst_p src_p bytes
1814
1815 -- | Takes an info table label, a register to return the newly
1816 -- allocated array in, a source array, an offset in the source array,
1817 -- and the number of elements to copy. Allocates a new array and
1818 -- initializes it from the source array.
1819 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1820 -> FCode ()
1821 emitCloneArray info_p res_r src src_off n = do
1822 dflags <- getDynFlags
1823
1824 let info_ptr = mkLblExpr info_p
1825 rep = arrPtrsRep dflags n
1826
1827 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
1828 (mkIntExpr dflags (nonHdrSize dflags rep))
1829 (zeroExpr dflags)
1830
1831 let hdr_size = fixedHdrSize dflags
1832
1833 base <- allocHeapClosure rep info_ptr curCCS
1834 [ (mkIntExpr dflags n,
1835 hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
1836 , (mkIntExpr dflags (nonHdrSizeW rep),
1837 hdr_size + oFFSET_StgMutArrPtrs_size dflags)
1838 ]
1839
1840 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1841 emit $ mkAssign arr base
1842
1843 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1844 (arrPtrsHdrSize dflags)
1845 src_p <- assignTempE $ cmmOffsetExprW dflags src
1846 (cmmAddWord dflags
1847 (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
1848
1849 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1850 (mkIntExpr dflags (wORD_SIZE dflags))
1851
1852 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1853
1854 -- | Takes an info table label, a register to return the newly
1855 -- allocated array in, a source array, an offset in the source array,
1856 -- and the number of elements to copy. Allocates a new array and
1857 -- initializes it from the source array.
1858 emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
1859 -> FCode ()
1860 emitCloneSmallArray info_p res_r src src_off n = do
1861 dflags <- getDynFlags
1862
1863 let info_ptr = mkLblExpr info_p
1864 rep = smallArrPtrsRep n
1865
1866 tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
1867 (mkIntExpr dflags (nonHdrSize dflags rep))
1868 (zeroExpr dflags)
1869
1870 let hdr_size = fixedHdrSize dflags
1871
1872 base <- allocHeapClosure rep info_ptr curCCS
1873 [ (mkIntExpr dflags n,
1874 hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
1875 ]
1876
1877 arr <- CmmLocal `fmap` newTemp (bWord dflags)
1878 emit $ mkAssign arr base
1879
1880 dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
1881 (smallArrPtrsHdrSize dflags)
1882 src_p <- assignTempE $ cmmOffsetExprW dflags src
1883 (cmmAddWord dflags
1884 (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
1885
1886 emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
1887 (mkIntExpr dflags (wORD_SIZE dflags))
1888
1889 emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
1890
1891 -- | Takes and offset in the destination array, the base address of
1892 -- the card table, and the number of elements affected (*not* the
1893 -- number of cards). The number of elements may not be zero.
1894 -- Marks the relevant cards as dirty.
1895 emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
1896 emitSetCards dst_start dst_cards_start n = do
1897 dflags <- getDynFlags
1898 start_card <- assignTempE $ cardCmm dflags dst_start
1899 let end_card = cardCmm dflags
1900 (cmmSubWord dflags
1901 (cmmAddWord dflags dst_start (mkIntExpr dflags n))
1902 (mkIntExpr dflags 1))
1903 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1904 (mkIntExpr dflags 1)
1905 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1906 (mkIntExpr dflags 1) -- no alignment (1 byte)
1907
1908 -- Convert an element index to a card index
1909 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
1910 cardCmm dflags i =
1911 cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1912
1913 ------------------------------------------------------------------------------
1914 -- SmallArray PrimOp implementations
1915
1916 doReadSmallPtrArrayOp :: LocalReg
1917 -> CmmExpr
1918 -> CmmExpr
1919 -> FCode ()
1920 doReadSmallPtrArrayOp res addr idx = do
1921 dflags <- getDynFlags
1922 mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
1923 (gcWord dflags) idx
1924
1925 doWriteSmallPtrArrayOp :: CmmExpr
1926 -> CmmExpr
1927 -> CmmExpr
1928 -> FCode ()
1929 doWriteSmallPtrArrayOp addr idx val = do
1930 dflags <- getDynFlags
1931 let ty = cmmExprType dflags val
1932 mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
1933 emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
1934
1935 ------------------------------------------------------------------------------
1936 -- Helpers for emitting function calls
1937
1938 -- | Emit a call to @memcpy@.
1939 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1940 emitMemcpyCall dst src n align = do
1941 emitPrimCall
1942 [ {-no results-} ]
1943 MO_Memcpy
1944 [ dst, src, n, align ]
1945
1946 -- | Emit a call to @memmove@.
1947 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1948 emitMemmoveCall dst src n align = do
1949 emitPrimCall
1950 [ {- no results -} ]
1951 MO_Memmove
1952 [ dst, src, n, align ]
1953
1954 -- | Emit a call to @memset@. The second argument must fit inside an
1955 -- unsigned char.
1956 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1957 emitMemsetCall dst c n align = do
1958 emitPrimCall
1959 [ {- no results -} ]
1960 MO_Memset
1961 [ dst, c, n, align ]
1962
1963 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1964 emitBSwapCall res x width = do
1965 emitPrimCall
1966 [ res ]
1967 (MO_BSwap width)
1968 [ x ]
1969
1970 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1971 emitPopCntCall res x width = do
1972 emitPrimCall
1973 [ res ]
1974 (MO_PopCnt width)
1975 [ x ]