Add support for passing SSE vectors in registers.
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmPrim (
10 cgOpApp,
11 cgPrimOp -- internal(ish), used by cgCase to get code for a
12 -- comparison without also turning it into a Bool.
13 ) where
14
15 #include "HsVersions.h"
16
17 import StgCmmLayout
18 import StgCmmForeign
19 import StgCmmEnv
20 import StgCmmMonad
21 import StgCmmUtils
22 import StgCmmTicky
23 import StgCmmHeap
24 import StgCmmProf
25
26 import DynFlags
27 import Platform
28 import BasicTypes
29 import MkGraph
30 import StgSyn
31 import Cmm
32 import CmmInfo
33 import Type ( Type, tyConAppTyCon )
34 import TyCon
35 import CLabel
36 import CmmUtils
37 import PrimOp
38 import SMRep
39 import Module
40 import FastString
41 import Outputable
42 import Util
43
44 import Control.Monad (liftM)
45 import Data.Bits
46
47 ------------------------------------------------------------------------
48 -- Primitive operations and foreign calls
49 ------------------------------------------------------------------------
50
51 {- Note [Foreign call results]
52 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
53 A foreign call always returns an unboxed tuple of results, one
54 of which is the state token. This seems to happen even for pure
55 calls.
56
57 Even if we returned a single result for pure calls, it'd still be
58 right to wrap it in a singleton unboxed tuple, because the result
59 might be a Haskell closure pointer, we don't want to evaluate it. -}
60
61 ----------------------------------
62 cgOpApp :: StgOp -- The op
63 -> [StgArg] -- Arguments
64 -> Type -- Result type (always an unboxed tuple)
65 -> FCode ReturnKind
66
67 -- Foreign calls
68 cgOpApp (StgFCallOp fcall _) stg_args res_ty
69 = cgForeignCall fcall stg_args res_ty
70 -- Note [Foreign call results]
71
72 -- tagToEnum# is special: we need to pull the constructor
73 -- out of the table, and perform an appropriate return.
74
75 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
76 = ASSERT(isEnumerationTyCon tycon)
77 do { dflags <- getDynFlags
78 ; args' <- getNonVoidArgAmodes [arg]
79 ; let amode = case args' of [amode] -> amode
80 _ -> panic "TagToEnumOp had void arg"
81 ; emitReturn [tagToClosure dflags tycon amode] }
82 where
83 -- If you're reading this code in the attempt to figure
84 -- out why the compiler panic'ed here, it is probably because
85 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
86 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
87 -- That won't work.
88 tycon = tyConAppTyCon res_ty
89
90 cgOpApp (StgPrimOp primop) args res_ty
91 | primOpOutOfLine primop
92 = do { cmm_args <- getNonVoidArgAmodes args
93 ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
94 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
95
96 | ReturnsPrim VoidRep <- result_info
97 = do cgPrimOp [] primop args
98 emitReturn []
99
100 | ReturnsPrim rep <- result_info
101 = do dflags <- getDynFlags
102 res <- newTemp (primRepCmmType dflags rep)
103 cgPrimOp [res] primop args
104 emitReturn [CmmReg (CmmLocal res)]
105
106 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
107 = do (regs, _hints) <- newUnboxedTupleRegs res_ty
108 cgPrimOp regs primop args
109 emitReturn (map (CmmReg . CmmLocal) regs)
110
111 | ReturnsAlg tycon <- result_info
112 , isEnumerationTyCon tycon
113 -- c.f. cgExpr (...TagToEnumOp...)
114 = do dflags <- getDynFlags
115 tag_reg <- newTemp (bWord dflags)
116 cgPrimOp [tag_reg] primop args
117 emitReturn [tagToClosure dflags tycon
118 (CmmReg (CmmLocal tag_reg))]
119
120 | otherwise = panic "cgPrimop"
121 where
122 result_info = getPrimOpResultInfo primop
123
124 cgOpApp (StgPrimCallOp primcall) args _res_ty
125 = do { cmm_args <- getNonVoidArgAmodes args
126 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
127 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
128
129 ---------------------------------------------------
130 cgPrimOp :: [LocalReg] -- where to put the results
131 -> PrimOp -- the op
132 -> [StgArg] -- arguments
133 -> FCode ()
134
135 cgPrimOp results op args
136 = do dflags <- getDynFlags
137 arg_exprs <- getNonVoidArgAmodes args
138 emitPrimOp dflags results op arg_exprs
139
140
141 ------------------------------------------------------------------------
142 -- Emitting code for a primop
143 ------------------------------------------------------------------------
144
145 emitPrimOp :: DynFlags
146 -> [LocalReg] -- where to put the results
147 -> PrimOp -- the op
148 -> [CmmExpr] -- arguments
149 -> FCode ()
150
151 -- First we handle various awkward cases specially. The remaining
152 -- easy cases are then handled by translateOp, defined below.
153
154 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
155 {-
156 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
157 C, and without needing any comparisons. This may not be the
158 fastest way to do it - if you have better code, please send it! --SDM
159
160 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
161
162 We currently don't make use of the r value if c is != 0 (i.e.
163 overflow), we just convert to big integers and try again. This
164 could be improved by making r and c the correct values for
165 plugging into a new J#.
166
167 { r = ((I_)(a)) + ((I_)(b)); \
168 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
169 >> (BITS_IN (I_) - 1); \
170 }
171 Wading through the mass of bracketry, it seems to reduce to:
172 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
173
174 -}
175 = emit $ catAGraphs [
176 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
177 mkAssign (CmmLocal res_c) $
178 CmmMachOp (mo_wordUShr dflags) [
179 CmmMachOp (mo_wordAnd dflags) [
180 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
181 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
182 ],
183 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
184 ]
185 ]
186
187
188 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
189 {- Similarly:
190 #define subIntCzh(r,c,a,b) \
191 { r = ((I_)(a)) - ((I_)(b)); \
192 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
193 >> (BITS_IN (I_) - 1); \
194 }
195
196 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
197 -}
198 = emit $ catAGraphs [
199 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
200 mkAssign (CmmLocal res_c) $
201 CmmMachOp (mo_wordUShr dflags) [
202 CmmMachOp (mo_wordAnd dflags) [
203 CmmMachOp (mo_wordXor dflags) [aa,bb],
204 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
205 ],
206 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
207 ]
208 ]
209
210
211 emitPrimOp _ [res] ParOp [arg]
212 =
213 -- for now, just implement this in a C function
214 -- later, we might want to inline it.
215 emitCCall
216 [(res,NoHint)]
217 (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
218 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
219
220 emitPrimOp dflags [res] SparkOp [arg]
221 = do
222 -- returns the value of arg in res. We're going to therefore
223 -- refer to arg twice (once to pass to newSpark(), and once to
224 -- assign to res), so put it in a temporary.
225 tmp <- assignTemp arg
226 tmp2 <- newTemp (bWord dflags)
227 emitCCall
228 [(tmp2,NoHint)]
229 (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
230 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
231 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
232
233 emitPrimOp dflags [res] GetCCSOfOp [arg]
234 = emitAssign (CmmLocal res) val
235 where
236 val
237 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
238 | otherwise = CmmLit (zeroCLit dflags)
239
240 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
241 = emitAssign (CmmLocal res) curCCS
242
243 emitPrimOp dflags [res] ReadMutVarOp [mutv]
244 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
245
246 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
247 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
248 emitCCall
249 [{-no results-}]
250 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
251 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
252
253 -- #define sizzeofByteArrayzh(r,a) \
254 -- r = ((StgArrWords *)(a))->bytes
255 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
256 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
257
258 -- #define sizzeofMutableByteArrayzh(r,a) \
259 -- r = ((StgArrWords *)(a))->bytes
260 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
261 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
262
263
264 -- #define touchzh(o) /* nothing */
265 emitPrimOp _ res@[] TouchOp args@[_arg]
266 = do emitPrimCall res MO_Touch args
267
268 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
269 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
270 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
271
272 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
273 emitPrimOp dflags [res] StableNameToIntOp [arg]
274 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
275
276 -- #define eqStableNamezh(r,sn1,sn2) \
277 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
278 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
279 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
280 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
281 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
282 ])
283
284
285 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
286 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
287
288 -- #define addrToHValuezh(r,a) r=(P_)a
289 emitPrimOp _ [res] AddrToAnyOp [arg]
290 = emitAssign (CmmLocal res) arg
291
292 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
293 -- Note: argument may be tagged!
294 emitPrimOp dflags [res] DataToTagOp [arg]
295 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
296
297 {- Freezing arrays-of-ptrs requires changing an info table, for the
298 benefit of the generational collector. It needs to scavenge mutable
299 objects, even if they are in old space. When they become immutable,
300 they can be removed from this scavenge list. -}
301
302 -- #define unsafeFreezzeArrayzh(r,a)
303 -- {
304 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
305 -- r = a;
306 -- }
307 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
308 = emit $ catAGraphs
309 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
310 mkAssign (CmmLocal res) arg ]
311 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
312 = emit $ catAGraphs
313 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
314 mkAssign (CmmLocal res) arg ]
315
316 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
317 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
318 = emitAssign (CmmLocal res) arg
319
320 -- Copying pointer arrays
321
322 emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
323 doCopyArrayOp src src_off dst dst_off n
324 emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
325 doCopyMutableArrayOp src src_off dst dst_off n
326 emitPrimOp _ [res] CloneArrayOp [src,src_off,n] =
327 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
328 emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] =
329 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
330 emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] =
331 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
332 emitPrimOp _ [res] ThawArrayOp [src,src_off,n] =
333 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
334
335 emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
336 doCopyArrayOp src src_off dst dst_off n
337 emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
338 doCopyMutableArrayOp src src_off dst dst_off n
339
340 -- Reading/writing pointer arrays
341
342 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
343 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
344 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
345
346 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
347 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
348 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
349 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
350 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
351 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
352 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
353 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
354 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
355 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
356
357 emitPrimOp dflags [res] SizeofArrayOp [arg]
358 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
359 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
360 = emitPrimOp dflags [res] SizeofArrayOp [arg]
361 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
362 = emitPrimOp dflags [res] SizeofArrayOp [arg]
363 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
364 = emitPrimOp dflags [res] SizeofArrayOp [arg]
365
366 -- IndexXXXoffAddr
367
368 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
369 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
370 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
371 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
372 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
373 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
374 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
375 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
376 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
377 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
378 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
379 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
380 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
381 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
382 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
383 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
384 emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
385 emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args
386 emitPrimOp _ res IndexOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
387 emitPrimOp _ res IndexOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 f64 res args
388 emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
389 emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
390 emitPrimOp _ res IndexOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
391 emitPrimOp _ res IndexOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
392
393 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
394
395 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
396 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
397 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
398 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
399 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
400 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
401 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
402 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
403 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
404 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
405 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
406 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
407 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
408 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
409 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
410 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
411 emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
412 emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args
413 emitPrimOp _ res ReadOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
414 emitPrimOp _ res ReadOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 b64 res args
415 emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
416 emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
417 emitPrimOp _ res ReadOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
418 emitPrimOp _ res ReadOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
419
420 -- IndexXXXArray
421
422 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
423 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
424 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
425 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
426 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
427 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
428 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
429 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
430 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
431 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
432 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
433 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
434 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
435 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
436 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
437 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
438 emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
439 emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
440 emitPrimOp _ res IndexByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
441 emitPrimOp _ res IndexByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
442 emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
443 emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
444 emitPrimOp _ res IndexByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
445 emitPrimOp _ res IndexByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
446
447 -- ReadXXXArray, identical to IndexXXXArray.
448
449 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
450 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
451 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
452 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
453 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
454 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
455 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
456 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
457 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
458 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
459 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
460 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
461 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
462 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
463 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
464 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
465 emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
466 emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
467 emitPrimOp _ res ReadByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
468 emitPrimOp _ res ReadByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
469 emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
470 emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
471 emitPrimOp _ res ReadByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
472 emitPrimOp _ res ReadByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
473
474 -- WriteXXXoffAddr
475
476 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
477 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
478 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
479 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
480 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
481 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
482 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
483 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
484 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
485 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
486 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
487 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
488 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
489 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
490 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
491 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
492 emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args
493 emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args
494 emitPrimOp _ res WriteOffAddrOp_DoubleX2 args = doWriteOffAddrOp Nothing vec2f64 res args
495 emitPrimOp _ res WriteOffAddrOp_DoubleAsDoubleX2 args = doWriteOffAddrOp Nothing f64 res args
496 emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args
497 emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args
498 emitPrimOp _ res WriteOffAddrOp_Int64X2 args = doWriteOffAddrOp Nothing vec2b64 res args
499 emitPrimOp _ res WriteOffAddrOp_Int64AsInt64X2 args = doWriteOffAddrOp Nothing b64 res args
500
501 -- WriteXXXArray
502
503 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
504 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
505 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
506 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
507 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
508 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
509 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
510 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
511 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
512 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
513 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
514 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
515 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
516 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
517 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
518 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
519 emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args
520 emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args
521 emitPrimOp _ res WriteByteArrayOp_DoubleX2 args = doWriteByteArrayOp Nothing vec2f64 res args
522 emitPrimOp _ res WriteByteArrayOp_DoubleAsDoubleX2 args = doWriteByteArrayOp Nothing f64 res args
523 emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args
524 emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args
525 emitPrimOp _ res WriteByteArrayOp_Int64X2 args = doWriteByteArrayOp Nothing vec2b64 res args
526 emitPrimOp _ res WriteByteArrayOp_Int64AsInt64X2 args = doWriteByteArrayOp Nothing b64 res args
527
528 -- Copying and setting byte arrays
529 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
530 doCopyByteArrayOp src src_off dst dst_off n
531 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
532 doCopyMutableByteArrayOp src src_off dst dst_off n
533 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
534 doSetByteArrayOp ba off len c
535
536 -- Population count
537 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
538 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
539 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
540 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
541 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
542
543 -- Unsigned int to floating point conversions
544 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
545 (MO_UF_Conv W32) [w]
546 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
547 (MO_UF_Conv W64) [w]
548
549 -- SIMD vector packing and unpacking
550 emitPrimOp _ [res] FloatToFloatX4Op [e] =
551 doVecPackOp Nothing vec4f32 zero [e,e,e,e] res
552 where
553 zero :: CmmExpr
554 zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
555
556 emitPrimOp _ [res] FloatX4PackOp es@[_,_,_,_] =
557 doVecPackOp Nothing vec4f32 zero es res
558 where
559 zero :: CmmExpr
560 zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
561
562 emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] =
563 doVecUnpackOp Nothing vec4f32 arg res
564
565 emitPrimOp _ [res] FloatX4InsertOp [v,e,i] =
566 doVecInsertOp Nothing vec4f32 v e i res
567
568 emitPrimOp _ [res] DoubleToDoubleX2Op [e] =
569 doVecPackOp Nothing vec2f64 zero [e,e] res
570 where
571 zero :: CmmExpr
572 zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
573
574 emitPrimOp _ [res] DoubleX2PackOp es@[_,_] =
575 doVecPackOp Nothing vec2f64 zero es res
576 where
577 zero :: CmmExpr
578 zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
579
580 emitPrimOp _ res@[_,_] DoubleX2UnpackOp [arg] =
581 doVecUnpackOp Nothing vec2f64 arg res
582
583 emitPrimOp _ [res] DoubleX2InsertOp [v,e,i] =
584 doVecInsertOp Nothing vec2f64 v e i res
585
586 emitPrimOp dflags [res] Int32ToInt32X4Op [e] =
587 doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res
588 where
589 zero :: CmmExpr
590 zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
591
592 emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] =
593 doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res
594 where
595 zero :: CmmExpr
596 zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
597
598 emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] =
599 doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res
600
601 emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] =
602 doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res
603
604 emitPrimOp _ [res] Int64ToInt64X2Op [e] =
605 doVecPackOp Nothing vec2b64 zero [e,e] res
606 where
607 zero :: CmmExpr
608 zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
609
610 emitPrimOp _ [res] Int64X2PackOp es@[_,_] =
611 doVecPackOp Nothing vec2b64 zero es res
612 where
613 zero :: CmmExpr
614 zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
615
616 emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] =
617 doVecUnpackOp Nothing vec2b64 arg res
618
619 emitPrimOp _ [res] Int64X2InsertOp [v,e,i] =
620 doVecInsertOp Nothing vec2b64 v e i res
621
622 -- The rest just translate straightforwardly
623 emitPrimOp dflags [res] op [arg]
624 | nopOp op
625 = emitAssign (CmmLocal res) arg
626
627 | Just (mop,rep) <- narrowOp op
628 = emitAssign (CmmLocal res) $
629 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
630
631 emitPrimOp dflags r@[res] op args
632 | Just prim <- callishOp op
633 = do emitPrimCall r prim args
634
635 | Just mop <- translateOp dflags op
636 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
637 emit stmt
638
639 emitPrimOp dflags results op args
640 = case callishPrimOpSupported dflags op of
641 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
642 Right gen -> gen results args
643
644 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
645
646 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
647 callishPrimOpSupported dflags op
648 = case op of
649 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
650 | otherwise -> Right (genericIntQuotRemOp dflags)
651
652 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
653 | otherwise -> Right (genericWordQuotRemOp dflags)
654
655 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
656 | otherwise -> Right (genericWordQuotRem2Op dflags)
657
658 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
659 | otherwise -> Right genericWordAdd2Op
660
661 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
662 | otherwise -> Right genericWordMul2Op
663
664 _ -> panic "emitPrimOp: can't translate PrimOp" (ppr op)
665 where
666 ncg = case hscTarget dflags of
667 HscAsm -> True
668 _ -> False
669
670 x86ish = case platformArch (targetPlatform dflags) of
671 ArchX86 -> True
672 ArchX86_64 -> True
673 _ -> False
674
675 genericIntQuotRemOp :: DynFlags -> GenericOp
676 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
677 = emit $ mkAssign (CmmLocal res_q)
678 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
679 mkAssign (CmmLocal res_r)
680 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
681 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
682
683 genericWordQuotRemOp :: DynFlags -> GenericOp
684 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
685 = emit $ mkAssign (CmmLocal res_q)
686 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
687 mkAssign (CmmLocal res_r)
688 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
689 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
690
691 genericWordQuotRem2Op :: DynFlags -> GenericOp
692 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
693 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
694 where ty = cmmExprType dflags arg_x_high
695 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
696 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
697 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
698 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
699 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
700 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
701 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
702 zero = lit 0
703 one = lit 1
704 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
705 lit i = CmmLit (CmmInt i (wordWidth dflags))
706
707 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
708 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
709 mkAssign (CmmLocal res_r) high)
710 f i acc high low =
711 do roverflowedBit <- newTemp ty
712 rhigh' <- newTemp ty
713 rhigh'' <- newTemp ty
714 rlow' <- newTemp ty
715 risge <- newTemp ty
716 racc' <- newTemp ty
717 let high' = CmmReg (CmmLocal rhigh')
718 isge = CmmReg (CmmLocal risge)
719 overflowedBit = CmmReg (CmmLocal roverflowedBit)
720 let this = catAGraphs
721 [mkAssign (CmmLocal roverflowedBit)
722 (shr high negone),
723 mkAssign (CmmLocal rhigh')
724 (or (shl high one) (shr low negone)),
725 mkAssign (CmmLocal rlow')
726 (shl low one),
727 mkAssign (CmmLocal risge)
728 (or (overflowedBit `ne` zero)
729 (high' `ge` arg_y)),
730 mkAssign (CmmLocal rhigh'')
731 (high' `minus` (arg_y `times` isge)),
732 mkAssign (CmmLocal racc')
733 (or (shl acc one) isge)]
734 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
735 (CmmReg (CmmLocal rhigh''))
736 (CmmReg (CmmLocal rlow'))
737 return (this <*> rest)
738 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
739
740 genericWordAdd2Op :: GenericOp
741 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
742 = do dflags <- getDynFlags
743 r1 <- newTemp (cmmExprType dflags arg_x)
744 r2 <- newTemp (cmmExprType dflags arg_x)
745 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
746 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
747 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
748 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
749 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
750 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
751 (wordWidth dflags))
752 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
753 emit $ catAGraphs
754 [mkAssign (CmmLocal r1)
755 (add (bottomHalf arg_x) (bottomHalf arg_y)),
756 mkAssign (CmmLocal r2)
757 (add (topHalf (CmmReg (CmmLocal r1)))
758 (add (topHalf arg_x) (topHalf arg_y))),
759 mkAssign (CmmLocal res_h)
760 (topHalf (CmmReg (CmmLocal r2))),
761 mkAssign (CmmLocal res_l)
762 (or (toTopHalf (CmmReg (CmmLocal r2)))
763 (bottomHalf (CmmReg (CmmLocal r1))))]
764 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
765
766 genericWordMul2Op :: GenericOp
767 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
768 = do dflags <- getDynFlags
769 let t = cmmExprType dflags arg_x
770 xlyl <- liftM CmmLocal $ newTemp t
771 xlyh <- liftM CmmLocal $ newTemp t
772 xhyl <- liftM CmmLocal $ newTemp t
773 r <- liftM CmmLocal $ newTemp t
774 -- This generic implementation is very simple and slow. We might
775 -- well be able to do better, but for now this at least works.
776 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
777 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
778 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
779 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
780 sum = foldl1 add
781 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
782 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
783 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
784 (wordWidth dflags))
785 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
786 emit $ catAGraphs
787 [mkAssign xlyl
788 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
789 mkAssign xlyh
790 (mul (bottomHalf arg_x) (topHalf arg_y)),
791 mkAssign xhyl
792 (mul (topHalf arg_x) (bottomHalf arg_y)),
793 mkAssign r
794 (sum [topHalf (CmmReg xlyl),
795 bottomHalf (CmmReg xhyl),
796 bottomHalf (CmmReg xlyh)]),
797 mkAssign (CmmLocal res_l)
798 (or (bottomHalf (CmmReg xlyl))
799 (toTopHalf (CmmReg r))),
800 mkAssign (CmmLocal res_h)
801 (sum [mul (topHalf arg_x) (topHalf arg_y),
802 topHalf (CmmReg xhyl),
803 topHalf (CmmReg xlyh),
804 topHalf (CmmReg r)])]
805 genericWordMul2Op _ _ = panic "genericWordMul2Op"
806
807 -- These PrimOps are NOPs in Cmm
808
809 nopOp :: PrimOp -> Bool
810 nopOp Int2WordOp = True
811 nopOp Word2IntOp = True
812 nopOp Int2AddrOp = True
813 nopOp Addr2IntOp = True
814 nopOp ChrOp = True -- Int# and Char# are rep'd the same
815 nopOp OrdOp = True
816 nopOp _ = False
817
818 -- These PrimOps turn into double casts
819
820 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
821 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
822 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
823 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
824 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
825 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
826 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
827 narrowOp _ = Nothing
828
829 -- Native word signless ops
830
831 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
832 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
833 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
834 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
835 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
836 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
837 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
838
839 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
840 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
841 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
842 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
843 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
844 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
845
846 translateOp dflags AndOp = Just (mo_wordAnd dflags)
847 translateOp dflags OrOp = Just (mo_wordOr dflags)
848 translateOp dflags XorOp = Just (mo_wordXor dflags)
849 translateOp dflags NotOp = Just (mo_wordNot dflags)
850 translateOp dflags SllOp = Just (mo_wordShl dflags)
851 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
852
853 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
854
855 -- Native word signed ops
856
857 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
858 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
859 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
860 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
861 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
862
863
864 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
865 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
866 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
867 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
868
869 translateOp dflags ISllOp = Just (mo_wordShl dflags)
870 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
871 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
872
873 -- Native word unsigned ops
874
875 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
876 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
877 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
878 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
879
880 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
881 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
882 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
883
884 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
885 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
886 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
887 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
888
889 -- Char# ops
890
891 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
892 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
893 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
894 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
895 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
896 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
897
898 -- Double ops
899
900 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
901 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
902 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
903 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
904 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
905 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
906
907 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
908 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
909 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
910 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
911 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
912
913 -- Float ops
914
915 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
916 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
917 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
918 translateOp _ FloatLeOp = Just (MO_F_Le W32)
919 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
920 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
921
922 translateOp _ FloatAddOp = Just (MO_F_Add W32)
923 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
924 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
925 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
926 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
927
928 -- Floating point vector ops
929
930 translateOp _ FloatX4AddOp = Just (MO_VF_Add 4 W32)
931 translateOp _ FloatX4SubOp = Just (MO_VF_Sub 4 W32)
932 translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32)
933 translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32)
934 translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32)
935
936 translateOp _ DoubleX2AddOp = Just (MO_VF_Add 2 W64)
937 translateOp _ DoubleX2SubOp = Just (MO_VF_Sub 2 W64)
938 translateOp _ DoubleX2MulOp = Just (MO_VF_Mul 2 W64)
939 translateOp _ DoubleX2DivOp = Just (MO_VF_Quot 2 W64)
940 translateOp _ DoubleX2NegOp = Just (MO_VF_Neg 2 W64)
941
942 translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32)
943 translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32)
944 translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32)
945 translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32)
946 translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32)
947 translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32)
948
949 translateOp _ Int64X2AddOp = Just (MO_V_Add 2 W64)
950 translateOp _ Int64X2SubOp = Just (MO_V_Sub 2 W64)
951 translateOp _ Int64X2MulOp = Just (MO_V_Mul 2 W64)
952 translateOp _ Int64X2QuotOp = Just (MO_VS_Quot 2 W64)
953 translateOp _ Int64X2RemOp = Just (MO_VS_Rem 2 W64)
954 translateOp _ Int64X2NegOp = Just (MO_VS_Neg 2 W64)
955
956 -- Conversions
957
958 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
959 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
960
961 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
962 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
963
964 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
965 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
966
967 -- Word comparisons masquerading as more exotic things.
968
969 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
970 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
971 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
972 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
973 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
974 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
975 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
976
977 translateOp _ _ = Nothing
978
979 -- These primops are implemented by CallishMachOps, because they sometimes
980 -- turn into foreign calls depending on the backend.
981
982 callishOp :: PrimOp -> Maybe CallishMachOp
983 callishOp DoublePowerOp = Just MO_F64_Pwr
984 callishOp DoubleSinOp = Just MO_F64_Sin
985 callishOp DoubleCosOp = Just MO_F64_Cos
986 callishOp DoubleTanOp = Just MO_F64_Tan
987 callishOp DoubleSinhOp = Just MO_F64_Sinh
988 callishOp DoubleCoshOp = Just MO_F64_Cosh
989 callishOp DoubleTanhOp = Just MO_F64_Tanh
990 callishOp DoubleAsinOp = Just MO_F64_Asin
991 callishOp DoubleAcosOp = Just MO_F64_Acos
992 callishOp DoubleAtanOp = Just MO_F64_Atan
993 callishOp DoubleLogOp = Just MO_F64_Log
994 callishOp DoubleExpOp = Just MO_F64_Exp
995 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
996
997 callishOp FloatPowerOp = Just MO_F32_Pwr
998 callishOp FloatSinOp = Just MO_F32_Sin
999 callishOp FloatCosOp = Just MO_F32_Cos
1000 callishOp FloatTanOp = Just MO_F32_Tan
1001 callishOp FloatSinhOp = Just MO_F32_Sinh
1002 callishOp FloatCoshOp = Just MO_F32_Cosh
1003 callishOp FloatTanhOp = Just MO_F32_Tanh
1004 callishOp FloatAsinOp = Just MO_F32_Asin
1005 callishOp FloatAcosOp = Just MO_F32_Acos
1006 callishOp FloatAtanOp = Just MO_F32_Atan
1007 callishOp FloatLogOp = Just MO_F32_Log
1008 callishOp FloatExpOp = Just MO_F32_Exp
1009 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1010
1011 callishOp _ = Nothing
1012
1013 ------------------------------------------------------------------------------
1014 -- Helpers for translating various minor variants of array indexing.
1015
1016 doIndexOffAddrOp :: Maybe MachOp
1017 -> CmmType
1018 -> [LocalReg]
1019 -> [CmmExpr]
1020 -> FCode ()
1021 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1022 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1023 doIndexOffAddrOp _ _ _ _
1024 = panic "StgCmmPrim: doIndexOffAddrOp"
1025
1026 doIndexOffAddrOpAs :: Maybe MachOp
1027 -> CmmType
1028 -> CmmType
1029 -> [LocalReg]
1030 -> [CmmExpr]
1031 -> FCode ()
1032 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1033 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1034 doIndexOffAddrOpAs _ _ _ _ _
1035 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1036
1037 doIndexByteArrayOp :: Maybe MachOp
1038 -> CmmType
1039 -> [LocalReg]
1040 -> [CmmExpr]
1041 -> FCode ()
1042 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1043 = do dflags <- getDynFlags
1044 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1045 doIndexByteArrayOp _ _ _ _
1046 = panic "StgCmmPrim: doIndexByteArrayOp"
1047
1048 doIndexByteArrayOpAs :: Maybe MachOp
1049 -> CmmType
1050 -> CmmType
1051 -> [LocalReg]
1052 -> [CmmExpr]
1053 -> FCode ()
1054 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1055 = do dflags <- getDynFlags
1056 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1057 doIndexByteArrayOpAs _ _ _ _ _
1058 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1059
1060 doReadPtrArrayOp :: LocalReg
1061 -> CmmExpr
1062 -> CmmExpr
1063 -> FCode ()
1064 doReadPtrArrayOp res addr idx
1065 = do dflags <- getDynFlags
1066 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1067
1068 doWriteOffAddrOp :: Maybe MachOp
1069 -> CmmType
1070 -> [LocalReg]
1071 -> [CmmExpr]
1072 -> FCode ()
1073 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1074 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1075 doWriteOffAddrOp _ _ _ _
1076 = panic "StgCmmPrim: doWriteOffAddrOp"
1077
1078 doWriteByteArrayOp :: Maybe MachOp
1079 -> CmmType
1080 -> [LocalReg]
1081 -> [CmmExpr]
1082 -> FCode ()
1083 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1084 = do dflags <- getDynFlags
1085 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1086 doWriteByteArrayOp _ _ _ _
1087 = panic "StgCmmPrim: doWriteByteArrayOp"
1088
1089 doWritePtrArrayOp :: CmmExpr
1090 -> CmmExpr
1091 -> CmmExpr
1092 -> FCode ()
1093 doWritePtrArrayOp addr idx val
1094 = do dflags <- getDynFlags
1095 let ty = cmmExprType dflags val
1096 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1097 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1098 -- the write barrier. We must write a byte into the mark table:
1099 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1100 emit $ mkStore (
1101 cmmOffsetExpr dflags
1102 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1103 (loadArrPtrsSize dflags addr))
1104 (CmmMachOp (mo_wordUShr dflags) [idx,
1105 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1106 ) (CmmLit (CmmInt 1 W8))
1107
1108 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1109 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1110 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1111
1112 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1113 -> Maybe MachOp -- Optional result cast
1114 -> CmmType -- Type of element we are accessing
1115 -> LocalReg -- Destination
1116 -> CmmExpr -- Base address
1117 -> CmmType -- Type of element by which we are indexing
1118 -> CmmExpr -- Index
1119 -> FCode ()
1120 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1121 = do dflags <- getDynFlags
1122 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1123 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1124 = do dflags <- getDynFlags
1125 emitAssign (CmmLocal res) (CmmMachOp cast [
1126 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1127
1128 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1129 -> Maybe MachOp -- Optional value cast
1130 -> CmmExpr -- Base address
1131 -> CmmType -- Type of element by which we are indexing
1132 -> CmmExpr -- Index
1133 -> CmmExpr -- Value to write
1134 -> FCode ()
1135 mkBasicIndexedWrite off Nothing base idx_ty idx val
1136 = do dflags <- getDynFlags
1137 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1138 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1139 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1140
1141 -- ----------------------------------------------------------------------------
1142 -- Misc utils
1143
1144 cmmIndexOffExpr :: DynFlags
1145 -> ByteOff -- Initial offset in bytes
1146 -> Width -- Width of element by which we are indexing
1147 -> CmmExpr -- Base address
1148 -> CmmExpr -- Index
1149 -> CmmExpr
1150 cmmIndexOffExpr dflags off width base idx
1151 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1152
1153 cmmLoadIndexOffExpr :: DynFlags
1154 -> ByteOff -- Initial offset in bytes
1155 -> CmmType -- Type of element we are accessing
1156 -> CmmExpr -- Base address
1157 -> CmmType -- Type of element by which we are indexing
1158 -> CmmExpr -- Index
1159 -> CmmExpr
1160 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1161 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1162
1163 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1164 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1165
1166 ------------------------------------------------------------------------------
1167 -- Helpers for translating vector packing and unpacking.
1168
1169 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1170 -> CmmType -- Type of vector
1171 -> CmmExpr -- Initial vector
1172 -> [CmmExpr] -- Elements
1173 -> CmmFormal -- Destination for result
1174 -> FCode ()
1175 doVecPackOp maybe_pre_write_cast ty z es res = do
1176 dst <- newTemp ty
1177 emitAssign (CmmLocal dst) z
1178 vecPack dst es 0
1179 where
1180 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1181 vecPack src [] _ =
1182 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1183
1184 vecPack src (e : es) i = do
1185 dst <- newTemp ty
1186 if isFloatType (vecElemType ty)
1187 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1188 [CmmReg (CmmLocal src), cast e, iLit])
1189 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1190 [CmmReg (CmmLocal src), cast e, iLit])
1191 vecPack dst es (i + 1)
1192 where
1193 -- vector indices are always 32-bits
1194 iLit = CmmLit (CmmInt (toInteger i) W32)
1195
1196 cast :: CmmExpr -> CmmExpr
1197 cast val = case maybe_pre_write_cast of
1198 Nothing -> val
1199 Just cast -> CmmMachOp cast [val]
1200
1201 len :: Length
1202 len = vecLength ty
1203
1204 wid :: Width
1205 wid = typeWidth (vecElemType ty)
1206
1207 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1208 -> CmmType -- Type of vector
1209 -> CmmExpr -- Vector
1210 -> [CmmFormal] -- Element results
1211 -> FCode ()
1212 doVecUnpackOp maybe_post_read_cast ty e res =
1213 vecUnpack res 0
1214 where
1215 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1216 vecUnpack [] _ =
1217 return ()
1218
1219 vecUnpack (r : rs) i = do
1220 if isFloatType (vecElemType ty)
1221 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1222 [e, iLit]))
1223 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1224 [e, iLit]))
1225 vecUnpack rs (i + 1)
1226 where
1227 -- vector indices are always 32-bits
1228 iLit = CmmLit (CmmInt (toInteger i) W32)
1229
1230 cast :: CmmExpr -> CmmExpr
1231 cast val = case maybe_post_read_cast of
1232 Nothing -> val
1233 Just cast -> CmmMachOp cast [val]
1234
1235 len :: Length
1236 len = vecLength ty
1237
1238 wid :: Width
1239 wid = typeWidth (vecElemType ty)
1240
1241 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1242 -> CmmType -- Vector type
1243 -> CmmExpr -- Source vector
1244 -> CmmExpr -- Element
1245 -> CmmExpr -- Index at which to insert element
1246 -> CmmFormal -- Destination for result
1247 -> FCode ()
1248 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1249 dflags <- getDynFlags
1250 -- vector indices are always 32-bits
1251 let idx' :: CmmExpr
1252 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1253 if isFloatType (vecElemType ty)
1254 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1255 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1256 where
1257 cast :: CmmExpr -> CmmExpr
1258 cast val = case maybe_pre_write_cast of
1259 Nothing -> val
1260 Just cast -> CmmMachOp cast [val]
1261
1262 len :: Length
1263 len = vecLength ty
1264
1265 wid :: Width
1266 wid = typeWidth (vecElemType ty)
1267
1268 -- ----------------------------------------------------------------------------
1269 -- Copying byte arrays
1270
1271 -- | Takes a source 'ByteArray#', an offset in the source array, a
1272 -- destination 'MutableByteArray#', an offset into the destination
1273 -- array, and the number of bytes to copy. Copies the given number of
1274 -- bytes from the source array to the destination array.
1275 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1276 -> FCode ()
1277 doCopyByteArrayOp = emitCopyByteArray copy
1278 where
1279 -- Copy data (we assume the arrays aren't overlapping since
1280 -- they're of different types)
1281 copy _src _dst dst_p src_p bytes =
1282 do dflags <- getDynFlags
1283 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1284
1285 -- | Takes a source 'MutableByteArray#', an offset in the source
1286 -- array, a destination 'MutableByteArray#', an offset into the
1287 -- destination array, and the number of bytes to copy. Copies the
1288 -- given number of bytes from the source array to the destination
1289 -- array.
1290 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1291 -> FCode ()
1292 doCopyMutableByteArrayOp = emitCopyByteArray copy
1293 where
1294 -- The only time the memory might overlap is when the two arrays
1295 -- we were provided are the same array!
1296 -- TODO: Optimize branch for common case of no aliasing.
1297 copy src dst dst_p src_p bytes = do
1298 dflags <- getDynFlags
1299 [moveCall, cpyCall] <- forkAlts [
1300 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1301 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1302 ]
1303 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1304
1305 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1306 -> FCode ())
1307 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1308 -> FCode ()
1309 emitCopyByteArray copy src src_off dst dst_off n = do
1310 dflags <- getDynFlags
1311 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1312 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1313 copy src dst dst_p src_p n
1314
1315 -- ----------------------------------------------------------------------------
1316 -- Setting byte arrays
1317
1318 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1319 -- and a byte, and sets each of the selected bytes in the array to the
1320 -- character.
1321 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1322 -> FCode ()
1323 doSetByteArrayOp ba off len c
1324 = do dflags <- getDynFlags
1325 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1326 emitMemsetCall p c len (mkIntExpr dflags 1)
1327
1328 -- ----------------------------------------------------------------------------
1329 -- Copying pointer arrays
1330
1331 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1332 -- nowhere else in the code generator. This is mostly because these
1333 -- "primitive" ops result in a surprisingly large amount of code. It
1334 -- will likely be worthwhile to optimize what is emitted here, so that
1335 -- our optimization passes don't waste time repeatedly optimizing the
1336 -- same bits of code.
1337
1338 -- More closely imitates 'assignTemp' from the old code generator, which
1339 -- returns a CmmExpr rather than a LocalReg.
1340 assignTempE :: CmmExpr -> FCode CmmExpr
1341 assignTempE e = do
1342 t <- assignTemp e
1343 return (CmmReg (CmmLocal t))
1344
1345 -- | Takes a source 'Array#', an offset in the source array, a
1346 -- destination 'MutableArray#', an offset into the destination array,
1347 -- and the number of elements to copy. Copies the given number of
1348 -- elements from the source array to the destination array.
1349 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1350 -> FCode ()
1351 doCopyArrayOp = emitCopyArray copy
1352 where
1353 -- Copy data (we assume the arrays aren't overlapping since
1354 -- they're of different types)
1355 copy _src _dst dst_p src_p bytes =
1356 do dflags <- getDynFlags
1357 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1358
1359
1360 -- | Takes a source 'MutableArray#', an offset in the source array, a
1361 -- destination 'MutableArray#', an offset into the destination array,
1362 -- and the number of elements to copy. Copies the given number of
1363 -- elements from the source array to the destination array.
1364 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1365 -> FCode ()
1366 doCopyMutableArrayOp = emitCopyArray copy
1367 where
1368 -- The only time the memory might overlap is when the two arrays
1369 -- we were provided are the same array!
1370 -- TODO: Optimize branch for common case of no aliasing.
1371 copy src dst dst_p src_p bytes = do
1372 dflags <- getDynFlags
1373 [moveCall, cpyCall] <- forkAlts [
1374 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags)),
1375 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1376 ]
1377 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1378
1379 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1380 -> FCode ())
1381 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1382 -> FCode ()
1383 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
1384 dflags <- getDynFlags
1385 n <- assignTempE n0
1386 nonzero <- getCode $ do
1387 -- Passed as arguments (be careful)
1388 src <- assignTempE src0
1389 src_off <- assignTempE src_off0
1390 dst <- assignTempE dst0
1391 dst_off <- assignTempE dst_off0
1392
1393 -- Set the dirty bit in the header.
1394 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1395
1396 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst (arrPtrsHdrSize dflags)
1397 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1398 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1399 bytes <- assignTempE $ cmmMulWord dflags n (mkIntExpr dflags (wORD_SIZE dflags))
1400
1401 copy src dst dst_p src_p bytes
1402
1403 -- The base address of the destination card table
1404 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p (loadArrPtrsSize dflags dst)
1405
1406 emitSetCards dst_off dst_cards_p n
1407
1408 emit =<< mkCmmIfThen (cmmNeWord dflags n (mkIntExpr dflags 0)) nonzero
1409
1410 -- | Takes an info table label, a register to return the newly
1411 -- allocated array in, a source array, an offset in the source array,
1412 -- and the number of elements to copy. Allocates a new array and
1413 -- initializes it form the source array.
1414 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
1415 -> FCode ()
1416 emitCloneArray info_p res_r src0 src_off0 n0 = do
1417 dflags <- getDynFlags
1418 let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags +
1419 (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags))
1420 myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags))
1421 -- Passed as arguments (be careful)
1422 src <- assignTempE src0
1423 src_off <- assignTempE src_off0
1424 n <- assignTempE n0
1425
1426 card_bytes <- assignTempE $ cardRoundUp dflags n
1427 size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUp dflags card_bytes)
1428 words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size
1429
1430 arr_r <- newTemp (bWord dflags)
1431 emitAllocateCall arr_r myCapability words
1432 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags))
1433 (zeroExpr dflags)
1434
1435 let arr = CmmReg (CmmLocal arr_r)
1436 emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
1437 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1438 oFFSET_StgMutArrPtrs_ptrs dflags)) n
1439 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1440 oFFSET_StgMutArrPtrs_size dflags)) size
1441
1442 dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags)
1443 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags))
1444 src_off
1445
1446 emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
1447
1448 emitMemsetCall (cmmOffsetExprW dflags dst_p n)
1449 (mkIntExpr dflags 1)
1450 card_bytes
1451 (mkIntExpr dflags (wORD_SIZE dflags))
1452 emit $ mkAssign (CmmLocal res_r) arr
1453
1454 -- | Takes and offset in the destination array, the base address of
1455 -- the card table, and the number of elements affected (*not* the
1456 -- number of cards). The number of elements may not be zero.
1457 -- Marks the relevant cards as dirty.
1458 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1459 emitSetCards dst_start dst_cards_start n = do
1460 dflags <- getDynFlags
1461 start_card <- assignTempE $ card dflags dst_start
1462 let end_card = card dflags (cmmSubWord dflags (cmmAddWord dflags dst_start n) (mkIntExpr dflags 1))
1463 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1464 (mkIntExpr dflags 1)
1465 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1466 (mkIntExpr dflags 1) -- no alignment (1 byte)
1467
1468 -- Convert an element index to a card index
1469 card :: DynFlags -> CmmExpr -> CmmExpr
1470 card dflags i = cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1471
1472 -- Convert a number of elements to a number of cards, rounding up
1473 cardRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1474 cardRoundUp dflags i = card dflags (cmmAddWord dflags i (mkIntExpr dflags ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)))
1475
1476 bytesToWordsRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1477 bytesToWordsRoundUp dflags e = cmmQuotWord dflags (cmmAddWord dflags e (mkIntExpr dflags (wORD_SIZE dflags - 1)))
1478 (wordSize dflags)
1479
1480 wordSize :: DynFlags -> CmmExpr
1481 wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags)
1482
1483 -- | Emit a call to @memcpy@.
1484 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1485 emitMemcpyCall dst src n align = do
1486 emitPrimCall
1487 [ {-no results-} ]
1488 MO_Memcpy
1489 [ dst, src, n, align ]
1490
1491 -- | Emit a call to @memmove@.
1492 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1493 emitMemmoveCall dst src n align = do
1494 emitPrimCall
1495 [ {- no results -} ]
1496 MO_Memmove
1497 [ dst, src, n, align ]
1498
1499 -- | Emit a call to @memset@. The second argument must fit inside an
1500 -- unsigned char.
1501 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1502 emitMemsetCall dst c n align = do
1503 emitPrimCall
1504 [ {- no results -} ]
1505 MO_Memset
1506 [ dst, c, n, align ]
1507
1508 -- | Emit a call to @allocate@.
1509 emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
1510 emitAllocateCall res cap n = do
1511 emitCCall
1512 [ (res, AddrHint) ]
1513 allocate
1514 [ (cap, AddrHint)
1515 , (n, NoHint)
1516 ]
1517 where
1518 allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
1519 ForeignLabelInExternalPackage IsFunction))
1520
1521 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1522 emitPopCntCall res x width = do
1523 emitPrimCall
1524 [ res ]
1525 (MO_PopCnt width)
1526 [ x ]