In CMM, only allow foreign calls to labels, not arbitrary expressions
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmPrim (
10 cgOpApp,
11 cgPrimOp -- internal(ish), used by cgCase to get code for a
12 -- comparison without also turning it into a Bool.
13 ) where
14
15 #include "HsVersions.h"
16
17 import StgCmmLayout
18 import StgCmmForeign
19 import StgCmmEnv
20 import StgCmmMonad
21 import StgCmmUtils
22 import StgCmmTicky
23 import StgCmmHeap
24 import StgCmmProf
25
26 import DynFlags
27 import Platform
28 import BasicTypes
29 import MkGraph
30 import StgSyn
31 import Cmm
32 import CmmInfo
33 import Type ( Type, tyConAppTyCon )
34 import TyCon
35 import CLabel
36 import CmmUtils
37 import PrimOp
38 import SMRep
39 import FastString
40 import Outputable
41 import Util
42
43 import Control.Monad (liftM)
44 import Data.Bits
45
46 ------------------------------------------------------------------------
47 -- Primitive operations and foreign calls
48 ------------------------------------------------------------------------
49
50 {- Note [Foreign call results]
51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
52 A foreign call always returns an unboxed tuple of results, one
53 of which is the state token. This seems to happen even for pure
54 calls.
55
56 Even if we returned a single result for pure calls, it'd still be
57 right to wrap it in a singleton unboxed tuple, because the result
58 might be a Haskell closure pointer, we don't want to evaluate it. -}
59
60 ----------------------------------
61 cgOpApp :: StgOp -- The op
62 -> [StgArg] -- Arguments
63 -> Type -- Result type (always an unboxed tuple)
64 -> FCode ReturnKind
65
66 -- Foreign calls
67 cgOpApp (StgFCallOp fcall _) stg_args res_ty
68 = cgForeignCall fcall stg_args res_ty
69 -- Note [Foreign call results]
70
71 -- tagToEnum# is special: we need to pull the constructor
72 -- out of the table, and perform an appropriate return.
73
74 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
75 = ASSERT(isEnumerationTyCon tycon)
76 do { dflags <- getDynFlags
77 ; args' <- getNonVoidArgAmodes [arg]
78 ; let amode = case args' of [amode] -> amode
79 _ -> panic "TagToEnumOp had void arg"
80 ; emitReturn [tagToClosure dflags tycon amode] }
81 where
82 -- If you're reading this code in the attempt to figure
83 -- out why the compiler panic'ed here, it is probably because
84 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
85 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
86 -- That won't work.
87 tycon = tyConAppTyCon res_ty
88
89 cgOpApp (StgPrimOp primop) args res_ty
90 | primOpOutOfLine primop
91 = do { cmm_args <- getNonVoidArgAmodes args
92 ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
93 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
94
95 | ReturnsPrim VoidRep <- result_info
96 = do cgPrimOp [] primop args
97 emitReturn []
98
99 | ReturnsPrim rep <- result_info
100 = do dflags <- getDynFlags
101 res <- newTemp (primRepCmmType dflags rep)
102 cgPrimOp [res] primop args
103 emitReturn [CmmReg (CmmLocal res)]
104
105 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
106 = do (regs, _hints) <- newUnboxedTupleRegs res_ty
107 cgPrimOp regs primop args
108 emitReturn (map (CmmReg . CmmLocal) regs)
109
110 | ReturnsAlg tycon <- result_info
111 , isEnumerationTyCon tycon
112 -- c.f. cgExpr (...TagToEnumOp...)
113 = do dflags <- getDynFlags
114 tag_reg <- newTemp (bWord dflags)
115 cgPrimOp [tag_reg] primop args
116 emitReturn [tagToClosure dflags tycon
117 (CmmReg (CmmLocal tag_reg))]
118
119 | otherwise = panic "cgPrimop"
120 where
121 result_info = getPrimOpResultInfo primop
122
123 cgOpApp (StgPrimCallOp primcall) args _res_ty
124 = do { cmm_args <- getNonVoidArgAmodes args
125 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
126 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
127
128 ---------------------------------------------------
129 cgPrimOp :: [LocalReg] -- where to put the results
130 -> PrimOp -- the op
131 -> [StgArg] -- arguments
132 -> FCode ()
133
134 cgPrimOp results op args
135 = do dflags <- getDynFlags
136 arg_exprs <- getNonVoidArgAmodes args
137 emitPrimOp dflags results op arg_exprs
138
139
140 ------------------------------------------------------------------------
141 -- Emitting code for a primop
142 ------------------------------------------------------------------------
143
144 emitPrimOp :: DynFlags
145 -> [LocalReg] -- where to put the results
146 -> PrimOp -- the op
147 -> [CmmExpr] -- arguments
148 -> FCode ()
149
150 -- First we handle various awkward cases specially. The remaining
151 -- easy cases are then handled by translateOp, defined below.
152
153 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
154 {-
155 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
156 C, and without needing any comparisons. This may not be the
157 fastest way to do it - if you have better code, please send it! --SDM
158
159 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
160
161 We currently don't make use of the r value if c is != 0 (i.e.
162 overflow), we just convert to big integers and try again. This
163 could be improved by making r and c the correct values for
164 plugging into a new J#.
165
166 { r = ((I_)(a)) + ((I_)(b)); \
167 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
168 >> (BITS_IN (I_) - 1); \
169 }
170 Wading through the mass of bracketry, it seems to reduce to:
171 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
172
173 -}
174 = emit $ catAGraphs [
175 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
176 mkAssign (CmmLocal res_c) $
177 CmmMachOp (mo_wordUShr dflags) [
178 CmmMachOp (mo_wordAnd dflags) [
179 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
180 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
181 ],
182 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
183 ]
184 ]
185
186
187 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
188 {- Similarly:
189 #define subIntCzh(r,c,a,b) \
190 { r = ((I_)(a)) - ((I_)(b)); \
191 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
192 >> (BITS_IN (I_) - 1); \
193 }
194
195 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
196 -}
197 = emit $ catAGraphs [
198 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
199 mkAssign (CmmLocal res_c) $
200 CmmMachOp (mo_wordUShr dflags) [
201 CmmMachOp (mo_wordAnd dflags) [
202 CmmMachOp (mo_wordXor dflags) [aa,bb],
203 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
204 ],
205 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
206 ]
207 ]
208
209
210 emitPrimOp _ [res] ParOp [arg]
211 =
212 -- for now, just implement this in a C function
213 -- later, we might want to inline it.
214 emitCCall
215 [(res,NoHint)]
216 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
217 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
218
219 emitPrimOp dflags [res] SparkOp [arg]
220 = do
221 -- returns the value of arg in res. We're going to therefore
222 -- refer to arg twice (once to pass to newSpark(), and once to
223 -- assign to res), so put it in a temporary.
224 tmp <- assignTemp arg
225 tmp2 <- newTemp (bWord dflags)
226 emitCCall
227 [(tmp2,NoHint)]
228 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
229 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
230 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
231
232 emitPrimOp dflags [res] GetCCSOfOp [arg]
233 = emitAssign (CmmLocal res) val
234 where
235 val
236 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
237 | otherwise = CmmLit (zeroCLit dflags)
238
239 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
240 = emitAssign (CmmLocal res) curCCS
241
242 emitPrimOp dflags [res] ReadMutVarOp [mutv]
243 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
244
245 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
246 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
247 emitCCall
248 [{-no results-}]
249 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
250 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
251
252 -- #define sizzeofByteArrayzh(r,a) \
253 -- r = ((StgArrWords *)(a))->bytes
254 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
255 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
256
257 -- #define sizzeofMutableByteArrayzh(r,a) \
258 -- r = ((StgArrWords *)(a))->bytes
259 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
260 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
261
262
263 -- #define touchzh(o) /* nothing */
264 emitPrimOp _ res@[] TouchOp args@[_arg]
265 = do emitPrimCall res MO_Touch args
266
267 emitPrimOp _ res@[] PrefetchByteArrayOp args@[_arg]
268 = do emitPrimCall res MO_Prefetch_Data args
269
270 emitPrimOp _ res@[] PrefetchMutableByteArrayOp args@[_arg]
271 = do emitPrimCall res MO_Prefetch_Data args
272
273 emitPrimOp _ res@[] PrefetchAddrOp args@[_arg]
274 = do emitPrimCall res MO_Prefetch_Data args
275
276 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
277 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
278 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
279
280 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
281 emitPrimOp dflags [res] StableNameToIntOp [arg]
282 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
283
284 -- #define eqStableNamezh(r,sn1,sn2) \
285 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
286 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
287 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
288 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
289 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
290 ])
291
292
293 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
294 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
295
296 -- #define addrToHValuezh(r,a) r=(P_)a
297 emitPrimOp _ [res] AddrToAnyOp [arg]
298 = emitAssign (CmmLocal res) arg
299
300 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
301 -- Note: argument may be tagged!
302 emitPrimOp dflags [res] DataToTagOp [arg]
303 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
304
305 {- Freezing arrays-of-ptrs requires changing an info table, for the
306 benefit of the generational collector. It needs to scavenge mutable
307 objects, even if they are in old space. When they become immutable,
308 they can be removed from this scavenge list. -}
309
310 -- #define unsafeFreezzeArrayzh(r,a)
311 -- {
312 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
313 -- r = a;
314 -- }
315 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
316 = emit $ catAGraphs
317 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
318 mkAssign (CmmLocal res) arg ]
319 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
320 = emit $ catAGraphs
321 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
322 mkAssign (CmmLocal res) arg ]
323
324 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
325 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
326 = emitAssign (CmmLocal res) arg
327
328 -- Copying pointer arrays
329
330 emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
331 doCopyArrayOp src src_off dst dst_off n
332 emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
333 doCopyMutableArrayOp src src_off dst dst_off n
334 emitPrimOp _ [res] CloneArrayOp [src,src_off,n] =
335 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
336 emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] =
337 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
338 emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] =
339 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
340 emitPrimOp _ [res] ThawArrayOp [src,src_off,n] =
341 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
342
343 emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
344 doCopyArrayOp src src_off dst dst_off n
345 emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
346 doCopyMutableArrayOp src src_off dst dst_off n
347
348 -- Reading/writing pointer arrays
349
350 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
351 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
352 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
353
354 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
355 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
356 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
357 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
358 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
359 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
360 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
361 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
362 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
363 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
364
365 emitPrimOp dflags [res] SizeofArrayOp [arg]
366 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
367 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
368 = emitPrimOp dflags [res] SizeofArrayOp [arg]
369 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
370 = emitPrimOp dflags [res] SizeofArrayOp [arg]
371 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
372 = emitPrimOp dflags [res] SizeofArrayOp [arg]
373
374 -- IndexXXXoffAddr
375
376 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
377 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
378 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
379 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
380 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
381 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
382 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
383 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
384 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
385 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
386 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
387 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
388 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
389 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
390 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
391 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
392 emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
393 emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args
394 emitPrimOp _ res IndexOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
395 emitPrimOp _ res IndexOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 f64 res args
396 emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
397 emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
398 emitPrimOp _ res IndexOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
399 emitPrimOp _ res IndexOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
400
401 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
402
403 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
404 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
405 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
406 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
407 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
408 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
409 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
410 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
411 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
412 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
413 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
414 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
415 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
416 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
417 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
418 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
419 emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
420 emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args
421 emitPrimOp _ res ReadOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
422 emitPrimOp _ res ReadOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 b64 res args
423 emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
424 emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
425 emitPrimOp _ res ReadOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
426 emitPrimOp _ res ReadOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
427
428 -- IndexXXXArray
429
430 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
431 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
432 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
433 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
434 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
435 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
436 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
437 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
438 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
439 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
440 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
441 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
442 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
443 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
444 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
445 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
446 emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
447 emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
448 emitPrimOp _ res IndexByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
449 emitPrimOp _ res IndexByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
450 emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
451 emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
452 emitPrimOp _ res IndexByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
453 emitPrimOp _ res IndexByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
454
455 -- ReadXXXArray, identical to IndexXXXArray.
456
457 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
458 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
459 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
460 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
461 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
462 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
463 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
464 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
465 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
466 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
467 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
468 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
469 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
470 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
471 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
472 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
473 emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
474 emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
475 emitPrimOp _ res ReadByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
476 emitPrimOp _ res ReadByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
477 emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
478 emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
479 emitPrimOp _ res ReadByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
480 emitPrimOp _ res ReadByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
481
482 -- WriteXXXoffAddr
483
484 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
485 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
486 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
487 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
488 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
489 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
490 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
491 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
492 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
493 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
494 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
495 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
496 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
497 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
498 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
499 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
500 emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args
501 emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args
502 emitPrimOp _ res WriteOffAddrOp_DoubleX2 args = doWriteOffAddrOp Nothing vec2f64 res args
503 emitPrimOp _ res WriteOffAddrOp_DoubleAsDoubleX2 args = doWriteOffAddrOp Nothing f64 res args
504 emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args
505 emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args
506 emitPrimOp _ res WriteOffAddrOp_Int64X2 args = doWriteOffAddrOp Nothing vec2b64 res args
507 emitPrimOp _ res WriteOffAddrOp_Int64AsInt64X2 args = doWriteOffAddrOp Nothing b64 res args
508
509 -- WriteXXXArray
510
511 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
512 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
513 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
514 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
515 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
516 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
517 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
518 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
519 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
520 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
521 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
522 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
523 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
524 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
525 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
526 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
527 emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args
528 emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args
529 emitPrimOp _ res WriteByteArrayOp_DoubleX2 args = doWriteByteArrayOp Nothing vec2f64 res args
530 emitPrimOp _ res WriteByteArrayOp_DoubleAsDoubleX2 args = doWriteByteArrayOp Nothing f64 res args
531 emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args
532 emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args
533 emitPrimOp _ res WriteByteArrayOp_Int64X2 args = doWriteByteArrayOp Nothing vec2b64 res args
534 emitPrimOp _ res WriteByteArrayOp_Int64AsInt64X2 args = doWriteByteArrayOp Nothing b64 res args
535
536 -- Copying and setting byte arrays
537 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
538 doCopyByteArrayOp src src_off dst dst_off n
539 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
540 doCopyMutableByteArrayOp src src_off dst dst_off n
541 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
542 doSetByteArrayOp ba off len c
543
544 -- Population count
545 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
546 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
547 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
548 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
549 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
550
551 -- Unsigned int to floating point conversions
552 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
553 (MO_UF_Conv W32) [w]
554 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
555 (MO_UF_Conv W64) [w]
556
557 -- SIMD vector packing and unpacking
558 emitPrimOp _ [res] FloatToFloatX4Op [e] =
559 doVecPackOp Nothing vec4f32 zero [e,e,e,e] res
560 where
561 zero :: CmmExpr
562 zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
563
564 emitPrimOp _ [res] FloatX4PackOp es@[_,_,_,_] =
565 doVecPackOp Nothing vec4f32 zero es res
566 where
567 zero :: CmmExpr
568 zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
569
570 emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] =
571 doVecUnpackOp Nothing vec4f32 arg res
572
573 emitPrimOp _ [res] FloatX4InsertOp [v,e,i] =
574 doVecInsertOp Nothing vec4f32 v e i res
575
576 emitPrimOp _ [res] DoubleToDoubleX2Op [e] =
577 doVecPackOp Nothing vec2f64 zero [e,e] res
578 where
579 zero :: CmmExpr
580 zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
581
582 emitPrimOp _ [res] DoubleX2PackOp es@[_,_] =
583 doVecPackOp Nothing vec2f64 zero es res
584 where
585 zero :: CmmExpr
586 zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
587
588 emitPrimOp _ res@[_,_] DoubleX2UnpackOp [arg] =
589 doVecUnpackOp Nothing vec2f64 arg res
590
591 emitPrimOp _ [res] DoubleX2InsertOp [v,e,i] =
592 doVecInsertOp Nothing vec2f64 v e i res
593
594 emitPrimOp dflags [res] Int32ToInt32X4Op [e] =
595 doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res
596 where
597 zero :: CmmExpr
598 zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
599
600 emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] =
601 doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res
602 where
603 zero :: CmmExpr
604 zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
605
606 emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] =
607 doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res
608
609 emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] =
610 doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res
611
612 emitPrimOp _ [res] Int64ToInt64X2Op [e] =
613 doVecPackOp Nothing vec2b64 zero [e,e] res
614 where
615 zero :: CmmExpr
616 zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
617
618 emitPrimOp _ [res] Int64X2PackOp es@[_,_] =
619 doVecPackOp Nothing vec2b64 zero es res
620 where
621 zero :: CmmExpr
622 zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
623
624 emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] =
625 doVecUnpackOp Nothing vec2b64 arg res
626
627 emitPrimOp _ [res] Int64X2InsertOp [v,e,i] =
628 doVecInsertOp Nothing vec2b64 v e i res
629
630 -- Prefetch
631 emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args
632 emitPrimOp _ res PrefetchMutableByteArrayOp args = doPrefetchByteArrayOp res args
633 emitPrimOp _ res PrefetchAddrOp args = doPrefetchAddrOp res args
634
635 -- The rest just translate straightforwardly
636 emitPrimOp dflags [res] op [arg]
637 | nopOp op
638 = emitAssign (CmmLocal res) arg
639
640 | Just (mop,rep) <- narrowOp op
641 = emitAssign (CmmLocal res) $
642 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
643
644 emitPrimOp dflags r@[res] op args
645 | Just prim <- callishOp op
646 = do emitPrimCall r prim args
647
648 | Just mop <- translateOp dflags op
649 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
650 emit stmt
651
652 emitPrimOp dflags results op args
653 = case callishPrimOpSupported dflags op of
654 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
655 Right gen -> gen results args
656
657 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
658
659 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
660 callishPrimOpSupported dflags op
661 = case op of
662 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
663 | otherwise -> Right (genericIntQuotRemOp dflags)
664
665 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
666 | otherwise -> Right (genericWordQuotRemOp dflags)
667
668 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
669 | otherwise -> Right (genericWordQuotRem2Op dflags)
670
671 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
672 | otherwise -> Right genericWordAdd2Op
673
674 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
675 | otherwise -> Right genericWordMul2Op
676
677 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
678 where
679 ncg = case hscTarget dflags of
680 HscAsm -> True
681 _ -> False
682
683 x86ish = case platformArch (targetPlatform dflags) of
684 ArchX86 -> True
685 ArchX86_64 -> True
686 _ -> False
687
688 genericIntQuotRemOp :: DynFlags -> GenericOp
689 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
690 = emit $ mkAssign (CmmLocal res_q)
691 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
692 mkAssign (CmmLocal res_r)
693 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
694 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
695
696 genericWordQuotRemOp :: DynFlags -> GenericOp
697 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
698 = emit $ mkAssign (CmmLocal res_q)
699 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
700 mkAssign (CmmLocal res_r)
701 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
702 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
703
704 genericWordQuotRem2Op :: DynFlags -> GenericOp
705 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
706 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
707 where ty = cmmExprType dflags arg_x_high
708 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
709 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
710 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
711 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
712 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
713 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
714 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
715 zero = lit 0
716 one = lit 1
717 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
718 lit i = CmmLit (CmmInt i (wordWidth dflags))
719
720 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
721 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
722 mkAssign (CmmLocal res_r) high)
723 f i acc high low =
724 do roverflowedBit <- newTemp ty
725 rhigh' <- newTemp ty
726 rhigh'' <- newTemp ty
727 rlow' <- newTemp ty
728 risge <- newTemp ty
729 racc' <- newTemp ty
730 let high' = CmmReg (CmmLocal rhigh')
731 isge = CmmReg (CmmLocal risge)
732 overflowedBit = CmmReg (CmmLocal roverflowedBit)
733 let this = catAGraphs
734 [mkAssign (CmmLocal roverflowedBit)
735 (shr high negone),
736 mkAssign (CmmLocal rhigh')
737 (or (shl high one) (shr low negone)),
738 mkAssign (CmmLocal rlow')
739 (shl low one),
740 mkAssign (CmmLocal risge)
741 (or (overflowedBit `ne` zero)
742 (high' `ge` arg_y)),
743 mkAssign (CmmLocal rhigh'')
744 (high' `minus` (arg_y `times` isge)),
745 mkAssign (CmmLocal racc')
746 (or (shl acc one) isge)]
747 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
748 (CmmReg (CmmLocal rhigh''))
749 (CmmReg (CmmLocal rlow'))
750 return (this <*> rest)
751 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
752
753 genericWordAdd2Op :: GenericOp
754 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
755 = do dflags <- getDynFlags
756 r1 <- newTemp (cmmExprType dflags arg_x)
757 r2 <- newTemp (cmmExprType dflags arg_x)
758 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
759 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
760 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
761 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
762 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
763 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
764 (wordWidth dflags))
765 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
766 emit $ catAGraphs
767 [mkAssign (CmmLocal r1)
768 (add (bottomHalf arg_x) (bottomHalf arg_y)),
769 mkAssign (CmmLocal r2)
770 (add (topHalf (CmmReg (CmmLocal r1)))
771 (add (topHalf arg_x) (topHalf arg_y))),
772 mkAssign (CmmLocal res_h)
773 (topHalf (CmmReg (CmmLocal r2))),
774 mkAssign (CmmLocal res_l)
775 (or (toTopHalf (CmmReg (CmmLocal r2)))
776 (bottomHalf (CmmReg (CmmLocal r1))))]
777 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
778
779 genericWordMul2Op :: GenericOp
780 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
781 = do dflags <- getDynFlags
782 let t = cmmExprType dflags arg_x
783 xlyl <- liftM CmmLocal $ newTemp t
784 xlyh <- liftM CmmLocal $ newTemp t
785 xhyl <- liftM CmmLocal $ newTemp t
786 r <- liftM CmmLocal $ newTemp t
787 -- This generic implementation is very simple and slow. We might
788 -- well be able to do better, but for now this at least works.
789 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
790 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
791 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
792 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
793 sum = foldl1 add
794 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
795 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
796 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
797 (wordWidth dflags))
798 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
799 emit $ catAGraphs
800 [mkAssign xlyl
801 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
802 mkAssign xlyh
803 (mul (bottomHalf arg_x) (topHalf arg_y)),
804 mkAssign xhyl
805 (mul (topHalf arg_x) (bottomHalf arg_y)),
806 mkAssign r
807 (sum [topHalf (CmmReg xlyl),
808 bottomHalf (CmmReg xhyl),
809 bottomHalf (CmmReg xlyh)]),
810 mkAssign (CmmLocal res_l)
811 (or (bottomHalf (CmmReg xlyl))
812 (toTopHalf (CmmReg r))),
813 mkAssign (CmmLocal res_h)
814 (sum [mul (topHalf arg_x) (topHalf arg_y),
815 topHalf (CmmReg xhyl),
816 topHalf (CmmReg xlyh),
817 topHalf (CmmReg r)])]
818 genericWordMul2Op _ _ = panic "genericWordMul2Op"
819
820 -- These PrimOps are NOPs in Cmm
821
822 nopOp :: PrimOp -> Bool
823 nopOp Int2WordOp = True
824 nopOp Word2IntOp = True
825 nopOp Int2AddrOp = True
826 nopOp Addr2IntOp = True
827 nopOp ChrOp = True -- Int# and Char# are rep'd the same
828 nopOp OrdOp = True
829 nopOp _ = False
830
831 -- These PrimOps turn into double casts
832
833 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
834 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
835 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
836 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
837 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
838 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
839 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
840 narrowOp _ = Nothing
841
842 -- Native word signless ops
843
844 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
845 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
846 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
847 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
848 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
849 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
850 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
851
852 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
853 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
854 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
855 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
856 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
857 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
858
859 translateOp dflags AndOp = Just (mo_wordAnd dflags)
860 translateOp dflags OrOp = Just (mo_wordOr dflags)
861 translateOp dflags XorOp = Just (mo_wordXor dflags)
862 translateOp dflags NotOp = Just (mo_wordNot dflags)
863 translateOp dflags SllOp = Just (mo_wordShl dflags)
864 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
865
866 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
867
868 -- Native word signed ops
869
870 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
871 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
872 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
873 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
874 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
875
876
877 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
878 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
879 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
880 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
881
882 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
883 translateOp dflags OrIOp = Just (mo_wordOr dflags)
884 translateOp dflags XorIOp = Just (mo_wordXor dflags)
885 translateOp dflags NotIOp = Just (mo_wordNot dflags)
886 translateOp dflags ISllOp = Just (mo_wordShl dflags)
887 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
888 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
889
890 -- Native word unsigned ops
891
892 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
893 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
894 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
895 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
896
897 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
898 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
899 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
900
901 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
902 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
903 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
904 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
905
906 -- Char# ops
907
908 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
909 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
910 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
911 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
912 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
913 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
914
915 -- Double ops
916
917 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
918 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
919 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
920 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
921 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
922 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
923
924 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
925 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
926 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
927 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
928 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
929
930 -- Float ops
931
932 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
933 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
934 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
935 translateOp _ FloatLeOp = Just (MO_F_Le W32)
936 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
937 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
938
939 translateOp _ FloatAddOp = Just (MO_F_Add W32)
940 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
941 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
942 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
943 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
944
945 -- Floating point vector ops
946
947 translateOp _ FloatX4AddOp = Just (MO_VF_Add 4 W32)
948 translateOp _ FloatX4SubOp = Just (MO_VF_Sub 4 W32)
949 translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32)
950 translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32)
951 translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32)
952
953 translateOp _ DoubleX2AddOp = Just (MO_VF_Add 2 W64)
954 translateOp _ DoubleX2SubOp = Just (MO_VF_Sub 2 W64)
955 translateOp _ DoubleX2MulOp = Just (MO_VF_Mul 2 W64)
956 translateOp _ DoubleX2DivOp = Just (MO_VF_Quot 2 W64)
957 translateOp _ DoubleX2NegOp = Just (MO_VF_Neg 2 W64)
958
959 translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32)
960 translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32)
961 translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32)
962 translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32)
963 translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32)
964 translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32)
965
966 translateOp _ Int64X2AddOp = Just (MO_V_Add 2 W64)
967 translateOp _ Int64X2SubOp = Just (MO_V_Sub 2 W64)
968 translateOp _ Int64X2MulOp = Just (MO_V_Mul 2 W64)
969 translateOp _ Int64X2QuotOp = Just (MO_VS_Quot 2 W64)
970 translateOp _ Int64X2RemOp = Just (MO_VS_Rem 2 W64)
971 translateOp _ Int64X2NegOp = Just (MO_VS_Neg 2 W64)
972
973 -- Conversions
974
975 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
976 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
977
978 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
979 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
980
981 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
982 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
983
984 -- Word comparisons masquerading as more exotic things.
985
986 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
987 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
988 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
989 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
990 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
991 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
992 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
993
994 translateOp _ _ = Nothing
995
996 -- These primops are implemented by CallishMachOps, because they sometimes
997 -- turn into foreign calls depending on the backend.
998
999 callishOp :: PrimOp -> Maybe CallishMachOp
1000 callishOp DoublePowerOp = Just MO_F64_Pwr
1001 callishOp DoubleSinOp = Just MO_F64_Sin
1002 callishOp DoubleCosOp = Just MO_F64_Cos
1003 callishOp DoubleTanOp = Just MO_F64_Tan
1004 callishOp DoubleSinhOp = Just MO_F64_Sinh
1005 callishOp DoubleCoshOp = Just MO_F64_Cosh
1006 callishOp DoubleTanhOp = Just MO_F64_Tanh
1007 callishOp DoubleAsinOp = Just MO_F64_Asin
1008 callishOp DoubleAcosOp = Just MO_F64_Acos
1009 callishOp DoubleAtanOp = Just MO_F64_Atan
1010 callishOp DoubleLogOp = Just MO_F64_Log
1011 callishOp DoubleExpOp = Just MO_F64_Exp
1012 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1013
1014 callishOp FloatPowerOp = Just MO_F32_Pwr
1015 callishOp FloatSinOp = Just MO_F32_Sin
1016 callishOp FloatCosOp = Just MO_F32_Cos
1017 callishOp FloatTanOp = Just MO_F32_Tan
1018 callishOp FloatSinhOp = Just MO_F32_Sinh
1019 callishOp FloatCoshOp = Just MO_F32_Cosh
1020 callishOp FloatTanhOp = Just MO_F32_Tanh
1021 callishOp FloatAsinOp = Just MO_F32_Asin
1022 callishOp FloatAcosOp = Just MO_F32_Acos
1023 callishOp FloatAtanOp = Just MO_F32_Atan
1024 callishOp FloatLogOp = Just MO_F32_Log
1025 callishOp FloatExpOp = Just MO_F32_Exp
1026 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1027
1028 callishOp _ = Nothing
1029
1030 ------------------------------------------------------------------------------
1031 -- Helpers for translating various minor variants of array indexing.
1032
1033 doIndexOffAddrOp :: Maybe MachOp
1034 -> CmmType
1035 -> [LocalReg]
1036 -> [CmmExpr]
1037 -> FCode ()
1038 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1039 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1040 doIndexOffAddrOp _ _ _ _
1041 = panic "StgCmmPrim: doIndexOffAddrOp"
1042
1043 doIndexOffAddrOpAs :: Maybe MachOp
1044 -> CmmType
1045 -> CmmType
1046 -> [LocalReg]
1047 -> [CmmExpr]
1048 -> FCode ()
1049 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1050 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1051 doIndexOffAddrOpAs _ _ _ _ _
1052 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1053
1054 doIndexByteArrayOp :: Maybe MachOp
1055 -> CmmType
1056 -> [LocalReg]
1057 -> [CmmExpr]
1058 -> FCode ()
1059 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1060 = do dflags <- getDynFlags
1061 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1062 doIndexByteArrayOp _ _ _ _
1063 = panic "StgCmmPrim: doIndexByteArrayOp"
1064
1065 doIndexByteArrayOpAs :: Maybe MachOp
1066 -> CmmType
1067 -> CmmType
1068 -> [LocalReg]
1069 -> [CmmExpr]
1070 -> FCode ()
1071 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1072 = do dflags <- getDynFlags
1073 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1074 doIndexByteArrayOpAs _ _ _ _ _
1075 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1076
1077 doReadPtrArrayOp :: LocalReg
1078 -> CmmExpr
1079 -> CmmExpr
1080 -> FCode ()
1081 doReadPtrArrayOp res addr idx
1082 = do dflags <- getDynFlags
1083 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1084
1085 doWriteOffAddrOp :: Maybe MachOp
1086 -> CmmType
1087 -> [LocalReg]
1088 -> [CmmExpr]
1089 -> FCode ()
1090 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1091 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1092 doWriteOffAddrOp _ _ _ _
1093 = panic "StgCmmPrim: doWriteOffAddrOp"
1094
1095 doWriteByteArrayOp :: Maybe MachOp
1096 -> CmmType
1097 -> [LocalReg]
1098 -> [CmmExpr]
1099 -> FCode ()
1100 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1101 = do dflags <- getDynFlags
1102 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1103 doWriteByteArrayOp _ _ _ _
1104 = panic "StgCmmPrim: doWriteByteArrayOp"
1105
1106 doWritePtrArrayOp :: CmmExpr
1107 -> CmmExpr
1108 -> CmmExpr
1109 -> FCode ()
1110 doWritePtrArrayOp addr idx val
1111 = do dflags <- getDynFlags
1112 let ty = cmmExprType dflags val
1113 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1114 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1115 -- the write barrier. We must write a byte into the mark table:
1116 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1117 emit $ mkStore (
1118 cmmOffsetExpr dflags
1119 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1120 (loadArrPtrsSize dflags addr))
1121 (CmmMachOp (mo_wordUShr dflags) [idx,
1122 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1123 ) (CmmLit (CmmInt 1 W8))
1124
1125 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1126 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1127 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1128
1129 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1130 -> Maybe MachOp -- Optional result cast
1131 -> CmmType -- Type of element we are accessing
1132 -> LocalReg -- Destination
1133 -> CmmExpr -- Base address
1134 -> CmmType -- Type of element by which we are indexing
1135 -> CmmExpr -- Index
1136 -> FCode ()
1137 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1138 = do dflags <- getDynFlags
1139 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1140 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1141 = do dflags <- getDynFlags
1142 emitAssign (CmmLocal res) (CmmMachOp cast [
1143 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1144
1145 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1146 -> Maybe MachOp -- Optional value cast
1147 -> CmmExpr -- Base address
1148 -> CmmType -- Type of element by which we are indexing
1149 -> CmmExpr -- Index
1150 -> CmmExpr -- Value to write
1151 -> FCode ()
1152 mkBasicIndexedWrite off Nothing base idx_ty idx val
1153 = do dflags <- getDynFlags
1154 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1155 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1156 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1157
1158 -- ----------------------------------------------------------------------------
1159 -- Misc utils
1160
1161 cmmIndexOffExpr :: DynFlags
1162 -> ByteOff -- Initial offset in bytes
1163 -> Width -- Width of element by which we are indexing
1164 -> CmmExpr -- Base address
1165 -> CmmExpr -- Index
1166 -> CmmExpr
1167 cmmIndexOffExpr dflags off width base idx
1168 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1169
1170 cmmLoadIndexOffExpr :: DynFlags
1171 -> ByteOff -- Initial offset in bytes
1172 -> CmmType -- Type of element we are accessing
1173 -> CmmExpr -- Base address
1174 -> CmmType -- Type of element by which we are indexing
1175 -> CmmExpr -- Index
1176 -> CmmExpr
1177 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1178 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1179
1180 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1181 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1182
1183 ------------------------------------------------------------------------------
1184 -- Helpers for translating vector packing and unpacking.
1185
1186 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1187 -> CmmType -- Type of vector
1188 -> CmmExpr -- Initial vector
1189 -> [CmmExpr] -- Elements
1190 -> CmmFormal -- Destination for result
1191 -> FCode ()
1192 doVecPackOp maybe_pre_write_cast ty z es res = do
1193 dst <- newTemp ty
1194 emitAssign (CmmLocal dst) z
1195 vecPack dst es 0
1196 where
1197 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1198 vecPack src [] _ =
1199 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1200
1201 vecPack src (e : es) i = do
1202 dst <- newTemp ty
1203 if isFloatType (vecElemType ty)
1204 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1205 [CmmReg (CmmLocal src), cast e, iLit])
1206 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1207 [CmmReg (CmmLocal src), cast e, iLit])
1208 vecPack dst es (i + 1)
1209 where
1210 -- vector indices are always 32-bits
1211 iLit = CmmLit (CmmInt (toInteger i) W32)
1212
1213 cast :: CmmExpr -> CmmExpr
1214 cast val = case maybe_pre_write_cast of
1215 Nothing -> val
1216 Just cast -> CmmMachOp cast [val]
1217
1218 len :: Length
1219 len = vecLength ty
1220
1221 wid :: Width
1222 wid = typeWidth (vecElemType ty)
1223
1224 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1225 -> CmmType -- Type of vector
1226 -> CmmExpr -- Vector
1227 -> [CmmFormal] -- Element results
1228 -> FCode ()
1229 doVecUnpackOp maybe_post_read_cast ty e res =
1230 vecUnpack res 0
1231 where
1232 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1233 vecUnpack [] _ =
1234 return ()
1235
1236 vecUnpack (r : rs) i = do
1237 if isFloatType (vecElemType ty)
1238 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1239 [e, iLit]))
1240 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1241 [e, iLit]))
1242 vecUnpack rs (i + 1)
1243 where
1244 -- vector indices are always 32-bits
1245 iLit = CmmLit (CmmInt (toInteger i) W32)
1246
1247 cast :: CmmExpr -> CmmExpr
1248 cast val = case maybe_post_read_cast of
1249 Nothing -> val
1250 Just cast -> CmmMachOp cast [val]
1251
1252 len :: Length
1253 len = vecLength ty
1254
1255 wid :: Width
1256 wid = typeWidth (vecElemType ty)
1257
1258 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1259 -> CmmType -- Vector type
1260 -> CmmExpr -- Source vector
1261 -> CmmExpr -- Element
1262 -> CmmExpr -- Index at which to insert element
1263 -> CmmFormal -- Destination for result
1264 -> FCode ()
1265 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1266 dflags <- getDynFlags
1267 -- vector indices are always 32-bits
1268 let idx' :: CmmExpr
1269 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1270 if isFloatType (vecElemType ty)
1271 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1272 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1273 where
1274 cast :: CmmExpr -> CmmExpr
1275 cast val = case maybe_pre_write_cast of
1276 Nothing -> val
1277 Just cast -> CmmMachOp cast [val]
1278
1279 len :: Length
1280 len = vecLength ty
1281
1282 wid :: Width
1283 wid = typeWidth (vecElemType ty)
1284
1285 ------------------------------------------------------------------------------
1286 -- Helpers for translating prefetching.
1287
1288 doPrefetchByteArrayOp :: [LocalReg]
1289 -> [CmmExpr]
1290 -> FCode ()
1291 doPrefetchByteArrayOp res [addr,idx]
1292 = do dflags <- getDynFlags
1293 mkBasicPrefetch (arrWordsHdrSize dflags) res addr idx
1294 doPrefetchByteArrayOp _ _
1295 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1296
1297 doPrefetchAddrOp :: [LocalReg]
1298 -> [CmmExpr]
1299 -> FCode ()
1300 doPrefetchAddrOp res [addr,idx]
1301 = mkBasicPrefetch 0 res addr idx
1302 doPrefetchAddrOp _ _
1303 = panic "StgCmmPrim: doPrefetchAddrOp"
1304
1305 mkBasicPrefetch :: ByteOff -- Initial offset in bytes
1306 -> [LocalReg] -- Destination
1307 -> CmmExpr -- Base address
1308 -> CmmExpr -- Index
1309 -> FCode ()
1310 mkBasicPrefetch off res base idx
1311 = do dflags <- getDynFlags
1312 emitPrimCall [] MO_Prefetch_Data [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1313 case res of
1314 [] -> return ()
1315 [reg] -> emitAssign (CmmLocal reg) base
1316 _ -> panic "StgCmmPrim: mkBasicPrefetch"
1317
1318 -- ----------------------------------------------------------------------------
1319 -- Copying byte arrays
1320
1321 -- | Takes a source 'ByteArray#', an offset in the source array, a
1322 -- destination 'MutableByteArray#', an offset into the destination
1323 -- array, and the number of bytes to copy. Copies the given number of
1324 -- bytes from the source array to the destination array.
1325 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1326 -> FCode ()
1327 doCopyByteArrayOp = emitCopyByteArray copy
1328 where
1329 -- Copy data (we assume the arrays aren't overlapping since
1330 -- they're of different types)
1331 copy _src _dst dst_p src_p bytes =
1332 do dflags <- getDynFlags
1333 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1334
1335 -- | Takes a source 'MutableByteArray#', an offset in the source
1336 -- array, a destination 'MutableByteArray#', an offset into the
1337 -- destination array, and the number of bytes to copy. Copies the
1338 -- given number of bytes from the source array to the destination
1339 -- array.
1340 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1341 -> FCode ()
1342 doCopyMutableByteArrayOp = emitCopyByteArray copy
1343 where
1344 -- The only time the memory might overlap is when the two arrays
1345 -- we were provided are the same array!
1346 -- TODO: Optimize branch for common case of no aliasing.
1347 copy src dst dst_p src_p bytes = do
1348 dflags <- getDynFlags
1349 [moveCall, cpyCall] <- forkAlts [
1350 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1351 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1352 ]
1353 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1354
1355 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1356 -> FCode ())
1357 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1358 -> FCode ()
1359 emitCopyByteArray copy src src_off dst dst_off n = do
1360 dflags <- getDynFlags
1361 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1362 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1363 copy src dst dst_p src_p n
1364
1365 -- ----------------------------------------------------------------------------
1366 -- Setting byte arrays
1367
1368 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1369 -- and a byte, and sets each of the selected bytes in the array to the
1370 -- character.
1371 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1372 -> FCode ()
1373 doSetByteArrayOp ba off len c
1374 = do dflags <- getDynFlags
1375 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1376 emitMemsetCall p c len (mkIntExpr dflags 1)
1377
1378 -- ----------------------------------------------------------------------------
1379 -- Copying pointer arrays
1380
1381 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1382 -- nowhere else in the code generator. This is mostly because these
1383 -- "primitive" ops result in a surprisingly large amount of code. It
1384 -- will likely be worthwhile to optimize what is emitted here, so that
1385 -- our optimization passes don't waste time repeatedly optimizing the
1386 -- same bits of code.
1387
1388 -- More closely imitates 'assignTemp' from the old code generator, which
1389 -- returns a CmmExpr rather than a LocalReg.
1390 assignTempE :: CmmExpr -> FCode CmmExpr
1391 assignTempE e = do
1392 t <- assignTemp e
1393 return (CmmReg (CmmLocal t))
1394
1395 -- | Takes a source 'Array#', an offset in the source array, a
1396 -- destination 'MutableArray#', an offset into the destination array,
1397 -- and the number of elements to copy. Copies the given number of
1398 -- elements from the source array to the destination array.
1399 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1400 -> FCode ()
1401 doCopyArrayOp = emitCopyArray copy
1402 where
1403 -- Copy data (we assume the arrays aren't overlapping since
1404 -- they're of different types)
1405 copy _src _dst dst_p src_p bytes =
1406 do dflags <- getDynFlags
1407 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1408
1409
1410 -- | Takes a source 'MutableArray#', an offset in the source array, a
1411 -- destination 'MutableArray#', an offset into the destination array,
1412 -- and the number of elements to copy. Copies the given number of
1413 -- elements from the source array to the destination array.
1414 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1415 -> FCode ()
1416 doCopyMutableArrayOp = emitCopyArray copy
1417 where
1418 -- The only time the memory might overlap is when the two arrays
1419 -- we were provided are the same array!
1420 -- TODO: Optimize branch for common case of no aliasing.
1421 copy src dst dst_p src_p bytes = do
1422 dflags <- getDynFlags
1423 [moveCall, cpyCall] <- forkAlts [
1424 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags)),
1425 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1426 ]
1427 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1428
1429 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1430 -> FCode ())
1431 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1432 -> FCode ()
1433 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
1434 dflags <- getDynFlags
1435 n <- assignTempE n0
1436 nonzero <- getCode $ do
1437 -- Passed as arguments (be careful)
1438 src <- assignTempE src0
1439 src_off <- assignTempE src_off0
1440 dst <- assignTempE dst0
1441 dst_off <- assignTempE dst_off0
1442
1443 -- Set the dirty bit in the header.
1444 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1445
1446 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst (arrPtrsHdrSize dflags)
1447 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1448 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1449 bytes <- assignTempE $ cmmMulWord dflags n (mkIntExpr dflags (wORD_SIZE dflags))
1450
1451 copy src dst dst_p src_p bytes
1452
1453 -- The base address of the destination card table
1454 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p (loadArrPtrsSize dflags dst)
1455
1456 emitSetCards dst_off dst_cards_p n
1457
1458 emit =<< mkCmmIfThen (cmmNeWord dflags n (mkIntExpr dflags 0)) nonzero
1459
1460 -- | Takes an info table label, a register to return the newly
1461 -- allocated array in, a source array, an offset in the source array,
1462 -- and the number of elements to copy. Allocates a new array and
1463 -- initializes it form the source array.
1464 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
1465 -> FCode ()
1466 emitCloneArray info_p res_r src0 src_off0 n0 = do
1467 dflags <- getDynFlags
1468 let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags +
1469 (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags))
1470 myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags))
1471 -- Passed as arguments (be careful)
1472 src <- assignTempE src0
1473 src_off <- assignTempE src_off0
1474 n <- assignTempE n0
1475
1476 card_bytes <- assignTempE $ cardRoundUp dflags n
1477 size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUp dflags card_bytes)
1478 words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size
1479
1480 arr_r <- newTemp (bWord dflags)
1481 emitAllocateCall arr_r myCapability words
1482 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags))
1483 (zeroExpr dflags)
1484
1485 let arr = CmmReg (CmmLocal arr_r)
1486 emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
1487 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1488 oFFSET_StgMutArrPtrs_ptrs dflags)) n
1489 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1490 oFFSET_StgMutArrPtrs_size dflags)) size
1491
1492 dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags)
1493 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags))
1494 src_off
1495
1496 emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
1497
1498 emitMemsetCall (cmmOffsetExprW dflags dst_p n)
1499 (mkIntExpr dflags 1)
1500 card_bytes
1501 (mkIntExpr dflags (wORD_SIZE dflags))
1502 emit $ mkAssign (CmmLocal res_r) arr
1503
1504 -- | Takes and offset in the destination array, the base address of
1505 -- the card table, and the number of elements affected (*not* the
1506 -- number of cards). The number of elements may not be zero.
1507 -- Marks the relevant cards as dirty.
1508 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1509 emitSetCards dst_start dst_cards_start n = do
1510 dflags <- getDynFlags
1511 start_card <- assignTempE $ card dflags dst_start
1512 let end_card = card dflags (cmmSubWord dflags (cmmAddWord dflags dst_start n) (mkIntExpr dflags 1))
1513 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1514 (mkIntExpr dflags 1)
1515 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1516 (mkIntExpr dflags 1) -- no alignment (1 byte)
1517
1518 -- Convert an element index to a card index
1519 card :: DynFlags -> CmmExpr -> CmmExpr
1520 card dflags i = cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1521
1522 -- Convert a number of elements to a number of cards, rounding up
1523 cardRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1524 cardRoundUp dflags i = card dflags (cmmAddWord dflags i (mkIntExpr dflags ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)))
1525
1526 bytesToWordsRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1527 bytesToWordsRoundUp dflags e = cmmQuotWord dflags (cmmAddWord dflags e (mkIntExpr dflags (wORD_SIZE dflags - 1)))
1528 (wordSize dflags)
1529
1530 wordSize :: DynFlags -> CmmExpr
1531 wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags)
1532
1533 -- | Emit a call to @memcpy@.
1534 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1535 emitMemcpyCall dst src n align = do
1536 emitPrimCall
1537 [ {-no results-} ]
1538 MO_Memcpy
1539 [ dst, src, n, align ]
1540
1541 -- | Emit a call to @memmove@.
1542 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1543 emitMemmoveCall dst src n align = do
1544 emitPrimCall
1545 [ {- no results -} ]
1546 MO_Memmove
1547 [ dst, src, n, align ]
1548
1549 -- | Emit a call to @memset@. The second argument must fit inside an
1550 -- unsigned char.
1551 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1552 emitMemsetCall dst c n align = do
1553 emitPrimCall
1554 [ {- no results -} ]
1555 MO_Memset
1556 [ dst, c, n, align ]
1557
1558 -- | Emit a call to @allocate@.
1559 emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
1560 emitAllocateCall res cap n = do
1561 emitCCall
1562 [ (res, AddrHint) ]
1563 allocate
1564 [ (cap, AddrHint)
1565 , (n, NoHint)
1566 ]
1567 where
1568 allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
1569 ForeignLabelInExternalPackage IsFunction))
1570
1571 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1572 emitPopCntCall res x width = do
1573 emitPrimCall
1574 [ res ]
1575 (MO_PopCnt width)
1576 [ x ]