Comparison primops return Int# (Fixes #6135)
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmPrim (
10 cgOpApp,
11 cgPrimOp -- internal(ish), used by cgCase to get code for a
12 -- comparison without also turning it into a Bool.
13 ) where
14
15 #include "HsVersions.h"
16
17 import StgCmmLayout
18 import StgCmmForeign
19 import StgCmmEnv
20 import StgCmmMonad
21 import StgCmmUtils
22 import StgCmmTicky
23 import StgCmmHeap
24 import StgCmmProf
25
26 import DynFlags
27 import Platform
28 import BasicTypes
29 import MkGraph
30 import StgSyn
31 import Cmm
32 import CmmInfo
33 import Type ( Type, tyConAppTyCon )
34 import TyCon
35 import CLabel
36 import CmmUtils
37 import PrimOp
38 import SMRep
39 import FastString
40 import Outputable
41 import Util
42
43 import Control.Monad (liftM)
44 import Data.Bits
45
46 ------------------------------------------------------------------------
47 -- Primitive operations and foreign calls
48 ------------------------------------------------------------------------
49
50 {- Note [Foreign call results]
51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
52 A foreign call always returns an unboxed tuple of results, one
53 of which is the state token. This seems to happen even for pure
54 calls.
55
56 Even if we returned a single result for pure calls, it'd still be
57 right to wrap it in a singleton unboxed tuple, because the result
58 might be a Haskell closure pointer, we don't want to evaluate it. -}
59
60 ----------------------------------
61 cgOpApp :: StgOp -- The op
62 -> [StgArg] -- Arguments
63 -> Type -- Result type (always an unboxed tuple)
64 -> FCode ReturnKind
65
66 -- Foreign calls
67 cgOpApp (StgFCallOp fcall _) stg_args res_ty
68 = cgForeignCall fcall stg_args res_ty
69 -- Note [Foreign call results]
70
71 -- tagToEnum# is special: we need to pull the constructor
72 -- out of the table, and perform an appropriate return.
73
74 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
75 = ASSERT(isEnumerationTyCon tycon)
76 do { dflags <- getDynFlags
77 ; args' <- getNonVoidArgAmodes [arg]
78 ; let amode = case args' of [amode] -> amode
79 _ -> panic "TagToEnumOp had void arg"
80 ; emitReturn [tagToClosure dflags tycon amode] }
81 where
82 -- If you're reading this code in the attempt to figure
83 -- out why the compiler panic'ed here, it is probably because
84 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
85 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
86 -- That won't work.
87 tycon = tyConAppTyCon res_ty
88
89 cgOpApp (StgPrimOp primop) args res_ty
90 | primOpOutOfLine primop
91 = do { cmm_args <- getNonVoidArgAmodes args
92 ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
93 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
94
95 | ReturnsPrim VoidRep <- result_info
96 = do cgPrimOp [] primop args
97 emitReturn []
98
99 | ReturnsPrim rep <- result_info
100 = do dflags <- getDynFlags
101 res <- newTemp (primRepCmmType dflags rep)
102 cgPrimOp [res] primop args
103 emitReturn [CmmReg (CmmLocal res)]
104
105 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
106 = do (regs, _hints) <- newUnboxedTupleRegs res_ty
107 cgPrimOp regs primop args
108 emitReturn (map (CmmReg . CmmLocal) regs)
109
110 | otherwise = panic "cgPrimop"
111 where
112 result_info = getPrimOpResultInfo primop
113
114 cgOpApp (StgPrimCallOp primcall) args _res_ty
115 = do { cmm_args <- getNonVoidArgAmodes args
116 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
117 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
118
119 ---------------------------------------------------
120 cgPrimOp :: [LocalReg] -- where to put the results
121 -> PrimOp -- the op
122 -> [StgArg] -- arguments
123 -> FCode ()
124
125 cgPrimOp results op args
126 = do dflags <- getDynFlags
127 arg_exprs <- getNonVoidArgAmodes args
128 emitPrimOp dflags results op arg_exprs
129
130
131 ------------------------------------------------------------------------
132 -- Emitting code for a primop
133 ------------------------------------------------------------------------
134
135 emitPrimOp :: DynFlags
136 -> [LocalReg] -- where to put the results
137 -> PrimOp -- the op
138 -> [CmmExpr] -- arguments
139 -> FCode ()
140
141 -- First we handle various awkward cases specially. The remaining
142 -- easy cases are then handled by translateOp, defined below.
143
144 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
145 {-
146 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
147 C, and without needing any comparisons. This may not be the
148 fastest way to do it - if you have better code, please send it! --SDM
149
150 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
151
152 We currently don't make use of the r value if c is != 0 (i.e.
153 overflow), we just convert to big integers and try again. This
154 could be improved by making r and c the correct values for
155 plugging into a new J#.
156
157 { r = ((I_)(a)) + ((I_)(b)); \
158 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
159 >> (BITS_IN (I_) - 1); \
160 }
161 Wading through the mass of bracketry, it seems to reduce to:
162 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
163
164 -}
165 = emit $ catAGraphs [
166 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
167 mkAssign (CmmLocal res_c) $
168 CmmMachOp (mo_wordUShr dflags) [
169 CmmMachOp (mo_wordAnd dflags) [
170 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
171 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
172 ],
173 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
174 ]
175 ]
176
177
178 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
179 {- Similarly:
180 #define subIntCzh(r,c,a,b) \
181 { r = ((I_)(a)) - ((I_)(b)); \
182 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
183 >> (BITS_IN (I_) - 1); \
184 }
185
186 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
187 -}
188 = emit $ catAGraphs [
189 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
190 mkAssign (CmmLocal res_c) $
191 CmmMachOp (mo_wordUShr dflags) [
192 CmmMachOp (mo_wordAnd dflags) [
193 CmmMachOp (mo_wordXor dflags) [aa,bb],
194 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
195 ],
196 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
197 ]
198 ]
199
200
201 emitPrimOp _ [res] ParOp [arg]
202 =
203 -- for now, just implement this in a C function
204 -- later, we might want to inline it.
205 emitCCall
206 [(res,NoHint)]
207 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
208 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
209
210 emitPrimOp dflags [res] SparkOp [arg]
211 = do
212 -- returns the value of arg in res. We're going to therefore
213 -- refer to arg twice (once to pass to newSpark(), and once to
214 -- assign to res), so put it in a temporary.
215 tmp <- assignTemp arg
216 tmp2 <- newTemp (bWord dflags)
217 emitCCall
218 [(tmp2,NoHint)]
219 (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
220 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
221 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
222
223 emitPrimOp dflags [res] GetCCSOfOp [arg]
224 = emitAssign (CmmLocal res) val
225 where
226 val
227 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
228 | otherwise = CmmLit (zeroCLit dflags)
229
230 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
231 = emitAssign (CmmLocal res) curCCS
232
233 emitPrimOp dflags [res] ReadMutVarOp [mutv]
234 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
235
236 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
237 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
238 emitCCall
239 [{-no results-}]
240 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
241 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
242
243 -- #define sizzeofByteArrayzh(r,a) \
244 -- r = ((StgArrWords *)(a))->bytes
245 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
246 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
247
248 -- #define sizzeofMutableByteArrayzh(r,a) \
249 -- r = ((StgArrWords *)(a))->bytes
250 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
251 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
252
253
254 -- #define touchzh(o) /* nothing */
255 emitPrimOp _ res@[] TouchOp args@[_arg]
256 = do emitPrimCall res MO_Touch args
257
258 emitPrimOp _ res@[] PrefetchByteArrayOp args@[_arg]
259 = do emitPrimCall res MO_Prefetch_Data args
260
261 emitPrimOp _ res@[] PrefetchMutableByteArrayOp args@[_arg]
262 = do emitPrimCall res MO_Prefetch_Data args
263
264 emitPrimOp _ res@[] PrefetchAddrOp args@[_arg]
265 = do emitPrimCall res MO_Prefetch_Data args
266
267 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
268 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
269 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
270
271 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
272 emitPrimOp dflags [res] StableNameToIntOp [arg]
273 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
274
275 -- #define eqStableNamezh(r,sn1,sn2) \
276 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
277 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
278 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
279 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
280 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
281 ])
282
283
284 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
285 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
286
287 -- #define addrToHValuezh(r,a) r=(P_)a
288 emitPrimOp _ [res] AddrToAnyOp [arg]
289 = emitAssign (CmmLocal res) arg
290
291 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
292 -- Note: argument may be tagged!
293 emitPrimOp dflags [res] DataToTagOp [arg]
294 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
295
296 {- Freezing arrays-of-ptrs requires changing an info table, for the
297 benefit of the generational collector. It needs to scavenge mutable
298 objects, even if they are in old space. When they become immutable,
299 they can be removed from this scavenge list. -}
300
301 -- #define unsafeFreezzeArrayzh(r,a)
302 -- {
303 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
304 -- r = a;
305 -- }
306 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
307 = emit $ catAGraphs
308 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
309 mkAssign (CmmLocal res) arg ]
310 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
311 = emit $ catAGraphs
312 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
313 mkAssign (CmmLocal res) arg ]
314
315 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
316 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
317 = emitAssign (CmmLocal res) arg
318
319 -- Copying pointer arrays
320
321 emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
322 doCopyArrayOp src src_off dst dst_off n
323 emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
324 doCopyMutableArrayOp src src_off dst dst_off n
325 emitPrimOp _ [res] CloneArrayOp [src,src_off,n] =
326 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
327 emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] =
328 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
329 emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] =
330 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
331 emitPrimOp _ [res] ThawArrayOp [src,src_off,n] =
332 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
333
334 emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
335 doCopyArrayOp src src_off dst dst_off n
336 emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
337 doCopyMutableArrayOp src src_off dst dst_off n
338
339 -- Reading/writing pointer arrays
340
341 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
342 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
343 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
344
345 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
346 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
347 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
348 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
349 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
350 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
351 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
352 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
353 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
354 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
355
356 emitPrimOp dflags [res] SizeofArrayOp [arg]
357 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
358 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
359 = emitPrimOp dflags [res] SizeofArrayOp [arg]
360 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
361 = emitPrimOp dflags [res] SizeofArrayOp [arg]
362 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
363 = emitPrimOp dflags [res] SizeofArrayOp [arg]
364
365 -- IndexXXXoffAddr
366
367 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
368 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
369 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
370 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
371 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
372 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
373 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
374 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
375 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
376 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
377 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
378 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
379 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
380 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
381 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
382 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
383 emitPrimOp _ res IndexOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
384 emitPrimOp _ res IndexOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 f32 res args
385 emitPrimOp _ res IndexOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
386 emitPrimOp _ res IndexOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 f64 res args
387 emitPrimOp _ res IndexOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
388 emitPrimOp _ res IndexOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
389 emitPrimOp _ res IndexOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
390 emitPrimOp _ res IndexOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
391
392 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
393
394 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
395 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
396 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
397 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
398 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
399 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
400 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
401 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
402 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
403 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
404 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
405 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
406 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
407 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
408 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
409 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
410 emitPrimOp _ res ReadOffAddrOp_FloatX4 args = doIndexOffAddrOp Nothing vec4f32 res args
411 emitPrimOp _ res ReadOffAddrOp_FloatAsFloatX4 args = doIndexOffAddrOpAs Nothing vec4f32 b32 res args
412 emitPrimOp _ res ReadOffAddrOp_DoubleX2 args = doIndexOffAddrOp Nothing vec2f64 res args
413 emitPrimOp _ res ReadOffAddrOp_DoubleAsDoubleX2 args = doIndexOffAddrOpAs Nothing vec2f64 b64 res args
414 emitPrimOp _ res ReadOffAddrOp_Int32X4 args = doIndexOffAddrOp Nothing vec4b32 res args
415 emitPrimOp _ res ReadOffAddrOp_Int32AsInt32X4 args = doIndexOffAddrOpAs Nothing vec4b32 b32 res args
416 emitPrimOp _ res ReadOffAddrOp_Int64X2 args = doIndexOffAddrOp Nothing vec2b64 res args
417 emitPrimOp _ res ReadOffAddrOp_Int64AsInt64X2 args = doIndexOffAddrOpAs Nothing vec2b64 b64 res args
418
419 -- IndexXXXArray
420
421 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
422 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
423 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
424 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
425 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
426 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
427 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
428 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
429 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
430 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
431 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
432 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
433 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
434 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
435 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
436 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
437 emitPrimOp _ res IndexByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
438 emitPrimOp _ res IndexByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
439 emitPrimOp _ res IndexByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
440 emitPrimOp _ res IndexByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
441 emitPrimOp _ res IndexByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
442 emitPrimOp _ res IndexByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
443 emitPrimOp _ res IndexByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
444 emitPrimOp _ res IndexByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
445
446 -- ReadXXXArray, identical to IndexXXXArray.
447
448 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
449 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
450 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
451 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
452 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
453 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
454 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
455 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
456 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
457 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
458 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
459 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
460 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
461 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
462 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
463 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
464 emitPrimOp _ res ReadByteArrayOp_FloatX4 args = doIndexByteArrayOp Nothing vec4f32 res args
465 emitPrimOp _ res ReadByteArrayOp_FloatAsFloatX4 args = doIndexByteArrayOpAs Nothing vec4f32 f32 res args
466 emitPrimOp _ res ReadByteArrayOp_DoubleX2 args = doIndexByteArrayOp Nothing vec2f64 res args
467 emitPrimOp _ res ReadByteArrayOp_DoubleAsDoubleX2 args = doIndexByteArrayOpAs Nothing vec2f64 f64 res args
468 emitPrimOp _ res ReadByteArrayOp_Int32X4 args = doIndexByteArrayOp Nothing vec4b32 res args
469 emitPrimOp _ res ReadByteArrayOp_Int32AsInt32X4 args = doIndexByteArrayOpAs Nothing vec4b32 b32 res args
470 emitPrimOp _ res ReadByteArrayOp_Int64X2 args = doIndexByteArrayOp Nothing vec2b64 res args
471 emitPrimOp _ res ReadByteArrayOp_Int64AsInt64X2 args = doIndexByteArrayOpAs Nothing vec2b64 b64 res args
472
473 -- WriteXXXoffAddr
474
475 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
476 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
477 emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
478 emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
479 emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
480 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
481 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
482 emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
483 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
484 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
485 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
486 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
487 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
488 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
489 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
490 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
491 emitPrimOp _ res WriteOffAddrOp_FloatX4 args = doWriteOffAddrOp Nothing vec4f32 res args
492 emitPrimOp _ res WriteOffAddrOp_FloatAsFloatX4 args = doWriteOffAddrOp Nothing f32 res args
493 emitPrimOp _ res WriteOffAddrOp_DoubleX2 args = doWriteOffAddrOp Nothing vec2f64 res args
494 emitPrimOp _ res WriteOffAddrOp_DoubleAsDoubleX2 args = doWriteOffAddrOp Nothing f64 res args
495 emitPrimOp _ res WriteOffAddrOp_Int32X4 args = doWriteOffAddrOp Nothing vec4b32 res args
496 emitPrimOp _ res WriteOffAddrOp_Int32AsInt32X4 args = doWriteOffAddrOp Nothing b32 res args
497 emitPrimOp _ res WriteOffAddrOp_Int64X2 args = doWriteOffAddrOp Nothing vec2b64 res args
498 emitPrimOp _ res WriteOffAddrOp_Int64AsInt64X2 args = doWriteOffAddrOp Nothing b64 res args
499
500 -- WriteXXXArray
501
502 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
503 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
504 emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
505 emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
506 emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
507 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
508 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
509 emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
510 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
511 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
512 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
513 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
514 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
515 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
516 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
517 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
518 emitPrimOp _ res WriteByteArrayOp_FloatX4 args = doWriteByteArrayOp Nothing vec4f32 res args
519 emitPrimOp _ res WriteByteArrayOp_FloatAsFloatX4 args = doWriteByteArrayOp Nothing f32 res args
520 emitPrimOp _ res WriteByteArrayOp_DoubleX2 args = doWriteByteArrayOp Nothing vec2f64 res args
521 emitPrimOp _ res WriteByteArrayOp_DoubleAsDoubleX2 args = doWriteByteArrayOp Nothing f64 res args
522 emitPrimOp _ res WriteByteArrayOp_Int32X4 args = doWriteByteArrayOp Nothing vec4b32 res args
523 emitPrimOp _ res WriteByteArrayOp_Int32AsInt32X4 args = doWriteByteArrayOp Nothing b32 res args
524 emitPrimOp _ res WriteByteArrayOp_Int64X2 args = doWriteByteArrayOp Nothing vec2b64 res args
525 emitPrimOp _ res WriteByteArrayOp_Int64AsInt64X2 args = doWriteByteArrayOp Nothing b64 res args
526
527 -- Copying and setting byte arrays
528 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
529 doCopyByteArrayOp src src_off dst dst_off n
530 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
531 doCopyMutableByteArrayOp src src_off dst dst_off n
532 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
533 doSetByteArrayOp ba off len c
534
535 emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
536 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
537 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
538 emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
539
540 -- Population count
541 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
542 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
543 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
544 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
545 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
546
547 -- Unsigned int to floating point conversions
548 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
549 (MO_UF_Conv W32) [w]
550 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
551 (MO_UF_Conv W64) [w]
552
553 -- SIMD vector packing and unpacking
554 emitPrimOp _ [res] FloatToFloatX4Op [e] =
555 doVecPackOp Nothing vec4f32 zero [e,e,e,e] res
556 where
557 zero :: CmmExpr
558 zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
559
560 emitPrimOp _ [res] FloatX4PackOp es@[_,_,_,_] =
561 doVecPackOp Nothing vec4f32 zero es res
562 where
563 zero :: CmmExpr
564 zero = CmmLit $ CmmVec (replicate 4 (CmmFloat 0 W32))
565
566 emitPrimOp _ res@[_,_,_,_] FloatX4UnpackOp [arg] =
567 doVecUnpackOp Nothing vec4f32 arg res
568
569 emitPrimOp _ [res] FloatX4InsertOp [v,e,i] =
570 doVecInsertOp Nothing vec4f32 v e i res
571
572 emitPrimOp _ [res] DoubleToDoubleX2Op [e] =
573 doVecPackOp Nothing vec2f64 zero [e,e] res
574 where
575 zero :: CmmExpr
576 zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
577
578 emitPrimOp _ [res] DoubleX2PackOp es@[_,_] =
579 doVecPackOp Nothing vec2f64 zero es res
580 where
581 zero :: CmmExpr
582 zero = CmmLit $ CmmVec (replicate 2 (CmmFloat 0 W64))
583
584 emitPrimOp _ res@[_,_] DoubleX2UnpackOp [arg] =
585 doVecUnpackOp Nothing vec2f64 arg res
586
587 emitPrimOp _ [res] DoubleX2InsertOp [v,e,i] =
588 doVecInsertOp Nothing vec2f64 v e i res
589
590 emitPrimOp dflags [res] Int32ToInt32X4Op [e] =
591 doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero [e,e,e,e] res
592 where
593 zero :: CmmExpr
594 zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
595
596 emitPrimOp dflags [res] Int32X4PackOp es@[_,_,_,_] =
597 doVecPackOp (Just (mo_WordTo32 dflags)) vec4b32 zero es res
598 where
599 zero :: CmmExpr
600 zero = CmmLit $ CmmVec (replicate 4 (CmmInt 0 W32))
601
602 emitPrimOp dflags res@[_,_,_,_] Int32X4UnpackOp [arg] =
603 doVecUnpackOp (Just (mo_s_32ToWord dflags)) vec4b32 arg res
604
605 emitPrimOp dflags [res] Int32X4InsertOp [v,e,i] =
606 doVecInsertOp (Just (mo_WordTo32 dflags)) vec4b32 v e i res
607
608 emitPrimOp _ [res] Int64ToInt64X2Op [e] =
609 doVecPackOp Nothing vec2b64 zero [e,e] res
610 where
611 zero :: CmmExpr
612 zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
613
614 emitPrimOp _ [res] Int64X2PackOp es@[_,_] =
615 doVecPackOp Nothing vec2b64 zero es res
616 where
617 zero :: CmmExpr
618 zero = CmmLit $ CmmVec (replicate 2 (CmmInt 0 W64))
619
620 emitPrimOp _ res@[_,_] Int64X2UnpackOp [arg] =
621 doVecUnpackOp Nothing vec2b64 arg res
622
623 emitPrimOp _ [res] Int64X2InsertOp [v,e,i] =
624 doVecInsertOp Nothing vec2b64 v e i res
625
626 -- Prefetch
627 emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args
628 emitPrimOp _ res PrefetchMutableByteArrayOp args = doPrefetchByteArrayOp res args
629 emitPrimOp _ res PrefetchAddrOp args = doPrefetchAddrOp res args
630
631 -- The rest just translate straightforwardly
632 emitPrimOp dflags [res] op [arg]
633 | nopOp op
634 = emitAssign (CmmLocal res) arg
635
636 | Just (mop,rep) <- narrowOp op
637 = emitAssign (CmmLocal res) $
638 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
639
640 emitPrimOp dflags r@[res] op args
641 | Just prim <- callishOp op
642 = do emitPrimCall r prim args
643
644 | Just mop <- translateOp dflags op
645 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
646 emit stmt
647
648 emitPrimOp dflags results op args
649 = case callishPrimOpSupported dflags op of
650 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
651 Right gen -> gen results args
652
653 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
654
655 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
656 callishPrimOpSupported dflags op
657 = case op of
658 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
659 | otherwise -> Right (genericIntQuotRemOp dflags)
660
661 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
662 | otherwise -> Right (genericWordQuotRemOp dflags)
663
664 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
665 | otherwise -> Right (genericWordQuotRem2Op dflags)
666
667 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
668 | otherwise -> Right genericWordAdd2Op
669
670 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
671 | otherwise -> Right genericWordMul2Op
672
673 _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
674 where
675 ncg = case hscTarget dflags of
676 HscAsm -> True
677 _ -> False
678
679 x86ish = case platformArch (targetPlatform dflags) of
680 ArchX86 -> True
681 ArchX86_64 -> True
682 _ -> False
683
684 genericIntQuotRemOp :: DynFlags -> GenericOp
685 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
686 = emit $ mkAssign (CmmLocal res_q)
687 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
688 mkAssign (CmmLocal res_r)
689 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
690 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
691
692 genericWordQuotRemOp :: DynFlags -> GenericOp
693 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
694 = emit $ mkAssign (CmmLocal res_q)
695 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
696 mkAssign (CmmLocal res_r)
697 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
698 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
699
700 genericWordQuotRem2Op :: DynFlags -> GenericOp
701 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
702 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
703 where ty = cmmExprType dflags arg_x_high
704 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
705 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
706 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
707 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
708 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
709 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
710 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
711 zero = lit 0
712 one = lit 1
713 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
714 lit i = CmmLit (CmmInt i (wordWidth dflags))
715
716 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
717 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
718 mkAssign (CmmLocal res_r) high)
719 f i acc high low =
720 do roverflowedBit <- newTemp ty
721 rhigh' <- newTemp ty
722 rhigh'' <- newTemp ty
723 rlow' <- newTemp ty
724 risge <- newTemp ty
725 racc' <- newTemp ty
726 let high' = CmmReg (CmmLocal rhigh')
727 isge = CmmReg (CmmLocal risge)
728 overflowedBit = CmmReg (CmmLocal roverflowedBit)
729 let this = catAGraphs
730 [mkAssign (CmmLocal roverflowedBit)
731 (shr high negone),
732 mkAssign (CmmLocal rhigh')
733 (or (shl high one) (shr low negone)),
734 mkAssign (CmmLocal rlow')
735 (shl low one),
736 mkAssign (CmmLocal risge)
737 (or (overflowedBit `ne` zero)
738 (high' `ge` arg_y)),
739 mkAssign (CmmLocal rhigh'')
740 (high' `minus` (arg_y `times` isge)),
741 mkAssign (CmmLocal racc')
742 (or (shl acc one) isge)]
743 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
744 (CmmReg (CmmLocal rhigh''))
745 (CmmReg (CmmLocal rlow'))
746 return (this <*> rest)
747 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
748
749 genericWordAdd2Op :: GenericOp
750 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
751 = do dflags <- getDynFlags
752 r1 <- newTemp (cmmExprType dflags arg_x)
753 r2 <- newTemp (cmmExprType dflags arg_x)
754 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
755 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
756 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
757 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
758 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
759 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
760 (wordWidth dflags))
761 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
762 emit $ catAGraphs
763 [mkAssign (CmmLocal r1)
764 (add (bottomHalf arg_x) (bottomHalf arg_y)),
765 mkAssign (CmmLocal r2)
766 (add (topHalf (CmmReg (CmmLocal r1)))
767 (add (topHalf arg_x) (topHalf arg_y))),
768 mkAssign (CmmLocal res_h)
769 (topHalf (CmmReg (CmmLocal r2))),
770 mkAssign (CmmLocal res_l)
771 (or (toTopHalf (CmmReg (CmmLocal r2)))
772 (bottomHalf (CmmReg (CmmLocal r1))))]
773 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
774
775 genericWordMul2Op :: GenericOp
776 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
777 = do dflags <- getDynFlags
778 let t = cmmExprType dflags arg_x
779 xlyl <- liftM CmmLocal $ newTemp t
780 xlyh <- liftM CmmLocal $ newTemp t
781 xhyl <- liftM CmmLocal $ newTemp t
782 r <- liftM CmmLocal $ newTemp t
783 -- This generic implementation is very simple and slow. We might
784 -- well be able to do better, but for now this at least works.
785 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
786 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
787 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
788 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
789 sum = foldl1 add
790 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
791 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
792 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
793 (wordWidth dflags))
794 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
795 emit $ catAGraphs
796 [mkAssign xlyl
797 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
798 mkAssign xlyh
799 (mul (bottomHalf arg_x) (topHalf arg_y)),
800 mkAssign xhyl
801 (mul (topHalf arg_x) (bottomHalf arg_y)),
802 mkAssign r
803 (sum [topHalf (CmmReg xlyl),
804 bottomHalf (CmmReg xhyl),
805 bottomHalf (CmmReg xlyh)]),
806 mkAssign (CmmLocal res_l)
807 (or (bottomHalf (CmmReg xlyl))
808 (toTopHalf (CmmReg r))),
809 mkAssign (CmmLocal res_h)
810 (sum [mul (topHalf arg_x) (topHalf arg_y),
811 topHalf (CmmReg xhyl),
812 topHalf (CmmReg xlyh),
813 topHalf (CmmReg r)])]
814 genericWordMul2Op _ _ = panic "genericWordMul2Op"
815
816 -- These PrimOps are NOPs in Cmm
817
818 nopOp :: PrimOp -> Bool
819 nopOp Int2WordOp = True
820 nopOp Word2IntOp = True
821 nopOp Int2AddrOp = True
822 nopOp Addr2IntOp = True
823 nopOp ChrOp = True -- Int# and Char# are rep'd the same
824 nopOp OrdOp = True
825 nopOp _ = False
826
827 -- These PrimOps turn into double casts
828
829 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
830 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
831 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
832 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
833 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
834 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
835 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
836 narrowOp _ = Nothing
837
838 -- Native word signless ops
839
840 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
841 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
842 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
843 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
844 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
845 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
846 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
847
848 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
849 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
850 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
851 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
852 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
853 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
854
855 translateOp dflags AndOp = Just (mo_wordAnd dflags)
856 translateOp dflags OrOp = Just (mo_wordOr dflags)
857 translateOp dflags XorOp = Just (mo_wordXor dflags)
858 translateOp dflags NotOp = Just (mo_wordNot dflags)
859 translateOp dflags SllOp = Just (mo_wordShl dflags)
860 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
861
862 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
863
864 -- Native word signed ops
865
866 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
867 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
868 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
869 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
870 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
871
872
873 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
874 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
875 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
876 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
877
878 translateOp dflags AndIOp = Just (mo_wordAnd dflags)
879 translateOp dflags OrIOp = Just (mo_wordOr dflags)
880 translateOp dflags XorIOp = Just (mo_wordXor dflags)
881 translateOp dflags NotIOp = Just (mo_wordNot dflags)
882 translateOp dflags ISllOp = Just (mo_wordShl dflags)
883 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
884 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
885
886 -- Native word unsigned ops
887
888 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
889 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
890 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
891 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
892
893 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
894 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
895 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
896
897 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
898 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
899 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
900 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
901
902 -- Char# ops
903
904 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
905 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
906 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
907 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
908 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
909 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
910
911 -- Double ops
912
913 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
914 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
915 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
916 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
917 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
918 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
919
920 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
921 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
922 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
923 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
924 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
925
926 -- Float ops
927
928 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
929 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
930 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
931 translateOp _ FloatLeOp = Just (MO_F_Le W32)
932 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
933 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
934
935 translateOp _ FloatAddOp = Just (MO_F_Add W32)
936 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
937 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
938 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
939 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
940
941 -- Floating point vector ops
942
943 translateOp _ FloatX4AddOp = Just (MO_VF_Add 4 W32)
944 translateOp _ FloatX4SubOp = Just (MO_VF_Sub 4 W32)
945 translateOp _ FloatX4MulOp = Just (MO_VF_Mul 4 W32)
946 translateOp _ FloatX4DivOp = Just (MO_VF_Quot 4 W32)
947 translateOp _ FloatX4NegOp = Just (MO_VF_Neg 4 W32)
948
949 translateOp _ DoubleX2AddOp = Just (MO_VF_Add 2 W64)
950 translateOp _ DoubleX2SubOp = Just (MO_VF_Sub 2 W64)
951 translateOp _ DoubleX2MulOp = Just (MO_VF_Mul 2 W64)
952 translateOp _ DoubleX2DivOp = Just (MO_VF_Quot 2 W64)
953 translateOp _ DoubleX2NegOp = Just (MO_VF_Neg 2 W64)
954
955 translateOp _ Int32X4AddOp = Just (MO_V_Add 4 W32)
956 translateOp _ Int32X4SubOp = Just (MO_V_Sub 4 W32)
957 translateOp _ Int32X4MulOp = Just (MO_V_Mul 4 W32)
958 translateOp _ Int32X4QuotOp = Just (MO_VS_Quot 4 W32)
959 translateOp _ Int32X4RemOp = Just (MO_VS_Rem 4 W32)
960 translateOp _ Int32X4NegOp = Just (MO_VS_Neg 4 W32)
961
962 translateOp _ Int64X2AddOp = Just (MO_V_Add 2 W64)
963 translateOp _ Int64X2SubOp = Just (MO_V_Sub 2 W64)
964 translateOp _ Int64X2MulOp = Just (MO_V_Mul 2 W64)
965 translateOp _ Int64X2QuotOp = Just (MO_VS_Quot 2 W64)
966 translateOp _ Int64X2RemOp = Just (MO_VS_Rem 2 W64)
967 translateOp _ Int64X2NegOp = Just (MO_VS_Neg 2 W64)
968
969 -- Conversions
970
971 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
972 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
973
974 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
975 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
976
977 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
978 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
979
980 -- Word comparisons masquerading as more exotic things.
981
982 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
983 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
984 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
985 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
986 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
987 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
988 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
989
990 translateOp _ _ = Nothing
991
992 -- These primops are implemented by CallishMachOps, because they sometimes
993 -- turn into foreign calls depending on the backend.
994
995 callishOp :: PrimOp -> Maybe CallishMachOp
996 callishOp DoublePowerOp = Just MO_F64_Pwr
997 callishOp DoubleSinOp = Just MO_F64_Sin
998 callishOp DoubleCosOp = Just MO_F64_Cos
999 callishOp DoubleTanOp = Just MO_F64_Tan
1000 callishOp DoubleSinhOp = Just MO_F64_Sinh
1001 callishOp DoubleCoshOp = Just MO_F64_Cosh
1002 callishOp DoubleTanhOp = Just MO_F64_Tanh
1003 callishOp DoubleAsinOp = Just MO_F64_Asin
1004 callishOp DoubleAcosOp = Just MO_F64_Acos
1005 callishOp DoubleAtanOp = Just MO_F64_Atan
1006 callishOp DoubleLogOp = Just MO_F64_Log
1007 callishOp DoubleExpOp = Just MO_F64_Exp
1008 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
1009
1010 callishOp FloatPowerOp = Just MO_F32_Pwr
1011 callishOp FloatSinOp = Just MO_F32_Sin
1012 callishOp FloatCosOp = Just MO_F32_Cos
1013 callishOp FloatTanOp = Just MO_F32_Tan
1014 callishOp FloatSinhOp = Just MO_F32_Sinh
1015 callishOp FloatCoshOp = Just MO_F32_Cosh
1016 callishOp FloatTanhOp = Just MO_F32_Tanh
1017 callishOp FloatAsinOp = Just MO_F32_Asin
1018 callishOp FloatAcosOp = Just MO_F32_Acos
1019 callishOp FloatAtanOp = Just MO_F32_Atan
1020 callishOp FloatLogOp = Just MO_F32_Log
1021 callishOp FloatExpOp = Just MO_F32_Exp
1022 callishOp FloatSqrtOp = Just MO_F32_Sqrt
1023
1024 callishOp _ = Nothing
1025
1026 ------------------------------------------------------------------------------
1027 -- Helpers for translating various minor variants of array indexing.
1028
1029 doIndexOffAddrOp :: Maybe MachOp
1030 -> CmmType
1031 -> [LocalReg]
1032 -> [CmmExpr]
1033 -> FCode ()
1034 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
1035 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
1036 doIndexOffAddrOp _ _ _ _
1037 = panic "StgCmmPrim: doIndexOffAddrOp"
1038
1039 doIndexOffAddrOpAs :: Maybe MachOp
1040 -> CmmType
1041 -> CmmType
1042 -> [LocalReg]
1043 -> [CmmExpr]
1044 -> FCode ()
1045 doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1046 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
1047 doIndexOffAddrOpAs _ _ _ _ _
1048 = panic "StgCmmPrim: doIndexOffAddrOpAs"
1049
1050 doIndexByteArrayOp :: Maybe MachOp
1051 -> CmmType
1052 -> [LocalReg]
1053 -> [CmmExpr]
1054 -> FCode ()
1055 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
1056 = do dflags <- getDynFlags
1057 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
1058 doIndexByteArrayOp _ _ _ _
1059 = panic "StgCmmPrim: doIndexByteArrayOp"
1060
1061 doIndexByteArrayOpAs :: Maybe MachOp
1062 -> CmmType
1063 -> CmmType
1064 -> [LocalReg]
1065 -> [CmmExpr]
1066 -> FCode ()
1067 doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
1068 = do dflags <- getDynFlags
1069 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
1070 doIndexByteArrayOpAs _ _ _ _ _
1071 = panic "StgCmmPrim: doIndexByteArrayOpAs"
1072
1073 doReadPtrArrayOp :: LocalReg
1074 -> CmmExpr
1075 -> CmmExpr
1076 -> FCode ()
1077 doReadPtrArrayOp res addr idx
1078 = do dflags <- getDynFlags
1079 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
1080
1081 doWriteOffAddrOp :: Maybe MachOp
1082 -> CmmType
1083 -> [LocalReg]
1084 -> [CmmExpr]
1085 -> FCode ()
1086 doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1087 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
1088 doWriteOffAddrOp _ _ _ _
1089 = panic "StgCmmPrim: doWriteOffAddrOp"
1090
1091 doWriteByteArrayOp :: Maybe MachOp
1092 -> CmmType
1093 -> [LocalReg]
1094 -> [CmmExpr]
1095 -> FCode ()
1096 doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
1097 = do dflags <- getDynFlags
1098 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
1099 doWriteByteArrayOp _ _ _ _
1100 = panic "StgCmmPrim: doWriteByteArrayOp"
1101
1102 doWritePtrArrayOp :: CmmExpr
1103 -> CmmExpr
1104 -> CmmExpr
1105 -> FCode ()
1106 doWritePtrArrayOp addr idx val
1107 = do dflags <- getDynFlags
1108 let ty = cmmExprType dflags val
1109 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
1110 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1111 -- the write barrier. We must write a byte into the mark table:
1112 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
1113 emit $ mkStore (
1114 cmmOffsetExpr dflags
1115 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
1116 (loadArrPtrsSize dflags addr))
1117 (CmmMachOp (mo_wordUShr dflags) [idx,
1118 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
1119 ) (CmmLit (CmmInt 1 W8))
1120
1121 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
1122 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
1123 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
1124
1125 mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
1126 -> Maybe MachOp -- Optional result cast
1127 -> CmmType -- Type of element we are accessing
1128 -> LocalReg -- Destination
1129 -> CmmExpr -- Base address
1130 -> CmmType -- Type of element by which we are indexing
1131 -> CmmExpr -- Index
1132 -> FCode ()
1133 mkBasicIndexedRead off Nothing ty res base idx_ty idx
1134 = do dflags <- getDynFlags
1135 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
1136 mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
1137 = do dflags <- getDynFlags
1138 emitAssign (CmmLocal res) (CmmMachOp cast [
1139 cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
1140
1141 mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
1142 -> Maybe MachOp -- Optional value cast
1143 -> CmmExpr -- Base address
1144 -> CmmType -- Type of element by which we are indexing
1145 -> CmmExpr -- Index
1146 -> CmmExpr -- Value to write
1147 -> FCode ()
1148 mkBasicIndexedWrite off Nothing base idx_ty idx val
1149 = do dflags <- getDynFlags
1150 emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
1151 mkBasicIndexedWrite off (Just cast) base idx_ty idx val
1152 = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
1153
1154 -- ----------------------------------------------------------------------------
1155 -- Misc utils
1156
1157 cmmIndexOffExpr :: DynFlags
1158 -> ByteOff -- Initial offset in bytes
1159 -> Width -- Width of element by which we are indexing
1160 -> CmmExpr -- Base address
1161 -> CmmExpr -- Index
1162 -> CmmExpr
1163 cmmIndexOffExpr dflags off width base idx
1164 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
1165
1166 cmmLoadIndexOffExpr :: DynFlags
1167 -> ByteOff -- Initial offset in bytes
1168 -> CmmType -- Type of element we are accessing
1169 -> CmmExpr -- Base address
1170 -> CmmType -- Type of element by which we are indexing
1171 -> CmmExpr -- Index
1172 -> CmmExpr
1173 cmmLoadIndexOffExpr dflags off ty base idx_ty idx
1174 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
1175
1176 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
1177 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
1178
1179 ------------------------------------------------------------------------------
1180 -- Helpers for translating vector packing and unpacking.
1181
1182 doVecPackOp :: Maybe MachOp -- Cast from element to vector component
1183 -> CmmType -- Type of vector
1184 -> CmmExpr -- Initial vector
1185 -> [CmmExpr] -- Elements
1186 -> CmmFormal -- Destination for result
1187 -> FCode ()
1188 doVecPackOp maybe_pre_write_cast ty z es res = do
1189 dst <- newTemp ty
1190 emitAssign (CmmLocal dst) z
1191 vecPack dst es 0
1192 where
1193 vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
1194 vecPack src [] _ =
1195 emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
1196
1197 vecPack src (e : es) i = do
1198 dst <- newTemp ty
1199 if isFloatType (vecElemType ty)
1200 then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
1201 [CmmReg (CmmLocal src), cast e, iLit])
1202 else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
1203 [CmmReg (CmmLocal src), cast e, iLit])
1204 vecPack dst es (i + 1)
1205 where
1206 -- vector indices are always 32-bits
1207 iLit = CmmLit (CmmInt (toInteger i) W32)
1208
1209 cast :: CmmExpr -> CmmExpr
1210 cast val = case maybe_pre_write_cast of
1211 Nothing -> val
1212 Just cast -> CmmMachOp cast [val]
1213
1214 len :: Length
1215 len = vecLength ty
1216
1217 wid :: Width
1218 wid = typeWidth (vecElemType ty)
1219
1220 doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
1221 -> CmmType -- Type of vector
1222 -> CmmExpr -- Vector
1223 -> [CmmFormal] -- Element results
1224 -> FCode ()
1225 doVecUnpackOp maybe_post_read_cast ty e res =
1226 vecUnpack res 0
1227 where
1228 vecUnpack :: [CmmFormal] -> Int -> FCode ()
1229 vecUnpack [] _ =
1230 return ()
1231
1232 vecUnpack (r : rs) i = do
1233 if isFloatType (vecElemType ty)
1234 then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
1235 [e, iLit]))
1236 else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
1237 [e, iLit]))
1238 vecUnpack rs (i + 1)
1239 where
1240 -- vector indices are always 32-bits
1241 iLit = CmmLit (CmmInt (toInteger i) W32)
1242
1243 cast :: CmmExpr -> CmmExpr
1244 cast val = case maybe_post_read_cast of
1245 Nothing -> val
1246 Just cast -> CmmMachOp cast [val]
1247
1248 len :: Length
1249 len = vecLength ty
1250
1251 wid :: Width
1252 wid = typeWidth (vecElemType ty)
1253
1254 doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
1255 -> CmmType -- Vector type
1256 -> CmmExpr -- Source vector
1257 -> CmmExpr -- Element
1258 -> CmmExpr -- Index at which to insert element
1259 -> CmmFormal -- Destination for result
1260 -> FCode ()
1261 doVecInsertOp maybe_pre_write_cast ty src e idx res = do
1262 dflags <- getDynFlags
1263 -- vector indices are always 32-bits
1264 let idx' :: CmmExpr
1265 idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
1266 if isFloatType (vecElemType ty)
1267 then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
1268 else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
1269 where
1270 cast :: CmmExpr -> CmmExpr
1271 cast val = case maybe_pre_write_cast of
1272 Nothing -> val
1273 Just cast -> CmmMachOp cast [val]
1274
1275 len :: Length
1276 len = vecLength ty
1277
1278 wid :: Width
1279 wid = typeWidth (vecElemType ty)
1280
1281 ------------------------------------------------------------------------------
1282 -- Helpers for translating prefetching.
1283
1284 doPrefetchByteArrayOp :: [LocalReg]
1285 -> [CmmExpr]
1286 -> FCode ()
1287 doPrefetchByteArrayOp res [addr,idx]
1288 = do dflags <- getDynFlags
1289 mkBasicPrefetch (arrWordsHdrSize dflags) res addr idx
1290 doPrefetchByteArrayOp _ _
1291 = panic "StgCmmPrim: doPrefetchByteArrayOp"
1292
1293 doPrefetchAddrOp :: [LocalReg]
1294 -> [CmmExpr]
1295 -> FCode ()
1296 doPrefetchAddrOp res [addr,idx]
1297 = mkBasicPrefetch 0 res addr idx
1298 doPrefetchAddrOp _ _
1299 = panic "StgCmmPrim: doPrefetchAddrOp"
1300
1301 mkBasicPrefetch :: ByteOff -- Initial offset in bytes
1302 -> [LocalReg] -- Destination
1303 -> CmmExpr -- Base address
1304 -> CmmExpr -- Index
1305 -> FCode ()
1306 mkBasicPrefetch off res base idx
1307 = do dflags <- getDynFlags
1308 emitPrimCall [] MO_Prefetch_Data [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
1309 case res of
1310 [] -> return ()
1311 [reg] -> emitAssign (CmmLocal reg) base
1312 _ -> panic "StgCmmPrim: mkBasicPrefetch"
1313
1314 -- ----------------------------------------------------------------------------
1315 -- Copying byte arrays
1316
1317 -- | Takes a source 'ByteArray#', an offset in the source array, a
1318 -- destination 'MutableByteArray#', an offset into the destination
1319 -- array, and the number of bytes to copy. Copies the given number of
1320 -- bytes from the source array to the destination array.
1321 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1322 -> FCode ()
1323 doCopyByteArrayOp = emitCopyByteArray copy
1324 where
1325 -- Copy data (we assume the arrays aren't overlapping since
1326 -- they're of different types)
1327 copy _src _dst dst_p src_p bytes =
1328 do dflags <- getDynFlags
1329 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1330
1331 -- | Takes a source 'MutableByteArray#', an offset in the source
1332 -- array, a destination 'MutableByteArray#', an offset into the
1333 -- destination array, and the number of bytes to copy. Copies the
1334 -- given number of bytes from the source array to the destination
1335 -- array.
1336 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1337 -> FCode ()
1338 doCopyMutableByteArrayOp = emitCopyByteArray copy
1339 where
1340 -- The only time the memory might overlap is when the two arrays
1341 -- we were provided are the same array!
1342 -- TODO: Optimize branch for common case of no aliasing.
1343 copy src dst dst_p src_p bytes = do
1344 dflags <- getDynFlags
1345 [moveCall, cpyCall] <- forkAlts [
1346 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
1347 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
1348 ]
1349 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1350
1351 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1352 -> FCode ())
1353 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1354 -> FCode ()
1355 emitCopyByteArray copy src src_off dst dst_off n = do
1356 dflags <- getDynFlags
1357 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
1358 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1359 copy src dst dst_p src_p n
1360
1361 -- ----------------------------------------------------------------------------
1362 -- Setting byte arrays
1363
1364 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1365 -- and a byte, and sets each of the selected bytes in the array to the
1366 -- character.
1367 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1368 -> FCode ()
1369 doSetByteArrayOp ba off len c
1370 = do dflags <- getDynFlags
1371 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1372 emitMemsetCall p c len (mkIntExpr dflags 1)
1373
1374 -- ----------------------------------------------------------------------------
1375 -- Copying pointer arrays
1376
1377 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1378 -- nowhere else in the code generator. This is mostly because these
1379 -- "primitive" ops result in a surprisingly large amount of code. It
1380 -- will likely be worthwhile to optimize what is emitted here, so that
1381 -- our optimization passes don't waste time repeatedly optimizing the
1382 -- same bits of code.
1383
1384 -- More closely imitates 'assignTemp' from the old code generator, which
1385 -- returns a CmmExpr rather than a LocalReg.
1386 assignTempE :: CmmExpr -> FCode CmmExpr
1387 assignTempE e = do
1388 t <- assignTemp e
1389 return (CmmReg (CmmLocal t))
1390
1391 -- | Takes a source 'Array#', an offset in the source array, a
1392 -- destination 'MutableArray#', an offset into the destination array,
1393 -- and the number of elements to copy. Copies the given number of
1394 -- elements from the source array to the destination array.
1395 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1396 -> FCode ()
1397 doCopyArrayOp = emitCopyArray copy
1398 where
1399 -- Copy data (we assume the arrays aren't overlapping since
1400 -- they're of different types)
1401 copy _src _dst dst_p src_p bytes =
1402 do dflags <- getDynFlags
1403 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1404
1405
1406 -- | Takes a source 'MutableArray#', an offset in the source array, a
1407 -- destination 'MutableArray#', an offset into the destination array,
1408 -- and the number of elements to copy. Copies the given number of
1409 -- elements from the source array to the destination array.
1410 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1411 -> FCode ()
1412 doCopyMutableArrayOp = emitCopyArray copy
1413 where
1414 -- The only time the memory might overlap is when the two arrays
1415 -- we were provided are the same array!
1416 -- TODO: Optimize branch for common case of no aliasing.
1417 copy src dst dst_p src_p bytes = do
1418 dflags <- getDynFlags
1419 [moveCall, cpyCall] <- forkAlts [
1420 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags)),
1421 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1422 ]
1423 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1424
1425 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1426 -> FCode ())
1427 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1428 -> FCode ()
1429 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
1430 dflags <- getDynFlags
1431 n <- assignTempE n0
1432 nonzero <- getCode $ do
1433 -- Passed as arguments (be careful)
1434 src <- assignTempE src0
1435 src_off <- assignTempE src_off0
1436 dst <- assignTempE dst0
1437 dst_off <- assignTempE dst_off0
1438
1439 -- Set the dirty bit in the header.
1440 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1441
1442 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst (arrPtrsHdrSize dflags)
1443 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1444 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1445 bytes <- assignTempE $ cmmMulWord dflags n (mkIntExpr dflags (wORD_SIZE dflags))
1446
1447 copy src dst dst_p src_p bytes
1448
1449 -- The base address of the destination card table
1450 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p (loadArrPtrsSize dflags dst)
1451
1452 emitSetCards dst_off dst_cards_p n
1453
1454 emit =<< mkCmmIfThen (cmmNeWord dflags n (mkIntExpr dflags 0)) nonzero
1455
1456 -- | Takes an info table label, a register to return the newly
1457 -- allocated array in, a source array, an offset in the source array,
1458 -- and the number of elements to copy. Allocates a new array and
1459 -- initializes it form the source array.
1460 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
1461 -> FCode ()
1462 emitCloneArray info_p res_r src0 src_off0 n0 = do
1463 dflags <- getDynFlags
1464 let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags +
1465 (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags))
1466 myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags))
1467 -- Passed as arguments (be careful)
1468 src <- assignTempE src0
1469 src_off <- assignTempE src_off0
1470 n <- assignTempE n0
1471
1472 card_bytes <- assignTempE $ cardRoundUp dflags n
1473 size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUp dflags card_bytes)
1474 words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size
1475
1476 arr_r <- newTemp (bWord dflags)
1477 emitAllocateCall arr_r myCapability words
1478 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags))
1479 (zeroExpr dflags)
1480
1481 let arr = CmmReg (CmmLocal arr_r)
1482 emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
1483 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1484 oFFSET_StgMutArrPtrs_ptrs dflags)) n
1485 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1486 oFFSET_StgMutArrPtrs_size dflags)) size
1487
1488 dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags)
1489 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags))
1490 src_off
1491
1492 emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
1493
1494 emitMemsetCall (cmmOffsetExprW dflags dst_p n)
1495 (mkIntExpr dflags 1)
1496 card_bytes
1497 (mkIntExpr dflags (wORD_SIZE dflags))
1498 emit $ mkAssign (CmmLocal res_r) arr
1499
1500 -- | Takes and offset in the destination array, the base address of
1501 -- the card table, and the number of elements affected (*not* the
1502 -- number of cards). The number of elements may not be zero.
1503 -- Marks the relevant cards as dirty.
1504 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1505 emitSetCards dst_start dst_cards_start n = do
1506 dflags <- getDynFlags
1507 start_card <- assignTempE $ card dflags dst_start
1508 let end_card = card dflags (cmmSubWord dflags (cmmAddWord dflags dst_start n) (mkIntExpr dflags 1))
1509 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1510 (mkIntExpr dflags 1)
1511 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1512 (mkIntExpr dflags 1) -- no alignment (1 byte)
1513
1514 -- Convert an element index to a card index
1515 card :: DynFlags -> CmmExpr -> CmmExpr
1516 card dflags i = cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1517
1518 -- Convert a number of elements to a number of cards, rounding up
1519 cardRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1520 cardRoundUp dflags i = card dflags (cmmAddWord dflags i (mkIntExpr dflags ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)))
1521
1522 bytesToWordsRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1523 bytesToWordsRoundUp dflags e = cmmQuotWord dflags (cmmAddWord dflags e (mkIntExpr dflags (wORD_SIZE dflags - 1)))
1524 (wordSize dflags)
1525
1526 wordSize :: DynFlags -> CmmExpr
1527 wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags)
1528
1529 -- | Emit a call to @memcpy@.
1530 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1531 emitMemcpyCall dst src n align = do
1532 emitPrimCall
1533 [ {-no results-} ]
1534 MO_Memcpy
1535 [ dst, src, n, align ]
1536
1537 -- | Emit a call to @memmove@.
1538 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1539 emitMemmoveCall dst src n align = do
1540 emitPrimCall
1541 [ {- no results -} ]
1542 MO_Memmove
1543 [ dst, src, n, align ]
1544
1545 -- | Emit a call to @memset@. The second argument must fit inside an
1546 -- unsigned char.
1547 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1548 emitMemsetCall dst c n align = do
1549 emitPrimCall
1550 [ {- no results -} ]
1551 MO_Memset
1552 [ dst, c, n, align ]
1553
1554 -- | Emit a call to @allocate@.
1555 emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
1556 emitAllocateCall res cap n = do
1557 emitCCall
1558 [ (res, AddrHint) ]
1559 allocate
1560 [ (cap, AddrHint)
1561 , (n, NoHint)
1562 ]
1563 where
1564 allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
1565 ForeignLabelInExternalPackage IsFunction))
1566
1567 emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1568 emitBSwapCall res x width = do
1569 emitPrimCall
1570 [ res ]
1571 (MO_BSwap width)
1572 [ x ]
1573
1574 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1575 emitPopCntCall res x width = do
1576 emitPrimCall
1577 [ res ]
1578 (MO_PopCnt width)
1579 [ x ]