Merge branch 'master' of darcs.haskell.org:/home/darcs/ghc
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmPrim (
10 cgOpApp,
11 cgPrimOp -- internal(ish), used by cgCase to get code for a
12 -- comparison without also turning it into a Bool.
13 ) where
14
15 #include "HsVersions.h"
16
17 import StgCmmLayout
18 import StgCmmForeign
19 import StgCmmEnv
20 import StgCmmMonad
21 import StgCmmUtils
22 import StgCmmTicky
23 import StgCmmHeap
24 import StgCmmProf
25
26 import DynFlags
27 import Platform
28 import BasicTypes
29 import MkGraph
30 import StgSyn
31 import Cmm
32 import Type ( Type, tyConAppTyCon )
33 import TyCon
34 import CLabel
35 import CmmUtils
36 import PrimOp
37 import SMRep
38 import Module
39 import FastString
40 import Outputable
41 import Util
42
43 import Control.Monad (liftM)
44 import Data.Bits
45
46 ------------------------------------------------------------------------
47 -- Primitive operations and foreign calls
48 ------------------------------------------------------------------------
49
50 {- Note [Foreign call results]
51 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
52 A foreign call always returns an unboxed tuple of results, one
53 of which is the state token. This seems to happen even for pure
54 calls.
55
56 Even if we returned a single result for pure calls, it'd still be
57 right to wrap it in a singleton unboxed tuple, because the result
58 might be a Haskell closure pointer, we don't want to evaluate it. -}
59
60 ----------------------------------
61 cgOpApp :: StgOp -- The op
62 -> [StgArg] -- Arguments
63 -> Type -- Result type (always an unboxed tuple)
64 -> FCode ReturnKind
65
66 -- Foreign calls
67 cgOpApp (StgFCallOp fcall _) stg_args res_ty
68 = cgForeignCall fcall stg_args res_ty
69 -- Note [Foreign call results]
70
71 -- tagToEnum# is special: we need to pull the constructor
72 -- out of the table, and perform an appropriate return.
73
74 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
75 = ASSERT(isEnumerationTyCon tycon)
76 do { dflags <- getDynFlags
77 ; args' <- getNonVoidArgAmodes [arg]
78 ; let amode = case args' of [amode] -> amode
79 _ -> panic "TagToEnumOp had void arg"
80 ; emitReturn [tagToClosure dflags tycon amode] }
81 where
82 -- If you're reading this code in the attempt to figure
83 -- out why the compiler panic'ed here, it is probably because
84 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
85 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
86 -- That won't work.
87 tycon = tyConAppTyCon res_ty
88
89 cgOpApp (StgPrimOp primop) args res_ty
90 | primOpOutOfLine primop
91 = do { cmm_args <- getNonVoidArgAmodes args
92 ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
93 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
94
95 | ReturnsPrim VoidRep <- result_info
96 = do cgPrimOp [] primop args
97 emitReturn []
98
99 | ReturnsPrim rep <- result_info
100 = do dflags <- getDynFlags
101 res <- newTemp (primRepCmmType dflags rep)
102 cgPrimOp [res] primop args
103 emitReturn [CmmReg (CmmLocal res)]
104
105 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
106 = do (regs, _hints) <- newUnboxedTupleRegs res_ty
107 cgPrimOp regs primop args
108 emitReturn (map (CmmReg . CmmLocal) regs)
109
110 | ReturnsAlg tycon <- result_info
111 , isEnumerationTyCon tycon
112 -- c.f. cgExpr (...TagToEnumOp...)
113 = do dflags <- getDynFlags
114 tag_reg <- newTemp (bWord dflags)
115 cgPrimOp [tag_reg] primop args
116 emitReturn [tagToClosure dflags tycon
117 (CmmReg (CmmLocal tag_reg))]
118
119 | otherwise = panic "cgPrimop"
120 where
121 result_info = getPrimOpResultInfo primop
122
123 cgOpApp (StgPrimCallOp primcall) args _res_ty
124 = do { cmm_args <- getNonVoidArgAmodes args
125 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
126 ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
127
128 ---------------------------------------------------
129 cgPrimOp :: [LocalReg] -- where to put the results
130 -> PrimOp -- the op
131 -> [StgArg] -- arguments
132 -> FCode ()
133
134 cgPrimOp results op args
135 = do dflags <- getDynFlags
136 arg_exprs <- getNonVoidArgAmodes args
137 emitPrimOp dflags results op arg_exprs
138
139
140 ------------------------------------------------------------------------
141 -- Emitting code for a primop
142 ------------------------------------------------------------------------
143
144 emitPrimOp :: DynFlags
145 -> [LocalReg] -- where to put the results
146 -> PrimOp -- the op
147 -> [CmmExpr] -- arguments
148 -> FCode ()
149
150 -- First we handle various awkward cases specially. The remaining
151 -- easy cases are then handled by translateOp, defined below.
152
153 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
154 {-
155 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
156 C, and without needing any comparisons. This may not be the
157 fastest way to do it - if you have better code, please send it! --SDM
158
159 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
160
161 We currently don't make use of the r value if c is != 0 (i.e.
162 overflow), we just convert to big integers and try again. This
163 could be improved by making r and c the correct values for
164 plugging into a new J#.
165
166 { r = ((I_)(a)) + ((I_)(b)); \
167 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
168 >> (BITS_IN (I_) - 1); \
169 }
170 Wading through the mass of bracketry, it seems to reduce to:
171 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
172
173 -}
174 = emit $ catAGraphs [
175 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
176 mkAssign (CmmLocal res_c) $
177 CmmMachOp (mo_wordUShr dflags) [
178 CmmMachOp (mo_wordAnd dflags) [
179 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
180 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
181 ],
182 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
183 ]
184 ]
185
186
187 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
188 {- Similarly:
189 #define subIntCzh(r,c,a,b) \
190 { r = ((I_)(a)) - ((I_)(b)); \
191 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
192 >> (BITS_IN (I_) - 1); \
193 }
194
195 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
196 -}
197 = emit $ catAGraphs [
198 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
199 mkAssign (CmmLocal res_c) $
200 CmmMachOp (mo_wordUShr dflags) [
201 CmmMachOp (mo_wordAnd dflags) [
202 CmmMachOp (mo_wordXor dflags) [aa,bb],
203 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
204 ],
205 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
206 ]
207 ]
208
209
210 emitPrimOp _ [res] ParOp [arg]
211 =
212 -- for now, just implement this in a C function
213 -- later, we might want to inline it.
214 emitCCall
215 [(res,NoHint)]
216 (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
217 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
218
219 emitPrimOp dflags [res] SparkOp [arg]
220 = do
221 -- returns the value of arg in res. We're going to therefore
222 -- refer to arg twice (once to pass to newSpark(), and once to
223 -- assign to res), so put it in a temporary.
224 tmp <- assignTemp arg
225 tmp2 <- newTemp (bWord dflags)
226 emitCCall
227 [(tmp2,NoHint)]
228 (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
229 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
230 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
231
232 emitPrimOp dflags [res] GetCCSOfOp [arg]
233 = emitAssign (CmmLocal res) val
234 where
235 val
236 | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
237 | otherwise = CmmLit (zeroCLit dflags)
238
239 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
240 = emitAssign (CmmLocal res) curCCS
241
242 emitPrimOp dflags [res] ReadMutVarOp [mutv]
243 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
244
245 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
246 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
247 emitCCall
248 [{-no results-}]
249 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
250 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
251
252 -- #define sizzeofByteArrayzh(r,a) \
253 -- r = ((StgArrWords *)(a))->bytes
254 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
255 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
256
257 -- #define sizzeofMutableByteArrayzh(r,a) \
258 -- r = ((StgArrWords *)(a))->bytes
259 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
260 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
261
262
263 -- #define touchzh(o) /* nothing */
264 emitPrimOp _ res@[] TouchOp args@[_arg]
265 = do emitPrimCall res MO_Touch args
266
267 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
268 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
269 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
270
271 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
272 emitPrimOp dflags [res] StableNameToIntOp [arg]
273 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
274
275 -- #define eqStableNamezh(r,sn1,sn2) \
276 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
277 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
278 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
279 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
280 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
281 ])
282
283
284 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
285 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
286
287 -- #define addrToHValuezh(r,a) r=(P_)a
288 emitPrimOp _ [res] AddrToAnyOp [arg]
289 = emitAssign (CmmLocal res) arg
290
291 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
292 -- Note: argument may be tagged!
293 emitPrimOp dflags [res] DataToTagOp [arg]
294 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
295
296 {- Freezing arrays-of-ptrs requires changing an info table, for the
297 benefit of the generational collector. It needs to scavenge mutable
298 objects, even if they are in old space. When they become immutable,
299 they can be removed from this scavenge list. -}
300
301 -- #define unsafeFreezzeArrayzh(r,a)
302 -- {
303 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
304 -- r = a;
305 -- }
306 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
307 = emit $ catAGraphs
308 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
309 mkAssign (CmmLocal res) arg ]
310 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
311 = emit $ catAGraphs
312 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
313 mkAssign (CmmLocal res) arg ]
314
315 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
316 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
317 = emitAssign (CmmLocal res) arg
318
319 -- Copying pointer arrays
320
321 emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
322 doCopyArrayOp src src_off dst dst_off n
323 emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
324 doCopyMutableArrayOp src src_off dst dst_off n
325 emitPrimOp _ [res] CloneArrayOp [src,src_off,n] =
326 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
327 emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] =
328 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
329 emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] =
330 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
331 emitPrimOp _ [res] ThawArrayOp [src,src_off,n] =
332 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
333
334 emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
335 doCopyArrayOp src src_off dst dst_off n
336 emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
337 doCopyMutableArrayOp src src_off dst dst_off n
338
339 -- Reading/writing pointer arrays
340
341 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
342 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
343 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
344
345 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
346 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
347 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
348 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
349 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
350 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
351 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
352 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
353 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
354 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
355
356 emitPrimOp dflags [res] SizeofArrayOp [arg]
357 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
358 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
359 = emitPrimOp dflags [res] SizeofArrayOp [arg]
360 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
361 = emitPrimOp dflags [res] SizeofArrayOp [arg]
362 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
363 = emitPrimOp dflags [res] SizeofArrayOp [arg]
364
365 -- IndexXXXoffAddr
366
367 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
368 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
369 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
370 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
371 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
372 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
373 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
374 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
375 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
376 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
377 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
378 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
379 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
380 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
381 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
382 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
383
384 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
385
386 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
387 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
388 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
389 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
390 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
391 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
392 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
393 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
394 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
395 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
396 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
397 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
398 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
399 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
400 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
401 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
402
403 -- IndexXXXArray
404
405 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
406 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
407 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
408 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
409 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
410 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
411 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
412 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
413 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
414 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
415 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
416 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
417 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
418 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
419 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
420 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
421
422 -- ReadXXXArray, identical to IndexXXXArray.
423
424 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
425 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
426 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
427 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
428 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
429 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
430 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
431 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
432 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
433 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
434 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
435 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
436 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
437 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
438 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
439 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
440
441 -- WriteXXXoffAddr
442
443 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) res args
444 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) res args
445 emitPrimOp _ res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing res args
446 emitPrimOp _ res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing res args
447 emitPrimOp _ res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing res args
448 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing res args
449 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing res args
450 emitPrimOp _ res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing res args
451 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) res args
452 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) res args
453 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) res args
454 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing res args
455 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) res args
456 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) res args
457 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) res args
458 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing res args
459
460 -- WriteXXXArray
461
462 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) res args
463 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) res args
464 emitPrimOp _ res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing res args
465 emitPrimOp _ res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing res args
466 emitPrimOp _ res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing res args
467 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing res args
468 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing res args
469 emitPrimOp _ res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing res args
470 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) res args
471 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) res args
472 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) res args
473 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing res args
474 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) res args
475 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) res args
476 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) res args
477 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing res args
478
479 -- Copying and setting byte arrays
480 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
481 doCopyByteArrayOp src src_off dst dst_off n
482 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
483 doCopyMutableByteArrayOp src src_off dst dst_off n
484 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
485 doSetByteArrayOp ba off len c
486
487 -- Population count
488 emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
489 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
490 emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
491 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
492 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
493
494 -- Unsigned int to floating point conversions
495 emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
496 (MO_UF_Conv W32) [w]
497 emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
498 (MO_UF_Conv W64) [w]
499
500 -- The rest just translate straightforwardly
501 emitPrimOp dflags [res] op [arg]
502 | nopOp op
503 = emitAssign (CmmLocal res) arg
504
505 | Just (mop,rep) <- narrowOp op
506 = emitAssign (CmmLocal res) $
507 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
508
509 emitPrimOp dflags r@[res] op args
510 | Just prim <- callishOp op
511 = do emitPrimCall r prim args
512
513 | Just mop <- translateOp dflags op
514 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
515 emit stmt
516
517 emitPrimOp dflags results op args
518 = case callishPrimOpSupported dflags op of
519 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
520 Right gen -> gen results args
521
522 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
523
524 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
525 callishPrimOpSupported dflags op
526 = case op of
527 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
528 | otherwise -> Right (genericIntQuotRemOp dflags)
529
530 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
531 | otherwise -> Right (genericWordQuotRemOp dflags)
532
533 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
534 | otherwise -> Right (genericWordQuotRem2Op dflags)
535
536 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
537 | otherwise -> Right genericWordAdd2Op
538
539 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
540 | otherwise -> Right genericWordMul2Op
541
542 _ -> panic "emitPrimOp: can't translate PrimOp" (ppr op)
543 where
544 ncg = case hscTarget dflags of
545 HscAsm -> True
546 _ -> False
547
548 x86ish = case platformArch (targetPlatform dflags) of
549 ArchX86 -> True
550 ArchX86_64 -> True
551 _ -> False
552
553 genericIntQuotRemOp :: DynFlags -> GenericOp
554 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
555 = emit $ mkAssign (CmmLocal res_q)
556 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
557 mkAssign (CmmLocal res_r)
558 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
559 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
560
561 genericWordQuotRemOp :: DynFlags -> GenericOp
562 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
563 = emit $ mkAssign (CmmLocal res_q)
564 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
565 mkAssign (CmmLocal res_r)
566 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
567 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
568
569 genericWordQuotRem2Op :: DynFlags -> GenericOp
570 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
571 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
572 where ty = cmmExprType dflags arg_x_high
573 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
574 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
575 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
576 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
577 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
578 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
579 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
580 zero = lit 0
581 one = lit 1
582 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
583 lit i = CmmLit (CmmInt i (wordWidth dflags))
584
585 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
586 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
587 mkAssign (CmmLocal res_r) high)
588 f i acc high low =
589 do roverflowedBit <- newTemp ty
590 rhigh' <- newTemp ty
591 rhigh'' <- newTemp ty
592 rlow' <- newTemp ty
593 risge <- newTemp ty
594 racc' <- newTemp ty
595 let high' = CmmReg (CmmLocal rhigh')
596 isge = CmmReg (CmmLocal risge)
597 overflowedBit = CmmReg (CmmLocal roverflowedBit)
598 let this = catAGraphs
599 [mkAssign (CmmLocal roverflowedBit)
600 (shr high negone),
601 mkAssign (CmmLocal rhigh')
602 (or (shl high one) (shr low negone)),
603 mkAssign (CmmLocal rlow')
604 (shl low one),
605 mkAssign (CmmLocal risge)
606 (or (overflowedBit `ne` zero)
607 (high' `ge` arg_y)),
608 mkAssign (CmmLocal rhigh'')
609 (high' `minus` (arg_y `times` isge)),
610 mkAssign (CmmLocal racc')
611 (or (shl acc one) isge)]
612 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
613 (CmmReg (CmmLocal rhigh''))
614 (CmmReg (CmmLocal rlow'))
615 return (this <*> rest)
616 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
617
618 genericWordAdd2Op :: GenericOp
619 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
620 = do dflags <- getDynFlags
621 r1 <- newTemp (cmmExprType dflags arg_x)
622 r2 <- newTemp (cmmExprType dflags arg_x)
623 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
624 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
625 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
626 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
627 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
628 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
629 (wordWidth dflags))
630 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
631 emit $ catAGraphs
632 [mkAssign (CmmLocal r1)
633 (add (bottomHalf arg_x) (bottomHalf arg_y)),
634 mkAssign (CmmLocal r2)
635 (add (topHalf (CmmReg (CmmLocal r1)))
636 (add (topHalf arg_x) (topHalf arg_y))),
637 mkAssign (CmmLocal res_h)
638 (topHalf (CmmReg (CmmLocal r2))),
639 mkAssign (CmmLocal res_l)
640 (or (toTopHalf (CmmReg (CmmLocal r2)))
641 (bottomHalf (CmmReg (CmmLocal r1))))]
642 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
643
644 genericWordMul2Op :: GenericOp
645 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
646 = do dflags <- getDynFlags
647 let t = cmmExprType dflags arg_x
648 xlyl <- liftM CmmLocal $ newTemp t
649 xlyh <- liftM CmmLocal $ newTemp t
650 xhyl <- liftM CmmLocal $ newTemp t
651 r <- liftM CmmLocal $ newTemp t
652 -- This generic implementation is very simple and slow. We might
653 -- well be able to do better, but for now this at least works.
654 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
655 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
656 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
657 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
658 sum = foldl1 add
659 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
660 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
661 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
662 (wordWidth dflags))
663 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
664 emit $ catAGraphs
665 [mkAssign xlyl
666 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
667 mkAssign xlyh
668 (mul (bottomHalf arg_x) (topHalf arg_y)),
669 mkAssign xhyl
670 (mul (topHalf arg_x) (bottomHalf arg_y)),
671 mkAssign r
672 (sum [topHalf (CmmReg xlyl),
673 bottomHalf (CmmReg xhyl),
674 bottomHalf (CmmReg xlyh)]),
675 mkAssign (CmmLocal res_l)
676 (or (bottomHalf (CmmReg xlyl))
677 (toTopHalf (CmmReg r))),
678 mkAssign (CmmLocal res_h)
679 (sum [mul (topHalf arg_x) (topHalf arg_y),
680 topHalf (CmmReg xhyl),
681 topHalf (CmmReg xlyh),
682 topHalf (CmmReg r)])]
683 genericWordMul2Op _ _ = panic "genericWordMul2Op"
684
685 -- These PrimOps are NOPs in Cmm
686
687 nopOp :: PrimOp -> Bool
688 nopOp Int2WordOp = True
689 nopOp Word2IntOp = True
690 nopOp Int2AddrOp = True
691 nopOp Addr2IntOp = True
692 nopOp ChrOp = True -- Int# and Char# are rep'd the same
693 nopOp OrdOp = True
694 nopOp _ = False
695
696 -- These PrimOps turn into double casts
697
698 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
699 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
700 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
701 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
702 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
703 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
704 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
705 narrowOp _ = Nothing
706
707 -- Native word signless ops
708
709 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
710 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
711 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
712 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
713 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
714 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
715 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
716
717 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
718 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
719 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
720 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
721 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
722 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
723
724 translateOp dflags AndOp = Just (mo_wordAnd dflags)
725 translateOp dflags OrOp = Just (mo_wordOr dflags)
726 translateOp dflags XorOp = Just (mo_wordXor dflags)
727 translateOp dflags NotOp = Just (mo_wordNot dflags)
728 translateOp dflags SllOp = Just (mo_wordShl dflags)
729 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
730
731 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
732
733 -- Native word signed ops
734
735 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
736 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
737 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
738 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
739 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
740
741
742 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
743 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
744 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
745 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
746
747 translateOp dflags ISllOp = Just (mo_wordShl dflags)
748 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
749 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
750
751 -- Native word unsigned ops
752
753 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
754 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
755 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
756 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
757
758 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
759 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
760 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
761
762 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
763 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
764 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
765 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
766
767 -- Char# ops
768
769 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
770 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
771 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
772 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
773 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
774 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
775
776 -- Double ops
777
778 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
779 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
780 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
781 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
782 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
783 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
784
785 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
786 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
787 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
788 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
789 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
790
791 -- Float ops
792
793 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
794 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
795 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
796 translateOp _ FloatLeOp = Just (MO_F_Le W32)
797 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
798 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
799
800 translateOp _ FloatAddOp = Just (MO_F_Add W32)
801 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
802 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
803 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
804 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
805
806 -- Conversions
807
808 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
809 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
810
811 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
812 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
813
814 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
815 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
816
817 -- Word comparisons masquerading as more exotic things.
818
819 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
820 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
821 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
822 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
823 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
824 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
825 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
826
827 translateOp _ _ = Nothing
828
829 -- These primops are implemented by CallishMachOps, because they sometimes
830 -- turn into foreign calls depending on the backend.
831
832 callishOp :: PrimOp -> Maybe CallishMachOp
833 callishOp DoublePowerOp = Just MO_F64_Pwr
834 callishOp DoubleSinOp = Just MO_F64_Sin
835 callishOp DoubleCosOp = Just MO_F64_Cos
836 callishOp DoubleTanOp = Just MO_F64_Tan
837 callishOp DoubleSinhOp = Just MO_F64_Sinh
838 callishOp DoubleCoshOp = Just MO_F64_Cosh
839 callishOp DoubleTanhOp = Just MO_F64_Tanh
840 callishOp DoubleAsinOp = Just MO_F64_Asin
841 callishOp DoubleAcosOp = Just MO_F64_Acos
842 callishOp DoubleAtanOp = Just MO_F64_Atan
843 callishOp DoubleLogOp = Just MO_F64_Log
844 callishOp DoubleExpOp = Just MO_F64_Exp
845 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
846
847 callishOp FloatPowerOp = Just MO_F32_Pwr
848 callishOp FloatSinOp = Just MO_F32_Sin
849 callishOp FloatCosOp = Just MO_F32_Cos
850 callishOp FloatTanOp = Just MO_F32_Tan
851 callishOp FloatSinhOp = Just MO_F32_Sinh
852 callishOp FloatCoshOp = Just MO_F32_Cosh
853 callishOp FloatTanhOp = Just MO_F32_Tanh
854 callishOp FloatAsinOp = Just MO_F32_Asin
855 callishOp FloatAcosOp = Just MO_F32_Acos
856 callishOp FloatAtanOp = Just MO_F32_Atan
857 callishOp FloatLogOp = Just MO_F32_Log
858 callishOp FloatExpOp = Just MO_F32_Exp
859 callishOp FloatSqrtOp = Just MO_F32_Sqrt
860
861 callishOp _ = Nothing
862
863 ------------------------------------------------------------------------------
864 -- Helpers for translating various minor variants of array indexing.
865
866 doIndexOffAddrOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode ()
867 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
868 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx
869 doIndexOffAddrOp _ _ _ _
870 = panic "CgPrimOp: doIndexOffAddrOp"
871
872 doIndexByteArrayOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode ()
873 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
874 = do dflags <- getDynFlags
875 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx
876 doIndexByteArrayOp _ _ _ _
877 = panic "CgPrimOp: doIndexByteArrayOp"
878
879 doReadPtrArrayOp :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
880 doReadPtrArrayOp res addr idx
881 = do dflags <- getDynFlags
882 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr idx
883
884
885 doWriteOffAddrOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode ()
886 doWriteOffAddrOp maybe_pre_write_cast [] [addr,idx,val]
887 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx val
888 doWriteOffAddrOp _ _ _
889 = panic "CgPrimOp: doWriteOffAddrOp"
890
891 doWriteByteArrayOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode ()
892 doWriteByteArrayOp maybe_pre_write_cast [] [addr,idx,val]
893 = do dflags <- getDynFlags
894 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx val
895 doWriteByteArrayOp _ _ _
896 = panic "CgPrimOp: doWriteByteArrayOp"
897
898 doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
899 doWritePtrArrayOp addr idx val
900 = do dflags <- getDynFlags
901 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr idx val
902 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
903 -- the write barrier. We must write a byte into the mark table:
904 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
905 emit $ mkStore (
906 cmmOffsetExpr dflags
907 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
908 (loadArrPtrsSize dflags addr))
909 (CmmMachOp (mo_wordUShr dflags) [idx,
910 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
911 ) (CmmLit (CmmInt 1 W8))
912
913 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
914 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
915 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
916
917 mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType
918 -> LocalReg -> CmmExpr -> CmmExpr -> FCode ()
919 mkBasicIndexedRead off Nothing read_rep res base idx
920 = do dflags <- getDynFlags
921 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off read_rep base idx)
922 mkBasicIndexedRead off (Just cast) read_rep res base idx
923 = do dflags <- getDynFlags
924 emitAssign (CmmLocal res) (CmmMachOp cast [
925 cmmLoadIndexOffExpr dflags off read_rep base idx])
926
927 mkBasicIndexedWrite :: ByteOff -> Maybe MachOp
928 -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
929 mkBasicIndexedWrite off Nothing base idx val
930 = do dflags <- getDynFlags
931 emitStore (cmmIndexOffExpr dflags off (typeWidth (cmmExprType dflags val)) base idx) val
932 mkBasicIndexedWrite off (Just cast) base idx val
933 = mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val])
934
935 -- ----------------------------------------------------------------------------
936 -- Misc utils
937
938 cmmIndexOffExpr :: DynFlags -> ByteOff -> Width -> CmmExpr -> CmmExpr -> CmmExpr
939 cmmIndexOffExpr dflags off width base idx
940 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
941
942 cmmLoadIndexOffExpr :: DynFlags -> ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr
943 cmmLoadIndexOffExpr dflags off ty base idx
944 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth ty) base idx) ty
945
946 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
947 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
948
949 -- ----------------------------------------------------------------------------
950 -- Copying byte arrays
951
952 -- | Takes a source 'ByteArray#', an offset in the source array, a
953 -- destination 'MutableByteArray#', an offset into the destination
954 -- array, and the number of bytes to copy. Copies the given number of
955 -- bytes from the source array to the destination array.
956 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
957 -> FCode ()
958 doCopyByteArrayOp = emitCopyByteArray copy
959 where
960 -- Copy data (we assume the arrays aren't overlapping since
961 -- they're of different types)
962 copy _src _dst dst_p src_p bytes =
963 do dflags <- getDynFlags
964 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
965
966 -- | Takes a source 'MutableByteArray#', an offset in the source
967 -- array, a destination 'MutableByteArray#', an offset into the
968 -- destination array, and the number of bytes to copy. Copies the
969 -- given number of bytes from the source array to the destination
970 -- array.
971 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
972 -> FCode ()
973 doCopyMutableByteArrayOp = emitCopyByteArray copy
974 where
975 -- The only time the memory might overlap is when the two arrays
976 -- we were provided are the same array!
977 -- TODO: Optimize branch for common case of no aliasing.
978 copy src dst dst_p src_p bytes = do
979 dflags <- getDynFlags
980 [moveCall, cpyCall] <- forkAlts [
981 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
982 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
983 ]
984 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
985
986 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
987 -> FCode ())
988 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
989 -> FCode ()
990 emitCopyByteArray copy src src_off dst dst_off n = do
991 dflags <- getDynFlags
992 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
993 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
994 copy src dst dst_p src_p n
995
996 -- ----------------------------------------------------------------------------
997 -- Setting byte arrays
998
999 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1000 -- and a byte, and sets each of the selected bytes in the array to the
1001 -- character.
1002 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1003 -> FCode ()
1004 doSetByteArrayOp ba off len c
1005 = do dflags <- getDynFlags
1006 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1007 emitMemsetCall p c len (mkIntExpr dflags 1)
1008
1009 -- ----------------------------------------------------------------------------
1010 -- Copying pointer arrays
1011
1012 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1013 -- nowhere else in the code generator. This is mostly because these
1014 -- "primitive" ops result in a surprisingly large amount of code. It
1015 -- will likely be worthwhile to optimize what is emitted here, so that
1016 -- our optimization passes don't waste time repeatedly optimizing the
1017 -- same bits of code.
1018
1019 -- More closely imitates 'assignTemp' from the old code generator, which
1020 -- returns a CmmExpr rather than a LocalReg.
1021 assignTempE :: CmmExpr -> FCode CmmExpr
1022 assignTempE e = do
1023 t <- assignTemp e
1024 return (CmmReg (CmmLocal t))
1025
1026 -- | Takes a source 'Array#', an offset in the source array, a
1027 -- destination 'MutableArray#', an offset into the destination array,
1028 -- and the number of elements to copy. Copies the given number of
1029 -- elements from the source array to the destination array.
1030 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1031 -> FCode ()
1032 doCopyArrayOp = emitCopyArray copy
1033 where
1034 -- Copy data (we assume the arrays aren't overlapping since
1035 -- they're of different types)
1036 copy _src _dst dst_p src_p bytes =
1037 do dflags <- getDynFlags
1038 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1039
1040
1041 -- | Takes a source 'MutableArray#', an offset in the source array, a
1042 -- destination 'MutableArray#', an offset into the destination array,
1043 -- and the number of elements to copy. Copies the given number of
1044 -- elements from the source array to the destination array.
1045 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1046 -> FCode ()
1047 doCopyMutableArrayOp = emitCopyArray copy
1048 where
1049 -- The only time the memory might overlap is when the two arrays
1050 -- we were provided are the same array!
1051 -- TODO: Optimize branch for common case of no aliasing.
1052 copy src dst dst_p src_p bytes = do
1053 dflags <- getDynFlags
1054 [moveCall, cpyCall] <- forkAlts [
1055 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags)),
1056 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1057 ]
1058 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1059
1060 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1061 -> FCode ())
1062 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1063 -> FCode ()
1064 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
1065 dflags <- getDynFlags
1066 n <- assignTempE n0
1067 nonzero <- getCode $ do
1068 -- Passed as arguments (be careful)
1069 src <- assignTempE src0
1070 src_off <- assignTempE src_off0
1071 dst <- assignTempE dst0
1072 dst_off <- assignTempE dst_off0
1073
1074 -- Set the dirty bit in the header.
1075 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1076
1077 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst (arrPtrsHdrSize dflags)
1078 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1079 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1080 bytes <- assignTempE $ cmmMulWord dflags n (mkIntExpr dflags (wORD_SIZE dflags))
1081
1082 copy src dst dst_p src_p bytes
1083
1084 -- The base address of the destination card table
1085 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p (loadArrPtrsSize dflags dst)
1086
1087 emitSetCards dst_off dst_cards_p n
1088
1089 emit =<< mkCmmIfThen (cmmNeWord dflags n (mkIntExpr dflags 0)) nonzero
1090
1091 -- | Takes an info table label, a register to return the newly
1092 -- allocated array in, a source array, an offset in the source array,
1093 -- and the number of elements to copy. Allocates a new array and
1094 -- initializes it form the source array.
1095 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
1096 -> FCode ()
1097 emitCloneArray info_p res_r src0 src_off0 n0 = do
1098 dflags <- getDynFlags
1099 let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags +
1100 (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags))
1101 myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags))
1102 -- Passed as arguments (be careful)
1103 src <- assignTempE src0
1104 src_off <- assignTempE src_off0
1105 n <- assignTempE n0
1106
1107 card_bytes <- assignTempE $ cardRoundUp dflags n
1108 size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUp dflags card_bytes)
1109 words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size
1110
1111 arr_r <- newTemp (bWord dflags)
1112 emitAllocateCall arr_r myCapability words
1113 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags))
1114 (zeroExpr dflags)
1115
1116 let arr = CmmReg (CmmLocal arr_r)
1117 emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
1118 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1119 oFFSET_StgMutArrPtrs_ptrs dflags)) n
1120 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1121 oFFSET_StgMutArrPtrs_size dflags)) size
1122
1123 dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags)
1124 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags))
1125 src_off
1126
1127 emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
1128
1129 emitMemsetCall (cmmOffsetExprW dflags dst_p n)
1130 (mkIntExpr dflags 1)
1131 card_bytes
1132 (mkIntExpr dflags (wORD_SIZE dflags))
1133 emit $ mkAssign (CmmLocal res_r) arr
1134
1135 -- | Takes and offset in the destination array, the base address of
1136 -- the card table, and the number of elements affected (*not* the
1137 -- number of cards). The number of elements may not be zero.
1138 -- Marks the relevant cards as dirty.
1139 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1140 emitSetCards dst_start dst_cards_start n = do
1141 dflags <- getDynFlags
1142 start_card <- assignTempE $ card dflags dst_start
1143 let end_card = card dflags (cmmSubWord dflags (cmmAddWord dflags dst_start n) (mkIntExpr dflags 1))
1144 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1145 (mkIntExpr dflags 1)
1146 (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
1147 (mkIntExpr dflags 1) -- no alignment (1 byte)
1148
1149 -- Convert an element index to a card index
1150 card :: DynFlags -> CmmExpr -> CmmExpr
1151 card dflags i = cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1152
1153 -- Convert a number of elements to a number of cards, rounding up
1154 cardRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1155 cardRoundUp dflags i = card dflags (cmmAddWord dflags i (mkIntExpr dflags ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)))
1156
1157 bytesToWordsRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1158 bytesToWordsRoundUp dflags e = cmmQuotWord dflags (cmmAddWord dflags e (mkIntExpr dflags (wORD_SIZE dflags - 1)))
1159 (wordSize dflags)
1160
1161 wordSize :: DynFlags -> CmmExpr
1162 wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags)
1163
1164 -- | Emit a call to @memcpy@.
1165 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1166 emitMemcpyCall dst src n align = do
1167 emitPrimCall
1168 [ {-no results-} ]
1169 MO_Memcpy
1170 [ dst, src, n, align ]
1171
1172 -- | Emit a call to @memmove@.
1173 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1174 emitMemmoveCall dst src n align = do
1175 emitPrimCall
1176 [ {- no results -} ]
1177 MO_Memmove
1178 [ dst, src, n, align ]
1179
1180 -- | Emit a call to @memset@. The second argument must fit inside an
1181 -- unsigned char.
1182 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1183 emitMemsetCall dst c n align = do
1184 emitPrimCall
1185 [ {- no results -} ]
1186 MO_Memset
1187 [ dst, c, n, align ]
1188
1189 -- | Emit a call to @allocate@.
1190 emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
1191 emitAllocateCall res cap n = do
1192 emitCCall
1193 [ (res, AddrHint) ]
1194 allocate
1195 [ (cap, AddrHint)
1196 , (n, NoHint)
1197 ]
1198 where
1199 allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
1200 ForeignLabelInExternalPackage IsFunction))
1201
1202 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1203 emitPopCntCall res x width = do
1204 emitPrimCall
1205 [ res ]
1206 (MO_PopCnt width)
1207 [ x ]