cbb2aa70bd35139a5ac5e98cb5a65a3bb463766f
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 {-# OPTIONS -fno-warn-tabs #-}
10 -- The above warning supression flag is a temporary kludge.
11 -- While working on this module you are encouraged to remove it and
12 -- detab the module (please do the detabbing in a separate patch). See
13 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
14 -- for details
15
16 module StgCmmPrim (
17 cgOpApp,
18 cgPrimOp -- internal(ish), used by cgCase to get code for a
19 -- comparison without also turning it into a Bool.
20 ) where
21
22 #include "HsVersions.h"
23
24 import StgCmmLayout
25 import StgCmmForeign
26 import StgCmmEnv
27 import StgCmmMonad
28 import StgCmmUtils
29 import StgCmmTicky
30 import StgCmmHeap
31 import StgCmmProf
32
33 import DynFlags
34 import Platform
35 import BasicTypes
36 import MkGraph
37 import StgSyn
38 import Cmm
39 import Type ( Type, tyConAppTyCon )
40 import TyCon
41 import CLabel
42 import CmmUtils
43 import PrimOp
44 import SMRep
45 import Module
46 import FastString
47 import Outputable
48 import Util
49
50 import Control.Monad (liftM)
51 import Data.Bits
52
53 ------------------------------------------------------------------------
54 -- Primitive operations and foreign calls
55 ------------------------------------------------------------------------
56
57 {- Note [Foreign call results]
58 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 A foreign call always returns an unboxed tuple of results, one
60 of which is the state token. This seems to happen even for pure
61 calls.
62
63 Even if we returned a single result for pure calls, it'd still be
64 right to wrap it in a singleton unboxed tuple, because the result
65 might be a Haskell closure pointer, we don't want to evaluate it. -}
66
67 ----------------------------------
68 cgOpApp :: StgOp -- The op
69 -> [StgArg] -- Arguments
70 -> Type -- Result type (always an unboxed tuple)
71 -> FCode ReturnKind
72
73 -- Foreign calls
74 cgOpApp (StgFCallOp fcall _) stg_args res_ty
75 = cgForeignCall fcall stg_args res_ty
76 -- Note [Foreign call results]
77
78 -- tagToEnum# is special: we need to pull the constructor
79 -- out of the table, and perform an appropriate return.
80
81 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
82 = ASSERT(isEnumerationTyCon tycon)
83 do { dflags <- getDynFlags
84 ; args' <- getNonVoidArgAmodes [arg]
85 ; let amode = case args' of [amode] -> amode
86 _ -> panic "TagToEnumOp had void arg"
87 ; emitReturn [tagToClosure dflags tycon amode] }
88 where
89 -- If you're reading this code in the attempt to figure
90 -- out why the compiler panic'ed here, it is probably because
91 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
92 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
93 -- That won't work.
94 tycon = tyConAppTyCon res_ty
95
96 cgOpApp (StgPrimOp primop) args res_ty
97 | primOpOutOfLine primop
98 = do { cmm_args <- getNonVoidArgAmodes args
99 ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
100 ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args }
101
102 | ReturnsPrim VoidRep <- result_info
103 = do cgPrimOp [] primop args
104 emitReturn []
105
106 | ReturnsPrim rep <- result_info
107 = do dflags <- getDynFlags
108 res <- newTemp (primRepCmmType dflags rep)
109 cgPrimOp [res] primop args
110 emitReturn [CmmReg (CmmLocal res)]
111
112 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
113 = do (regs, _hints) <- newUnboxedTupleRegs res_ty
114 cgPrimOp regs primop args
115 emitReturn (map (CmmReg . CmmLocal) regs)
116
117 | ReturnsAlg tycon <- result_info
118 , isEnumerationTyCon tycon
119 -- c.f. cgExpr (...TagToEnumOp...)
120 = do dflags <- getDynFlags
121 tag_reg <- newTemp (bWord dflags)
122 cgPrimOp [tag_reg] primop args
123 emitReturn [tagToClosure dflags tycon
124 (CmmReg (CmmLocal tag_reg))]
125
126 | otherwise = panic "cgPrimop"
127 where
128 result_info = getPrimOpResultInfo primop
129
130 cgOpApp (StgPrimCallOp primcall) args _res_ty
131 = do { cmm_args <- getNonVoidArgAmodes args
132 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
133 ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args }
134
135 ---------------------------------------------------
136 cgPrimOp :: [LocalReg] -- where to put the results
137 -> PrimOp -- the op
138 -> [StgArg] -- arguments
139 -> FCode ()
140
141 cgPrimOp results op args
142 = do dflags <- getDynFlags
143 arg_exprs <- getNonVoidArgAmodes args
144 emitPrimOp dflags results op arg_exprs
145
146
147 ------------------------------------------------------------------------
148 -- Emitting code for a primop
149 ------------------------------------------------------------------------
150
151 emitPrimOp :: DynFlags
152 -> [LocalReg] -- where to put the results
153 -> PrimOp -- the op
154 -> [CmmExpr] -- arguments
155 -> FCode ()
156
157 -- First we handle various awkward cases specially. The remaining
158 -- easy cases are then handled by translateOp, defined below.
159
160 emitPrimOp dflags [res_r,res_c] IntAddCOp [aa,bb]
161 {-
162 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
163 C, and without needing any comparisons. This may not be the
164 fastest way to do it - if you have better code, please send it! --SDM
165
166 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
167
168 We currently don't make use of the r value if c is != 0 (i.e.
169 overflow), we just convert to big integers and try again. This
170 could be improved by making r and c the correct values for
171 plugging into a new J#.
172
173 { r = ((I_)(a)) + ((I_)(b)); \
174 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
175 >> (BITS_IN (I_) - 1); \
176 }
177 Wading through the mass of bracketry, it seems to reduce to:
178 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
179
180 -}
181 = emit $ catAGraphs [
182 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
183 mkAssign (CmmLocal res_c) $
184 CmmMachOp (mo_wordUShr dflags) [
185 CmmMachOp (mo_wordAnd dflags) [
186 CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
187 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
188 ],
189 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
190 ]
191 ]
192
193
194 emitPrimOp dflags [res_r,res_c] IntSubCOp [aa,bb]
195 {- Similarly:
196 #define subIntCzh(r,c,a,b) \
197 { r = ((I_)(a)) - ((I_)(b)); \
198 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
199 >> (BITS_IN (I_) - 1); \
200 }
201
202 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
203 -}
204 = emit $ catAGraphs [
205 mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
206 mkAssign (CmmLocal res_c) $
207 CmmMachOp (mo_wordUShr dflags) [
208 CmmMachOp (mo_wordAnd dflags) [
209 CmmMachOp (mo_wordXor dflags) [aa,bb],
210 CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
211 ],
212 mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
213 ]
214 ]
215
216
217 emitPrimOp _ [res] ParOp [arg]
218 =
219 -- for now, just implement this in a C function
220 -- later, we might want to inline it.
221 emitCCall
222 [(res,NoHint)]
223 (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
224 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
225
226 emitPrimOp dflags [res] SparkOp [arg]
227 = do
228 -- returns the value of arg in res. We're going to therefore
229 -- refer to arg twice (once to pass to newSpark(), and once to
230 -- assign to res), so put it in a temporary.
231 tmp <- assignTemp arg
232 tmp2 <- newTemp (bWord dflags)
233 emitCCall
234 [(tmp2,NoHint)]
235 (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
236 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
237 emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
238
239 emitPrimOp dflags [res] GetCCSOfOp [arg]
240 = emitAssign (CmmLocal res) val
241 where
242 val
243 | dopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
244 | otherwise = CmmLit (zeroCLit dflags)
245
246 emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
247 = emitAssign (CmmLocal res) curCCS
248
249 emitPrimOp dflags [res] ReadMutVarOp [mutv]
250 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags))
251
252 emitPrimOp dflags [] WriteMutVarOp [mutv,var]
253 = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var
254 emitCCall
255 [{-no results-}]
256 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
257 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
258
259 -- #define sizzeofByteArrayzh(r,a) \
260 -- r = ((StgArrWords *)(a))->bytes
261 emitPrimOp dflags [res] SizeofByteArrayOp [arg]
262 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
263
264 -- #define sizzeofMutableByteArrayzh(r,a) \
265 -- r = ((StgArrWords *)(a))->bytes
266 emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
267 = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
268
269
270 -- #define touchzh(o) /* nothing */
271 emitPrimOp _ res@[] TouchOp args@[_arg]
272 = do emitPrimCall res MO_Touch args
273
274 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
275 emitPrimOp dflags [res] ByteArrayContents_Char [arg]
276 = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
277
278 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
279 emitPrimOp dflags [res] StableNameToIntOp [arg]
280 = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags))
281
282 -- #define eqStableNamezh(r,sn1,sn2) \
283 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
284 emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
285 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
286 cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags),
287 cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags)
288 ])
289
290
291 emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
292 = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
293
294 -- #define addrToHValuezh(r,a) r=(P_)a
295 emitPrimOp _ [res] AddrToAnyOp [arg]
296 = emitAssign (CmmLocal res) arg
297
298 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
299 -- Note: argument may be tagged!
300 emitPrimOp dflags [res] DataToTagOp [arg]
301 = emitAssign (CmmLocal res) (getConstrTag dflags (cmmUntag dflags arg))
302
303 {- Freezing arrays-of-ptrs requires changing an info table, for the
304 benefit of the generational collector. It needs to scavenge mutable
305 objects, even if they are in old space. When they become immutable,
306 they can be removed from this scavenge list. -}
307
308 -- #define unsafeFreezzeArrayzh(r,a)
309 -- {
310 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
311 -- r = a;
312 -- }
313 emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
314 = emit $ catAGraphs
315 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
316 mkAssign (CmmLocal res) arg ]
317 emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
318 = emit $ catAGraphs
319 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
320 mkAssign (CmmLocal res) arg ]
321
322 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
323 emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
324 = emitAssign (CmmLocal res) arg
325
326 -- Copying pointer arrays
327
328 emitPrimOp _ [] CopyArrayOp [src,src_off,dst,dst_off,n] =
329 doCopyArrayOp src src_off dst dst_off n
330 emitPrimOp _ [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
331 doCopyMutableArrayOp src src_off dst dst_off n
332 emitPrimOp _ [res] CloneArrayOp [src,src_off,n] =
333 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
334 emitPrimOp _ [res] CloneMutableArrayOp [src,src_off,n] =
335 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
336 emitPrimOp _ [res] FreezeArrayOp [src,src_off,n] =
337 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
338 emitPrimOp _ [res] ThawArrayOp [src,src_off,n] =
339 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
340
341 emitPrimOp _ [] CopyArrayArrayOp [src,src_off,dst,dst_off,n] =
342 doCopyArrayOp src src_off dst dst_off n
343 emitPrimOp _ [] CopyMutableArrayArrayOp [src,src_off,dst,dst_off,n] =
344 doCopyMutableArrayOp src src_off dst dst_off n
345
346 -- Reading/writing pointer arrays
347
348 emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
349 emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
350 emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
351
352 emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
353 emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
354 emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
355 emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
356 emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
357 emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
358 emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
359 emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
360 emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
361 emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
362
363 emitPrimOp dflags [res] SizeofArrayOp [arg]
364 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags))
365 emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
366 = emitPrimOp dflags [res] SizeofArrayOp [arg]
367 emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
368 = emitPrimOp dflags [res] SizeofArrayOp [arg]
369 emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
370 = emitPrimOp dflags [res] SizeofArrayOp [arg]
371
372 -- IndexXXXoffAddr
373
374 emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
375 emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
376 emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
377 emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
378 emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
379 emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
380 emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
381 emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
382 emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
383 emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
384 emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
385 emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
386 emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
387 emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
388 emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
389 emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
390
391 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
392
393 emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
394 emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
395 emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
396 emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
397 emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
398 emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
399 emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
400 emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
401 emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
402 emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
403 emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
404 emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
405 emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
406 emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
407 emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
408 emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
409
410 -- IndexXXXArray
411
412 emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
413 emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
414 emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
415 emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
416 emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
417 emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
418 emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
419 emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
420 emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
421 emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
422 emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
423 emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
424 emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
425 emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
426 emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
427 emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
428
429 -- ReadXXXArray, identical to IndexXXXArray.
430
431 emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
432 emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
433 emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
434 emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
435 emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
436 emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
437 emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
438 emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
439 emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
440 emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
441 emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
442 emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
443 emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
444 emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
445 emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
446 emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
447
448 -- WriteXXXoffAddr
449
450 emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) res args
451 emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) res args
452 emitPrimOp _ res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing res args
453 emitPrimOp _ res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing res args
454 emitPrimOp _ res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing res args
455 emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing res args
456 emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing res args
457 emitPrimOp _ res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing res args
458 emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) res args
459 emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) res args
460 emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) res args
461 emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing res args
462 emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) res args
463 emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) res args
464 emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) res args
465 emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing res args
466
467 -- WriteXXXArray
468
469 emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) res args
470 emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) res args
471 emitPrimOp _ res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing res args
472 emitPrimOp _ res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing res args
473 emitPrimOp _ res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing res args
474 emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing res args
475 emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing res args
476 emitPrimOp _ res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing res args
477 emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) res args
478 emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) res args
479 emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) res args
480 emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing res args
481 emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) res args
482 emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) res args
483 emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) res args
484 emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing res args
485
486 -- Copying and setting byte arrays
487 emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
488 doCopyByteArrayOp src src_off dst dst_off n
489 emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
490 doCopyMutableByteArrayOp src src_off dst dst_off n
491 emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
492 doSetByteArrayOp ba off len c
493
494 -- Population count
495 emitPrimOp dflags [res] PopCnt8Op [w] =
496 emitPopCntCall res (CmmMachOp (mo_WordTo8 dflags) [w]) W8
497 emitPrimOp dflags [res] PopCnt16Op [w] =
498 emitPopCntCall res (CmmMachOp (mo_WordTo16 dflags) [w]) W16
499 emitPrimOp dflags [res] PopCnt32Op [w] =
500 emitPopCntCall res (CmmMachOp (mo_WordTo32 dflags) [w]) W32
501 emitPrimOp _ [res] PopCnt64Op [w] =
502 emitPopCntCall res w W64 -- arg always has type W64, no need to narrow
503 emitPrimOp dflags [res] PopCntOp [w] =
504 emitPopCntCall res w (wordWidth dflags)
505
506 -- The rest just translate straightforwardly
507 emitPrimOp dflags [res] op [arg]
508 | nopOp op
509 = emitAssign (CmmLocal res) arg
510
511 | Just (mop,rep) <- narrowOp op
512 = emitAssign (CmmLocal res) $
513 CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
514
515 emitPrimOp dflags r@[res] op args
516 | Just prim <- callishOp op
517 = do emitPrimCall r prim args
518
519 | Just mop <- translateOp dflags op
520 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
521 emit stmt
522
523 emitPrimOp dflags results op args
524 = case callishPrimOpSupported dflags op of
525 Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
526 Right gen -> gen results args
527
528 type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
529
530 callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
531 callishPrimOpSupported dflags op
532 = case op of
533 IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
534 | otherwise -> Right (genericIntQuotRemOp dflags)
535
536 WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
537 | otherwise -> Right (genericWordQuotRemOp dflags)
538
539 WordQuotRem2Op | ncg && x86ish -> Left (MO_U_QuotRem2 (wordWidth dflags))
540 | otherwise -> Right (genericWordQuotRem2Op dflags)
541
542 WordAdd2Op | ncg && x86ish -> Left (MO_Add2 (wordWidth dflags))
543 | otherwise -> Right genericWordAdd2Op
544
545 WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
546 | otherwise -> Right genericWordMul2Op
547
548 _ -> panic "emitPrimOp: can't translate PrimOp" (ppr op)
549 where
550 ncg = case hscTarget dflags of
551 HscAsm -> True
552 _ -> False
553
554 x86ish = case platformArch (targetPlatform dflags) of
555 ArchX86 -> True
556 ArchX86_64 -> True
557 _ -> False
558
559 genericIntQuotRemOp :: DynFlags -> GenericOp
560 genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
561 = emit $ mkAssign (CmmLocal res_q)
562 (CmmMachOp (MO_S_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
563 mkAssign (CmmLocal res_r)
564 (CmmMachOp (MO_S_Rem (wordWidth dflags)) [arg_x, arg_y])
565 genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
566
567 genericWordQuotRemOp :: DynFlags -> GenericOp
568 genericWordQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
569 = emit $ mkAssign (CmmLocal res_q)
570 (CmmMachOp (MO_U_Quot (wordWidth dflags)) [arg_x, arg_y]) <*>
571 mkAssign (CmmLocal res_r)
572 (CmmMachOp (MO_U_Rem (wordWidth dflags)) [arg_x, arg_y])
573 genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
574
575 genericWordQuotRem2Op :: DynFlags -> GenericOp
576 genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
577 = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
578 where ty = cmmExprType dflags arg_x_high
579 shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
580 shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
581 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
582 ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
583 ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
584 minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
585 times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
586 zero = lit 0
587 one = lit 1
588 negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
589 lit i = CmmLit (CmmInt i (wordWidth dflags))
590
591 f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
592 f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
593 mkAssign (CmmLocal res_r) high)
594 f i acc high low =
595 do roverflowedBit <- newTemp ty
596 rhigh' <- newTemp ty
597 rhigh'' <- newTemp ty
598 rlow' <- newTemp ty
599 risge <- newTemp ty
600 racc' <- newTemp ty
601 let high' = CmmReg (CmmLocal rhigh')
602 isge = CmmReg (CmmLocal risge)
603 overflowedBit = CmmReg (CmmLocal roverflowedBit)
604 let this = catAGraphs
605 [mkAssign (CmmLocal roverflowedBit)
606 (shr high negone),
607 mkAssign (CmmLocal rhigh')
608 (or (shl high one) (shr low negone)),
609 mkAssign (CmmLocal rlow')
610 (shl low one),
611 mkAssign (CmmLocal risge)
612 (or (overflowedBit `ne` zero)
613 (high' `ge` arg_y)),
614 mkAssign (CmmLocal rhigh'')
615 (high' `minus` (arg_y `times` isge)),
616 mkAssign (CmmLocal racc')
617 (or (shl acc one) isge)]
618 rest <- f (i - 1) (CmmReg (CmmLocal racc'))
619 (CmmReg (CmmLocal rhigh''))
620 (CmmReg (CmmLocal rlow'))
621 return (this <*> rest)
622 genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
623
624 genericWordAdd2Op :: GenericOp
625 genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
626 = do dflags <- getDynFlags
627 r1 <- newTemp (cmmExprType dflags arg_x)
628 r2 <- newTemp (cmmExprType dflags arg_x)
629 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
630 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
631 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
632 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
633 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
634 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
635 (wordWidth dflags))
636 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
637 emit $ catAGraphs
638 [mkAssign (CmmLocal r1)
639 (add (bottomHalf arg_x) (bottomHalf arg_y)),
640 mkAssign (CmmLocal r2)
641 (add (topHalf (CmmReg (CmmLocal r1)))
642 (add (topHalf arg_x) (topHalf arg_y))),
643 mkAssign (CmmLocal res_h)
644 (topHalf (CmmReg (CmmLocal r2))),
645 mkAssign (CmmLocal res_l)
646 (or (toTopHalf (CmmReg (CmmLocal r2)))
647 (bottomHalf (CmmReg (CmmLocal r1))))]
648 genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
649
650 genericWordMul2Op :: GenericOp
651 genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
652 = do dflags <- getDynFlags
653 let t = cmmExprType dflags arg_x
654 xlyl <- liftM CmmLocal $ newTemp t
655 xlyh <- liftM CmmLocal $ newTemp t
656 xhyl <- liftM CmmLocal $ newTemp t
657 r <- liftM CmmLocal $ newTemp t
658 -- This generic implementation is very simple and slow. We might
659 -- well be able to do better, but for now this at least works.
660 let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
661 toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
662 bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
663 add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
664 sum = foldl1 add
665 mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
666 or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
667 hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
668 (wordWidth dflags))
669 hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
670 emit $ catAGraphs
671 [mkAssign xlyl
672 (mul (bottomHalf arg_x) (bottomHalf arg_y)),
673 mkAssign xlyh
674 (mul (bottomHalf arg_x) (topHalf arg_y)),
675 mkAssign xhyl
676 (mul (topHalf arg_x) (bottomHalf arg_y)),
677 mkAssign r
678 (sum [topHalf (CmmReg xlyl),
679 bottomHalf (CmmReg xhyl),
680 bottomHalf (CmmReg xlyh)]),
681 mkAssign (CmmLocal res_l)
682 (or (bottomHalf (CmmReg xlyl))
683 (toTopHalf (CmmReg r))),
684 mkAssign (CmmLocal res_h)
685 (sum [mul (topHalf arg_x) (topHalf arg_y),
686 topHalf (CmmReg xhyl),
687 topHalf (CmmReg xlyh),
688 topHalf (CmmReg r)])]
689 genericWordMul2Op _ _ = panic "genericWordMul2Op"
690
691 -- These PrimOps are NOPs in Cmm
692
693 nopOp :: PrimOp -> Bool
694 nopOp Int2WordOp = True
695 nopOp Word2IntOp = True
696 nopOp Int2AddrOp = True
697 nopOp Addr2IntOp = True
698 nopOp ChrOp = True -- Int# and Char# are rep'd the same
699 nopOp OrdOp = True
700 nopOp _ = False
701
702 -- These PrimOps turn into double casts
703
704 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
705 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
706 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
707 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
708 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
709 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
710 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
711 narrowOp _ = Nothing
712
713 -- Native word signless ops
714
715 translateOp :: DynFlags -> PrimOp -> Maybe MachOp
716 translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
717 translateOp dflags IntSubOp = Just (mo_wordSub dflags)
718 translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
719 translateOp dflags WordSubOp = Just (mo_wordSub dflags)
720 translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
721 translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
722
723 translateOp dflags IntEqOp = Just (mo_wordEq dflags)
724 translateOp dflags IntNeOp = Just (mo_wordNe dflags)
725 translateOp dflags WordEqOp = Just (mo_wordEq dflags)
726 translateOp dflags WordNeOp = Just (mo_wordNe dflags)
727 translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
728 translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
729
730 translateOp dflags AndOp = Just (mo_wordAnd dflags)
731 translateOp dflags OrOp = Just (mo_wordOr dflags)
732 translateOp dflags XorOp = Just (mo_wordXor dflags)
733 translateOp dflags NotOp = Just (mo_wordNot dflags)
734 translateOp dflags SllOp = Just (mo_wordShl dflags)
735 translateOp dflags SrlOp = Just (mo_wordUShr dflags)
736
737 translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
738
739 -- Native word signed ops
740
741 translateOp dflags IntMulOp = Just (mo_wordMul dflags)
742 translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
743 translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
744 translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
745 translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
746
747
748 translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
749 translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
750 translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
751 translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
752
753 translateOp dflags ISllOp = Just (mo_wordShl dflags)
754 translateOp dflags ISraOp = Just (mo_wordSShr dflags)
755 translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
756
757 -- Native word unsigned ops
758
759 translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
760 translateOp dflags WordLeOp = Just (mo_wordULe dflags)
761 translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
762 translateOp dflags WordLtOp = Just (mo_wordULt dflags)
763
764 translateOp dflags WordMulOp = Just (mo_wordMul dflags)
765 translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
766 translateOp dflags WordRemOp = Just (mo_wordURem dflags)
767
768 translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
769 translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
770 translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
771 translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
772
773 -- Char# ops
774
775 translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
776 translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
777 translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
778 translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
779 translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
780 translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
781
782 -- Double ops
783
784 translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
785 translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
786 translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
787 translateOp _ DoubleLeOp = Just (MO_F_Le W64)
788 translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
789 translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
790
791 translateOp _ DoubleAddOp = Just (MO_F_Add W64)
792 translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
793 translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
794 translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
795 translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
796
797 -- Float ops
798
799 translateOp _ FloatEqOp = Just (MO_F_Eq W32)
800 translateOp _ FloatNeOp = Just (MO_F_Ne W32)
801 translateOp _ FloatGeOp = Just (MO_F_Ge W32)
802 translateOp _ FloatLeOp = Just (MO_F_Le W32)
803 translateOp _ FloatGtOp = Just (MO_F_Gt W32)
804 translateOp _ FloatLtOp = Just (MO_F_Lt W32)
805
806 translateOp _ FloatAddOp = Just (MO_F_Add W32)
807 translateOp _ FloatSubOp = Just (MO_F_Sub W32)
808 translateOp _ FloatMulOp = Just (MO_F_Mul W32)
809 translateOp _ FloatDivOp = Just (MO_F_Quot W32)
810 translateOp _ FloatNegOp = Just (MO_F_Neg W32)
811
812 -- Conversions
813
814 translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
815 translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
816
817 translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
818 translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
819
820 translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
821 translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
822
823 -- Word comparisons masquerading as more exotic things.
824
825 translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
826 translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
827 translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
828 translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
829 translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
830 translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
831 translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
832
833 translateOp _ _ = Nothing
834
835 -- These primops are implemented by CallishMachOps, because they sometimes
836 -- turn into foreign calls depending on the backend.
837
838 callishOp :: PrimOp -> Maybe CallishMachOp
839 callishOp DoublePowerOp = Just MO_F64_Pwr
840 callishOp DoubleSinOp = Just MO_F64_Sin
841 callishOp DoubleCosOp = Just MO_F64_Cos
842 callishOp DoubleTanOp = Just MO_F64_Tan
843 callishOp DoubleSinhOp = Just MO_F64_Sinh
844 callishOp DoubleCoshOp = Just MO_F64_Cosh
845 callishOp DoubleTanhOp = Just MO_F64_Tanh
846 callishOp DoubleAsinOp = Just MO_F64_Asin
847 callishOp DoubleAcosOp = Just MO_F64_Acos
848 callishOp DoubleAtanOp = Just MO_F64_Atan
849 callishOp DoubleLogOp = Just MO_F64_Log
850 callishOp DoubleExpOp = Just MO_F64_Exp
851 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
852
853 callishOp FloatPowerOp = Just MO_F32_Pwr
854 callishOp FloatSinOp = Just MO_F32_Sin
855 callishOp FloatCosOp = Just MO_F32_Cos
856 callishOp FloatTanOp = Just MO_F32_Tan
857 callishOp FloatSinhOp = Just MO_F32_Sinh
858 callishOp FloatCoshOp = Just MO_F32_Cosh
859 callishOp FloatTanhOp = Just MO_F32_Tanh
860 callishOp FloatAsinOp = Just MO_F32_Asin
861 callishOp FloatAcosOp = Just MO_F32_Acos
862 callishOp FloatAtanOp = Just MO_F32_Atan
863 callishOp FloatLogOp = Just MO_F32_Log
864 callishOp FloatExpOp = Just MO_F32_Exp
865 callishOp FloatSqrtOp = Just MO_F32_Sqrt
866
867 callishOp _ = Nothing
868
869 ------------------------------------------------------------------------------
870 -- Helpers for translating various minor variants of array indexing.
871
872 doIndexOffAddrOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode ()
873 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
874 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx
875 doIndexOffAddrOp _ _ _ _
876 = panic "CgPrimOp: doIndexOffAddrOp"
877
878 doIndexByteArrayOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode ()
879 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
880 = do dflags <- getDynFlags
881 mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx
882 doIndexByteArrayOp _ _ _ _
883 = panic "CgPrimOp: doIndexByteArrayOp"
884
885 doReadPtrArrayOp :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
886 doReadPtrArrayOp res addr idx
887 = do dflags <- getDynFlags
888 mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr idx
889
890
891 doWriteOffAddrOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode ()
892 doWriteOffAddrOp maybe_pre_write_cast [] [addr,idx,val]
893 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx val
894 doWriteOffAddrOp _ _ _
895 = panic "CgPrimOp: doWriteOffAddrOp"
896
897 doWriteByteArrayOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode ()
898 doWriteByteArrayOp maybe_pre_write_cast [] [addr,idx,val]
899 = do dflags <- getDynFlags
900 mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx val
901 doWriteByteArrayOp _ _ _
902 = panic "CgPrimOp: doWriteByteArrayOp"
903
904 doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
905 doWritePtrArrayOp addr idx val
906 = do dflags <- getDynFlags
907 mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr idx val
908 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
909 -- the write barrier. We must write a byte into the mark table:
910 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
911 emit $ mkStore (
912 cmmOffsetExpr dflags
913 (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
914 (loadArrPtrsSize dflags addr))
915 (CmmMachOp (mo_wordUShr dflags) [idx,
916 mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
917 ) (CmmLit (CmmInt 1 W8))
918
919 loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
920 loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
921 where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags
922
923 mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType
924 -> LocalReg -> CmmExpr -> CmmExpr -> FCode ()
925 mkBasicIndexedRead off Nothing read_rep res base idx
926 = do dflags <- getDynFlags
927 emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off read_rep base idx)
928 mkBasicIndexedRead off (Just cast) read_rep res base idx
929 = do dflags <- getDynFlags
930 emitAssign (CmmLocal res) (CmmMachOp cast [
931 cmmLoadIndexOffExpr dflags off read_rep base idx])
932
933 mkBasicIndexedWrite :: ByteOff -> Maybe MachOp
934 -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
935 mkBasicIndexedWrite off Nothing base idx val
936 = do dflags <- getDynFlags
937 emitStore (cmmIndexOffExpr dflags off (typeWidth (cmmExprType dflags val)) base idx) val
938 mkBasicIndexedWrite off (Just cast) base idx val
939 = mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val])
940
941 -- ----------------------------------------------------------------------------
942 -- Misc utils
943
944 cmmIndexOffExpr :: DynFlags -> ByteOff -> Width -> CmmExpr -> CmmExpr -> CmmExpr
945 cmmIndexOffExpr dflags off width base idx
946 = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
947
948 cmmLoadIndexOffExpr :: DynFlags -> ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr
949 cmmLoadIndexOffExpr dflags off ty base idx
950 = CmmLoad (cmmIndexOffExpr dflags off (typeWidth ty) base idx) ty
951
952 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
953 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
954
955 -- ----------------------------------------------------------------------------
956 -- Copying byte arrays
957
958 -- | Takes a source 'ByteArray#', an offset in the source array, a
959 -- destination 'MutableByteArray#', an offset into the destination
960 -- array, and the number of bytes to copy. Copies the given number of
961 -- bytes from the source array to the destination array.
962 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
963 -> FCode ()
964 doCopyByteArrayOp = emitCopyByteArray copy
965 where
966 -- Copy data (we assume the arrays aren't overlapping since
967 -- they're of different types)
968 copy _src _dst dst_p src_p bytes =
969 do dflags <- getDynFlags
970 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
971
972 -- | Takes a source 'MutableByteArray#', an offset in the source
973 -- array, a destination 'MutableByteArray#', an offset into the
974 -- destination array, and the number of bytes to copy. Copies the
975 -- given number of bytes from the source array to the destination
976 -- array.
977 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
978 -> FCode ()
979 doCopyMutableByteArrayOp = emitCopyByteArray copy
980 where
981 -- The only time the memory might overlap is when the two arrays
982 -- we were provided are the same array!
983 -- TODO: Optimize branch for common case of no aliasing.
984 copy src dst dst_p src_p bytes = do
985 dflags <- getDynFlags
986 [moveCall, cpyCall] <- forkAlts [
987 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags 1),
988 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags 1)
989 ]
990 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
991
992 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
993 -> FCode ())
994 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
995 -> FCode ()
996 emitCopyByteArray copy src src_off dst dst_off n = do
997 dflags <- getDynFlags
998 dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
999 src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
1000 copy src dst dst_p src_p n
1001
1002 -- ----------------------------------------------------------------------------
1003 -- Setting byte arrays
1004
1005 -- | Takes a 'MutableByteArray#', an offset into the array, a length,
1006 -- and a byte, and sets each of the selected bytes in the array to the
1007 -- character.
1008 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1009 -> FCode ()
1010 doSetByteArrayOp ba off len c
1011 = do dflags <- getDynFlags
1012 p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
1013 emitMemsetCall p c len (mkIntExpr dflags 1)
1014
1015 -- ----------------------------------------------------------------------------
1016 -- Copying pointer arrays
1017
1018 -- EZY: This code has an unusually high amount of assignTemp calls, seen
1019 -- nowhere else in the code generator. This is mostly because these
1020 -- "primitive" ops result in a surprisingly large amount of code. It
1021 -- will likely be worthwhile to optimize what is emitted here, so that
1022 -- our optimization passes don't waste time repeatedly optimizing the
1023 -- same bits of code.
1024
1025 -- More closely imitates 'assignTemp' from the old code generator, which
1026 -- returns a CmmExpr rather than a LocalReg.
1027 assignTempE :: CmmExpr -> FCode CmmExpr
1028 assignTempE e = do
1029 t <- assignTemp e
1030 return (CmmReg (CmmLocal t))
1031
1032 -- | Takes a source 'Array#', an offset in the source array, a
1033 -- destination 'MutableArray#', an offset into the destination array,
1034 -- and the number of elements to copy. Copies the given number of
1035 -- elements from the source array to the destination array.
1036 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1037 -> FCode ()
1038 doCopyArrayOp = emitCopyArray copy
1039 where
1040 -- Copy data (we assume the arrays aren't overlapping since
1041 -- they're of different types)
1042 copy _src _dst dst_p src_p bytes =
1043 do dflags <- getDynFlags
1044 emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1045
1046
1047 -- | Takes a source 'MutableArray#', an offset in the source array, a
1048 -- destination 'MutableArray#', an offset into the destination array,
1049 -- and the number of elements to copy. Copies the given number of
1050 -- elements from the source array to the destination array.
1051 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1052 -> FCode ()
1053 doCopyMutableArrayOp = emitCopyArray copy
1054 where
1055 -- The only time the memory might overlap is when the two arrays
1056 -- we were provided are the same array!
1057 -- TODO: Optimize branch for common case of no aliasing.
1058 copy src dst dst_p src_p bytes = do
1059 dflags <- getDynFlags
1060 [moveCall, cpyCall] <- forkAlts [
1061 getCode $ emitMemmoveCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags)),
1062 getCode $ emitMemcpyCall dst_p src_p bytes (mkIntExpr dflags (wORD_SIZE dflags))
1063 ]
1064 emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
1065
1066 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1067 -> FCode ())
1068 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
1069 -> FCode ()
1070 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
1071 dflags <- getDynFlags
1072 -- Passed as arguments (be careful)
1073 src <- assignTempE src0
1074 src_off <- assignTempE src_off0
1075 dst <- assignTempE dst0
1076 dst_off <- assignTempE dst_off0
1077 n <- assignTempE n0
1078
1079 -- Set the dirty bit in the header.
1080 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
1081
1082 dst_elems_p <- assignTempE $ cmmOffsetB dflags dst (arrPtrsHdrSize dflags)
1083 dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
1084 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
1085 bytes <- assignTempE $ cmmMulWord dflags n (mkIntExpr dflags (wORD_SIZE dflags))
1086
1087 copy src dst dst_p src_p bytes
1088
1089 -- The base address of the destination card table
1090 dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p (loadArrPtrsSize dflags dst)
1091
1092 emitSetCards dst_off dst_cards_p n
1093
1094 -- | Takes an info table label, a register to return the newly
1095 -- allocated array in, a source array, an offset in the source array,
1096 -- and the number of elements to copy. Allocates a new array and
1097 -- initializes it form the source array.
1098 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
1099 -> FCode ()
1100 emitCloneArray info_p res_r src0 src_off0 n0 = do
1101 dflags <- getDynFlags
1102 let arrPtrsHdrSizeW dflags = mkIntExpr dflags (fixedHdrSize dflags +
1103 (sIZEOF_StgMutArrPtrs_NoHdr dflags `div` wORD_SIZE dflags))
1104 myCapability = cmmSubWord dflags (CmmReg baseReg) (mkIntExpr dflags (oFFSET_Capability_r dflags))
1105 -- Passed as arguments (be careful)
1106 src <- assignTempE src0
1107 src_off <- assignTempE src_off0
1108 n <- assignTempE n0
1109
1110 card_bytes <- assignTempE $ cardRoundUp dflags n
1111 size <- assignTempE $ cmmAddWord dflags n (bytesToWordsRoundUp dflags card_bytes)
1112 words <- assignTempE $ cmmAddWord dflags (arrPtrsHdrSizeW dflags) size
1113
1114 arr_r <- newTemp (bWord dflags)
1115 emitAllocateCall arr_r myCapability words
1116 tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) (cmmMulWord dflags n (wordSize dflags))
1117 (zeroExpr dflags)
1118
1119 let arr = CmmReg (CmmLocal arr_r)
1120 emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
1121 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1122 oFFSET_StgMutArrPtrs_ptrs dflags)) n
1123 emit $ mkStore (cmmOffsetB dflags arr (fixedHdrSize dflags * wORD_SIZE dflags +
1124 oFFSET_StgMutArrPtrs_size dflags)) size
1125
1126 dst_p <- assignTempE $ cmmOffsetB dflags arr (arrPtrsHdrSize dflags)
1127 src_p <- assignTempE $ cmmOffsetExprW dflags (cmmOffsetB dflags src (arrPtrsHdrSize dflags))
1128 src_off
1129
1130 emitMemcpyCall dst_p src_p (cmmMulWord dflags n (wordSize dflags)) (mkIntExpr dflags (wORD_SIZE dflags))
1131
1132 emitMemsetCall (cmmOffsetExprW dflags dst_p n)
1133 (mkIntExpr dflags 1)
1134 card_bytes
1135 (mkIntExpr dflags (wORD_SIZE dflags))
1136 emit $ mkAssign (CmmLocal res_r) arr
1137
1138 -- | Takes and offset in the destination array, the base address of
1139 -- the card table, and the number of elements affected (*not* the
1140 -- number of cards). Marks the relevant cards as dirty.
1141 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1142 emitSetCards dst_start dst_cards_start n = do
1143 dflags <- getDynFlags
1144 start_card <- assignTempE $ card dflags dst_start
1145 emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
1146 (mkIntExpr dflags 1)
1147 (cardRoundUp dflags n)
1148 (mkIntExpr dflags 1) -- no alignment (1 byte)
1149
1150 -- Convert an element index to a card index
1151 card :: DynFlags -> CmmExpr -> CmmExpr
1152 card dflags i = cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
1153
1154 -- Convert a number of elements to a number of cards, rounding up
1155 cardRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1156 cardRoundUp dflags i = card dflags (cmmAddWord dflags i (mkIntExpr dflags ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)))
1157
1158 bytesToWordsRoundUp :: DynFlags -> CmmExpr -> CmmExpr
1159 bytesToWordsRoundUp dflags e = cmmQuotWord dflags (cmmAddWord dflags e (mkIntExpr dflags (wORD_SIZE dflags - 1)))
1160 (wordSize dflags)
1161
1162 wordSize :: DynFlags -> CmmExpr
1163 wordSize dflags = mkIntExpr dflags (wORD_SIZE dflags)
1164
1165 -- | Emit a call to @memcpy@.
1166 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1167 emitMemcpyCall dst src n align = do
1168 emitPrimCall
1169 [ {-no results-} ]
1170 MO_Memcpy
1171 [ dst, src, n, align ]
1172
1173 -- | Emit a call to @memmove@.
1174 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1175 emitMemmoveCall dst src n align = do
1176 emitPrimCall
1177 [ {- no results -} ]
1178 MO_Memmove
1179 [ dst, src, n, align ]
1180
1181 -- | Emit a call to @memset@. The second argument must fit inside an
1182 -- unsigned char.
1183 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
1184 emitMemsetCall dst c n align = do
1185 emitPrimCall
1186 [ {- no results -} ]
1187 MO_Memset
1188 [ dst, c, n, align ]
1189
1190 -- | Emit a call to @allocate@.
1191 emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
1192 emitAllocateCall res cap n = do
1193 emitCCall
1194 [ (res, AddrHint) ]
1195 allocate
1196 [ (cap, AddrHint)
1197 , (n, NoHint)
1198 ]
1199 where
1200 allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
1201 ForeignLabelInExternalPackage IsFunction))
1202
1203 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
1204 emitPopCntCall res x width = do
1205 emitPrimCall
1206 [ res ]
1207 (MO_PopCnt width)
1208 [ x ]