1d5a5b3cda6d3f5ba8b48dd688eefd9ade9d4d2a
[ghc.git] / compiler / codeGen / StgCmmPrim.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: primitive operations
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 {-# OPTIONS -fno-warn-tabs #-}
10 -- The above warning supression flag is a temporary kludge.
11 -- While working on this module you are encouraged to remove it and
12 -- detab the module (please do the detabbing in a separate patch). See
13 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
14 -- for details
15
16 module StgCmmPrim (
17 cgOpApp
18 ) where
19
20 #include "HsVersions.h"
21
22 import StgCmmLayout
23 import StgCmmForeign
24 import StgCmmEnv
25 import StgCmmMonad
26 import StgCmmUtils
27 import StgCmmTicky
28 import StgCmmHeap
29 import StgCmmProf
30
31 import BasicTypes
32 import MkGraph
33 import StgSyn
34 import Cmm
35 import Type ( Type, tyConAppTyCon )
36 import TyCon
37 import CLabel
38 import CmmUtils
39 import PrimOp
40 import SMRep
41 import Constants
42 import Module
43 import FastString
44 import Outputable
45 import StaticFlags
46
47 ------------------------------------------------------------------------
48 -- Primitive operations and foreign calls
49 ------------------------------------------------------------------------
50
51 {- Note [Foreign call results]
52 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
53 A foreign call always returns an unboxed tuple of results, one
54 of which is the state token. This seems to happen even for pure
55 calls.
56
57 Even if we returned a single result for pure calls, it'd still be
58 right to wrap it in a singleton unboxed tuple, because the result
59 might be a Haskell closure pointer, we don't want to evaluate it. -}
60
61 ----------------------------------
62 cgOpApp :: StgOp -- The op
63 -> [StgArg] -- Arguments
64 -> Type -- Result type (always an unboxed tuple)
65 -> FCode ()
66
67 -- Foreign calls
68 cgOpApp (StgFCallOp fcall _) stg_args res_ty
69 = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
70 -- Choose result regs r1, r2
71 -- Note [Foreign call results]
72 ; cgForeignCall res_regs res_hints fcall stg_args
73 -- r1, r2 = foo( x, y )
74 ; emitReturn (map (CmmReg . CmmLocal) res_regs) }
75 -- return (r1, r2)
76
77 -- tagToEnum# is special: we need to pull the constructor
78 -- out of the table, and perform an appropriate return.
79
80 cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
81 = ASSERT(isEnumerationTyCon tycon)
82 do { args' <- getNonVoidArgAmodes [arg]
83 ; let amode = case args' of [amode] -> amode
84 _ -> panic "TagToEnumOp had void arg"
85 ; emitReturn [tagToClosure tycon amode] }
86 where
87 -- If you're reading this code in the attempt to figure
88 -- out why the compiler panic'ed here, it is probably because
89 -- you used tagToEnum# in a non-monomorphic setting, e.g.,
90 -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
91 -- That won't work.
92 tycon = tyConAppTyCon res_ty
93
94 cgOpApp (StgPrimOp primop) args res_ty
95 | primOpOutOfLine primop
96 = do { cmm_args <- getNonVoidArgAmodes args
97 ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
98 ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args }
99
100 | ReturnsPrim VoidRep <- result_info
101 = do cgPrimOp [] primop args
102 emitReturn []
103
104 | ReturnsPrim rep <- result_info
105 = do res <- newTemp (primRepCmmType rep)
106 cgPrimOp [res] primop args
107 emitReturn [CmmReg (CmmLocal res)]
108
109 | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
110 = do (regs, _hints) <- newUnboxedTupleRegs res_ty
111 cgPrimOp regs primop args
112 emitReturn (map (CmmReg . CmmLocal) regs)
113
114 | ReturnsAlg tycon <- result_info
115 , isEnumerationTyCon tycon
116 -- c.f. cgExpr (...TagToEnumOp...)
117 = do tag_reg <- newTemp bWord
118 cgPrimOp [tag_reg] primop args
119 emitReturn [tagToClosure tycon
120 (CmmReg (CmmLocal tag_reg))]
121
122 | otherwise = panic "cgPrimop"
123 where
124 result_info = getPrimOpResultInfo primop
125
126 cgOpApp (StgPrimCallOp primcall) args _res_ty
127 = do { cmm_args <- getNonVoidArgAmodes args
128 ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
129 ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args }
130
131 ---------------------------------------------------
132 cgPrimOp :: [LocalReg] -- where to put the results
133 -> PrimOp -- the op
134 -> [StgArg] -- arguments
135 -> FCode ()
136
137 cgPrimOp results op args
138 = do arg_exprs <- getNonVoidArgAmodes args
139 emitPrimOp results op arg_exprs
140
141
142 ------------------------------------------------------------------------
143 -- Emitting code for a primop
144 ------------------------------------------------------------------------
145
146 emitPrimOp :: [LocalReg] -- where to put the results
147 -> PrimOp -- the op
148 -> [CmmExpr] -- arguments
149 -> FCode ()
150
151 -- First we handle various awkward cases specially. The remaining
152 -- easy cases are then handled by translateOp, defined below.
153
154 emitPrimOp [res_r,res_c] IntAddCOp [aa,bb]
155 {-
156 With some bit-twiddling, we can define int{Add,Sub}Czh portably in
157 C, and without needing any comparisons. This may not be the
158 fastest way to do it - if you have better code, please send it! --SDM
159
160 Return : r = a + b, c = 0 if no overflow, 1 on overflow.
161
162 We currently don't make use of the r value if c is != 0 (i.e.
163 overflow), we just convert to big integers and try again. This
164 could be improved by making r and c the correct values for
165 plugging into a new J#.
166
167 { r = ((I_)(a)) + ((I_)(b)); \
168 c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
169 >> (BITS_IN (I_) - 1); \
170 }
171 Wading through the mass of bracketry, it seems to reduce to:
172 c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
173
174 -}
175 = emit $ catAGraphs [
176 mkAssign (CmmLocal res_r) (CmmMachOp mo_wordAdd [aa,bb]),
177 mkAssign (CmmLocal res_c) $
178 CmmMachOp mo_wordUShr [
179 CmmMachOp mo_wordAnd [
180 CmmMachOp mo_wordNot [CmmMachOp mo_wordXor [aa,bb]],
181 CmmMachOp mo_wordXor [aa, CmmReg (CmmLocal res_r)]
182 ],
183 CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1))
184 ]
185 ]
186
187
188 emitPrimOp [res_r,res_c] IntSubCOp [aa,bb]
189 {- Similarly:
190 #define subIntCzh(r,c,a,b) \
191 { r = ((I_)(a)) - ((I_)(b)); \
192 c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
193 >> (BITS_IN (I_) - 1); \
194 }
195
196 c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
197 -}
198 = emit $ catAGraphs [
199 mkAssign (CmmLocal res_r) (CmmMachOp mo_wordSub [aa,bb]),
200 mkAssign (CmmLocal res_c) $
201 CmmMachOp mo_wordUShr [
202 CmmMachOp mo_wordAnd [
203 CmmMachOp mo_wordXor [aa,bb],
204 CmmMachOp mo_wordXor [aa, CmmReg (CmmLocal res_r)]
205 ],
206 CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1))
207 ]
208 ]
209
210
211 emitPrimOp [res] ParOp [arg]
212 =
213 -- for now, just implement this in a C function
214 -- later, we might want to inline it.
215 emitCCall
216 [(res,NoHint)]
217 (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
218 [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
219
220 emitPrimOp [res] SparkOp [arg]
221 = do
222 -- returns the value of arg in res. We're going to therefore
223 -- refer to arg twice (once to pass to newSpark(), and once to
224 -- assign to res), so put it in a temporary.
225 tmp <- assignTemp arg
226 tmp2 <- newTemp bWord
227 emitCCall
228 [(tmp2,NoHint)]
229 (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
230 [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
231 emit (mkAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
232
233 emitPrimOp [res] GetCCSOfOp [arg]
234 = emit (mkAssign (CmmLocal res) val)
235 where
236 val | opt_SccProfilingOn = costCentreFrom (cmmUntag arg)
237 | otherwise = CmmLit zeroCLit
238
239 emitPrimOp [res] GetCurrentCCSOp [_dummy_arg]
240 = emit (mkAssign (CmmLocal res) curCCS)
241
242 emitPrimOp [res] ReadMutVarOp [mutv]
243 = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord))
244
245 emitPrimOp [] WriteMutVarOp [mutv,var]
246 = do
247 emit (mkStore (cmmOffsetW mutv fixedHdrSize) var)
248 emitCCall
249 [{-no results-}]
250 (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
251 [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
252
253 -- #define sizzeofByteArrayzh(r,a) \
254 -- r = ((StgArrWords *)(a))->bytes
255 emitPrimOp [res] SizeofByteArrayOp [arg]
256 = emit $
257 mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)
258
259 -- #define sizzeofMutableByteArrayzh(r,a) \
260 -- r = ((StgArrWords *)(a))->bytes
261 emitPrimOp [res] SizeofMutableByteArrayOp [arg]
262 = emitPrimOp [res] SizeofByteArrayOp [arg]
263
264
265 -- #define touchzh(o) /* nothing */
266 emitPrimOp res@[] TouchOp args@[_arg]
267 = do emitPrimCall res MO_Touch args
268
269 -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
270 emitPrimOp [res] ByteArrayContents_Char [arg]
271 = emit (mkAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize))
272
273 -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
274 emitPrimOp [res] StableNameToIntOp [arg]
275 = emit (mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord))
276
277 -- #define eqStableNamezh(r,sn1,sn2) \
278 -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
279 emitPrimOp [res] EqStableNameOp [arg1,arg2]
280 = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [
281 cmmLoadIndexW arg1 fixedHdrSize bWord,
282 cmmLoadIndexW arg2 fixedHdrSize bWord
283 ]))
284
285
286 emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
287 = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2]))
288
289 -- #define addrToHValuezh(r,a) r=(P_)a
290 emitPrimOp [res] AddrToAnyOp [arg]
291 = emit (mkAssign (CmmLocal res) arg)
292
293 -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
294 -- Note: argument may be tagged!
295 emitPrimOp [res] DataToTagOp [arg]
296 = emit (mkAssign (CmmLocal res) (getConstrTag (cmmUntag arg)))
297
298 {- Freezing arrays-of-ptrs requires changing an info table, for the
299 benefit of the generational collector. It needs to scavenge mutable
300 objects, even if they are in old space. When they become immutable,
301 they can be removed from this scavenge list. -}
302
303 -- #define unsafeFreezzeArrayzh(r,a)
304 -- {
305 -- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
306 -- r = a;
307 -- }
308 emitPrimOp [res] UnsafeFreezeArrayOp [arg]
309 = emit $ catAGraphs
310 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
311 mkAssign (CmmLocal res) arg ]
312 emitPrimOp [res] UnsafeFreezeArrayArrayOp [arg]
313 = emit $ catAGraphs
314 [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
315 mkAssign (CmmLocal res) arg ]
316
317 -- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
318 emitPrimOp [res] UnsafeFreezeByteArrayOp [arg]
319 = emit (mkAssign (CmmLocal res) arg)
320
321 -- Copying pointer arrays
322
323 emitPrimOp [] CopyArrayOp [src,src_off,dst,dst_off,n] =
324 doCopyArrayOp src src_off dst dst_off n
325 emitPrimOp [] CopyMutableArrayOp [src,src_off,dst,dst_off,n] =
326 doCopyMutableArrayOp src src_off dst dst_off n
327 emitPrimOp [res] CloneArrayOp [src,src_off,n] =
328 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
329 emitPrimOp [res] CloneMutableArrayOp [src,src_off,n] =
330 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
331 emitPrimOp [res] FreezeArrayOp [src,src_off,n] =
332 emitCloneArray mkMAP_FROZEN_infoLabel res src src_off n
333 emitPrimOp [res] ThawArrayOp [src,src_off,n] =
334 emitCloneArray mkMAP_DIRTY_infoLabel res src src_off n
335
336 -- Reading/writing pointer arrays
337
338 emitPrimOp [r] ReadArrayOp [obj,ix] = doReadPtrArrayOp r obj ix
339 emitPrimOp [r] IndexArrayOp [obj,ix] = doReadPtrArrayOp r obj ix
340 emitPrimOp [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
341
342 emitPrimOp [res] SizeofArrayOp [arg]
343 = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW arg (fixedHdrSize + oFFSET_StgMutArrPtrs_ptrs) bWord)
344 emitPrimOp [res] SizeofMutableArrayOp [arg]
345 = emitPrimOp [res] SizeofArrayOp [arg]
346
347 -- IndexXXXoffAddr
348
349 emitPrimOp res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args
350 emitPrimOp res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args
351 emitPrimOp res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing bWord res args
352 emitPrimOp res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing bWord res args
353 emitPrimOp res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing bWord res args
354 emitPrimOp res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
355 emitPrimOp res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
356 emitPrimOp res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing bWord res args
357 emitPrimOp res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args
358 emitPrimOp res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args
359 emitPrimOp res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args
360 emitPrimOp res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
361 emitPrimOp res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args
362 emitPrimOp res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args
363 emitPrimOp res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args
364 emitPrimOp res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
365
366 -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
367
368 emitPrimOp res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args
369 emitPrimOp res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args
370 emitPrimOp res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing bWord res args
371 emitPrimOp res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing bWord res args
372 emitPrimOp res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing bWord res args
373 emitPrimOp res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
374 emitPrimOp res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
375 emitPrimOp res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing bWord res args
376 emitPrimOp res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args
377 emitPrimOp res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args
378 emitPrimOp res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args
379 emitPrimOp res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
380 emitPrimOp res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args
381 emitPrimOp res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args
382 emitPrimOp res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args
383 emitPrimOp res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
384
385 -- IndexXXXArray
386
387 emitPrimOp res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args
388 emitPrimOp res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args
389 emitPrimOp res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing bWord res args
390 emitPrimOp res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing bWord res args
391 emitPrimOp res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing bWord res args
392 emitPrimOp res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
393 emitPrimOp res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
394 emitPrimOp res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing bWord res args
395 emitPrimOp res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args
396 emitPrimOp res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args
397 emitPrimOp res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args
398 emitPrimOp res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
399 emitPrimOp res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args
400 emitPrimOp res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args
401 emitPrimOp res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args
402 emitPrimOp res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
403
404 -- ReadXXXArray, identical to IndexXXXArray.
405
406 emitPrimOp res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args
407 emitPrimOp res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args
408 emitPrimOp res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing bWord res args
409 emitPrimOp res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing bWord res args
410 emitPrimOp res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing bWord res args
411 emitPrimOp res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
412 emitPrimOp res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
413 emitPrimOp res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing bWord res args
414 emitPrimOp res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args
415 emitPrimOp res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args
416 emitPrimOp res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args
417 emitPrimOp res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
418 emitPrimOp res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args
419 emitPrimOp res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args
420 emitPrimOp res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args
421 emitPrimOp res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
422
423 -- WriteXXXoffAddr
424
425 emitPrimOp res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just mo_WordTo8) res args
426 emitPrimOp res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just mo_WordTo32) res args
427 emitPrimOp res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing res args
428 emitPrimOp res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing res args
429 emitPrimOp res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing res args
430 emitPrimOp res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing res args
431 emitPrimOp res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing res args
432 emitPrimOp res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing res args
433 emitPrimOp res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just mo_WordTo8) res args
434 emitPrimOp res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just mo_WordTo16) res args
435 emitPrimOp res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just mo_WordTo32) res args
436 emitPrimOp res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing res args
437 emitPrimOp res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just mo_WordTo8) res args
438 emitPrimOp res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just mo_WordTo16) res args
439 emitPrimOp res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just mo_WordTo32) res args
440 emitPrimOp res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing res args
441
442 -- WriteXXXArray
443
444 emitPrimOp res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just mo_WordTo8) res args
445 emitPrimOp res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just mo_WordTo32) res args
446 emitPrimOp res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing res args
447 emitPrimOp res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing res args
448 emitPrimOp res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing res args
449 emitPrimOp res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing res args
450 emitPrimOp res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing res args
451 emitPrimOp res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing res args
452 emitPrimOp res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just mo_WordTo8) res args
453 emitPrimOp res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just mo_WordTo16) res args
454 emitPrimOp res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just mo_WordTo32) res args
455 emitPrimOp res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing res args
456 emitPrimOp res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just mo_WordTo8) res args
457 emitPrimOp res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just mo_WordTo16) res args
458 emitPrimOp res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just mo_WordTo32) res args
459 emitPrimOp res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing res args
460
461 -- Copying byte arrays
462 emitPrimOp [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
463 doCopyByteArrayOp src src_off dst dst_off n
464 emitPrimOp [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
465 doCopyMutableByteArrayOp src src_off dst dst_off n
466
467 -- Population count
468 emitPrimOp [res] PopCnt8Op [w] = emitPopCntCall res w W8
469 emitPrimOp [res] PopCnt16Op [w] = emitPopCntCall res w W16
470 emitPrimOp [res] PopCnt32Op [w] = emitPopCntCall res w W32
471 emitPrimOp [res] PopCnt64Op [w] = emitPopCntCall res w W64
472 emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth
473
474 -- The rest just translate straightforwardly
475 emitPrimOp [res] op [arg]
476 | nopOp op
477 = emit (mkAssign (CmmLocal res) arg)
478
479 | Just (mop,rep) <- narrowOp op
480 = emit (mkAssign (CmmLocal res) $
481 CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]])
482
483 emitPrimOp r@[res] op args
484 | Just prim <- callishOp op
485 = do emitPrimCall r prim args
486
487 | Just mop <- translateOp op
488 = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
489 emit stmt
490
491 emitPrimOp _ op _
492 = pprPanic "emitPrimOp: can't translate PrimOp" (ppr op)
493
494
495 -- These PrimOps are NOPs in Cmm
496
497 nopOp :: PrimOp -> Bool
498 nopOp Int2WordOp = True
499 nopOp Word2IntOp = True
500 nopOp Int2AddrOp = True
501 nopOp Addr2IntOp = True
502 nopOp ChrOp = True -- Int# and Char# are rep'd the same
503 nopOp OrdOp = True
504 nopOp _ = False
505
506 -- These PrimOps turn into double casts
507
508 narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
509 narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
510 narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
511 narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
512 narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
513 narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
514 narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
515 narrowOp _ = Nothing
516
517 -- Native word signless ops
518
519 translateOp :: PrimOp -> Maybe MachOp
520 translateOp IntAddOp = Just mo_wordAdd
521 translateOp IntSubOp = Just mo_wordSub
522 translateOp WordAddOp = Just mo_wordAdd
523 translateOp WordSubOp = Just mo_wordSub
524 translateOp AddrAddOp = Just mo_wordAdd
525 translateOp AddrSubOp = Just mo_wordSub
526
527 translateOp IntEqOp = Just mo_wordEq
528 translateOp IntNeOp = Just mo_wordNe
529 translateOp WordEqOp = Just mo_wordEq
530 translateOp WordNeOp = Just mo_wordNe
531 translateOp AddrEqOp = Just mo_wordEq
532 translateOp AddrNeOp = Just mo_wordNe
533
534 translateOp AndOp = Just mo_wordAnd
535 translateOp OrOp = Just mo_wordOr
536 translateOp XorOp = Just mo_wordXor
537 translateOp NotOp = Just mo_wordNot
538 translateOp SllOp = Just mo_wordShl
539 translateOp SrlOp = Just mo_wordUShr
540
541 translateOp AddrRemOp = Just mo_wordURem
542
543 -- Native word signed ops
544
545 translateOp IntMulOp = Just mo_wordMul
546 translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordWidth)
547 translateOp IntQuotOp = Just mo_wordSQuot
548 translateOp IntRemOp = Just mo_wordSRem
549 translateOp IntNegOp = Just mo_wordSNeg
550
551
552 translateOp IntGeOp = Just mo_wordSGe
553 translateOp IntLeOp = Just mo_wordSLe
554 translateOp IntGtOp = Just mo_wordSGt
555 translateOp IntLtOp = Just mo_wordSLt
556
557 translateOp ISllOp = Just mo_wordShl
558 translateOp ISraOp = Just mo_wordSShr
559 translateOp ISrlOp = Just mo_wordUShr
560
561 -- Native word unsigned ops
562
563 translateOp WordGeOp = Just mo_wordUGe
564 translateOp WordLeOp = Just mo_wordULe
565 translateOp WordGtOp = Just mo_wordUGt
566 translateOp WordLtOp = Just mo_wordULt
567
568 translateOp WordMulOp = Just mo_wordMul
569 translateOp WordQuotOp = Just mo_wordUQuot
570 translateOp WordRemOp = Just mo_wordURem
571
572 translateOp AddrGeOp = Just mo_wordUGe
573 translateOp AddrLeOp = Just mo_wordULe
574 translateOp AddrGtOp = Just mo_wordUGt
575 translateOp AddrLtOp = Just mo_wordULt
576
577 -- Char# ops
578
579 translateOp CharEqOp = Just (MO_Eq wordWidth)
580 translateOp CharNeOp = Just (MO_Ne wordWidth)
581 translateOp CharGeOp = Just (MO_U_Ge wordWidth)
582 translateOp CharLeOp = Just (MO_U_Le wordWidth)
583 translateOp CharGtOp = Just (MO_U_Gt wordWidth)
584 translateOp CharLtOp = Just (MO_U_Lt wordWidth)
585
586 -- Double ops
587
588 translateOp DoubleEqOp = Just (MO_F_Eq W64)
589 translateOp DoubleNeOp = Just (MO_F_Ne W64)
590 translateOp DoubleGeOp = Just (MO_F_Ge W64)
591 translateOp DoubleLeOp = Just (MO_F_Le W64)
592 translateOp DoubleGtOp = Just (MO_F_Gt W64)
593 translateOp DoubleLtOp = Just (MO_F_Lt W64)
594
595 translateOp DoubleAddOp = Just (MO_F_Add W64)
596 translateOp DoubleSubOp = Just (MO_F_Sub W64)
597 translateOp DoubleMulOp = Just (MO_F_Mul W64)
598 translateOp DoubleDivOp = Just (MO_F_Quot W64)
599 translateOp DoubleNegOp = Just (MO_F_Neg W64)
600
601 -- Float ops
602
603 translateOp FloatEqOp = Just (MO_F_Eq W32)
604 translateOp FloatNeOp = Just (MO_F_Ne W32)
605 translateOp FloatGeOp = Just (MO_F_Ge W32)
606 translateOp FloatLeOp = Just (MO_F_Le W32)
607 translateOp FloatGtOp = Just (MO_F_Gt W32)
608 translateOp FloatLtOp = Just (MO_F_Lt W32)
609
610 translateOp FloatAddOp = Just (MO_F_Add W32)
611 translateOp FloatSubOp = Just (MO_F_Sub W32)
612 translateOp FloatMulOp = Just (MO_F_Mul W32)
613 translateOp FloatDivOp = Just (MO_F_Quot W32)
614 translateOp FloatNegOp = Just (MO_F_Neg W32)
615
616 -- Conversions
617
618 translateOp Int2DoubleOp = Just (MO_SF_Conv wordWidth W64)
619 translateOp Double2IntOp = Just (MO_FS_Conv W64 wordWidth)
620
621 translateOp Int2FloatOp = Just (MO_SF_Conv wordWidth W32)
622 translateOp Float2IntOp = Just (MO_FS_Conv W32 wordWidth)
623
624 translateOp Float2DoubleOp = Just (MO_FF_Conv W32 W64)
625 translateOp Double2FloatOp = Just (MO_FF_Conv W64 W32)
626
627 -- Word comparisons masquerading as more exotic things.
628
629 translateOp SameMutVarOp = Just mo_wordEq
630 translateOp SameMVarOp = Just mo_wordEq
631 translateOp SameMutableArrayOp = Just mo_wordEq
632 translateOp SameMutableByteArrayOp = Just mo_wordEq
633 translateOp SameMutableArrayArrayOp= Just mo_wordEq
634 translateOp SameTVarOp = Just mo_wordEq
635 translateOp EqStablePtrOp = Just mo_wordEq
636
637 translateOp _ = Nothing
638
639 -- These primops are implemented by CallishMachOps, because they sometimes
640 -- turn into foreign calls depending on the backend.
641
642 callishOp :: PrimOp -> Maybe CallishMachOp
643 callishOp DoublePowerOp = Just MO_F64_Pwr
644 callishOp DoubleSinOp = Just MO_F64_Sin
645 callishOp DoubleCosOp = Just MO_F64_Cos
646 callishOp DoubleTanOp = Just MO_F64_Tan
647 callishOp DoubleSinhOp = Just MO_F64_Sinh
648 callishOp DoubleCoshOp = Just MO_F64_Cosh
649 callishOp DoubleTanhOp = Just MO_F64_Tanh
650 callishOp DoubleAsinOp = Just MO_F64_Asin
651 callishOp DoubleAcosOp = Just MO_F64_Acos
652 callishOp DoubleAtanOp = Just MO_F64_Atan
653 callishOp DoubleLogOp = Just MO_F64_Log
654 callishOp DoubleExpOp = Just MO_F64_Exp
655 callishOp DoubleSqrtOp = Just MO_F64_Sqrt
656
657 callishOp FloatPowerOp = Just MO_F32_Pwr
658 callishOp FloatSinOp = Just MO_F32_Sin
659 callishOp FloatCosOp = Just MO_F32_Cos
660 callishOp FloatTanOp = Just MO_F32_Tan
661 callishOp FloatSinhOp = Just MO_F32_Sinh
662 callishOp FloatCoshOp = Just MO_F32_Cosh
663 callishOp FloatTanhOp = Just MO_F32_Tanh
664 callishOp FloatAsinOp = Just MO_F32_Asin
665 callishOp FloatAcosOp = Just MO_F32_Acos
666 callishOp FloatAtanOp = Just MO_F32_Atan
667 callishOp FloatLogOp = Just MO_F32_Log
668 callishOp FloatExpOp = Just MO_F32_Exp
669 callishOp FloatSqrtOp = Just MO_F32_Sqrt
670
671 callishOp _ = Nothing
672
673 ------------------------------------------------------------------------------
674 -- Helpers for translating various minor variants of array indexing.
675
676 doIndexOffAddrOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode ()
677 doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
678 = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx
679 doIndexOffAddrOp _ _ _ _
680 = panic "CgPrimOp: doIndexOffAddrOp"
681
682 doIndexByteArrayOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode ()
683 doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
684 = mkBasicIndexedRead arrWordsHdrSize maybe_post_read_cast rep res addr idx
685 doIndexByteArrayOp _ _ _ _
686 = panic "CgPrimOp: doIndexByteArrayOp"
687
688 doReadPtrArrayOp :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
689 doReadPtrArrayOp res addr idx
690 = mkBasicIndexedRead arrPtrsHdrSize Nothing gcWord res addr idx
691
692
693 doWriteOffAddrOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode ()
694 doWriteOffAddrOp maybe_pre_write_cast [] [addr,idx,val]
695 = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx val
696 doWriteOffAddrOp _ _ _
697 = panic "CgPrimOp: doWriteOffAddrOp"
698
699 doWriteByteArrayOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode ()
700 doWriteByteArrayOp maybe_pre_write_cast [] [addr,idx,val]
701 = mkBasicIndexedWrite arrWordsHdrSize maybe_pre_write_cast addr idx val
702 doWriteByteArrayOp _ _ _
703 = panic "CgPrimOp: doWriteByteArrayOp"
704
705 doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
706 doWritePtrArrayOp addr idx val
707 = do mkBasicIndexedWrite arrPtrsHdrSize Nothing addr idx val
708 emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
709 -- the write barrier. We must write a byte into the mark table:
710 -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
711 emit $ mkStore (
712 cmmOffsetExpr
713 (cmmOffsetExprW (cmmOffsetB addr arrPtrsHdrSize)
714 (loadArrPtrsSize addr))
715 (CmmMachOp mo_wordUShr [idx,
716 CmmLit (mkIntCLit mUT_ARR_PTRS_CARD_BITS)])
717 ) (CmmLit (CmmInt 1 W8))
718
719 loadArrPtrsSize :: CmmExpr -> CmmExpr
720 loadArrPtrsSize addr = CmmLoad (cmmOffsetB addr off) bWord
721 where off = fixedHdrSize*wORD_SIZE + oFFSET_StgMutArrPtrs_ptrs
722
723 mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType
724 -> LocalReg -> CmmExpr -> CmmExpr -> FCode ()
725 mkBasicIndexedRead off Nothing read_rep res base idx
726 = emit (mkAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx))
727 mkBasicIndexedRead off (Just cast) read_rep res base idx
728 = emit (mkAssign (CmmLocal res) (CmmMachOp cast [
729 cmmLoadIndexOffExpr off read_rep base idx]))
730
731 mkBasicIndexedWrite :: ByteOff -> Maybe MachOp
732 -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
733 mkBasicIndexedWrite off Nothing base idx val
734 = emit (mkStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val)
735 mkBasicIndexedWrite off (Just cast) base idx val
736 = mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val])
737
738 -- ----------------------------------------------------------------------------
739 -- Misc utils
740
741 cmmIndexOffExpr :: ByteOff -> Width -> CmmExpr -> CmmExpr -> CmmExpr
742 cmmIndexOffExpr off width base idx
743 = cmmIndexExpr width (cmmOffsetB base off) idx
744
745 cmmLoadIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr
746 cmmLoadIndexOffExpr off ty base idx
747 = CmmLoad (cmmIndexOffExpr off (typeWidth ty) base idx) ty
748
749 setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
750 setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
751
752 -- ----------------------------------------------------------------------------
753 -- Copying byte arrays
754
755 -- | Takes a source 'ByteArray#', an offset in the source array, a
756 -- destination 'MutableByteArray#', an offset into the destination
757 -- array, and the number of bytes to copy. Copies the given number of
758 -- bytes from the source array to the destination array.
759 doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
760 -> FCode ()
761 doCopyByteArrayOp = emitCopyByteArray copy
762 where
763 -- Copy data (we assume the arrays aren't overlapping since
764 -- they're of different types)
765 copy _src _dst dst_p src_p bytes =
766 emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit 1))
767
768 -- | Takes a source 'MutableByteArray#', an offset in the source
769 -- array, a destination 'MutableByteArray#', an offset into the
770 -- destination array, and the number of bytes to copy. Copies the
771 -- given number of bytes from the source array to the destination
772 -- array.
773 doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
774 -> FCode ()
775 doCopyMutableByteArrayOp = emitCopyByteArray copy
776 where
777 -- The only time the memory might overlap is when the two arrays
778 -- we were provided are the same array!
779 -- TODO: Optimize branch for common case of no aliasing.
780 copy src dst dst_p src_p bytes = do
781 [moveCall, cpyCall] <- forkAlts [
782 getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit 1)),
783 getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit 1))
784 ]
785 emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
786
787 emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
788 -> FCode ())
789 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
790 -> FCode ()
791 emitCopyByteArray copy src src_off dst dst_off n = do
792 dst_p <- assignTempE $ cmmOffsetExpr (cmmOffsetB dst arrWordsHdrSize) dst_off
793 src_p <- assignTempE $ cmmOffsetExpr (cmmOffsetB src arrWordsHdrSize) src_off
794 copy src dst dst_p src_p n
795
796 -- ----------------------------------------------------------------------------
797 -- Copying pointer arrays
798
799 -- EZY: This code has an unusually high amount of assignTemp calls, seen
800 -- nowhere else in the code generator. This is mostly because these
801 -- "primitive" ops result in a surprisingly large amount of code. It
802 -- will likely be worthwhile to optimize what is emitted here, so that
803 -- our optimization passes don't waste time repeatedly optimizing the
804 -- same bits of code.
805
806 -- More closely imitates 'assignTemp' from the old code generator, which
807 -- returns a CmmExpr rather than a LocalReg.
808 assignTempE :: CmmExpr -> FCode CmmExpr
809 assignTempE e = do
810 t <- assignTemp e
811 return (CmmReg (CmmLocal t))
812
813 -- | Takes a source 'Array#', an offset in the source array, a
814 -- destination 'MutableArray#', an offset into the destination array,
815 -- and the number of elements to copy. Copies the given number of
816 -- elements from the source array to the destination array.
817 doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
818 -> FCode ()
819 doCopyArrayOp = emitCopyArray copy
820 where
821 -- Copy data (we assume the arrays aren't overlapping since
822 -- they're of different types)
823 copy _src _dst dst_p src_p bytes =
824 emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE))
825
826
827 -- | Takes a source 'MutableArray#', an offset in the source array, a
828 -- destination 'MutableArray#', an offset into the destination array,
829 -- and the number of elements to copy. Copies the given number of
830 -- elements from the source array to the destination array.
831 doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
832 -> FCode ()
833 doCopyMutableArrayOp = emitCopyArray copy
834 where
835 -- The only time the memory might overlap is when the two arrays
836 -- we were provided are the same array!
837 -- TODO: Optimize branch for common case of no aliasing.
838 copy src dst dst_p src_p bytes = do
839 [moveCall, cpyCall] <- forkAlts [
840 getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)),
841 getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE))
842 ]
843 emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
844
845 emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
846 -> FCode ())
847 -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
848 -> FCode ()
849 emitCopyArray copy src0 src_off0 dst0 dst_off0 n0 = do
850 -- Passed as arguments (be careful)
851 src <- assignTempE src0
852 src_off <- assignTempE src_off0
853 dst <- assignTempE dst0
854 dst_off <- assignTempE dst_off0
855 n <- assignTempE n0
856
857 -- Set the dirty bit in the header.
858 emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
859
860 dst_elems_p <- assignTempE $ cmmOffsetB dst arrPtrsHdrSize
861 dst_p <- assignTempE $ cmmOffsetExprW dst_elems_p dst_off
862 src_p <- assignTempE $ cmmOffsetExprW (cmmOffsetB src arrPtrsHdrSize) src_off
863 bytes <- assignTempE $ cmmMulWord n (CmmLit (mkIntCLit wORD_SIZE))
864
865 copy src dst dst_p src_p bytes
866
867 -- The base address of the destination card table
868 dst_cards_p <- assignTempE $ cmmOffsetExprW dst_elems_p (loadArrPtrsSize dst)
869
870 emitSetCards dst_off dst_cards_p n
871
872 -- | Takes an info table label, a register to return the newly
873 -- allocated array in, a source array, an offset in the source array,
874 -- and the number of elements to copy. Allocates a new array and
875 -- initializes it form the source array.
876 emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> CmmExpr
877 -> FCode ()
878 emitCloneArray info_p res_r src0 src_off0 n0 = do
879 -- Passed as arguments (be careful)
880 src <- assignTempE src0
881 src_off <- assignTempE src_off0
882 n <- assignTempE n0
883
884 card_words <- assignTempE $ (n `cmmUShrWord`
885 (CmmLit (mkIntCLit mUT_ARR_PTRS_CARD_BITS)))
886 `cmmAddWord` CmmLit (mkIntCLit 1)
887 size <- assignTempE $ n `cmmAddWord` card_words
888 words <- assignTempE $ arrPtrsHdrSizeW `cmmAddWord` size
889
890 arr_r <- newTemp bWord
891 emitAllocateCall arr_r myCapability words
892 tickyAllocPrim (CmmLit (mkIntCLit arrPtrsHdrSize)) (n `cmmMulWord` wordSize)
893 (CmmLit $ mkIntCLit 0)
894
895 let arr = CmmReg (CmmLocal arr_r)
896 emitSetDynHdr arr (CmmLit (CmmLabel info_p)) curCCS
897 emit $ mkStore (cmmOffsetB arr (fixedHdrSize * wORD_SIZE +
898 oFFSET_StgMutArrPtrs_ptrs)) n
899 emit $ mkStore (cmmOffsetB arr (fixedHdrSize * wORD_SIZE +
900 oFFSET_StgMutArrPtrs_size)) size
901
902 dst_p <- assignTempE $ cmmOffsetB arr arrPtrsHdrSize
903 src_p <- assignTempE $ cmmOffsetExprW (cmmOffsetB src arrPtrsHdrSize)
904 src_off
905
906 emitMemcpyCall dst_p src_p (n `cmmMulWord` wordSize) (CmmLit (mkIntCLit wORD_SIZE))
907
908 emitMemsetCall (cmmOffsetExprW dst_p n)
909 (CmmLit (mkIntCLit 1))
910 (card_words `cmmMulWord` wordSize)
911 (CmmLit (mkIntCLit wORD_SIZE))
912 emit $ mkAssign (CmmLocal res_r) arr
913 where
914 arrPtrsHdrSizeW = CmmLit $ mkIntCLit $ fixedHdrSize +
915 (sIZEOF_StgMutArrPtrs_NoHdr `div` wORD_SIZE)
916 wordSize = CmmLit (mkIntCLit wORD_SIZE)
917 myCapability = CmmReg baseReg `cmmSubWord`
918 CmmLit (mkIntCLit oFFSET_Capability_r)
919
920 -- | Takes and offset in the destination array, the base address of
921 -- the card table, and the number of elements affected (*not* the
922 -- number of cards). Marks the relevant cards as dirty.
923 emitSetCards :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
924 emitSetCards dst_start dst_cards_start n = do
925 start_card <- assignTempE $ card dst_start
926 emitMemsetCall (dst_cards_start `cmmAddWord` start_card)
927 (CmmLit (mkIntCLit 1))
928 ((card (dst_start `cmmAddWord` n) `cmmSubWord` start_card)
929 `cmmAddWord` CmmLit (mkIntCLit 1))
930 (CmmLit (mkIntCLit wORD_SIZE))
931 where
932 -- Convert an element index to a card index
933 card i = i `cmmUShrWord` (CmmLit (mkIntCLit mUT_ARR_PTRS_CARD_BITS))
934
935 -- | Emit a call to @memcpy@.
936 emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
937 emitMemcpyCall dst src n align = do
938 emitPrimCall
939 [ {-no results-} ]
940 MO_Memcpy
941 [ dst, src, n, align ]
942
943 -- | Emit a call to @memmove@.
944 emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
945 emitMemmoveCall dst src n align = do
946 emitPrimCall
947 [ {- no results -} ]
948 MO_Memmove
949 [ dst, src, n, align ]
950
951 -- | Emit a call to @memset@. The second argument must fit inside an
952 -- unsigned char.
953 emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
954 emitMemsetCall dst c n align = do
955 emitPrimCall
956 [ {- no results -} ]
957 MO_Memset
958 [ dst, c, n, align ]
959
960 -- | Emit a call to @allocate@.
961 emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
962 emitAllocateCall res cap n = do
963 emitCCall
964 [ (res, AddrHint) ]
965 allocate
966 [ (cap, AddrHint)
967 , (n, NoHint)
968 ]
969 where
970 allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
971 ForeignLabelInExternalPackage IsFunction))
972
973 emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
974 emitPopCntCall res x width = do
975 emitPrimCall
976 [ res ]
977 (MO_PopCnt width)
978 [ x ]