Add SmallArray# and SmallMutableArray# types
[ghc.git] / compiler / codeGen / StgCmmForeign.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generation for foreign calls.
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmForeign (
10 cgForeignCall, loadThreadState, saveThreadState,
11 emitPrimCall, emitCCall,
12 emitForeignCall, -- For CmmParse
13 emitSaveThreadState, -- will be needed by the Cmm parser
14 emitLoadThreadState, -- ditto
15 emitCloseNursery, emitOpenNursery
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgSyn
21 import StgCmmProf (storeCurCCS, ccsType, curCCS)
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmClosure
26 import StgCmmLayout
27
28 import Cmm
29 import CmmUtils
30 import MkGraph
31 import Type
32 import TysPrim
33 import CLabel
34 import SMRep
35 import ForeignCall
36 import DynFlags
37 import Maybes
38 import Outputable
39 import BasicTypes
40
41 import Control.Monad
42 import Prelude hiding( succ )
43
44 -----------------------------------------------------------------------------
45 -- Code generation for Foreign Calls
46 -----------------------------------------------------------------------------
47
48 -- | emit code for a foreign call, and return the results to the sequel.
49 --
50 cgForeignCall :: ForeignCall -- the op
51 -> [StgArg] -- x,y arguments
52 -> Type -- result type
53 -> FCode ReturnKind
54
55 cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
56 = do { dflags <- getDynFlags
57 ; let -- in the stdcall calling convention, the symbol needs @size appended
58 -- to it, where size is the total number of bytes of arguments. We
59 -- attach this info to the CLabel here, and the CLabel pretty printer
60 -- will generate the suffix when the label is printed.
61 call_size args
62 | StdCallConv <- cconv = Just (sum (map arg_size args))
63 | otherwise = Nothing
64
65 -- ToDo: this might not be correct for 64-bit API
66 arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
67 (wORD_SIZE dflags)
68 ; cmm_args <- getFCallArgs stg_args
69 ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
70 ; let ((call_args, arg_hints), cmm_target)
71 = case target of
72 StaticTarget _ _ False ->
73 panic "cgForeignCall: unexpected FFI value import"
74 StaticTarget lbl mPkgId True
75 -> let labelSource
76 = case mPkgId of
77 Nothing -> ForeignLabelInThisPackage
78 Just pkgId -> ForeignLabelInPackage pkgId
79 size = call_size cmm_args
80 in ( unzip cmm_args
81 , CmmLit (CmmLabel
82 (mkForeignLabel lbl size labelSource IsFunction)))
83
84 DynamicTarget -> case cmm_args of
85 (fn,_):rest -> (unzip rest, fn)
86 [] -> panic "cgForeignCall []"
87 fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
88 call_target = ForeignTarget cmm_target fc
89
90 -- we want to emit code for the call, and then emitReturn.
91 -- However, if the sequel is AssignTo, we shortcut a little
92 -- and generate a foreign call that assigns the results
93 -- directly. Otherwise we end up generating a bunch of
94 -- useless "r = r" assignments, which are not merely annoying:
95 -- they prevent the common block elimination from working correctly
96 -- in the case of a safe foreign call.
97 -- See Note [safe foreign call convention]
98 --
99 ; sequel <- getSequel
100 ; case sequel of
101 AssignTo assign_to_these _ ->
102 emitForeignCall safety assign_to_these call_target call_args
103
104 _something_else ->
105 do { _ <- emitForeignCall safety res_regs call_target call_args
106 ; emitReturn (map (CmmReg . CmmLocal) res_regs)
107 }
108 }
109
110 {- Note [safe foreign call convention]
111
112 The simple thing to do for a safe foreign call would be the same as an
113 unsafe one: just
114
115 emitForeignCall ...
116 emitReturn ...
117
118 but consider what happens in this case
119
120 case foo x y z of
121 (# s, r #) -> ...
122
123 The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
124 as the result reg, and we generate
125
126 r = foo(x,y,z) returns to L1 -- emitForeignCall
127 L1:
128 r = r -- emitReturn
129 goto L2
130 L2:
131 ...
132
133 Now L1 is a proc point (by definition, it is the continuation of the
134 safe foreign call). If L2 does a heap check, then L2 will also be a
135 proc point.
136
137 Furthermore, the stack layout algorithm has to arrange to save r
138 somewhere between the call and the jump to L1, which is annoying: we
139 would have to treat r differently from the other live variables, which
140 have to be saved *before* the call.
141
142 So we adopt a special convention for safe foreign calls: the results
143 are copied out according to the NativeReturn convention by the call,
144 and the continuation of the call should copyIn the results. (The
145 copyOut code is actually inserted when the safe foreign call is
146 lowered later). The result regs attached to the safe foreign call are
147 only used temporarily to hold the results before they are copied out.
148
149 We will now generate this:
150
151 r = foo(x,y,z) returns to L1
152 L1:
153 r = R1 -- copyIn, inserted by mkSafeCall
154 goto L2
155 L2:
156 ... r ...
157
158 And when the safe foreign call is lowered later (see Note [lower safe
159 foreign calls]) we get this:
160
161 suspendThread()
162 r = foo(x,y,z)
163 resumeThread()
164 R1 = r -- copyOut, inserted by lowerSafeForeignCall
165 jump L1
166 L1:
167 r = R1 -- copyIn, inserted by mkSafeCall
168 goto L2
169 L2:
170 ... r ...
171
172 Now consider what happens if L2 does a heap check: the Adams
173 optimisation kicks in and commons up L1 with the heap-check
174 continuation, resulting in just one proc point instead of two. Yay!
175 -}
176
177
178 emitCCall :: [(CmmFormal,ForeignHint)]
179 -> CmmExpr
180 -> [(CmmActual,ForeignHint)]
181 -> FCode ()
182 emitCCall hinted_results fn hinted_args
183 = void $ emitForeignCall PlayRisky results target args
184 where
185 (args, arg_hints) = unzip hinted_args
186 (results, result_hints) = unzip hinted_results
187 target = ForeignTarget fn fc
188 fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
189
190
191 emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
192 emitPrimCall res op args
193 = void $ emitForeignCall PlayRisky res (PrimTarget op) args
194
195 -- alternative entry point, used by CmmParse
196 emitForeignCall
197 :: Safety
198 -> [CmmFormal] -- where to put the results
199 -> ForeignTarget -- the op
200 -> [CmmActual] -- arguments
201 -> FCode ReturnKind
202 emitForeignCall safety results target args
203 | not (playSafe safety) = do
204 dflags <- getDynFlags
205 let (caller_save, caller_load) = callerSaveVolatileRegs dflags
206 emit caller_save
207 target' <- load_target_into_temp target
208 args' <- mapM maybe_assign_temp args
209 emit $ mkUnsafeCall target' results args'
210 emit caller_load
211 return AssignedDirectly
212
213 | otherwise = do
214 dflags <- getDynFlags
215 updfr_off <- getUpdFrameOff
216 target' <- load_target_into_temp target
217 args' <- mapM maybe_assign_temp args
218 k <- newLabelC
219 let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results []
220 -- see Note [safe foreign call convention]
221 emit $
222 ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
223 (CmmLit (CmmBlock k))
224 <*> mkLast (CmmForeignCall { tgt = target'
225 , res = results
226 , args = args'
227 , succ = k
228 , ret_args = off
229 , ret_off = updfr_off
230 , intrbl = playInterruptible safety })
231 <*> mkLabel k
232 <*> copyout
233 )
234 return (ReturnedTo k off)
235
236 load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
237 load_target_into_temp (ForeignTarget expr conv) = do
238 tmp <- maybe_assign_temp expr
239 return (ForeignTarget tmp conv)
240 load_target_into_temp other_target@(PrimTarget _) =
241 return other_target
242
243 -- What we want to do here is create a new temporary for the foreign
244 -- call argument if it is not safe to use the expression directly,
245 -- because the expression mentions caller-saves GlobalRegs (see
246 -- Note [Register Parameter Passing]).
247 --
248 -- However, we can't pattern-match on the expression here, because
249 -- this is used in a loop by CmmParse, and testing the expression
250 -- results in a black hole. So we always create a temporary, and rely
251 -- on CmmSink to clean it up later. (Yuck, ToDo). The generated code
252 -- ends up being the same, at least for the RTS .cmm code.
253 --
254 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
255 maybe_assign_temp e = do
256 dflags <- getDynFlags
257 reg <- newTemp (cmmExprType dflags e)
258 emitAssign (CmmLocal reg) e
259 return (CmmReg (CmmLocal reg))
260
261 -- -----------------------------------------------------------------------------
262 -- Save/restore the thread state in the TSO
263
264 -- This stuff can't be done in suspendThread/resumeThread, because it
265 -- refers to global registers which aren't available in the C world.
266
267 saveThreadState :: DynFlags -> CmmAGraph
268 saveThreadState dflags =
269 -- CurrentTSO->stackobj->sp = Sp;
270 mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags stgCurrentTSO (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp
271 <*> closeNursery dflags
272 -- and save the current cost centre stack in the TSO when profiling:
273 <*> if gopt Opt_SccProfilingOn dflags then
274 mkStore (cmmOffset dflags stgCurrentTSO (tso_CCCS dflags)) curCCS
275 else mkNop
276
277 emitSaveThreadState :: FCode ()
278 emitSaveThreadState = do
279 dflags <- getDynFlags
280 emit (saveThreadState dflags)
281
282 emitCloseNursery :: FCode ()
283 emitCloseNursery = do
284 df <- getDynFlags
285 emit (closeNursery df)
286
287 -- CurrentNursery->free = Hp+1;
288 closeNursery :: DynFlags -> CmmAGraph
289 closeNursery dflags = mkStore (nursery_bdescr_free dflags) (cmmOffsetW dflags stgHp 1)
290
291 loadThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
292 loadThreadState dflags tso stack = do
293 catAGraphs [
294 -- tso = CurrentTSO;
295 mkAssign (CmmLocal tso) stgCurrentTSO,
296 -- stack = tso->stackobj;
297 mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
298 -- Sp = stack->sp;
299 mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
300 -- SpLim = stack->stack + RESERVED_STACK_WORDS;
301 mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
302 (rESERVED_STACK_WORDS dflags)),
303 -- HpAlloc = 0;
304 -- HpAlloc is assumed to be set to non-zero only by a failed
305 -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
306 mkAssign hpAlloc (zeroExpr dflags),
307
308 openNursery dflags,
309 -- and load the current cost centre stack from the TSO when profiling:
310 if gopt Opt_SccProfilingOn dflags then
311 storeCurCCS
312 (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) (ccsType dflags))
313 else mkNop]
314
315 emitLoadThreadState :: FCode ()
316 emitLoadThreadState = do
317 dflags <- getDynFlags
318 load_tso <- newTemp (gcWord dflags)
319 load_stack <- newTemp (gcWord dflags)
320 emit $ loadThreadState dflags load_tso load_stack
321
322 emitOpenNursery :: FCode ()
323 emitOpenNursery = do
324 df <- getDynFlags
325 emit (openNursery df)
326
327 openNursery :: DynFlags -> CmmAGraph
328 openNursery dflags = catAGraphs [
329 -- Hp = CurrentNursery->free - 1;
330 mkAssign hp (cmmOffsetW dflags (CmmLoad (nursery_bdescr_free dflags) (bWord dflags)) (-1)),
331
332 -- HpLim = CurrentNursery->start +
333 -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
334 mkAssign hpLim
335 (cmmOffsetExpr dflags
336 (CmmLoad (nursery_bdescr_start dflags) (bWord dflags))
337 (cmmOffset dflags
338 (CmmMachOp (mo_wordMul dflags) [
339 CmmMachOp (MO_SS_Conv W32 (wordWidth dflags))
340 [CmmLoad (nursery_bdescr_blocks dflags) b32],
341 mkIntExpr dflags (bLOCK_SIZE dflags)
342 ])
343 (-1)
344 )
345 )
346 ]
347
348 nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks :: DynFlags -> CmmExpr
349 nursery_bdescr_free dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_free dflags)
350 nursery_bdescr_start dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_start dflags)
351 nursery_bdescr_blocks dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_blocks dflags)
352
353 tso_stackobj, tso_CCCS, stack_STACK, stack_SP :: DynFlags -> ByteOff
354 tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
355 tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
356 stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
357 stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
358
359
360 closureField :: DynFlags -> ByteOff -> ByteOff
361 closureField dflags off = off + fixedHdrSize dflags
362
363 stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr
364 stgSp = CmmReg sp
365 stgHp = CmmReg hp
366 stgCurrentTSO = CmmReg currentTSO
367 stgCurrentNursery = CmmReg currentNursery
368
369 sp, spLim, hp, hpLim, currentTSO, currentNursery, hpAlloc :: CmmReg
370 sp = CmmGlobal Sp
371 spLim = CmmGlobal SpLim
372 hp = CmmGlobal Hp
373 hpLim = CmmGlobal HpLim
374 currentTSO = CmmGlobal CurrentTSO
375 currentNursery = CmmGlobal CurrentNursery
376 hpAlloc = CmmGlobal HpAlloc
377
378 -- -----------------------------------------------------------------------------
379 -- For certain types passed to foreign calls, we adjust the actual
380 -- value passed to the call. For ByteArray#/Array# we pass the
381 -- address of the actual array, not the address of the heap object.
382
383 getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)]
384 -- (a) Drop void args
385 -- (b) Add foreign-call shim code
386 -- It's (b) that makes this differ from getNonVoidArgAmodes
387
388 getFCallArgs args
389 = do { mb_cmms <- mapM get args
390 ; return (catMaybes mb_cmms) }
391 where
392 get arg | isVoidRep arg_rep
393 = return Nothing
394 | otherwise
395 = do { cmm <- getArgAmode (NonVoid arg)
396 ; dflags <- getDynFlags
397 ; return (Just (add_shim dflags arg_ty cmm, hint)) }
398 where
399 arg_ty = stgArgType arg
400 arg_rep = typePrimRep arg_ty
401 hint = typeForeignHint arg_ty
402
403 add_shim :: DynFlags -> Type -> CmmExpr -> CmmExpr
404 add_shim dflags arg_ty expr
405 | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
406 = cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
407
408 | tycon == smallArrayPrimTyCon || tycon == smallMutableArrayPrimTyCon
409 = cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
410
411 | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
412 = cmmOffsetB dflags expr (arrWordsHdrSize dflags)
413
414 | otherwise = expr
415 where
416 UnaryRep rep_ty = repType arg_ty
417 tycon = tyConAppTyCon rep_ty
418 -- should be a tycon app, since this is a foreign call