Produce new-style Cmm from the Cmm parser
[ghc.git] / compiler / codeGen / StgCmmForeign.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generation for foreign calls.
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmForeign (
10 cgForeignCall, loadThreadState, saveThreadState,
11 emitPrimCall, emitCCall,
12 emitForeignCall, -- For CmmParse
13 emitSaveThreadState, -- will be needed by the Cmm parser
14 emitLoadThreadState, -- ditto
15 emitCloseNursery, emitOpenNursery
16 ) where
17
18 #include "HsVersions.h"
19
20 import StgSyn
21 import StgCmmProf
22 import StgCmmEnv
23 import StgCmmMonad
24 import StgCmmUtils
25 import StgCmmClosure
26 import StgCmmLayout
27
28 import Cmm
29 import CmmUtils
30 import MkGraph
31 import Type
32 import TysPrim
33 import CLabel
34 import SMRep
35 import ForeignCall
36 import DynFlags
37 import Maybes
38 import Outputable
39 import BasicTypes
40
41 import Control.Monad
42 import Prelude hiding( succ )
43
44 -----------------------------------------------------------------------------
45 -- Code generation for Foreign Calls
46 -----------------------------------------------------------------------------
47
48 -- | emit code for a foreign call, and return the results to the sequel.
49 --
50 cgForeignCall :: ForeignCall -- the op
51 -> [StgArg] -- x,y arguments
52 -> Type -- result type
53 -> FCode ReturnKind
54
55 cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
56 = do { dflags <- getDynFlags
57 ; let -- in the stdcall calling convention, the symbol needs @size appended
58 -- to it, where size is the total number of bytes of arguments. We
59 -- attach this info to the CLabel here, and the CLabel pretty printer
60 -- will generate the suffix when the label is printed.
61 call_size args
62 | StdCallConv <- cconv = Just (sum (map arg_size args))
63 | otherwise = Nothing
64
65 -- ToDo: this might not be correct for 64-bit API
66 arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
67 (wORD_SIZE dflags)
68 ; cmm_args <- getFCallArgs stg_args
69 ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
70 ; let ((call_args, arg_hints), cmm_target)
71 = case target of
72 StaticTarget _ _ False ->
73 panic "cgForeignCall: unexpected FFI value import"
74 StaticTarget lbl mPkgId True
75 -> let labelSource
76 = case mPkgId of
77 Nothing -> ForeignLabelInThisPackage
78 Just pkgId -> ForeignLabelInPackage pkgId
79 size = call_size cmm_args
80 in ( unzip cmm_args
81 , CmmLit (CmmLabel
82 (mkForeignLabel lbl size labelSource IsFunction)))
83
84 DynamicTarget -> case cmm_args of
85 (fn,_):rest -> (unzip rest, fn)
86 [] -> panic "cgForeignCall []"
87 fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
88 call_target = ForeignTarget cmm_target fc
89
90 -- we want to emit code for the call, and then emitReturn.
91 -- However, if the sequel is AssignTo, we shortcut a little
92 -- and generate a foreign call that assigns the results
93 -- directly. Otherwise we end up generating a bunch of
94 -- useless "r = r" assignments, which are not merely annoying:
95 -- they prevent the common block elimination from working correctly
96 -- in the case of a safe foreign call.
97 -- See Note [safe foreign call convention]
98 --
99 ; sequel <- getSequel
100 ; case sequel of
101 AssignTo assign_to_these _ ->
102 emitForeignCall safety assign_to_these call_target call_args
103
104 _something_else ->
105 do { _ <- emitForeignCall safety res_regs call_target call_args
106 ; emitReturn (map (CmmReg . CmmLocal) res_regs)
107 }
108 }
109
110 {- Note [safe foreign call convention]
111
112 The simple thing to do for a safe foreign call would be the same as an
113 unsafe one: just
114
115 emitForeignCall ...
116 emitReturn ...
117
118 but consider what happens in this case
119
120 case foo x y z of
121 (# s, r #) -> ...
122
123 The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
124 as the result reg, and we generate
125
126 r = foo(x,y,z) returns to L1 -- emitForeignCall
127 L1:
128 r = r -- emitReturn
129 goto L2
130 L2:
131 ...
132
133 Now L1 is a proc point (by definition, it is the continuation of the
134 safe foreign call). If L2 does a heap check, then L2 will also be a
135 proc point.
136
137 Furthermore, the stack layout algorithm has to arrange to save r
138 somewhere between the call and the jump to L1, which is annoying: we
139 would have to treat r differently from the other live variables, which
140 have to be saved *before* the call.
141
142 So we adopt a special convention for safe foreign calls: the results
143 are copied out according to the NativeReturn convention by the call,
144 and the continuation of the call should copyIn the results. (The
145 copyOut code is actually inserted when the safe foreign call is
146 lowered later). The result regs attached to the safe foreign call are
147 only used temporarily to hold the results before they are copied out.
148
149 We will now generate this:
150
151 r = foo(x,y,z) returns to L1
152 L1:
153 r = R1 -- copyIn, inserted by mkSafeCall
154 goto L2
155 L2:
156 ... r ...
157
158 And when the safe foreign call is lowered later (see Note [lower safe
159 foreign calls]) we get this:
160
161 suspendThread()
162 r = foo(x,y,z)
163 resumeThread()
164 R1 = r -- copyOut, inserted by lowerSafeForeignCall
165 jump L1
166 L1:
167 r = R1 -- copyIn, inserted by mkSafeCall
168 goto L2
169 L2:
170 ... r ...
171
172 Now consider what happens if L2 does a heap check: the Adams
173 optimisation kicks in and commons up L1 with the heap-check
174 continuation, resulting in just one proc point instead of two. Yay!
175 -}
176
177
178 emitCCall :: [(CmmFormal,ForeignHint)]
179 -> CmmExpr
180 -> [(CmmActual,ForeignHint)]
181 -> FCode ()
182 emitCCall hinted_results fn hinted_args
183 = void $ emitForeignCall PlayRisky results target args
184 where
185 (args, arg_hints) = unzip hinted_args
186 (results, result_hints) = unzip hinted_results
187 target = ForeignTarget fn fc
188 fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
189
190
191 emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
192 emitPrimCall res op args
193 = void $ emitForeignCall PlayRisky res (PrimTarget op) args
194
195 -- alternative entry point, used by CmmParse
196 emitForeignCall
197 :: Safety
198 -> [CmmFormal] -- where to put the results
199 -> ForeignTarget -- the op
200 -> [CmmActual] -- arguments
201 -> FCode ReturnKind
202 emitForeignCall safety results target args
203 | not (playSafe safety) = do
204 dflags <- getDynFlags
205 let (caller_save, caller_load) = callerSaveVolatileRegs dflags
206 emit caller_save
207 emit $ mkUnsafeCall target results args
208 emit caller_load
209 return AssignedDirectly
210
211 | otherwise = do
212 dflags <- getDynFlags
213 updfr_off <- getUpdFrameOff
214 temp_target <- load_target_into_temp target
215 k <- newLabelC
216 let (off, copyout) = copyInOflow dflags NativeReturn (Young k) results []
217 -- see Note [safe foreign call convention]
218 emit $
219 ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
220 (CmmLit (CmmBlock k))
221 <*> mkLast (CmmForeignCall { tgt = temp_target
222 , res = results
223 , args = args
224 , succ = k
225 , updfr = updfr_off
226 , intrbl = playInterruptible safety })
227 <*> mkLabel k
228 <*> copyout
229 )
230 return (ReturnedTo k off)
231
232
233 {-
234 -- THINK ABOUT THIS (used to happen)
235 -- we might need to load arguments into temporaries before
236 -- making the call, because certain global registers might
237 -- overlap with registers that the C calling convention uses
238 -- for passing arguments.
239 --
240 -- This is a HACK; really it should be done in the back end, but
241 -- it's easier to generate the temporaries here.
242 load_args_into_temps = mapM arg_assign_temp
243 where arg_assign_temp (e,hint) = do
244 tmp <- maybe_assign_temp e
245 return (tmp,hint)
246 -}
247
248 load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
249 load_target_into_temp (ForeignTarget expr conv) = do
250 tmp <- maybe_assign_temp expr
251 return (ForeignTarget tmp conv)
252 load_target_into_temp other_target@(PrimTarget _) =
253 return other_target
254
255 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
256 maybe_assign_temp e
257 | hasNoGlobalRegs e = return e
258 | otherwise = do
259 dflags <- getDynFlags
260 -- don't use assignTemp, it uses its own notion of "trivial"
261 -- expressions, which are wrong here.
262 -- this is a NonPtr because it only duplicates an existing
263 reg <- newTemp (cmmExprType dflags e) --TODO FIXME NOW
264 emitAssign (CmmLocal reg) e
265 return (CmmReg (CmmLocal reg))
266
267 -- -----------------------------------------------------------------------------
268 -- Save/restore the thread state in the TSO
269
270 -- This stuff can't be done in suspendThread/resumeThread, because it
271 -- refers to global registers which aren't available in the C world.
272
273 saveThreadState :: DynFlags -> CmmAGraph
274 saveThreadState dflags =
275 -- CurrentTSO->stackobj->sp = Sp;
276 mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags stgCurrentTSO (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp
277 <*> closeNursery dflags
278 -- and save the current cost centre stack in the TSO when profiling:
279 <*> if dopt Opt_SccProfilingOn dflags then
280 mkStore (cmmOffset dflags stgCurrentTSO (tso_CCCS dflags)) curCCS
281 else mkNop
282
283 emitSaveThreadState :: FCode ()
284 emitSaveThreadState = do
285 dflags <- getDynFlags
286 emit (saveThreadState dflags)
287
288 emitCloseNursery :: FCode ()
289 emitCloseNursery = do
290 df <- getDynFlags
291 emit (closeNursery df)
292
293 -- CurrentNursery->free = Hp+1;
294 closeNursery :: DynFlags -> CmmAGraph
295 closeNursery dflags = mkStore (nursery_bdescr_free dflags) (cmmOffsetW dflags stgHp 1)
296
297 loadThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
298 loadThreadState dflags tso stack = do
299 catAGraphs [
300 -- tso = CurrentTSO;
301 mkAssign (CmmLocal tso) stgCurrentTSO,
302 -- stack = tso->stackobj;
303 mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
304 -- Sp = stack->sp;
305 mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
306 -- SpLim = stack->stack + RESERVED_STACK_WORDS;
307 mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
308 (rESERVED_STACK_WORDS dflags)),
309 openNursery dflags,
310 -- and load the current cost centre stack from the TSO when profiling:
311 if dopt Opt_SccProfilingOn dflags then
312 storeCurCCS
313 (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) (ccsType dflags))
314 else mkNop]
315
316 emitLoadThreadState :: FCode ()
317 emitLoadThreadState = do
318 dflags <- getDynFlags
319 load_tso <- newTemp (gcWord dflags)
320 load_stack <- newTemp (gcWord dflags)
321 emit $ loadThreadState dflags load_tso load_stack
322
323 emitOpenNursery :: FCode ()
324 emitOpenNursery = do
325 df <- getDynFlags
326 emit (openNursery df)
327
328 openNursery :: DynFlags -> CmmAGraph
329 openNursery dflags = catAGraphs [
330 -- Hp = CurrentNursery->free - 1;
331 mkAssign hp (cmmOffsetW dflags (CmmLoad (nursery_bdescr_free dflags) (bWord dflags)) (-1)),
332
333 -- HpLim = CurrentNursery->start +
334 -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
335 mkAssign hpLim
336 (cmmOffsetExpr dflags
337 (CmmLoad (nursery_bdescr_start dflags) (bWord dflags))
338 (cmmOffset dflags
339 (CmmMachOp (mo_wordMul dflags) [
340 CmmMachOp (MO_SS_Conv W32 (wordWidth dflags))
341 [CmmLoad (nursery_bdescr_blocks dflags) b32],
342 mkIntExpr dflags (bLOCK_SIZE dflags)
343 ])
344 (-1)
345 )
346 )
347 ]
348
349 nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks :: DynFlags -> CmmExpr
350 nursery_bdescr_free dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_free dflags)
351 nursery_bdescr_start dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_start dflags)
352 nursery_bdescr_blocks dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_blocks dflags)
353
354 tso_stackobj, tso_CCCS, stack_STACK, stack_SP :: DynFlags -> ByteOff
355 tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
356 tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
357 stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
358 stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
359
360
361 closureField :: DynFlags -> ByteOff -> ByteOff
362 closureField dflags off = off + fixedHdrSize dflags * wORD_SIZE dflags
363
364 stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr
365 stgSp = CmmReg sp
366 stgHp = CmmReg hp
367 stgCurrentTSO = CmmReg currentTSO
368 stgCurrentNursery = CmmReg currentNursery
369
370 sp, spLim, hp, hpLim, currentTSO, currentNursery :: CmmReg
371 sp = CmmGlobal Sp
372 spLim = CmmGlobal SpLim
373 hp = CmmGlobal Hp
374 hpLim = CmmGlobal HpLim
375 currentTSO = CmmGlobal CurrentTSO
376 currentNursery = CmmGlobal CurrentNursery
377
378 -- -----------------------------------------------------------------------------
379 -- For certain types passed to foreign calls, we adjust the actual
380 -- value passed to the call. For ByteArray#/Array# we pass the
381 -- address of the actual array, not the address of the heap object.
382
383 getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)]
384 -- (a) Drop void args
385 -- (b) Add foreign-call shim code
386 -- It's (b) that makes this differ from getNonVoidArgAmodes
387
388 getFCallArgs args
389 = do { mb_cmms <- mapM get args
390 ; return (catMaybes mb_cmms) }
391 where
392 get arg | isVoidRep arg_rep
393 = return Nothing
394 | otherwise
395 = do { cmm <- getArgAmode (NonVoid arg)
396 ; dflags <- getDynFlags
397 ; return (Just (add_shim dflags arg_ty cmm, hint)) }
398 where
399 arg_ty = stgArgType arg
400 arg_rep = typePrimRep arg_ty
401 hint = typeForeignHint arg_ty
402
403 add_shim :: DynFlags -> Type -> CmmExpr -> CmmExpr
404 add_shim dflags arg_ty expr
405 | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
406 = cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
407
408 | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
409 = cmmOffsetB dflags expr (arrWordsHdrSize dflags)
410
411 | otherwise = expr
412 where
413 UnaryRep rep_ty = repType arg_ty
414 tycon = tyConAppTyCon rep_ty
415 -- should be a tycon app, since this is a foreign call