c8db8644db268e1fbfbcd1a2a1b6003d5ec144f7
[ghc.git] / compiler / codeGen / StgCmmForeign.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Code generation for foreign calls.
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmForeign (
12 cgForeignCall,
13 emitPrimCall, emitCCall,
14 emitForeignCall, -- For CmmParse
15 emitSaveThreadState,
16 saveThreadState,
17 emitLoadThreadState,
18 loadThreadState,
19 emitOpenNursery,
20 emitCloseNursery,
21 ) where
22
23 #include "HsVersions.h"
24
25 import StgSyn
26 import StgCmmProf (storeCurCCS, ccsType, curCCS)
27 import StgCmmEnv
28 import StgCmmMonad
29 import StgCmmUtils
30 import StgCmmClosure
31 import StgCmmLayout
32
33 import Cmm
34 import CmmUtils
35 import MkGraph
36 import Type
37 import TysPrim
38 import CLabel
39 import SMRep
40 import ForeignCall
41 import DynFlags
42 import Maybes
43 import Outputable
44 import UniqSupply
45 import BasicTypes
46
47 import Control.Monad
48
49 import Prelude hiding( succ, (<*>) )
50
51 -----------------------------------------------------------------------------
52 -- Code generation for Foreign Calls
53 -----------------------------------------------------------------------------
54
55 -- | emit code for a foreign call, and return the results to the sequel.
56 --
57 cgForeignCall :: ForeignCall -- the op
58 -> [StgArg] -- x,y arguments
59 -> Type -- result type
60 -> FCode ReturnKind
61
62 cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
63 = do { dflags <- getDynFlags
64 ; let -- in the stdcall calling convention, the symbol needs @size appended
65 -- to it, where size is the total number of bytes of arguments. We
66 -- attach this info to the CLabel here, and the CLabel pretty printer
67 -- will generate the suffix when the label is printed.
68 call_size args
69 | StdCallConv <- cconv = Just (sum (map arg_size args))
70 | otherwise = Nothing
71
72 -- ToDo: this might not be correct for 64-bit API
73 arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
74 (wORD_SIZE dflags)
75 ; cmm_args <- getFCallArgs stg_args
76 ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
77 ; let ((call_args, arg_hints), cmm_target)
78 = case target of
79 StaticTarget _ _ _ False ->
80 panic "cgForeignCall: unexpected FFI value import"
81 StaticTarget _ lbl mPkgId True
82 -> let labelSource
83 = case mPkgId of
84 Nothing -> ForeignLabelInThisPackage
85 Just pkgId -> ForeignLabelInPackage pkgId
86 size = call_size cmm_args
87 in ( unzip cmm_args
88 , CmmLit (CmmLabel
89 (mkForeignLabel lbl size labelSource IsFunction)))
90
91 DynamicTarget -> case cmm_args of
92 (fn,_):rest -> (unzip rest, fn)
93 [] -> panic "cgForeignCall []"
94 fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
95 call_target = ForeignTarget cmm_target fc
96
97 -- we want to emit code for the call, and then emitReturn.
98 -- However, if the sequel is AssignTo, we shortcut a little
99 -- and generate a foreign call that assigns the results
100 -- directly. Otherwise we end up generating a bunch of
101 -- useless "r = r" assignments, which are not merely annoying:
102 -- they prevent the common block elimination from working correctly
103 -- in the case of a safe foreign call.
104 -- See Note [safe foreign call convention]
105 --
106 ; sequel <- getSequel
107 ; case sequel of
108 AssignTo assign_to_these _ ->
109 emitForeignCall safety assign_to_these call_target call_args
110
111 _something_else ->
112 do { _ <- emitForeignCall safety res_regs call_target call_args
113 ; emitReturn (map (CmmReg . CmmLocal) res_regs)
114 }
115 }
116
117 {- Note [safe foreign call convention]
118
119 The simple thing to do for a safe foreign call would be the same as an
120 unsafe one: just
121
122 emitForeignCall ...
123 emitReturn ...
124
125 but consider what happens in this case
126
127 case foo x y z of
128 (# s, r #) -> ...
129
130 The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
131 as the result reg, and we generate
132
133 r = foo(x,y,z) returns to L1 -- emitForeignCall
134 L1:
135 r = r -- emitReturn
136 goto L2
137 L2:
138 ...
139
140 Now L1 is a proc point (by definition, it is the continuation of the
141 safe foreign call). If L2 does a heap check, then L2 will also be a
142 proc point.
143
144 Furthermore, the stack layout algorithm has to arrange to save r
145 somewhere between the call and the jump to L1, which is annoying: we
146 would have to treat r differently from the other live variables, which
147 have to be saved *before* the call.
148
149 So we adopt a special convention for safe foreign calls: the results
150 are copied out according to the NativeReturn convention by the call,
151 and the continuation of the call should copyIn the results. (The
152 copyOut code is actually inserted when the safe foreign call is
153 lowered later). The result regs attached to the safe foreign call are
154 only used temporarily to hold the results before they are copied out.
155
156 We will now generate this:
157
158 r = foo(x,y,z) returns to L1
159 L1:
160 r = R1 -- copyIn, inserted by mkSafeCall
161 goto L2
162 L2:
163 ... r ...
164
165 And when the safe foreign call is lowered later (see Note [lower safe
166 foreign calls]) we get this:
167
168 suspendThread()
169 r = foo(x,y,z)
170 resumeThread()
171 R1 = r -- copyOut, inserted by lowerSafeForeignCall
172 jump L1
173 L1:
174 r = R1 -- copyIn, inserted by mkSafeCall
175 goto L2
176 L2:
177 ... r ...
178
179 Now consider what happens if L2 does a heap check: the Adams
180 optimisation kicks in and commons up L1 with the heap-check
181 continuation, resulting in just one proc point instead of two. Yay!
182 -}
183
184
185 emitCCall :: [(CmmFormal,ForeignHint)]
186 -> CmmExpr
187 -> [(CmmActual,ForeignHint)]
188 -> FCode ()
189 emitCCall hinted_results fn hinted_args
190 = void $ emitForeignCall PlayRisky results target args
191 where
192 (args, arg_hints) = unzip hinted_args
193 (results, result_hints) = unzip hinted_results
194 target = ForeignTarget fn fc
195 fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
196
197
198 emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
199 emitPrimCall res op args
200 = void $ emitForeignCall PlayRisky res (PrimTarget op) args
201
202 -- alternative entry point, used by CmmParse
203 emitForeignCall
204 :: Safety
205 -> [CmmFormal] -- where to put the results
206 -> ForeignTarget -- the op
207 -> [CmmActual] -- arguments
208 -> FCode ReturnKind
209 emitForeignCall safety results target args
210 | not (playSafe safety) = do
211 dflags <- getDynFlags
212 let (caller_save, caller_load) = callerSaveVolatileRegs dflags
213 emit caller_save
214 target' <- load_target_into_temp target
215 args' <- mapM maybe_assign_temp args
216 emit $ mkUnsafeCall target' results args'
217 emit caller_load
218 return AssignedDirectly
219
220 | otherwise = do
221 dflags <- getDynFlags
222 updfr_off <- getUpdFrameOff
223 target' <- load_target_into_temp target
224 args' <- mapM maybe_assign_temp args
225 k <- newLabelC
226 let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results []
227 -- see Note [safe foreign call convention]
228 tscope <- getTickScope
229 emit $
230 ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
231 (CmmLit (CmmBlock k))
232 <*> mkLast (CmmForeignCall { tgt = target'
233 , res = results
234 , args = args'
235 , succ = k
236 , ret_args = off
237 , ret_off = updfr_off
238 , intrbl = playInterruptible safety })
239 <*> mkLabel k tscope
240 <*> copyout
241 )
242 return (ReturnedTo k off)
243
244 load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
245 load_target_into_temp (ForeignTarget expr conv) = do
246 tmp <- maybe_assign_temp expr
247 return (ForeignTarget tmp conv)
248 load_target_into_temp other_target@(PrimTarget _) =
249 return other_target
250
251 -- What we want to do here is create a new temporary for the foreign
252 -- call argument if it is not safe to use the expression directly,
253 -- because the expression mentions caller-saves GlobalRegs (see
254 -- Note [Register Parameter Passing]).
255 --
256 -- However, we can't pattern-match on the expression here, because
257 -- this is used in a loop by CmmParse, and testing the expression
258 -- results in a black hole. So we always create a temporary, and rely
259 -- on CmmSink to clean it up later. (Yuck, ToDo). The generated code
260 -- ends up being the same, at least for the RTS .cmm code.
261 --
262 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
263 maybe_assign_temp e = do
264 dflags <- getDynFlags
265 reg <- newTemp (cmmExprType dflags e)
266 emitAssign (CmmLocal reg) e
267 return (CmmReg (CmmLocal reg))
268
269 -- -----------------------------------------------------------------------------
270 -- Save/restore the thread state in the TSO
271
272 -- This stuff can't be done in suspendThread/resumeThread, because it
273 -- refers to global registers which aren't available in the C world.
274
275 emitSaveThreadState :: FCode ()
276 emitSaveThreadState = do
277 dflags <- getDynFlags
278 code <- saveThreadState dflags
279 emit code
280
281 -- | Produce code to save the current thread state to @CurrentTSO@
282 saveThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
283 saveThreadState dflags = do
284 tso <- newTemp (gcWord dflags)
285 close_nursery <- closeNursery dflags tso
286 pure $ catAGraphs [
287 -- tso = CurrentTSO;
288 mkAssign (CmmLocal tso) stgCurrentTSO,
289 -- tso->stackobj->sp = Sp;
290 mkStore (cmmOffset dflags
291 (CmmLoad (cmmOffset dflags
292 (CmmReg (CmmLocal tso))
293 (tso_stackobj dflags))
294 (bWord dflags))
295 (stack_SP dflags))
296 stgSp,
297 close_nursery,
298 -- and save the current cost centre stack in the TSO when profiling:
299 if gopt Opt_SccProfilingOn dflags then
300 mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS
301 else mkNop
302 ]
303
304 emitCloseNursery :: FCode ()
305 emitCloseNursery = do
306 dflags <- getDynFlags
307 tso <- newTemp (bWord dflags)
308 code <- closeNursery dflags tso
309 emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> code
310
311 {- |
312 @closeNursery dflags tso@ produces code to close the nursery.
313 A local register holding the value of @CurrentTSO@ is expected for
314 efficiency.
315
316 Closing the nursery corresponds to the following code:
317
318 @
319 tso = CurrentTSO;
320 cn = CurrentNuresry;
321
322 // Update the allocation limit for the current thread. We don't
323 // check to see whether it has overflowed at this point, that check is
324 // made when we run out of space in the current heap block (stg_gc_noregs)
325 // and in the scheduler when context switching (schedulePostRunThread).
326 tso->alloc_limit -= Hp + WDS(1) - cn->start;
327
328 // Set cn->free to the next unoccupied word in the block
329 cn->free = Hp + WDS(1);
330 @
331 -}
332 closeNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
333 closeNursery df tso = do
334 let tsoreg = CmmLocal tso
335 cnreg <- CmmLocal <$> newTemp (bWord df)
336 pure $ catAGraphs [
337 mkAssign cnreg stgCurrentNursery,
338
339 -- CurrentNursery->free = Hp+1;
340 mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1),
341
342 let alloc =
343 CmmMachOp (mo_wordSub df)
344 [ cmmOffsetW df stgHp 1
345 , CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
346 ]
347
348 alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
349 in
350
351 -- tso->alloc_limit += alloc
352 mkStore alloc_limit (CmmMachOp (MO_Sub W64)
353 [ CmmLoad alloc_limit b64
354 , CmmMachOp (mo_WordTo64 df) [alloc] ])
355 ]
356
357 emitLoadThreadState :: FCode ()
358 emitLoadThreadState = do
359 dflags <- getDynFlags
360 code <- loadThreadState dflags
361 emit code
362
363 -- | Produce code to load the current thread state from @CurrentTSO@
364 loadThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
365 loadThreadState dflags = do
366 tso <- newTemp (gcWord dflags)
367 stack <- newTemp (gcWord dflags)
368 open_nursery <- openNursery dflags tso
369 pure $ catAGraphs [
370 -- tso = CurrentTSO;
371 mkAssign (CmmLocal tso) stgCurrentTSO,
372 -- stack = tso->stackobj;
373 mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
374 -- Sp = stack->sp;
375 mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
376 -- SpLim = stack->stack + RESERVED_STACK_WORDS;
377 mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
378 (rESERVED_STACK_WORDS dflags)),
379 -- HpAlloc = 0;
380 -- HpAlloc is assumed to be set to non-zero only by a failed
381 -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
382 mkAssign hpAlloc (zeroExpr dflags),
383 open_nursery,
384 -- and load the current cost centre stack from the TSO when profiling:
385 if gopt Opt_SccProfilingOn dflags
386 then storeCurCCS
387 (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
388 (tso_CCCS dflags)) (ccsType dflags))
389 else mkNop
390 ]
391
392
393 emitOpenNursery :: FCode ()
394 emitOpenNursery = do
395 dflags <- getDynFlags
396 tso <- newTemp (bWord dflags)
397 code <- openNursery dflags tso
398 emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> code
399
400 {- |
401 @openNursery dflags tso@ produces code to open the nursery. A local register
402 holding the value of @CurrentTSO@ is expected for efficiency.
403
404 Opening the nursery corresponds to the following code:
405
406 @
407 tso = CurrentTSO;
408 cn = CurrentNursery;
409 bdfree = CurrentNuresry->free;
410 bdstart = CurrentNuresry->start;
411
412 // We *add* the currently occupied portion of the nursery block to
413 // the allocation limit, because we will subtract it again in
414 // closeNursery.
415 tso->alloc_limit += bdfree - bdstart;
416
417 // Set Hp to the last occupied word of the heap block. Why not the
418 // next unocupied word? Doing it this way means that we get to use
419 // an offset of zero more often, which might lead to slightly smaller
420 // code on some architectures.
421 Hp = bdfree - WDS(1);
422
423 // Set HpLim to the end of the current nursery block (note that this block
424 // might be a block group, consisting of several adjacent blocks.
425 HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
426 @
427 -}
428 openNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
429 openNursery df tso = do
430 let tsoreg = CmmLocal tso
431 cnreg <- CmmLocal <$> newTemp (bWord df)
432 bdfreereg <- CmmLocal <$> newTemp (bWord df)
433 bdstartreg <- CmmLocal <$> newTemp (bWord df)
434
435 -- These assignments are carefully ordered to reduce register
436 -- pressure and generate not completely awful code on x86. To see
437 -- what code we generate, look at the assembly for
438 -- stg_returnToStackTop in rts/StgStartup.cmm.
439 pure $ catAGraphs [
440 mkAssign cnreg stgCurrentNursery,
441 mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)),
442
443 -- Hp = CurrentNursery->free - 1;
444 mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)),
445
446 mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
447
448 -- HpLim = CurrentNursery->start +
449 -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
450 mkAssign hpLim
451 (cmmOffsetExpr df
452 (CmmReg bdstartreg)
453 (cmmOffset df
454 (CmmMachOp (mo_wordMul df) [
455 CmmMachOp (MO_SS_Conv W32 (wordWidth df))
456 [CmmLoad (nursery_bdescr_blocks df cnreg) b32],
457 mkIntExpr df (bLOCK_SIZE df)
458 ])
459 (-1)
460 )
461 ),
462
463 -- alloc = bd->free - bd->start
464 let alloc =
465 CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
466
467 alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
468 in
469
470 -- tso->alloc_limit += alloc
471 mkStore alloc_limit (CmmMachOp (MO_Add W64)
472 [ CmmLoad alloc_limit b64
473 , CmmMachOp (mo_WordTo64 df) [alloc] ])
474
475 ]
476
477 nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
478 :: DynFlags -> CmmReg -> CmmExpr
479 nursery_bdescr_free dflags cn =
480 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
481 nursery_bdescr_start dflags cn =
482 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
483 nursery_bdescr_blocks dflags cn =
484 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
485
486 tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
487 tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
488 tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
489 tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
490 stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
491 stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
492
493
494 closureField :: DynFlags -> ByteOff -> ByteOff
495 closureField dflags off = off + fixedHdrSize dflags
496
497 stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr
498 stgSp = CmmReg sp
499 stgHp = CmmReg hp
500 stgCurrentTSO = CmmReg currentTSO
501 stgCurrentNursery = CmmReg currentNursery
502
503 sp, spLim, hp, hpLim, currentTSO, currentNursery, hpAlloc :: CmmReg
504 sp = CmmGlobal Sp
505 spLim = CmmGlobal SpLim
506 hp = CmmGlobal Hp
507 hpLim = CmmGlobal HpLim
508 currentTSO = CmmGlobal CurrentTSO
509 currentNursery = CmmGlobal CurrentNursery
510 hpAlloc = CmmGlobal HpAlloc
511
512 -- -----------------------------------------------------------------------------
513 -- For certain types passed to foreign calls, we adjust the actual
514 -- value passed to the call. For ByteArray#/Array# we pass the
515 -- address of the actual array, not the address of the heap object.
516
517 getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)]
518 -- (a) Drop void args
519 -- (b) Add foreign-call shim code
520 -- It's (b) that makes this differ from getNonVoidArgAmodes
521
522 getFCallArgs args
523 = do { mb_cmms <- mapM get args
524 ; return (catMaybes mb_cmms) }
525 where
526 get arg | isVoidRep arg_rep
527 = return Nothing
528 | otherwise
529 = do { cmm <- getArgAmode (NonVoid arg)
530 ; dflags <- getDynFlags
531 ; return (Just (add_shim dflags arg_ty cmm, hint)) }
532 where
533 arg_ty = stgArgType arg
534 arg_rep = typePrimRep arg_ty
535 hint = typeForeignHint arg_ty
536
537 add_shim :: DynFlags -> Type -> CmmExpr -> CmmExpr
538 add_shim dflags arg_ty expr
539 | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
540 = cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
541
542 | tycon == smallArrayPrimTyCon || tycon == smallMutableArrayPrimTyCon
543 = cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
544
545 | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
546 = cmmOffsetB dflags expr (arrWordsHdrSize dflags)
547
548 | otherwise = expr
549 where
550 UnaryRep rep_ty = repType arg_ty
551 tycon = tyConAppTyCon rep_ty
552 -- should be a tycon app, since this is a foreign call