Remove StgRubbishArg and CmmArg
[ghc.git] / compiler / codeGen / StgCmmForeign.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Code generation for foreign calls.
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmForeign (
12 cgForeignCall,
13 emitPrimCall, emitCCall,
14 emitForeignCall, -- For CmmParse
15 emitSaveThreadState,
16 saveThreadState,
17 emitLoadThreadState,
18 loadThreadState,
19 emitOpenNursery,
20 emitCloseNursery,
21 ) where
22
23 #include "HsVersions.h"
24
25 import StgSyn
26 import StgCmmProf (storeCurCCS, ccsType, curCCS)
27 import StgCmmEnv
28 import StgCmmMonad
29 import StgCmmUtils
30 import StgCmmClosure
31 import StgCmmLayout
32
33 import Cmm
34 import CmmUtils
35 import MkGraph
36 import Type
37 import RepType
38 import TysPrim
39 import CLabel
40 import SMRep
41 import ForeignCall
42 import DynFlags
43 import Maybes
44 import Outputable
45 import UniqSupply
46 import BasicTypes
47
48 import Control.Monad
49
50 import Prelude hiding( succ, (<*>) )
51
52 -----------------------------------------------------------------------------
53 -- Code generation for Foreign Calls
54 -----------------------------------------------------------------------------
55
56 -- | emit code for a foreign call, and return the results to the sequel.
57 --
58 cgForeignCall :: ForeignCall -- the op
59 -> [StgArg] -- x,y arguments
60 -> Type -- result type
61 -> FCode ReturnKind
62
63 cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
64 = do { dflags <- getDynFlags
65 ; let -- in the stdcall calling convention, the symbol needs @size appended
66 -- to it, where size is the total number of bytes of arguments. We
67 -- attach this info to the CLabel here, and the CLabel pretty printer
68 -- will generate the suffix when the label is printed.
69 call_size args
70 | StdCallConv <- cconv = Just (sum (map arg_size args))
71 | otherwise = Nothing
72
73 -- ToDo: this might not be correct for 64-bit API
74 arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
75 (wORD_SIZE dflags)
76 ; cmm_args <- getFCallArgs stg_args
77 ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
78 ; let ((call_args, arg_hints), cmm_target)
79 = case target of
80 StaticTarget _ _ _ False ->
81 panic "cgForeignCall: unexpected FFI value import"
82 StaticTarget _ lbl mPkgId True
83 -> let labelSource
84 = case mPkgId of
85 Nothing -> ForeignLabelInThisPackage
86 Just pkgId -> ForeignLabelInPackage pkgId
87 size = call_size cmm_args
88 in ( unzip cmm_args
89 , CmmLit (CmmLabel
90 (mkForeignLabel lbl size labelSource IsFunction)))
91
92 DynamicTarget -> case cmm_args of
93 (fn,_):rest -> (unzip rest, fn)
94 [] -> panic "cgForeignCall []"
95 fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
96 call_target = ForeignTarget cmm_target fc
97
98 -- we want to emit code for the call, and then emitReturn.
99 -- However, if the sequel is AssignTo, we shortcut a little
100 -- and generate a foreign call that assigns the results
101 -- directly. Otherwise we end up generating a bunch of
102 -- useless "r = r" assignments, which are not merely annoying:
103 -- they prevent the common block elimination from working correctly
104 -- in the case of a safe foreign call.
105 -- See Note [safe foreign call convention]
106 --
107 ; sequel <- getSequel
108 ; case sequel of
109 AssignTo assign_to_these _ ->
110 emitForeignCall safety assign_to_these call_target call_args
111
112 _something_else ->
113 do { _ <- emitForeignCall safety res_regs call_target call_args
114 ; emitReturn (map (CmmReg . CmmLocal) res_regs)
115 }
116 }
117
118 {- Note [safe foreign call convention]
119
120 The simple thing to do for a safe foreign call would be the same as an
121 unsafe one: just
122
123 emitForeignCall ...
124 emitReturn ...
125
126 but consider what happens in this case
127
128 case foo x y z of
129 (# s, r #) -> ...
130
131 The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
132 as the result reg, and we generate
133
134 r = foo(x,y,z) returns to L1 -- emitForeignCall
135 L1:
136 r = r -- emitReturn
137 goto L2
138 L2:
139 ...
140
141 Now L1 is a proc point (by definition, it is the continuation of the
142 safe foreign call). If L2 does a heap check, then L2 will also be a
143 proc point.
144
145 Furthermore, the stack layout algorithm has to arrange to save r
146 somewhere between the call and the jump to L1, which is annoying: we
147 would have to treat r differently from the other live variables, which
148 have to be saved *before* the call.
149
150 So we adopt a special convention for safe foreign calls: the results
151 are copied out according to the NativeReturn convention by the call,
152 and the continuation of the call should copyIn the results. (The
153 copyOut code is actually inserted when the safe foreign call is
154 lowered later). The result regs attached to the safe foreign call are
155 only used temporarily to hold the results before they are copied out.
156
157 We will now generate this:
158
159 r = foo(x,y,z) returns to L1
160 L1:
161 r = R1 -- copyIn, inserted by mkSafeCall
162 goto L2
163 L2:
164 ... r ...
165
166 And when the safe foreign call is lowered later (see Note [lower safe
167 foreign calls]) we get this:
168
169 suspendThread()
170 r = foo(x,y,z)
171 resumeThread()
172 R1 = r -- copyOut, inserted by lowerSafeForeignCall
173 jump L1
174 L1:
175 r = R1 -- copyIn, inserted by mkSafeCall
176 goto L2
177 L2:
178 ... r ...
179
180 Now consider what happens if L2 does a heap check: the Adams
181 optimisation kicks in and commons up L1 with the heap-check
182 continuation, resulting in just one proc point instead of two. Yay!
183 -}
184
185
186 emitCCall :: [(CmmFormal,ForeignHint)]
187 -> CmmExpr
188 -> [(CmmActual,ForeignHint)]
189 -> FCode ()
190 emitCCall hinted_results fn hinted_args
191 = void $ emitForeignCall PlayRisky results target args
192 where
193 (args, arg_hints) = unzip hinted_args
194 (results, result_hints) = unzip hinted_results
195 target = ForeignTarget fn fc
196 fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
197
198
199 emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
200 emitPrimCall res op args
201 = void $ emitForeignCall PlayRisky res (PrimTarget op) args
202
203 -- alternative entry point, used by CmmParse
204 emitForeignCall
205 :: Safety
206 -> [CmmFormal] -- where to put the results
207 -> ForeignTarget -- the op
208 -> [CmmActual] -- arguments
209 -> FCode ReturnKind
210 emitForeignCall safety results target args
211 | not (playSafe safety) = do
212 dflags <- getDynFlags
213 let (caller_save, caller_load) = callerSaveVolatileRegs dflags
214 emit caller_save
215 target' <- load_target_into_temp target
216 args' <- mapM maybe_assign_temp args
217 emit $ mkUnsafeCall target' results args'
218 emit caller_load
219 return AssignedDirectly
220
221 | otherwise = do
222 dflags <- getDynFlags
223 updfr_off <- getUpdFrameOff
224 target' <- load_target_into_temp target
225 args' <- mapM maybe_assign_temp args
226 k <- newLabelC
227 let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results []
228 -- see Note [safe foreign call convention]
229 tscope <- getTickScope
230 emit $
231 ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
232 (CmmLit (CmmBlock k))
233 <*> mkLast (CmmForeignCall { tgt = target'
234 , res = results
235 , args = args'
236 , succ = k
237 , ret_args = off
238 , ret_off = updfr_off
239 , intrbl = playInterruptible safety })
240 <*> mkLabel k tscope
241 <*> copyout
242 )
243 return (ReturnedTo k off)
244
245 load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
246 load_target_into_temp (ForeignTarget expr conv) = do
247 tmp <- maybe_assign_temp expr
248 return (ForeignTarget tmp conv)
249 load_target_into_temp other_target@(PrimTarget _) =
250 return other_target
251
252 -- What we want to do here is create a new temporary for the foreign
253 -- call argument if it is not safe to use the expression directly,
254 -- because the expression mentions caller-saves GlobalRegs (see
255 -- Note [Register Parameter Passing]).
256 --
257 -- However, we can't pattern-match on the expression here, because
258 -- this is used in a loop by CmmParse, and testing the expression
259 -- results in a black hole. So we always create a temporary, and rely
260 -- on CmmSink to clean it up later. (Yuck, ToDo). The generated code
261 -- ends up being the same, at least for the RTS .cmm code.
262 --
263 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
264 maybe_assign_temp e = do
265 dflags <- getDynFlags
266 reg <- newTemp (cmmExprType dflags e)
267 emitAssign (CmmLocal reg) e
268 return (CmmReg (CmmLocal reg))
269
270 -- -----------------------------------------------------------------------------
271 -- Save/restore the thread state in the TSO
272
273 -- This stuff can't be done in suspendThread/resumeThread, because it
274 -- refers to global registers which aren't available in the C world.
275
276 emitSaveThreadState :: FCode ()
277 emitSaveThreadState = do
278 dflags <- getDynFlags
279 code <- saveThreadState dflags
280 emit code
281
282 -- | Produce code to save the current thread state to @CurrentTSO@
283 saveThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
284 saveThreadState dflags = do
285 tso <- newTemp (gcWord dflags)
286 close_nursery <- closeNursery dflags tso
287 pure $ catAGraphs [
288 -- tso = CurrentTSO;
289 mkAssign (CmmLocal tso) stgCurrentTSO,
290 -- tso->stackobj->sp = Sp;
291 mkStore (cmmOffset dflags
292 (CmmLoad (cmmOffset dflags
293 (CmmReg (CmmLocal tso))
294 (tso_stackobj dflags))
295 (bWord dflags))
296 (stack_SP dflags))
297 stgSp,
298 close_nursery,
299 -- and save the current cost centre stack in the TSO when profiling:
300 if gopt Opt_SccProfilingOn dflags then
301 mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS
302 else mkNop
303 ]
304
305 emitCloseNursery :: FCode ()
306 emitCloseNursery = do
307 dflags <- getDynFlags
308 tso <- newTemp (bWord dflags)
309 code <- closeNursery dflags tso
310 emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> code
311
312 {- |
313 @closeNursery dflags tso@ produces code to close the nursery.
314 A local register holding the value of @CurrentTSO@ is expected for
315 efficiency.
316
317 Closing the nursery corresponds to the following code:
318
319 @
320 tso = CurrentTSO;
321 cn = CurrentNuresry;
322
323 // Update the allocation limit for the current thread. We don't
324 // check to see whether it has overflowed at this point, that check is
325 // made when we run out of space in the current heap block (stg_gc_noregs)
326 // and in the scheduler when context switching (schedulePostRunThread).
327 tso->alloc_limit -= Hp + WDS(1) - cn->start;
328
329 // Set cn->free to the next unoccupied word in the block
330 cn->free = Hp + WDS(1);
331 @
332 -}
333 closeNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
334 closeNursery df tso = do
335 let tsoreg = CmmLocal tso
336 cnreg <- CmmLocal <$> newTemp (bWord df)
337 pure $ catAGraphs [
338 mkAssign cnreg stgCurrentNursery,
339
340 -- CurrentNursery->free = Hp+1;
341 mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1),
342
343 let alloc =
344 CmmMachOp (mo_wordSub df)
345 [ cmmOffsetW df stgHp 1
346 , CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
347 ]
348
349 alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
350 in
351
352 -- tso->alloc_limit += alloc
353 mkStore alloc_limit (CmmMachOp (MO_Sub W64)
354 [ CmmLoad alloc_limit b64
355 , CmmMachOp (mo_WordTo64 df) [alloc] ])
356 ]
357
358 emitLoadThreadState :: FCode ()
359 emitLoadThreadState = do
360 dflags <- getDynFlags
361 code <- loadThreadState dflags
362 emit code
363
364 -- | Produce code to load the current thread state from @CurrentTSO@
365 loadThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
366 loadThreadState dflags = do
367 tso <- newTemp (gcWord dflags)
368 stack <- newTemp (gcWord dflags)
369 open_nursery <- openNursery dflags tso
370 pure $ catAGraphs [
371 -- tso = CurrentTSO;
372 mkAssign (CmmLocal tso) stgCurrentTSO,
373 -- stack = tso->stackobj;
374 mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
375 -- Sp = stack->sp;
376 mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
377 -- SpLim = stack->stack + RESERVED_STACK_WORDS;
378 mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
379 (rESERVED_STACK_WORDS dflags)),
380 -- HpAlloc = 0;
381 -- HpAlloc is assumed to be set to non-zero only by a failed
382 -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
383 mkAssign hpAlloc (zeroExpr dflags),
384 open_nursery,
385 -- and load the current cost centre stack from the TSO when profiling:
386 if gopt Opt_SccProfilingOn dflags
387 then storeCurCCS
388 (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
389 (tso_CCCS dflags)) (ccsType dflags))
390 else mkNop
391 ]
392
393
394 emitOpenNursery :: FCode ()
395 emitOpenNursery = do
396 dflags <- getDynFlags
397 tso <- newTemp (bWord dflags)
398 code <- openNursery dflags tso
399 emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> code
400
401 {- |
402 @openNursery dflags tso@ produces code to open the nursery. A local register
403 holding the value of @CurrentTSO@ is expected for efficiency.
404
405 Opening the nursery corresponds to the following code:
406
407 @
408 tso = CurrentTSO;
409 cn = CurrentNursery;
410 bdfree = CurrentNuresry->free;
411 bdstart = CurrentNuresry->start;
412
413 // We *add* the currently occupied portion of the nursery block to
414 // the allocation limit, because we will subtract it again in
415 // closeNursery.
416 tso->alloc_limit += bdfree - bdstart;
417
418 // Set Hp to the last occupied word of the heap block. Why not the
419 // next unocupied word? Doing it this way means that we get to use
420 // an offset of zero more often, which might lead to slightly smaller
421 // code on some architectures.
422 Hp = bdfree - WDS(1);
423
424 // Set HpLim to the end of the current nursery block (note that this block
425 // might be a block group, consisting of several adjacent blocks.
426 HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
427 @
428 -}
429 openNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
430 openNursery df tso = do
431 let tsoreg = CmmLocal tso
432 cnreg <- CmmLocal <$> newTemp (bWord df)
433 bdfreereg <- CmmLocal <$> newTemp (bWord df)
434 bdstartreg <- CmmLocal <$> newTemp (bWord df)
435
436 -- These assignments are carefully ordered to reduce register
437 -- pressure and generate not completely awful code on x86. To see
438 -- what code we generate, look at the assembly for
439 -- stg_returnToStackTop in rts/StgStartup.cmm.
440 pure $ catAGraphs [
441 mkAssign cnreg stgCurrentNursery,
442 mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)),
443
444 -- Hp = CurrentNursery->free - 1;
445 mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)),
446
447 mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
448
449 -- HpLim = CurrentNursery->start +
450 -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
451 mkAssign hpLim
452 (cmmOffsetExpr df
453 (CmmReg bdstartreg)
454 (cmmOffset df
455 (CmmMachOp (mo_wordMul df) [
456 CmmMachOp (MO_SS_Conv W32 (wordWidth df))
457 [CmmLoad (nursery_bdescr_blocks df cnreg) b32],
458 mkIntExpr df (bLOCK_SIZE df)
459 ])
460 (-1)
461 )
462 ),
463
464 -- alloc = bd->free - bd->start
465 let alloc =
466 CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
467
468 alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
469 in
470
471 -- tso->alloc_limit += alloc
472 mkStore alloc_limit (CmmMachOp (MO_Add W64)
473 [ CmmLoad alloc_limit b64
474 , CmmMachOp (mo_WordTo64 df) [alloc] ])
475
476 ]
477
478 nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
479 :: DynFlags -> CmmReg -> CmmExpr
480 nursery_bdescr_free dflags cn =
481 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
482 nursery_bdescr_start dflags cn =
483 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
484 nursery_bdescr_blocks dflags cn =
485 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
486
487 tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
488 tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
489 tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
490 tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
491 stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
492 stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
493
494
495 closureField :: DynFlags -> ByteOff -> ByteOff
496 closureField dflags off = off + fixedHdrSize dflags
497
498 stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr
499 stgSp = CmmReg sp
500 stgHp = CmmReg hp
501 stgCurrentTSO = CmmReg currentTSO
502 stgCurrentNursery = CmmReg currentNursery
503
504 sp, spLim, hp, hpLim, currentTSO, currentNursery, hpAlloc :: CmmReg
505 sp = CmmGlobal Sp
506 spLim = CmmGlobal SpLim
507 hp = CmmGlobal Hp
508 hpLim = CmmGlobal HpLim
509 currentTSO = CmmGlobal CurrentTSO
510 currentNursery = CmmGlobal CurrentNursery
511 hpAlloc = CmmGlobal HpAlloc
512
513 -- -----------------------------------------------------------------------------
514 -- For certain types passed to foreign calls, we adjust the actual
515 -- value passed to the call. For ByteArray#/Array# we pass the
516 -- address of the actual array, not the address of the heap object.
517
518 getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)]
519 -- (a) Drop void args
520 -- (b) Add foreign-call shim code
521 -- It's (b) that makes this differ from getNonVoidArgAmodes
522
523 getFCallArgs args
524 = do { mb_cmms <- mapM get args
525 ; return (catMaybes mb_cmms) }
526 where
527 get arg | isVoidRep arg_rep
528 = return Nothing
529 | otherwise
530 = do { cmm <- getArgAmode (NonVoid arg)
531 ; dflags <- getDynFlags
532 ; return (Just (add_shim dflags arg_ty cmm, hint)) }
533 where
534 arg_ty = stgArgType arg
535 arg_rep = typePrimRep arg_ty
536 hint = typeForeignHint arg_ty
537
538 add_shim :: DynFlags -> Type -> CmmExpr -> CmmExpr
539 add_shim dflags arg_ty expr
540 | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
541 = cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
542
543 | tycon == smallArrayPrimTyCon || tycon == smallMutableArrayPrimTyCon
544 = cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
545
546 | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
547 = cmmOffsetB dflags expr (arrWordsHdrSize dflags)
548
549 | otherwise = expr
550 where
551 UnaryRep rep_ty = repType arg_ty
552 tycon = tyConAppTyCon rep_ty
553 -- should be a tycon app, since this is a foreign call