Update levity polymorphism
[ghc.git] / compiler / codeGen / StgCmmForeign.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Code generation for foreign calls.
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmForeign (
12 cgForeignCall,
13 emitPrimCall, emitCCall,
14 emitForeignCall, -- For CmmParse
15 emitSaveThreadState,
16 saveThreadState,
17 emitLoadThreadState,
18 loadThreadState,
19 emitOpenNursery,
20 emitCloseNursery,
21 ) where
22
23 #include "HsVersions.h"
24
25 import StgSyn
26 import StgCmmProf (storeCurCCS, ccsType, curCCS)
27 import StgCmmEnv
28 import StgCmmMonad
29 import StgCmmUtils
30 import StgCmmClosure
31 import StgCmmLayout
32
33 import BlockId (newBlockId)
34 import Cmm
35 import CmmUtils
36 import MkGraph
37 import Type
38 import RepType
39 import TysPrim
40 import CLabel
41 import SMRep
42 import ForeignCall
43 import DynFlags
44 import Maybes
45 import Outputable
46 import UniqSupply
47 import BasicTypes
48
49 import Control.Monad
50
51 import Prelude hiding( succ, (<*>) )
52
53 -----------------------------------------------------------------------------
54 -- Code generation for Foreign Calls
55 -----------------------------------------------------------------------------
56
57 -- | emit code for a foreign call, and return the results to the sequel.
58 --
59 cgForeignCall :: ForeignCall -- the op
60 -> [StgArg] -- x,y arguments
61 -> Type -- result type
62 -> FCode ReturnKind
63
64 cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
65 = do { dflags <- getDynFlags
66 ; let -- in the stdcall calling convention, the symbol needs @size appended
67 -- to it, where size is the total number of bytes of arguments. We
68 -- attach this info to the CLabel here, and the CLabel pretty printer
69 -- will generate the suffix when the label is printed.
70 call_size args
71 | StdCallConv <- cconv = Just (sum (map arg_size args))
72 | otherwise = Nothing
73
74 -- ToDo: this might not be correct for 64-bit API
75 arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
76 (wORD_SIZE dflags)
77 ; cmm_args <- getFCallArgs stg_args
78 ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
79 ; let ((call_args, arg_hints), cmm_target)
80 = case target of
81 StaticTarget _ _ _ False ->
82 panic "cgForeignCall: unexpected FFI value import"
83 StaticTarget _ lbl mPkgId True
84 -> let labelSource
85 = case mPkgId of
86 Nothing -> ForeignLabelInThisPackage
87 Just pkgId -> ForeignLabelInPackage pkgId
88 size = call_size cmm_args
89 in ( unzip cmm_args
90 , CmmLit (CmmLabel
91 (mkForeignLabel lbl size labelSource IsFunction)))
92
93 DynamicTarget -> case cmm_args of
94 (fn,_):rest -> (unzip rest, fn)
95 [] -> panic "cgForeignCall []"
96 fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
97 call_target = ForeignTarget cmm_target fc
98
99 -- we want to emit code for the call, and then emitReturn.
100 -- However, if the sequel is AssignTo, we shortcut a little
101 -- and generate a foreign call that assigns the results
102 -- directly. Otherwise we end up generating a bunch of
103 -- useless "r = r" assignments, which are not merely annoying:
104 -- they prevent the common block elimination from working correctly
105 -- in the case of a safe foreign call.
106 -- See Note [safe foreign call convention]
107 --
108 ; sequel <- getSequel
109 ; case sequel of
110 AssignTo assign_to_these _ ->
111 emitForeignCall safety assign_to_these call_target call_args
112
113 _something_else ->
114 do { _ <- emitForeignCall safety res_regs call_target call_args
115 ; emitReturn (map (CmmReg . CmmLocal) res_regs)
116 }
117 }
118
119 {- Note [safe foreign call convention]
120
121 The simple thing to do for a safe foreign call would be the same as an
122 unsafe one: just
123
124 emitForeignCall ...
125 emitReturn ...
126
127 but consider what happens in this case
128
129 case foo x y z of
130 (# s, r #) -> ...
131
132 The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
133 as the result reg, and we generate
134
135 r = foo(x,y,z) returns to L1 -- emitForeignCall
136 L1:
137 r = r -- emitReturn
138 goto L2
139 L2:
140 ...
141
142 Now L1 is a proc point (by definition, it is the continuation of the
143 safe foreign call). If L2 does a heap check, then L2 will also be a
144 proc point.
145
146 Furthermore, the stack layout algorithm has to arrange to save r
147 somewhere between the call and the jump to L1, which is annoying: we
148 would have to treat r differently from the other live variables, which
149 have to be saved *before* the call.
150
151 So we adopt a special convention for safe foreign calls: the results
152 are copied out according to the NativeReturn convention by the call,
153 and the continuation of the call should copyIn the results. (The
154 copyOut code is actually inserted when the safe foreign call is
155 lowered later). The result regs attached to the safe foreign call are
156 only used temporarily to hold the results before they are copied out.
157
158 We will now generate this:
159
160 r = foo(x,y,z) returns to L1
161 L1:
162 r = R1 -- copyIn, inserted by mkSafeCall
163 goto L2
164 L2:
165 ... r ...
166
167 And when the safe foreign call is lowered later (see Note [lower safe
168 foreign calls]) we get this:
169
170 suspendThread()
171 r = foo(x,y,z)
172 resumeThread()
173 R1 = r -- copyOut, inserted by lowerSafeForeignCall
174 jump L1
175 L1:
176 r = R1 -- copyIn, inserted by mkSafeCall
177 goto L2
178 L2:
179 ... r ...
180
181 Now consider what happens if L2 does a heap check: the Adams
182 optimisation kicks in and commons up L1 with the heap-check
183 continuation, resulting in just one proc point instead of two. Yay!
184 -}
185
186
187 emitCCall :: [(CmmFormal,ForeignHint)]
188 -> CmmExpr
189 -> [(CmmActual,ForeignHint)]
190 -> FCode ()
191 emitCCall hinted_results fn hinted_args
192 = void $ emitForeignCall PlayRisky results target args
193 where
194 (args, arg_hints) = unzip hinted_args
195 (results, result_hints) = unzip hinted_results
196 target = ForeignTarget fn fc
197 fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
198
199
200 emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
201 emitPrimCall res op args
202 = void $ emitForeignCall PlayRisky res (PrimTarget op) args
203
204 -- alternative entry point, used by CmmParse
205 emitForeignCall
206 :: Safety
207 -> [CmmFormal] -- where to put the results
208 -> ForeignTarget -- the op
209 -> [CmmActual] -- arguments
210 -> FCode ReturnKind
211 emitForeignCall safety results target args
212 | not (playSafe safety) = do
213 dflags <- getDynFlags
214 let (caller_save, caller_load) = callerSaveVolatileRegs dflags
215 emit caller_save
216 target' <- load_target_into_temp target
217 args' <- mapM maybe_assign_temp args
218 emit $ mkUnsafeCall target' results args'
219 emit caller_load
220 return AssignedDirectly
221
222 | otherwise = do
223 dflags <- getDynFlags
224 updfr_off <- getUpdFrameOff
225 target' <- load_target_into_temp target
226 args' <- mapM maybe_assign_temp args
227 k <- newBlockId
228 let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results []
229 -- see Note [safe foreign call convention]
230 tscope <- getTickScope
231 emit $
232 ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
233 (CmmLit (CmmBlock k))
234 <*> mkLast (CmmForeignCall { tgt = target'
235 , res = results
236 , args = args'
237 , succ = k
238 , ret_args = off
239 , ret_off = updfr_off
240 , intrbl = playInterruptible safety })
241 <*> mkLabel k tscope
242 <*> copyout
243 )
244 return (ReturnedTo k off)
245
246 load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
247 load_target_into_temp (ForeignTarget expr conv) = do
248 tmp <- maybe_assign_temp expr
249 return (ForeignTarget tmp conv)
250 load_target_into_temp other_target@(PrimTarget _) =
251 return other_target
252
253 -- What we want to do here is create a new temporary for the foreign
254 -- call argument if it is not safe to use the expression directly,
255 -- because the expression mentions caller-saves GlobalRegs (see
256 -- Note [Register Parameter Passing]).
257 --
258 -- However, we can't pattern-match on the expression here, because
259 -- this is used in a loop by CmmParse, and testing the expression
260 -- results in a black hole. So we always create a temporary, and rely
261 -- on CmmSink to clean it up later. (Yuck, ToDo). The generated code
262 -- ends up being the same, at least for the RTS .cmm code.
263 --
264 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
265 maybe_assign_temp e = do
266 dflags <- getDynFlags
267 reg <- newTemp (cmmExprType dflags e)
268 emitAssign (CmmLocal reg) e
269 return (CmmReg (CmmLocal reg))
270
271 -- -----------------------------------------------------------------------------
272 -- Save/restore the thread state in the TSO
273
274 -- This stuff can't be done in suspendThread/resumeThread, because it
275 -- refers to global registers which aren't available in the C world.
276
277 emitSaveThreadState :: FCode ()
278 emitSaveThreadState = do
279 dflags <- getDynFlags
280 code <- saveThreadState dflags
281 emit code
282
283 -- | Produce code to save the current thread state to @CurrentTSO@
284 saveThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
285 saveThreadState dflags = do
286 tso <- newTemp (gcWord dflags)
287 close_nursery <- closeNursery dflags tso
288 pure $ catAGraphs [
289 -- tso = CurrentTSO;
290 mkAssign (CmmLocal tso) stgCurrentTSO,
291 -- tso->stackobj->sp = Sp;
292 mkStore (cmmOffset dflags
293 (CmmLoad (cmmOffset dflags
294 (CmmReg (CmmLocal tso))
295 (tso_stackobj dflags))
296 (bWord dflags))
297 (stack_SP dflags))
298 stgSp,
299 close_nursery,
300 -- and save the current cost centre stack in the TSO when profiling:
301 if gopt Opt_SccProfilingOn dflags then
302 mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS
303 else mkNop
304 ]
305
306 emitCloseNursery :: FCode ()
307 emitCloseNursery = do
308 dflags <- getDynFlags
309 tso <- newTemp (bWord dflags)
310 code <- closeNursery dflags tso
311 emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> code
312
313 {- |
314 @closeNursery dflags tso@ produces code to close the nursery.
315 A local register holding the value of @CurrentTSO@ is expected for
316 efficiency.
317
318 Closing the nursery corresponds to the following code:
319
320 @
321 tso = CurrentTSO;
322 cn = CurrentNuresry;
323
324 // Update the allocation limit for the current thread. We don't
325 // check to see whether it has overflowed at this point, that check is
326 // made when we run out of space in the current heap block (stg_gc_noregs)
327 // and in the scheduler when context switching (schedulePostRunThread).
328 tso->alloc_limit -= Hp + WDS(1) - cn->start;
329
330 // Set cn->free to the next unoccupied word in the block
331 cn->free = Hp + WDS(1);
332 @
333 -}
334 closeNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
335 closeNursery df tso = do
336 let tsoreg = CmmLocal tso
337 cnreg <- CmmLocal <$> newTemp (bWord df)
338 pure $ catAGraphs [
339 mkAssign cnreg stgCurrentNursery,
340
341 -- CurrentNursery->free = Hp+1;
342 mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1),
343
344 let alloc =
345 CmmMachOp (mo_wordSub df)
346 [ cmmOffsetW df stgHp 1
347 , CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
348 ]
349
350 alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
351 in
352
353 -- tso->alloc_limit += alloc
354 mkStore alloc_limit (CmmMachOp (MO_Sub W64)
355 [ CmmLoad alloc_limit b64
356 , CmmMachOp (mo_WordTo64 df) [alloc] ])
357 ]
358
359 emitLoadThreadState :: FCode ()
360 emitLoadThreadState = do
361 dflags <- getDynFlags
362 code <- loadThreadState dflags
363 emit code
364
365 -- | Produce code to load the current thread state from @CurrentTSO@
366 loadThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
367 loadThreadState dflags = do
368 tso <- newTemp (gcWord dflags)
369 stack <- newTemp (gcWord dflags)
370 open_nursery <- openNursery dflags tso
371 pure $ catAGraphs [
372 -- tso = CurrentTSO;
373 mkAssign (CmmLocal tso) stgCurrentTSO,
374 -- stack = tso->stackobj;
375 mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
376 -- Sp = stack->sp;
377 mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
378 -- SpLim = stack->stack + RESERVED_STACK_WORDS;
379 mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
380 (rESERVED_STACK_WORDS dflags)),
381 -- HpAlloc = 0;
382 -- HpAlloc is assumed to be set to non-zero only by a failed
383 -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
384 mkAssign hpAlloc (zeroExpr dflags),
385 open_nursery,
386 -- and load the current cost centre stack from the TSO when profiling:
387 if gopt Opt_SccProfilingOn dflags
388 then storeCurCCS
389 (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
390 (tso_CCCS dflags)) (ccsType dflags))
391 else mkNop
392 ]
393
394
395 emitOpenNursery :: FCode ()
396 emitOpenNursery = do
397 dflags <- getDynFlags
398 tso <- newTemp (bWord dflags)
399 code <- openNursery dflags tso
400 emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> code
401
402 {- |
403 @openNursery dflags tso@ produces code to open the nursery. A local register
404 holding the value of @CurrentTSO@ is expected for efficiency.
405
406 Opening the nursery corresponds to the following code:
407
408 @
409 tso = CurrentTSO;
410 cn = CurrentNursery;
411 bdfree = CurrentNuresry->free;
412 bdstart = CurrentNuresry->start;
413
414 // We *add* the currently occupied portion of the nursery block to
415 // the allocation limit, because we will subtract it again in
416 // closeNursery.
417 tso->alloc_limit += bdfree - bdstart;
418
419 // Set Hp to the last occupied word of the heap block. Why not the
420 // next unocupied word? Doing it this way means that we get to use
421 // an offset of zero more often, which might lead to slightly smaller
422 // code on some architectures.
423 Hp = bdfree - WDS(1);
424
425 // Set HpLim to the end of the current nursery block (note that this block
426 // might be a block group, consisting of several adjacent blocks.
427 HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
428 @
429 -}
430 openNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
431 openNursery df tso = do
432 let tsoreg = CmmLocal tso
433 cnreg <- CmmLocal <$> newTemp (bWord df)
434 bdfreereg <- CmmLocal <$> newTemp (bWord df)
435 bdstartreg <- CmmLocal <$> newTemp (bWord df)
436
437 -- These assignments are carefully ordered to reduce register
438 -- pressure and generate not completely awful code on x86. To see
439 -- what code we generate, look at the assembly for
440 -- stg_returnToStackTop in rts/StgStartup.cmm.
441 pure $ catAGraphs [
442 mkAssign cnreg stgCurrentNursery,
443 mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)),
444
445 -- Hp = CurrentNursery->free - 1;
446 mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)),
447
448 mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
449
450 -- HpLim = CurrentNursery->start +
451 -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
452 mkAssign hpLim
453 (cmmOffsetExpr df
454 (CmmReg bdstartreg)
455 (cmmOffset df
456 (CmmMachOp (mo_wordMul df) [
457 CmmMachOp (MO_SS_Conv W32 (wordWidth df))
458 [CmmLoad (nursery_bdescr_blocks df cnreg) b32],
459 mkIntExpr df (bLOCK_SIZE df)
460 ])
461 (-1)
462 )
463 ),
464
465 -- alloc = bd->free - bd->start
466 let alloc =
467 CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
468
469 alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
470 in
471
472 -- tso->alloc_limit += alloc
473 mkStore alloc_limit (CmmMachOp (MO_Add W64)
474 [ CmmLoad alloc_limit b64
475 , CmmMachOp (mo_WordTo64 df) [alloc] ])
476
477 ]
478
479 nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
480 :: DynFlags -> CmmReg -> CmmExpr
481 nursery_bdescr_free dflags cn =
482 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
483 nursery_bdescr_start dflags cn =
484 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
485 nursery_bdescr_blocks dflags cn =
486 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
487
488 tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
489 tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
490 tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
491 tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
492 stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
493 stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
494
495
496 closureField :: DynFlags -> ByteOff -> ByteOff
497 closureField dflags off = off + fixedHdrSize dflags
498
499 stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr
500 stgSp = CmmReg sp
501 stgHp = CmmReg hp
502 stgCurrentTSO = CmmReg currentTSO
503 stgCurrentNursery = CmmReg currentNursery
504
505 sp, spLim, hp, hpLim, currentTSO, currentNursery, hpAlloc :: CmmReg
506 sp = CmmGlobal Sp
507 spLim = CmmGlobal SpLim
508 hp = CmmGlobal Hp
509 hpLim = CmmGlobal HpLim
510 currentTSO = CmmGlobal CurrentTSO
511 currentNursery = CmmGlobal CurrentNursery
512 hpAlloc = CmmGlobal HpAlloc
513
514 -- -----------------------------------------------------------------------------
515 -- For certain types passed to foreign calls, we adjust the actual
516 -- value passed to the call. For ByteArray#/Array# we pass the
517 -- address of the actual array, not the address of the heap object.
518
519 getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)]
520 -- (a) Drop void args
521 -- (b) Add foreign-call shim code
522 -- It's (b) that makes this differ from getNonVoidArgAmodes
523
524 getFCallArgs args
525 = do { mb_cmms <- mapM get args
526 ; return (catMaybes mb_cmms) }
527 where
528 get arg | null arg_reps
529 = return Nothing
530 | otherwise
531 = do { cmm <- getArgAmode (NonVoid arg)
532 ; dflags <- getDynFlags
533 ; return (Just (add_shim dflags arg_ty cmm, hint)) }
534 where
535 arg_ty = stgArgType arg
536 arg_reps = typePrimRep arg_ty
537 hint = typeForeignHint arg_ty
538
539 add_shim :: DynFlags -> Type -> CmmExpr -> CmmExpr
540 add_shim dflags arg_ty expr
541 | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
542 = cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
543
544 | tycon == smallArrayPrimTyCon || tycon == smallMutableArrayPrimTyCon
545 = cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
546
547 | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
548 = cmmOffsetB dflags expr (arrWordsHdrSize dflags)
549
550 | otherwise = expr
551 where
552 tycon = tyConAppTyCon (unwrapType arg_ty)
553 -- should be a tycon app, since this is a foreign call