Support MO_{Add,Sub}IntC and MO_Add2 in the LLVM backend
[ghc.git] / compiler / codeGen / StgCmmForeign.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Code generation for foreign calls.
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmForeign (
12 cgForeignCall,
13 emitPrimCall, emitCCall,
14 emitForeignCall, -- For CmmParse
15 emitSaveThreadState,
16 saveThreadState,
17 emitLoadThreadState,
18 loadThreadState,
19 emitOpenNursery,
20 emitCloseNursery,
21 ) where
22
23 #include "HsVersions.h"
24
25 import StgSyn
26 import StgCmmProf (storeCurCCS, ccsType, curCCS)
27 import StgCmmEnv
28 import StgCmmMonad
29 import StgCmmUtils
30 import StgCmmClosure
31 import StgCmmLayout
32
33 import Cmm
34 import CmmUtils
35 import MkGraph
36 import Type
37 import TysPrim
38 import CLabel
39 import SMRep
40 import ForeignCall
41 import DynFlags
42 import Maybes
43 import Outputable
44 import BasicTypes
45
46 import Control.Monad
47
48 #if __GLASGOW_HASKELL__ >= 709
49 import Prelude hiding( succ, (<*>) )
50 #else
51 import Prelude hiding( succ )
52 #endif
53
54 -----------------------------------------------------------------------------
55 -- Code generation for Foreign Calls
56 -----------------------------------------------------------------------------
57
58 -- | emit code for a foreign call, and return the results to the sequel.
59 --
60 cgForeignCall :: ForeignCall -- the op
61 -> [StgArg] -- x,y arguments
62 -> Type -- result type
63 -> FCode ReturnKind
64
65 cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
66 = do { dflags <- getDynFlags
67 ; let -- in the stdcall calling convention, the symbol needs @size appended
68 -- to it, where size is the total number of bytes of arguments. We
69 -- attach this info to the CLabel here, and the CLabel pretty printer
70 -- will generate the suffix when the label is printed.
71 call_size args
72 | StdCallConv <- cconv = Just (sum (map arg_size args))
73 | otherwise = Nothing
74
75 -- ToDo: this might not be correct for 64-bit API
76 arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
77 (wORD_SIZE dflags)
78 ; cmm_args <- getFCallArgs stg_args
79 ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
80 ; let ((call_args, arg_hints), cmm_target)
81 = case target of
82 StaticTarget _ _ _ False ->
83 panic "cgForeignCall: unexpected FFI value import"
84 StaticTarget _ lbl mPkgId True
85 -> let labelSource
86 = case mPkgId of
87 Nothing -> ForeignLabelInThisPackage
88 Just pkgId -> ForeignLabelInPackage pkgId
89 size = call_size cmm_args
90 in ( unzip cmm_args
91 , CmmLit (CmmLabel
92 (mkForeignLabel lbl size labelSource IsFunction)))
93
94 DynamicTarget -> case cmm_args of
95 (fn,_):rest -> (unzip rest, fn)
96 [] -> panic "cgForeignCall []"
97 fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
98 call_target = ForeignTarget cmm_target fc
99
100 -- we want to emit code for the call, and then emitReturn.
101 -- However, if the sequel is AssignTo, we shortcut a little
102 -- and generate a foreign call that assigns the results
103 -- directly. Otherwise we end up generating a bunch of
104 -- useless "r = r" assignments, which are not merely annoying:
105 -- they prevent the common block elimination from working correctly
106 -- in the case of a safe foreign call.
107 -- See Note [safe foreign call convention]
108 --
109 ; sequel <- getSequel
110 ; case sequel of
111 AssignTo assign_to_these _ ->
112 emitForeignCall safety assign_to_these call_target call_args
113
114 _something_else ->
115 do { _ <- emitForeignCall safety res_regs call_target call_args
116 ; emitReturn (map (CmmReg . CmmLocal) res_regs)
117 }
118 }
119
120 {- Note [safe foreign call convention]
121
122 The simple thing to do for a safe foreign call would be the same as an
123 unsafe one: just
124
125 emitForeignCall ...
126 emitReturn ...
127
128 but consider what happens in this case
129
130 case foo x y z of
131 (# s, r #) -> ...
132
133 The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
134 as the result reg, and we generate
135
136 r = foo(x,y,z) returns to L1 -- emitForeignCall
137 L1:
138 r = r -- emitReturn
139 goto L2
140 L2:
141 ...
142
143 Now L1 is a proc point (by definition, it is the continuation of the
144 safe foreign call). If L2 does a heap check, then L2 will also be a
145 proc point.
146
147 Furthermore, the stack layout algorithm has to arrange to save r
148 somewhere between the call and the jump to L1, which is annoying: we
149 would have to treat r differently from the other live variables, which
150 have to be saved *before* the call.
151
152 So we adopt a special convention for safe foreign calls: the results
153 are copied out according to the NativeReturn convention by the call,
154 and the continuation of the call should copyIn the results. (The
155 copyOut code is actually inserted when the safe foreign call is
156 lowered later). The result regs attached to the safe foreign call are
157 only used temporarily to hold the results before they are copied out.
158
159 We will now generate this:
160
161 r = foo(x,y,z) returns to L1
162 L1:
163 r = R1 -- copyIn, inserted by mkSafeCall
164 goto L2
165 L2:
166 ... r ...
167
168 And when the safe foreign call is lowered later (see Note [lower safe
169 foreign calls]) we get this:
170
171 suspendThread()
172 r = foo(x,y,z)
173 resumeThread()
174 R1 = r -- copyOut, inserted by lowerSafeForeignCall
175 jump L1
176 L1:
177 r = R1 -- copyIn, inserted by mkSafeCall
178 goto L2
179 L2:
180 ... r ...
181
182 Now consider what happens if L2 does a heap check: the Adams
183 optimisation kicks in and commons up L1 with the heap-check
184 continuation, resulting in just one proc point instead of two. Yay!
185 -}
186
187
188 emitCCall :: [(CmmFormal,ForeignHint)]
189 -> CmmExpr
190 -> [(CmmActual,ForeignHint)]
191 -> FCode ()
192 emitCCall hinted_results fn hinted_args
193 = void $ emitForeignCall PlayRisky results target args
194 where
195 (args, arg_hints) = unzip hinted_args
196 (results, result_hints) = unzip hinted_results
197 target = ForeignTarget fn fc
198 fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
199
200
201 emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
202 emitPrimCall res op args
203 = void $ emitForeignCall PlayRisky res (PrimTarget op) args
204
205 -- alternative entry point, used by CmmParse
206 emitForeignCall
207 :: Safety
208 -> [CmmFormal] -- where to put the results
209 -> ForeignTarget -- the op
210 -> [CmmActual] -- arguments
211 -> FCode ReturnKind
212 emitForeignCall safety results target args
213 | not (playSafe safety) = do
214 dflags <- getDynFlags
215 let (caller_save, caller_load) = callerSaveVolatileRegs dflags
216 emit caller_save
217 target' <- load_target_into_temp target
218 args' <- mapM maybe_assign_temp args
219 emit $ mkUnsafeCall target' results args'
220 emit caller_load
221 return AssignedDirectly
222
223 | otherwise = do
224 dflags <- getDynFlags
225 updfr_off <- getUpdFrameOff
226 target' <- load_target_into_temp target
227 args' <- mapM maybe_assign_temp args
228 k <- newLabelC
229 let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results []
230 -- see Note [safe foreign call convention]
231 tscope <- getTickScope
232 emit $
233 ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
234 (CmmLit (CmmBlock k))
235 <*> mkLast (CmmForeignCall { tgt = target'
236 , res = results
237 , args = args'
238 , succ = k
239 , ret_args = off
240 , ret_off = updfr_off
241 , intrbl = playInterruptible safety })
242 <*> mkLabel k tscope
243 <*> copyout
244 )
245 return (ReturnedTo k off)
246
247 load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
248 load_target_into_temp (ForeignTarget expr conv) = do
249 tmp <- maybe_assign_temp expr
250 return (ForeignTarget tmp conv)
251 load_target_into_temp other_target@(PrimTarget _) =
252 return other_target
253
254 -- What we want to do here is create a new temporary for the foreign
255 -- call argument if it is not safe to use the expression directly,
256 -- because the expression mentions caller-saves GlobalRegs (see
257 -- Note [Register Parameter Passing]).
258 --
259 -- However, we can't pattern-match on the expression here, because
260 -- this is used in a loop by CmmParse, and testing the expression
261 -- results in a black hole. So we always create a temporary, and rely
262 -- on CmmSink to clean it up later. (Yuck, ToDo). The generated code
263 -- ends up being the same, at least for the RTS .cmm code.
264 --
265 maybe_assign_temp :: CmmExpr -> FCode CmmExpr
266 maybe_assign_temp e = do
267 dflags <- getDynFlags
268 reg <- newTemp (cmmExprType dflags e)
269 emitAssign (CmmLocal reg) e
270 return (CmmReg (CmmLocal reg))
271
272 -- -----------------------------------------------------------------------------
273 -- Save/restore the thread state in the TSO
274
275 -- This stuff can't be done in suspendThread/resumeThread, because it
276 -- refers to global registers which aren't available in the C world.
277
278 emitSaveThreadState :: FCode ()
279 emitSaveThreadState = do
280 dflags <- getDynFlags
281 tso <- newTemp (gcWord dflags)
282 cn <- newTemp (bWord dflags)
283 emit $ saveThreadState dflags tso cn
284
285
286 -- saveThreadState must be usable from the stack layout pass, where we
287 -- don't have FCode. Therefore it takes LocalRegs as arguments, so
288 -- the caller can create these.
289 saveThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
290 saveThreadState dflags tso cn =
291 catAGraphs [
292 -- tso = CurrentTSO;
293 mkAssign (CmmLocal tso) stgCurrentTSO,
294 -- tso->stackobj->sp = Sp;
295 mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp,
296 closeNursery dflags tso cn,
297 -- and save the current cost centre stack in the TSO when profiling:
298 if gopt Opt_SccProfilingOn dflags then
299 mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS
300 else mkNop
301 ]
302
303 emitCloseNursery :: FCode ()
304 emitCloseNursery = do
305 dflags <- getDynFlags
306 tso <- newTemp (gcWord dflags)
307 cn <- newTemp (bWord dflags)
308 emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
309 closeNursery dflags tso cn
310
311 {-
312 Closing the nursery corresponds to the following code:
313
314 tso = CurrentTSO;
315 cn = CurrentNuresry;
316
317 // Update the allocation limit for the current thread. We don't
318 // check to see whether it has overflowed at this point, that check is
319 // made when we run out of space in the current heap block (stg_gc_noregs)
320 // and in the scheduler when context switching (schedulePostRunThread).
321 tso->alloc_limit -= Hp + WDS(1) - cn->start;
322
323 // Set cn->free to the next unoccupied word in the block
324 cn->free = Hp + WDS(1);
325 -}
326
327 closeNursery :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
328 closeNursery df tso cn =
329 let
330 tsoreg = CmmLocal tso
331 cnreg = CmmLocal cn
332 in
333 catAGraphs [
334 mkAssign cnreg stgCurrentNursery,
335
336 -- CurrentNursery->free = Hp+1;
337 mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1),
338
339 let alloc =
340 CmmMachOp (mo_wordSub df)
341 [ cmmOffsetW df stgHp 1
342 , CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
343 ]
344
345 alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
346 in
347
348 -- tso->alloc_limit += alloc
349 mkStore alloc_limit (CmmMachOp (MO_Sub W64)
350 [ CmmLoad alloc_limit b64
351 , CmmMachOp (mo_WordTo64 df) [alloc] ])
352 ]
353
354 emitLoadThreadState :: FCode ()
355 emitLoadThreadState = do
356 dflags <- getDynFlags
357 tso <- newTemp (gcWord dflags)
358 stack <- newTemp (gcWord dflags)
359 cn <- newTemp (bWord dflags)
360 bdfree <- newTemp (bWord dflags)
361 bdstart <- newTemp (bWord dflags)
362 emit $ loadThreadState dflags tso stack cn bdfree bdstart
363
364 -- loadThreadState must be usable from the stack layout pass, where we
365 -- don't have FCode. Therefore it takes LocalRegs as arguments, so
366 -- the caller can create these.
367 loadThreadState :: DynFlags
368 -> LocalReg -> LocalReg -> LocalReg -> LocalReg -> LocalReg
369 -> CmmAGraph
370 loadThreadState dflags tso stack cn bdfree bdstart =
371 catAGraphs [
372 -- tso = CurrentTSO;
373 mkAssign (CmmLocal tso) stgCurrentTSO,
374 -- stack = tso->stackobj;
375 mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
376 -- Sp = stack->sp;
377 mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
378 -- SpLim = stack->stack + RESERVED_STACK_WORDS;
379 mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
380 (rESERVED_STACK_WORDS dflags)),
381 -- HpAlloc = 0;
382 -- HpAlloc is assumed to be set to non-zero only by a failed
383 -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
384 mkAssign hpAlloc (zeroExpr dflags),
385 openNursery dflags tso cn bdfree bdstart,
386 -- and load the current cost centre stack from the TSO when profiling:
387 if gopt Opt_SccProfilingOn dflags
388 then storeCurCCS
389 (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
390 (tso_CCCS dflags)) (ccsType dflags))
391 else mkNop
392 ]
393
394
395 emitOpenNursery :: FCode ()
396 emitOpenNursery = do
397 dflags <- getDynFlags
398 tso <- newTemp (gcWord dflags)
399 cn <- newTemp (bWord dflags)
400 bdfree <- newTemp (bWord dflags)
401 bdstart <- newTemp (bWord dflags)
402 emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
403 openNursery dflags tso cn bdfree bdstart
404
405 {-
406 Opening the nursery corresponds to the following code:
407
408 tso = CurrentTSO;
409 cn = CurrentNursery;
410 bdfree = CurrentNuresry->free;
411 bdstart = CurrentNuresry->start;
412
413 // We *add* the currently occupied portion of the nursery block to
414 // the allocation limit, because we will subtract it again in
415 // closeNursery.
416 tso->alloc_limit += bdfree - bdstart;
417
418 // Set Hp to the last occupied word of the heap block. Why not the
419 // next unocupied word? Doing it this way means that we get to use
420 // an offset of zero more often, which might lead to slightly smaller
421 // code on some architectures.
422 Hp = bdfree - WDS(1);
423
424 // Set HpLim to the end of the current nursery block (note that this block
425 // might be a block group, consisting of several adjacent blocks.
426 HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
427 -}
428
429 openNursery :: DynFlags
430 -> LocalReg -> LocalReg -> LocalReg -> LocalReg
431 -> CmmAGraph
432 openNursery df tso cn bdfree bdstart =
433 let
434 tsoreg = CmmLocal tso
435 cnreg = CmmLocal cn
436 bdfreereg = CmmLocal bdfree
437 bdstartreg = CmmLocal bdstart
438 in
439 -- These assignments are carefully ordered to reduce register
440 -- pressure and generate not completely awful code on x86. To see
441 -- what code we generate, look at the assembly for
442 -- stg_returnToStackTop in rts/StgStartup.cmm.
443 catAGraphs [
444 mkAssign cnreg stgCurrentNursery,
445 mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)),
446
447 -- Hp = CurrentNursery->free - 1;
448 mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)),
449
450 mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
451
452 -- HpLim = CurrentNursery->start +
453 -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
454 mkAssign hpLim
455 (cmmOffsetExpr df
456 (CmmReg bdstartreg)
457 (cmmOffset df
458 (CmmMachOp (mo_wordMul df) [
459 CmmMachOp (MO_SS_Conv W32 (wordWidth df))
460 [CmmLoad (nursery_bdescr_blocks df cnreg) b32],
461 mkIntExpr df (bLOCK_SIZE df)
462 ])
463 (-1)
464 )
465 ),
466
467 -- alloc = bd->free - bd->start
468 let alloc =
469 CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
470
471 alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
472 in
473
474 -- tso->alloc_limit += alloc
475 mkStore alloc_limit (CmmMachOp (MO_Add W64)
476 [ CmmLoad alloc_limit b64
477 , CmmMachOp (mo_WordTo64 df) [alloc] ])
478
479 ]
480
481 nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
482 :: DynFlags -> CmmReg -> CmmExpr
483 nursery_bdescr_free dflags cn =
484 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
485 nursery_bdescr_start dflags cn =
486 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
487 nursery_bdescr_blocks dflags cn =
488 cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
489
490 tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
491 tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
492 tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
493 tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
494 stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
495 stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
496
497
498 closureField :: DynFlags -> ByteOff -> ByteOff
499 closureField dflags off = off + fixedHdrSize dflags
500
501 stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr
502 stgSp = CmmReg sp
503 stgHp = CmmReg hp
504 stgCurrentTSO = CmmReg currentTSO
505 stgCurrentNursery = CmmReg currentNursery
506
507 sp, spLim, hp, hpLim, currentTSO, currentNursery, hpAlloc :: CmmReg
508 sp = CmmGlobal Sp
509 spLim = CmmGlobal SpLim
510 hp = CmmGlobal Hp
511 hpLim = CmmGlobal HpLim
512 currentTSO = CmmGlobal CurrentTSO
513 currentNursery = CmmGlobal CurrentNursery
514 hpAlloc = CmmGlobal HpAlloc
515
516 -- -----------------------------------------------------------------------------
517 -- For certain types passed to foreign calls, we adjust the actual
518 -- value passed to the call. For ByteArray#/Array# we pass the
519 -- address of the actual array, not the address of the heap object.
520
521 getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)]
522 -- (a) Drop void args
523 -- (b) Add foreign-call shim code
524 -- It's (b) that makes this differ from getNonVoidArgAmodes
525
526 getFCallArgs args
527 = do { mb_cmms <- mapM get args
528 ; return (catMaybes mb_cmms) }
529 where
530 get arg | isVoidRep arg_rep
531 = return Nothing
532 | otherwise
533 = do { cmm <- getArgAmode (NonVoid arg)
534 ; dflags <- getDynFlags
535 ; return (Just (add_shim dflags arg_ty cmm, hint)) }
536 where
537 arg_ty = stgArgType arg
538 arg_rep = typePrimRep arg_ty
539 hint = typeForeignHint arg_ty
540
541 add_shim :: DynFlags -> Type -> CmmExpr -> CmmExpr
542 add_shim dflags arg_ty expr
543 | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
544 = cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
545
546 | tycon == smallArrayPrimTyCon || tycon == smallMutableArrayPrimTyCon
547 = cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
548
549 | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
550 = cmmOffsetB dflags expr (arrWordsHdrSize dflags)
551
552 | otherwise = expr
553 where
554 UnaryRep rep_ty = repType arg_ty
555 tycon = tyConAppTyCon rep_ty
556 -- should be a tycon app, since this is a foreign call