3d60def4508270832323d322f9e4c7f2a5403bed
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C-- code generation: expressions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmExpr ( cgExpr ) where
10
11 #define FAST_STRING_NOT_NEEDED
12 #include "HsVersions.h"
13
14 import {-# SOURCE #-} StgCmmBind ( cgBind )
15
16 import StgCmmMonad
17 import StgCmmHeap
18 import StgCmmEnv
19 import StgCmmCon
20 import StgCmmProf
21 import StgCmmLayout
22 import StgCmmPrim
23 import StgCmmHpc
24 import StgCmmTicky
25 import StgCmmUtils
26 import StgCmmClosure
27
28 import StgSyn
29
30 import MkGraph
31 import BlockId
32 import Cmm
33 import CmmInfo
34 import CoreSyn
35 import DataCon
36 import ForeignCall
37 import Id
38 import PrimOp
39 import TyCon
40 import Type
41 import CostCentre ( CostCentreStack, currentCCS )
42 import Maybes
43 import Util
44 import FastString
45 import Outputable
46
47 import Control.Monad (when,void)
48
49 ------------------------------------------------------------------------
50 -- cgExpr: the main function
51 ------------------------------------------------------------------------
52
53 cgExpr :: StgExpr -> FCode ReturnKind
54
55 cgExpr (StgApp fun args) = cgIdApp fun args
56
57 {- seq# a s ==> a -}
58 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
59 cgIdApp a []
60
61 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
62 cgExpr (StgConApp con args) = cgConApp con args
63 cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr }
64 cgExpr (StgTick m n expr) = do dflags <- getDynFlags
65 emit (mkTickBox dflags m n)
66 cgExpr expr
67 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
68 emitReturn [CmmLit cmm_lit]
69
70 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
71 cgExpr (StgLetNoEscape _ _ binds expr) =
72 do { u <- newUnique
73 ; let join_id = mkBlockId u
74 ; cgLneBinds join_id binds
75 ; r <- cgExpr expr
76 ; emitLabel join_id
77 ; return r }
78
79 cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) =
80 cgCase expr bndr alt_type alts
81
82 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
83
84 ------------------------------------------------------------------------
85 -- Let no escape
86 ------------------------------------------------------------------------
87
88 {- Generating code for a let-no-escape binding, aka join point is very
89 very similar to what we do for a case expression. The duality is
90 between
91 let-no-escape x = b
92 in e
93 and
94 case e of ... -> b
95
96 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
97 the alternative of the case; it needs to be compiled in an environment
98 in which all volatile bindings are forgotten, and the free vars are
99 bound only to stable things like stack locations.. The 'e' part will
100 execute *next*, just like the scrutinee of a case. -}
101
102 -------------------------
103 cgLneBinds :: BlockId -> StgBinding -> FCode ()
104 cgLneBinds join_id (StgNonRec bndr rhs)
105 = do { local_cc <- saveCurrentCostCentre
106 -- See Note [Saving the current cost centre]
107 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
108 ; fcode
109 ; addBindC (cg_id info) info }
110
111 cgLneBinds join_id (StgRec pairs)
112 = do { local_cc <- saveCurrentCostCentre
113 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
114 ; let (infos, fcodes) = unzip r
115 ; addBindsC infos
116 ; sequence_ fcodes
117 }
118
119 -------------------------
120 cgLetNoEscapeRhs
121 :: BlockId -- join point for successor of let-no-escape
122 -> Maybe LocalReg -- Saved cost centre
123 -> Id
124 -> StgRhs
125 -> FCode (CgIdInfo, FCode ())
126
127 cgLetNoEscapeRhs join_id local_cc bndr rhs =
128 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
129 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
130 ; let code = do { body <- getCode rhs_code
131 ; emitOutOfLine bid (body <*> mkBranch join_id) }
132 ; return (info, code)
133 }
134
135 cgLetNoEscapeRhsBody
136 :: Maybe LocalReg -- Saved cost centre
137 -> Id
138 -> StgRhs
139 -> FCode (CgIdInfo, FCode ())
140 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body)
141 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
142 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
143 = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
144 -- For a constructor RHS we want to generate a single chunk of
145 -- code which can be jumped to from many places, which will
146 -- return the constructor. It's easy; just behave as if it
147 -- was an StgRhsClosure with a ConApp inside!
148
149 -------------------------
150 cgLetNoEscapeClosure
151 :: Id -- binder
152 -> Maybe LocalReg -- Slot for saved current cost centre
153 -> CostCentreStack -- XXX: *** NOT USED *** why not?
154 -> [NonVoid Id] -- Args (as in \ args -> body)
155 -> StgExpr -- Body (as in above)
156 -> FCode (CgIdInfo, FCode ())
157
158 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
159 = do dflags <- getDynFlags
160 return ( lneIdInfo dflags bndr args
161 , code )
162 where
163 code = forkProc $ do {
164 ; withNewTickyCounterLNE (idName bndr) args $ do
165 ; restoreCurrentCostCentre cc_slot
166 ; arg_regs <- bindArgsToRegs args
167 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
168
169
170 ------------------------------------------------------------------------
171 -- Case expressions
172 ------------------------------------------------------------------------
173
174 {- Note [Compiling case expressions]
175 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176 It is quite interesting to decide whether to put a heap-check at the
177 start of each alternative. Of course we certainly have to do so if
178 the case forces an evaluation, or if there is a primitive op which can
179 trigger GC.
180
181 A more interesting situation is this (a Plan-B situation)
182
183 !P!;
184 ...P...
185 case x# of
186 0# -> !Q!; ...Q...
187 default -> !R!; ...R...
188
189 where !x! indicates a possible heap-check point. The heap checks
190 in the alternatives *can* be omitted, in which case the topmost
191 heapcheck will take their worst case into account.
192
193 In favour of omitting !Q!, !R!:
194
195 - *May* save a heap overflow test,
196 if ...P... allocates anything.
197
198 - We can use relative addressing from a single Hp to
199 get at all the closures so allocated.
200
201 - No need to save volatile vars etc across heap checks
202 in !Q!, !R!
203
204 Against omitting !Q!, !R!
205
206 - May put a heap-check into the inner loop. Suppose
207 the main loop is P -> R -> P -> R...
208 Q is the loop exit, and only it does allocation.
209 This only hurts us if P does no allocation. If P allocates,
210 then there is a heap check in the inner loop anyway.
211
212 - May do more allocation than reqd. This sometimes bites us
213 badly. For example, nfib (ha!) allocates about 30\% more space if the
214 worst-casing is done, because many many calls to nfib are leaf calls
215 which don't need to allocate anything.
216
217 We can un-allocate, but that costs an instruction
218
219 Neither problem hurts us if there is only one alternative.
220
221 Suppose the inner loop is P->R->P->R etc. Then here is
222 how many heap checks we get in the *inner loop* under various
223 conditions
224
225 Alooc Heap check in branches (!Q!, !R!)?
226 P Q R yes no (absorb to !P!)
227 --------------------------------------
228 n n n 0 0
229 n y n 0 1
230 n . y 1 1
231 y . y 2 1
232 y . n 1 1
233
234 Best choices: absorb heap checks from Q and R into !P! iff
235 a) P itself does some allocation
236 or
237 b) P does allocation, or there is exactly one alternative
238
239 We adopt (b) because that is more likely to put the heap check at the
240 entry to a function, when not many things are live. After a bunch of
241 single-branch cases, we may have lots of things live
242
243 Hence: two basic plans for
244
245 case e of r { alts }
246
247 ------ Plan A: the general case ---------
248
249 ...save current cost centre...
250
251 ...code for e,
252 with sequel (SetLocals r)
253
254 ...restore current cost centre...
255 ...code for alts...
256 ...alts do their own heap checks
257
258 ------ Plan B: special case when ---------
259 (i) e does not allocate or call GC
260 (ii) either upstream code performs allocation
261 or there is just one alternative
262
263 Then heap allocation in the (single) case branch
264 is absorbed by the upstream check.
265 Very common example: primops on unboxed values
266
267 ...code for e,
268 with sequel (SetLocals r)...
269
270 ...code for alts...
271 ...no heap check...
272 -}
273
274
275
276 -------------------------------------
277 data GcPlan
278 = GcInAlts -- Put a GC check at the start the case alternatives,
279 [LocalReg] -- which binds these registers
280 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
281 -- primitive op which does no GC. Absorb the allocation
282 -- of the case alternative(s) into the upstream check
283
284 -------------------------------------
285 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
286
287 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
288 | isEnumerationTyCon tycon -- Note [case on bool]
289 = do { tag_expr <- do_enum_primop op args
290
291 -- If the binder is not dead, convert the tag to a constructor
292 -- and assign it.
293 ; when (not (isDeadBinder bndr)) $ do
294 { dflags <- getDynFlags
295 ; tmp_reg <- bindArgToReg (NonVoid bndr)
296 ; emitAssign (CmmLocal tmp_reg)
297 (tagToClosure dflags tycon tag_expr) }
298
299 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
300 (NonVoid bndr) alts
301 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
302 ; return AssignedDirectly
303 }
304 where
305 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
306 do_enum_primop TagToEnumOp [arg] -- No code!
307 = getArgAmode (NonVoid arg)
308 do_enum_primop primop args
309 = do dflags <- getDynFlags
310 tmp <- newTemp (bWord dflags)
311 cgPrimOp [tmp] primop args
312 return (CmmReg (CmmLocal tmp))
313
314 {-
315 Note [case on bool]
316
317 This special case handles code like
318
319 case a <# b of
320 True ->
321 False ->
322
323 If we let the ordinary case code handle it, we'll get something like
324
325 tmp1 = a < b
326 tmp2 = Bool_closure_tbl[tmp1]
327 if (tmp2 & 7 != 0) then ... // normal tagged case
328
329 but this junk won't optimise away. What we really want is just an
330 inline comparison:
331
332 if (a < b) then ...
333
334 So we add a special case to generate
335
336 tmp1 = a < b
337 if (tmp1 == 0) then ...
338
339 and later optimisations will further improve this.
340
341 We should really change all these primops to return Int# instead, that
342 would make this special case go away.
343 -}
344
345
346 -- Note [ticket #3132]: we might be looking at a case of a lifted Id
347 -- that was cast to an unlifted type. The Id will always be bottom,
348 -- but we don't want the code generator to fall over here. If we
349 -- just emit an assignment here, the assignment will be
350 -- type-incorrect Cmm. Hence, we emit the usual enter/return code,
351 -- (and because bottom must be untagged, it will be entered and the
352 -- program will crash).
353 -- The Sequel is a type-correct assignment, albeit bogus.
354 -- The (dead) continuation loops; it would be better to invoke some kind
355 -- of panic function here.
356 --
357 -- However, we also want to allow an assignment to be generated
358 -- in the case when the types are compatible, because this allows
359 -- some slightly-dodgy but occasionally-useful casts to be used,
360 -- such as in RtClosureInspect where we cast an HValue to a MutVar#
361 -- so we can print out the contents of the MutVar#. If we generate
362 -- code that enters the HValue, then we'll get a runtime panic, because
363 -- the HValue really is a MutVar#. The types are compatible though,
364 -- so we can just generate an assignment.
365 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
366 | isUnLiftedType (idType v)
367 || reps_compatible
368 = -- assignment suffices for unlifted types
369 do { dflags <- getDynFlags
370 ; when (not reps_compatible) $
371 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
372 ; v_info <- getCgIdInfo v
373 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr))) (idInfoToAmode v_info)
374 ; _ <- bindArgsToRegs [NonVoid bndr]
375 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
376 where
377 reps_compatible = idPrimRep v == idPrimRep bndr
378
379 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
380 = -- fail at run-time, not compile-time
381 do { dflags <- getDynFlags
382 ; mb_cc <- maybeSaveCostCentre True
383 ; _ <- withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
384 ; restoreCurrentCostCentre mb_cc
385 ; emitComment $ mkFastString "should be unreachable code"
386 ; l <- newLabelC
387 ; emitLabel l
388 ; emit (mkBranch l)
389 ; return AssignedDirectly
390 }
391 {-
392 case seq# a s of v
393 (# s', a' #) -> e
394
395 ==>
396
397 case a of v
398 (# s', a' #) -> e
399
400 (taking advantage of the fact that the return convention for (# State#, a #)
401 is the same as the return convention for just 'a')
402 -}
403
404 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
405 = -- handle seq#, same return convention as vanilla 'a'.
406 cgCase (StgApp a []) bndr alt_type alts
407
408 cgCase scrut bndr alt_type alts
409 = -- the general case
410 do { dflags <- getDynFlags
411 ; up_hp_usg <- getVirtHp -- Upstream heap usage
412 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
413 alt_regs = map (idToReg dflags) ret_bndrs
414 simple_scrut = isSimpleScrut scrut alt_type
415 do_gc | not simple_scrut = True
416 | isSingleton alts = False
417 | up_hp_usg > 0 = False
418 | otherwise = True
419 -- cf Note [Compiling case expressions]
420 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
421
422 ; mb_cc <- maybeSaveCostCentre simple_scrut
423
424 -- if do_gc then our sequel will be ReturnTo
425 -- - generate code for the sequel now
426 -- - pass info about the sequel to cgAlts for use in the heap check
427 -- else sequel will be AssignTo
428
429 ; ret_kind <- withSequel (AssignTo alt_regs False) (cgExpr scrut)
430 ; restoreCurrentCostCentre mb_cc
431 ; _ <- bindArgsToRegs ret_bndrs
432 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
433 }
434
435
436 -----------------
437 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
438 maybeSaveCostCentre simple_scrut
439 | simple_scrut = return Nothing
440 | otherwise = saveCurrentCostCentre
441
442
443 -----------------
444 isSimpleScrut :: StgExpr -> AltType -> Bool
445 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
446 -- heap usage from alternatives into the stuff before the case
447 -- NB: if you get this wrong, and claim that the expression doesn't allocate
448 -- when it does, you'll deeply mess up allocation
449 isSimpleScrut (StgOpApp op _ _) _ = isSimpleOp op
450 isSimpleScrut (StgLit _) _ = True -- case 1# of { 0# -> ..; ... }
451 isSimpleScrut (StgApp _ []) (PrimAlt _) = True -- case x# of { 0# -> ..; ... }
452 isSimpleScrut _ _ = False
453
454 isSimpleOp :: StgOp -> Bool
455 -- True iff the op cannot block or allocate
456 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) = not (playSafe safe)
457 isSimpleOp (StgPrimOp op) = not (primOpOutOfLine op)
458 isSimpleOp (StgPrimCallOp _) = False
459
460 -----------------
461 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
462 -- These are the binders of a case that are assigned
463 -- by the evaluation of the scrutinee
464 -- Only non-void ones come back
465 chooseReturnBndrs bndr (PrimAlt _) _alts
466 = nonVoidIds [bndr]
467
468 chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)]
469 = nonVoidIds ids -- 'bndr' is not assigned!
470
471 chooseReturnBndrs bndr (AlgAlt _) _alts
472 = nonVoidIds [bndr] -- Only 'bndr' is assigned
473
474 chooseReturnBndrs bndr PolyAlt _alts
475 = nonVoidIds [bndr] -- Only 'bndr' is assigned
476
477 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
478 -- UbxTupALt has only one alternative
479
480 -------------------------------------
481 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
482 -> FCode ReturnKind
483 -- At this point the result of the case are in the binders
484 cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
485 = maybeAltHeapCheck gc_plan (cgExpr rhs)
486
487 cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
488 = maybeAltHeapCheck gc_plan (cgExpr rhs)
489 -- Here bndrs are *already* in scope, so don't rebind them
490
491 cgAlts gc_plan bndr (PrimAlt _) alts
492 = do { dflags <- getDynFlags
493
494 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
495
496 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
497 (DEFAULT,deflt) = head tagged_cmms
498 -- PrimAlts always have a DEFAULT case
499 -- and it always comes first
500
501 tagged_cmms' = [(lit,code)
502 | (LitAlt lit, code) <- tagged_cmms]
503 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
504 ; return AssignedDirectly }
505
506 cgAlts gc_plan bndr (AlgAlt tycon) alts
507 = do { dflags <- getDynFlags
508
509 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
510
511 ; let fam_sz = tyConFamilySize tycon
512 bndr_reg = CmmLocal (idToReg dflags bndr)
513
514 -- Is the constructor tag in the node reg?
515 ; if isSmallFamily dflags fam_sz
516 then do
517 let -- Yes, bndr_reg has constr. tag in ls bits
518 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
519 branches' = [(tag+1,branch) | (tag,branch) <- branches]
520 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
521 return AssignedDirectly
522
523 else -- No, get tag from info table
524 do dflags <- getDynFlags
525 let -- Note that ptr _always_ has tag 1
526 -- when the family size is big enough
527 untagged_ptr = cmmRegOffB bndr_reg (-1)
528 tag_expr = getConstrTag dflags (untagged_ptr)
529 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
530 return AssignedDirectly }
531
532 cgAlts _ _ _ _ = panic "cgAlts"
533 -- UbxTupAlt and PolyAlt have only one alternative
534
535
536 -- Note [alg-alt heap check]
537 --
538 -- In an algebraic case with more than one alternative, we will have
539 -- code like
540 --
541 -- L0:
542 -- x = R1
543 -- goto L1
544 -- L1:
545 -- if (x & 7 >= 2) then goto L2 else goto L3
546 -- L2:
547 -- Hp = Hp + 16
548 -- if (Hp > HpLim) then goto L4
549 -- ...
550 -- L4:
551 -- call gc() returns to L5
552 -- L5:
553 -- x = R1
554 -- goto L1
555
556 -------------------
557 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
558 -> FCode ( Maybe CmmAGraph
559 , [(ConTagZ, CmmAGraph)] )
560 cgAlgAltRhss gc_plan bndr alts
561 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
562
563 ; let { mb_deflt = case tagged_cmms of
564 ((DEFAULT,rhs) : _) -> Just rhs
565 _other -> Nothing
566 -- DEFAULT is always first, if present
567
568 ; branches = [ (dataConTagZ con, cmm)
569 | (DataAlt con, cmm) <- tagged_cmms ]
570 }
571
572 ; return (mb_deflt, branches)
573 }
574
575
576 -------------------
577 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
578 -> FCode [(AltCon, CmmAGraph)]
579 cgAltRhss gc_plan bndr alts = do
580 dflags <- getDynFlags
581 let
582 base_reg = idToReg dflags bndr
583 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
584 cg_alt (con, bndrs, _uses, rhs)
585 = getCodeR $
586 maybeAltHeapCheck gc_plan $
587 do { _ <- bindConArgs con base_reg bndrs
588 ; _ <- cgExpr rhs
589 ; return con }
590 forkAlts (map cg_alt alts)
591
592 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
593 maybeAltHeapCheck (NoGcInAlts,_) code = code
594 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
595 altHeapCheck regs code
596 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
597 altHeapCheckReturnsTo regs lret off code
598
599 -----------------------------------------------------------------------------
600 -- Tail calls
601 -----------------------------------------------------------------------------
602
603 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
604 cgConApp con stg_args
605 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
606 = do { arg_exprs <- getNonVoidArgAmodes stg_args
607 ; tickyUnboxedTupleReturn (length arg_exprs)
608 ; emitReturn arg_exprs }
609
610 | otherwise -- Boxed constructors; allocate and return
611 = ASSERT( stg_args `lengthIs` dataConRepRepArity con )
612 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
613 currentCCS con stg_args
614 -- The first "con" says that the name bound to this
615 -- closure is is "con", which is a bit of a fudge, but
616 -- it only affects profiling (hence the False)
617
618 ; emit =<< fcode_init
619 ; emitReturn [idInfoToAmode idinfo] }
620
621
622 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
623 cgIdApp fun_id [] | isVoidId fun_id = emitReturn []
624 cgIdApp fun_id args
625 = do { fun_info <- getCgIdInfo fun_id
626 ; case maybeLetNoEscape fun_info of
627 Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args
628 Nothing -> cgTailCall (cg_id fun_info) fun_info args }
629 -- NB. use (cg_id fun_info) instead of fun_id, because the former
630 -- may be externalised for -split-objs.
631 -- See StgCmm.maybeExternaliseId.
632
633 cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ReturnKind
634 cgLneJump blk_id lne_regs args -- Join point; discard sequel
635 = do { adjustHpBackwards -- always do this before a tail-call
636 ; cmm_args <- getNonVoidArgAmodes args
637 ; emitMultiAssign lne_regs cmm_args
638 ; emit (mkBranch blk_id)
639 ; return AssignedDirectly }
640
641 cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ReturnKind
642 cgTailCall fun_id fun_info args = do
643 dflags <- getDynFlags
644 case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of
645
646 -- A value in WHNF, so we can just return it.
647 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
648
649 EnterIt -> ASSERT( null args ) -- Discarding arguments
650 emitEnter fun
651
652 SlowCall -> do -- A slow function call via the RTS apply routines
653 { tickySlowCall lf_info args
654 ; emitComment $ mkFastString "slowCall"
655 ; slowCall fun args }
656
657 -- A direct function call (possibly with some left-over arguments)
658 DirectEntry lbl arity -> do
659 { tickyDirectCall arity args
660 ; if node_points dflags
661 then directCall NativeNodeCall lbl arity (fun_arg:args)
662 else directCall NativeDirectCall lbl arity args }
663
664 JumpToIt {} -> panic "cgTailCall" -- ???
665
666 where
667 fun_arg = StgVarArg fun_id
668 fun_name = idName fun_id
669 fun = idInfoToAmode fun_info
670 lf_info = cgIdInfoLF fun_info
671 node_points dflags = nodeMustPointToIt dflags lf_info
672
673
674 emitEnter :: CmmExpr -> FCode ReturnKind
675 emitEnter fun = do
676 { dflags <- getDynFlags
677 ; adjustHpBackwards
678 ; sequel <- getSequel
679 ; updfr_off <- getUpdFrameOff
680 ; case sequel of
681 -- For a return, we have the option of generating a tag-test or
682 -- not. If the value is tagged, we can return directly, which
683 -- is quicker than entering the value. This is a code
684 -- size/speed trade-off: when optimising for speed rather than
685 -- size we could generate the tag test.
686 --
687 -- Right now, we do what the old codegen did, and omit the tag
688 -- test, just generating an enter.
689 Return _ -> do
690 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
691 ; emit $ mkJump dflags NativeNodeCall entry
692 [cmmUntag dflags fun] updfr_off
693 ; return AssignedDirectly
694 }
695
696 -- The result will be scrutinised in the sequel. This is where
697 -- we generate a tag-test to avoid entering the closure if
698 -- possible.
699 --
700 -- The generated code will be something like this:
701 --
702 -- R1 = fun -- copyout
703 -- if (fun & 7 != 0) goto Lcall else goto Lret
704 -- Lcall:
705 -- call [fun] returns to Lret
706 -- Lret:
707 -- fun' = R1 -- copyin
708 -- ...
709 --
710 -- Note in particular that the label Lret is used as a
711 -- destination by both the tag-test and the call. This is
712 -- becase Lret will necessarily be a proc-point, and we want to
713 -- ensure that we generate only one proc-point for this
714 -- sequence.
715 --
716 -- Furthermore, we tell the caller that we generated a native
717 -- return continuation by returning (ReturnedTo Lret off), so
718 -- that the continuation can be reused by the heap-check failure
719 -- code in the enclosing case expression.
720 --
721 AssignTo res_regs _ -> do
722 { lret <- newLabelC
723 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
724 ; lcall <- newLabelC
725 ; updfr_off <- getUpdFrameOff
726 ; let area = Young lret
727 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
728 [fun] updfr_off []
729 -- refer to fun via nodeReg after the copyout, to avoid having
730 -- both live simultaneously; this sometimes enables fun to be
731 -- inlined in the RHS of the R1 assignment.
732 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
733 the_call = toCall entry (Just lret) updfr_off off outArgs regs
734 ; emit $
735 copyout <*>
736 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg)) lret lcall <*>
737 outOfLine lcall the_call <*>
738 mkLabel lret <*>
739 copyin
740 ; return (ReturnedTo lret off)
741 }
742 }