038503eee7d35e36a79678ff6af604984b3f58ad
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C-- code generation: expressions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 {-# OPTIONS -fno-warn-tabs #-}
10 -- The above warning supression flag is a temporary kludge.
11 -- While working on this module you are encouraged to remove it and
12 -- detab the module (please do the detabbing in a separate patch). See
13 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
14 -- for details
15
16 module StgCmmExpr ( cgExpr ) where
17
18 #define FAST_STRING_NOT_NEEDED
19 #include "HsVersions.h"
20
21 import {-# SOURCE #-} StgCmmBind ( cgBind )
22
23 import StgCmmMonad
24 import StgCmmHeap
25 import StgCmmEnv
26 import StgCmmCon
27 import StgCmmProf
28 import StgCmmLayout
29 import StgCmmPrim
30 import StgCmmHpc
31 import StgCmmTicky
32 import StgCmmUtils
33 import StgCmmClosure
34
35 import StgSyn
36
37 import MkGraph
38 import BlockId
39 import Cmm
40 import CoreSyn
41 import DataCon
42 import ForeignCall
43 import Id
44 import PrimOp
45 import TyCon
46 import Type
47 import CostCentre ( CostCentreStack, currentCCS )
48 import Maybes
49 import Util
50 import FastString
51 import Outputable
52 import UniqSupply
53
54 import Control.Monad (when,void)
55
56 ------------------------------------------------------------------------
57 -- cgExpr: the main function
58 ------------------------------------------------------------------------
59
60 cgExpr :: StgExpr -> FCode ReturnKind
61
62 cgExpr (StgApp fun args) = cgIdApp fun args
63
64 {- seq# a s ==> a -}
65 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
66 cgIdApp a []
67
68 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
69 cgExpr (StgConApp con args) = cgConApp con args
70 cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr }
71 cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr }
72 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
73 emitReturn [CmmLit cmm_lit]
74
75 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
76 cgExpr (StgLetNoEscape _ _ binds expr) =
77 do { us <- newUniqSupply
78 ; let join_id = mkBlockId (uniqFromSupply us)
79 ; cgLneBinds join_id binds
80 ; r <- cgExpr expr
81 ; emitLabel join_id
82 ; return r }
83
84 cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) =
85 cgCase expr bndr alt_type alts
86
87 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
88
89 ------------------------------------------------------------------------
90 -- Let no escape
91 ------------------------------------------------------------------------
92
93 {- Generating code for a let-no-escape binding, aka join point is very
94 very similar to what we do for a case expression. The duality is
95 between
96 let-no-escape x = b
97 in e
98 and
99 case e of ... -> b
100
101 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
102 the alternative of the case; it needs to be compiled in an environment
103 in which all volatile bindings are forgotten, and the free vars are
104 bound only to stable things like stack locations.. The 'e' part will
105 execute *next*, just like the scrutinee of a case. -}
106
107 -------------------------
108 cgLneBinds :: BlockId -> StgBinding -> FCode ()
109 cgLneBinds join_id (StgNonRec bndr rhs)
110 = do { local_cc <- saveCurrentCostCentre
111 -- See Note [Saving the current cost centre]
112 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
113 ; fcode
114 ; addBindC (cg_id info) info }
115
116 cgLneBinds join_id (StgRec pairs)
117 = do { local_cc <- saveCurrentCostCentre
118 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
119 ; let (infos, fcodes) = unzip r
120 ; addBindsC infos
121 ; sequence_ fcodes
122 }
123
124 -------------------------
125 cgLetNoEscapeRhs
126 :: BlockId -- join point for successor of let-no-escape
127 -> Maybe LocalReg -- Saved cost centre
128 -> Id
129 -> StgRhs
130 -> FCode (CgIdInfo, FCode ())
131
132 cgLetNoEscapeRhs join_id local_cc bndr rhs =
133 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
134 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
135 ; let code = do { body <- getCode rhs_code
136 ; emitOutOfLine bid (body <*> mkBranch join_id) }
137 ; return (info, code)
138 }
139
140 cgLetNoEscapeRhsBody
141 :: Maybe LocalReg -- Saved cost centre
142 -> Id
143 -> StgRhs
144 -> FCode (CgIdInfo, FCode ())
145 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body)
146 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
147 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
148 = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
149 -- For a constructor RHS we want to generate a single chunk of
150 -- code which can be jumped to from many places, which will
151 -- return the constructor. It's easy; just behave as if it
152 -- was an StgRhsClosure with a ConApp inside!
153
154 -------------------------
155 cgLetNoEscapeClosure
156 :: Id -- binder
157 -> Maybe LocalReg -- Slot for saved current cost centre
158 -> CostCentreStack -- XXX: *** NOT USED *** why not?
159 -> [NonVoid Id] -- Args (as in \ args -> body)
160 -> StgExpr -- Body (as in above)
161 -> FCode (CgIdInfo, FCode ())
162
163 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
164 = return ( lneIdInfo bndr args
165 , code )
166 where
167 code = forkProc $ do
168 { restoreCurrentCostCentre cc_slot
169 ; arg_regs <- bindArgsToRegs args
170 ; void $ altHeapCheck arg_regs (cgExpr body) }
171 -- Using altHeapCheck just reduces
172 -- instructions to save on stack
173
174
175 ------------------------------------------------------------------------
176 -- Case expressions
177 ------------------------------------------------------------------------
178
179 {- Note [Compiling case expressions]
180 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
181 It is quite interesting to decide whether to put a heap-check at the
182 start of each alternative. Of course we certainly have to do so if
183 the case forces an evaluation, or if there is a primitive op which can
184 trigger GC.
185
186 A more interesting situation is this (a Plan-B situation)
187
188 !P!;
189 ...P...
190 case x# of
191 0# -> !Q!; ...Q...
192 default -> !R!; ...R...
193
194 where !x! indicates a possible heap-check point. The heap checks
195 in the alternatives *can* be omitted, in which case the topmost
196 heapcheck will take their worst case into account.
197
198 In favour of omitting !Q!, !R!:
199
200 - *May* save a heap overflow test,
201 if ...P... allocates anything.
202
203 - We can use relative addressing from a single Hp to
204 get at all the closures so allocated.
205
206 - No need to save volatile vars etc across heap checks
207 in !Q!, !R!
208
209 Against omitting !Q!, !R!
210
211 - May put a heap-check into the inner loop. Suppose
212 the main loop is P -> R -> P -> R...
213 Q is the loop exit, and only it does allocation.
214 This only hurts us if P does no allocation. If P allocates,
215 then there is a heap check in the inner loop anyway.
216
217 - May do more allocation than reqd. This sometimes bites us
218 badly. For example, nfib (ha!) allocates about 30\% more space if the
219 worst-casing is done, because many many calls to nfib are leaf calls
220 which don't need to allocate anything.
221
222 We can un-allocate, but that costs an instruction
223
224 Neither problem hurts us if there is only one alternative.
225
226 Suppose the inner loop is P->R->P->R etc. Then here is
227 how many heap checks we get in the *inner loop* under various
228 conditions
229
230 Alooc Heap check in branches (!Q!, !R!)?
231 P Q R yes no (absorb to !P!)
232 --------------------------------------
233 n n n 0 0
234 n y n 0 1
235 n . y 1 1
236 y . y 2 1
237 y . n 1 1
238
239 Best choices: absorb heap checks from Q and R into !P! iff
240 a) P itself does some allocation
241 or
242 b) P does allocation, or there is exactly one alternative
243
244 We adopt (b) because that is more likely to put the heap check at the
245 entry to a function, when not many things are live. After a bunch of
246 single-branch cases, we may have lots of things live
247
248 Hence: two basic plans for
249
250 case e of r { alts }
251
252 ------ Plan A: the general case ---------
253
254 ...save current cost centre...
255
256 ...code for e,
257 with sequel (SetLocals r)
258
259 ...restore current cost centre...
260 ...code for alts...
261 ...alts do their own heap checks
262
263 ------ Plan B: special case when ---------
264 (i) e does not allocate or call GC
265 (ii) either upstream code performs allocation
266 or there is just one alternative
267
268 Then heap allocation in the (single) case branch
269 is absorbed by the upstream check.
270 Very common example: primops on unboxed values
271
272 ...code for e,
273 with sequel (SetLocals r)...
274
275 ...code for alts...
276 ...no heap check...
277 -}
278
279
280
281 -------------------------------------
282 data GcPlan
283 = GcInAlts -- Put a GC check at the start the case alternatives,
284 [LocalReg] -- which binds these registers
285 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
286 -- primitive op which does no GC. Absorb the allocation
287 -- of the case alternative(s) into the upstream check
288
289 -------------------------------------
290 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
291
292 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
293 | isEnumerationTyCon tycon -- Note [case on bool]
294 = do { tag_expr <- do_enum_primop op args
295
296 -- If the binder is not dead, convert the tag to a constructor
297 -- and assign it.
298 ; when (not (isDeadBinder bndr)) $ do
299 { tmp_reg <- bindArgToReg (NonVoid bndr)
300 ; emitAssign (CmmLocal tmp_reg)
301 (tagToClosure tycon tag_expr) }
302
303 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
304 (NonVoid bndr) alts
305 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
306 ; return AssignedDirectly
307 }
308 where
309 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
310 do_enum_primop TagToEnumOp [arg] -- No code!
311 = getArgAmode (NonVoid arg)
312 do_enum_primop primop args
313 = do tmp <- newTemp bWord
314 cgPrimOp [tmp] primop args
315 return (CmmReg (CmmLocal tmp))
316
317 {-
318 Note [case on bool]
319
320 This special case handles code like
321
322 case a <# b of
323 True ->
324 False ->
325
326 If we let the ordinary case code handle it, we'll get something like
327
328 tmp1 = a < b
329 tmp2 = Bool_closure_tbl[tmp1]
330 if (tmp2 & 7 != 0) then ... // normal tagged case
331
332 but this junk won't optimise away. What we really want is just an
333 inline comparison:
334
335 if (a < b) then ...
336
337 So we add a special case to generate
338
339 tmp1 = a < b
340 if (tmp1 == 0) then ...
341
342 and later optimisations will further improve this.
343
344 We should really change all these primops to return Int# instead, that
345 would make this special case go away.
346 -}
347
348
349 -- Note [ticket #3132]: we might be looking at a case of a lifted Id
350 -- that was cast to an unlifted type. The Id will always be bottom,
351 -- but we don't want the code generator to fall over here. If we
352 -- just emit an assignment here, the assignment will be
353 -- type-incorrect Cmm. Hence, we emit the usual enter/return code,
354 -- (and because bottom must be untagged, it will be entered and the
355 -- program will crash).
356 -- The Sequel is a type-correct assignment, albeit bogus.
357 -- The (dead) continuation loops; it would be better to invoke some kind
358 -- of panic function here.
359 --
360 -- However, we also want to allow an assignment to be generated
361 -- in the case when the types are compatible, because this allows
362 -- some slightly-dodgy but occasionally-useful casts to be used,
363 -- such as in RtClosureInspect where we cast an HValue to a MutVar#
364 -- so we can print out the contents of the MutVar#. If we generate
365 -- code that enters the HValue, then we'll get a runtime panic, because
366 -- the HValue really is a MutVar#. The types are compatible though,
367 -- so we can just generate an assignment.
368 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
369 | isUnLiftedType (idType v)
370 || reps_compatible
371 = -- assignment suffices for unlifted types
372 do { when (not reps_compatible) $
373 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
374 ; v_info <- getCgIdInfo v
375 ; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)
376 ; _ <- bindArgsToRegs [NonVoid bndr]
377 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
378 where
379 reps_compatible = idPrimRep v == idPrimRep bndr
380
381 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
382 = -- fail at run-time, not compile-time
383 do { mb_cc <- maybeSaveCostCentre True
384 ; _ <- withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut)
385 ; restoreCurrentCostCentre mb_cc
386 ; emitComment $ mkFastString "should be unreachable code"
387 ; l <- newLabelC
388 ; emitLabel l
389 ; emit (mkBranch l)
390 ; return AssignedDirectly
391 }
392 {-
393 case seq# a s of v
394 (# s', a' #) -> e
395
396 ==>
397
398 case a of v
399 (# s', a' #) -> e
400
401 (taking advantage of the fact that the return convention for (# State#, a #)
402 is the same as the return convention for just 'a')
403 -}
404
405 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
406 = -- handle seq#, same return convention as vanilla 'a'.
407 cgCase (StgApp a []) bndr alt_type alts
408
409 cgCase scrut bndr alt_type alts
410 = -- the general case
411 do { up_hp_usg <- getVirtHp -- Upstream heap usage
412 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
413 alt_regs = map idToReg ret_bndrs
414 simple_scrut = isSimpleScrut scrut alt_type
415 do_gc | not simple_scrut = True
416 | isSingleton alts = False
417 | up_hp_usg > 0 = False
418 | otherwise = True
419 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
420
421 ; mb_cc <- maybeSaveCostCentre simple_scrut
422
423 -- if do_gc then our sequel will be ReturnTo
424 -- - generate code for the sequel now
425 -- - pass info about the sequel to cgAlts for use in the heap check
426 -- else sequel will be AssignTo
427
428 ; ret_kind <- withSequel (AssignTo alt_regs False) (cgExpr scrut)
429 ; restoreCurrentCostCentre mb_cc
430 ; _ <- bindArgsToRegs ret_bndrs
431 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
432 }
433
434
435 -----------------
436 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
437 maybeSaveCostCentre simple_scrut
438 | simple_scrut = return Nothing
439 | otherwise = saveCurrentCostCentre
440
441
442 -----------------
443 isSimpleScrut :: StgExpr -> AltType -> Bool
444 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
445 -- heap usage from alternatives into the stuff before the case
446 -- NB: if you get this wrong, and claim that the expression doesn't allocate
447 -- when it does, you'll deeply mess up allocation
448 isSimpleScrut (StgOpApp op _ _) _ = isSimpleOp op
449 isSimpleScrut (StgLit _) _ = True -- case 1# of { 0# -> ..; ... }
450 isSimpleScrut (StgApp _ []) (PrimAlt _) = True -- case x# of { 0# -> ..; ... }
451 isSimpleScrut _ _ = False
452
453 isSimpleOp :: StgOp -> Bool
454 -- True iff the op cannot block or allocate
455 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) = not (playSafe safe)
456 isSimpleOp (StgPrimOp op) = not (primOpOutOfLine op)
457 isSimpleOp (StgPrimCallOp _) = False
458
459 -----------------
460 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
461 -- These are the binders of a case that are assigned
462 -- by the evaluation of the scrutinee
463 -- Only non-void ones come back
464 chooseReturnBndrs bndr (PrimAlt _) _alts
465 = nonVoidIds [bndr]
466
467 chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)]
468 = nonVoidIds ids -- 'bndr' is not assigned!
469
470 chooseReturnBndrs bndr (AlgAlt _) _alts
471 = nonVoidIds [bndr] -- Only 'bndr' is assigned
472
473 chooseReturnBndrs bndr PolyAlt _alts
474 = nonVoidIds [bndr] -- Only 'bndr' is assigned
475
476 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
477 -- UbxTupALt has only one alternative
478
479 -------------------------------------
480 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
481 -> FCode ReturnKind
482 -- At this point the result of the case are in the binders
483 cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
484 = maybeAltHeapCheck gc_plan (cgExpr rhs)
485
486 cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
487 = maybeAltHeapCheck gc_plan (cgExpr rhs)
488 -- Here bndrs are *already* in scope, so don't rebind them
489
490 cgAlts gc_plan bndr (PrimAlt _) alts
491 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
492
493 ; let bndr_reg = CmmLocal (idToReg bndr)
494 (DEFAULT,deflt) = head tagged_cmms
495 -- PrimAlts always have a DEFAULT case
496 -- and it always comes first
497
498 tagged_cmms' = [(lit,code)
499 | (LitAlt lit, code) <- tagged_cmms]
500 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
501 ; return AssignedDirectly }
502
503 cgAlts gc_plan bndr (AlgAlt tycon) alts
504 = do { (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
505
506 ; let fam_sz = tyConFamilySize tycon
507 bndr_reg = CmmLocal (idToReg bndr)
508
509 -- Is the constructor tag in the node reg?
510 ; if isSmallFamily fam_sz
511 then do
512 let -- Yes, bndr_reg has constr. tag in ls bits
513 tag_expr = cmmConstrTag1 (CmmReg bndr_reg)
514 branches' = [(tag+1,branch) | (tag,branch) <- branches]
515 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
516 return AssignedDirectly
517
518 else -- No, get tag from info table
519 do dflags <- getDynFlags
520 let -- Note that ptr _always_ has tag 1
521 -- when the family size is big enough
522 untagged_ptr = cmmRegOffB bndr_reg (-1)
523 tag_expr = getConstrTag dflags (untagged_ptr)
524 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
525 return AssignedDirectly }
526
527 cgAlts _ _ _ _ = panic "cgAlts"
528 -- UbxTupAlt and PolyAlt have only one alternative
529
530
531 -- Note [alg-alt heap check]
532 --
533 -- In an algebraic case with more than one alternative, we will have
534 -- code like
535 --
536 -- L0:
537 -- x = R1
538 -- goto L1
539 -- L1:
540 -- if (x & 7 >= 2) then goto L2 else goto L3
541 -- L2:
542 -- Hp = Hp + 16
543 -- if (Hp > HpLim) then goto L4
544 -- ...
545 -- L4:
546 -- call gc() returns to L5
547 -- L5:
548 -- x = R1
549 -- goto L1
550
551 -------------------
552 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
553 -> FCode ( Maybe CmmAGraph
554 , [(ConTagZ, CmmAGraph)] )
555 cgAlgAltRhss gc_plan bndr alts
556 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
557
558 ; let { mb_deflt = case tagged_cmms of
559 ((DEFAULT,rhs) : _) -> Just rhs
560 _other -> Nothing
561 -- DEFAULT is always first, if present
562
563 ; branches = [ (dataConTagZ con, cmm)
564 | (DataAlt con, cmm) <- tagged_cmms ]
565 }
566
567 ; return (mb_deflt, branches)
568 }
569
570
571 -------------------
572 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
573 -> FCode [(AltCon, CmmAGraph)]
574 cgAltRhss gc_plan bndr alts
575 = forkAlts (map cg_alt alts)
576 where
577 base_reg = idToReg bndr
578 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
579 cg_alt (con, bndrs, _uses, rhs)
580 = getCodeR $
581 maybeAltHeapCheck gc_plan $
582 do { _ <- bindConArgs con base_reg bndrs
583 ; _ <- cgExpr rhs
584 ; return con }
585
586 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
587 maybeAltHeapCheck (NoGcInAlts,_) code = code
588 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
589 altHeapCheck regs code
590 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
591 altHeapCheckReturnsTo regs lret off code
592
593 -----------------------------------------------------------------------------
594 -- Tail calls
595 -----------------------------------------------------------------------------
596
597 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
598 cgConApp con stg_args
599 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
600 = do { arg_exprs <- getNonVoidArgAmodes stg_args
601 ; tickyUnboxedTupleReturn (length arg_exprs)
602 ; emitReturn arg_exprs }
603
604 | otherwise -- Boxed constructors; allocate and return
605 = ASSERT( stg_args `lengthIs` dataConRepRepArity con )
606 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con)
607 currentCCS con stg_args
608 -- The first "con" says that the name bound to this closure is
609 -- is "con", which is a bit of a fudge, but it only affects profiling
610
611 ; emit =<< fcode_init
612 ; emitReturn [idInfoToAmode idinfo] }
613
614
615 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
616 cgIdApp fun_id [] | isVoidId fun_id = emitReturn []
617 cgIdApp fun_id args
618 = do { fun_info <- getCgIdInfo fun_id
619 ; case maybeLetNoEscape fun_info of
620 Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args
621 Nothing -> cgTailCall fun_id fun_info args }
622
623 cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ReturnKind
624 cgLneJump blk_id lne_regs args -- Join point; discard sequel
625 = do { adjustHpBackwards -- always do this before a tail-call
626 ; cmm_args <- getNonVoidArgAmodes args
627 ; emitMultiAssign lne_regs cmm_args
628 ; emit (mkBranch blk_id)
629 ; return AssignedDirectly }
630
631 cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ReturnKind
632 cgTailCall fun_id fun_info args = do
633 dflags <- getDynFlags
634 case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of
635
636 -- A value in WHNF, so we can just return it.
637 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
638
639 EnterIt -> ASSERT( null args ) -- Discarding arguments
640 emitEnter fun
641
642 SlowCall -> do -- A slow function call via the RTS apply routines
643 { tickySlowCall lf_info args
644 ; emitComment $ mkFastString "slowCall"
645 ; slowCall fun args }
646
647 -- A direct function call (possibly with some left-over arguments)
648 DirectEntry lbl arity -> do
649 { tickyDirectCall arity args
650 ; if node_points dflags
651 then directCall NativeNodeCall lbl arity (fun_arg:args)
652 else directCall NativeDirectCall lbl arity args }
653
654 JumpToIt {} -> panic "cgTailCall" -- ???
655
656 where
657 fun_arg = StgVarArg fun_id
658 fun_name = idName fun_id
659 fun = idInfoToAmode fun_info
660 lf_info = cgIdInfoLF fun_info
661 node_points dflags = nodeMustPointToIt dflags lf_info
662
663
664 emitEnter :: CmmExpr -> FCode ReturnKind
665 emitEnter fun = do
666 { dflags <- getDynFlags
667 ; adjustHpBackwards
668 ; sequel <- getSequel
669 ; updfr_off <- getUpdFrameOff
670 ; case sequel of
671 -- For a return, we have the option of generating a tag-test or
672 -- not. If the value is tagged, we can return directly, which
673 -- is quicker than entering the value. This is a code
674 -- size/speed trade-off: when optimising for speed rather than
675 -- size we could generate the tag test.
676 --
677 -- Right now, we do what the old codegen did, and omit the tag
678 -- test, just generating an enter.
679 Return _ -> do
680 { let entry = entryCode dflags $ closureInfoPtr $ CmmReg nodeReg
681 ; emit $ mkForeignJump dflags NativeNodeCall entry
682 [cmmUntag fun] updfr_off
683 ; return AssignedDirectly
684 }
685
686 -- The result will be scrutinised in the sequel. This is where
687 -- we generate a tag-test to avoid entering the closure if
688 -- possible.
689 --
690 -- The generated code will be something like this:
691 --
692 -- R1 = fun -- copyout
693 -- if (fun & 7 != 0) goto Lcall else goto Lret
694 -- Lcall:
695 -- call [fun] returns to Lret
696 -- Lret:
697 -- fun' = R1 -- copyin
698 -- ...
699 --
700 -- Note in particular that the label Lret is used as a
701 -- destination by both the tag-test and the call. This is
702 -- becase Lret will necessarily be a proc-point, and we want to
703 -- ensure that we generate only one proc-point for this
704 -- sequence.
705 --
706 -- Furthermore, we tell the caller that we generated a native
707 -- return continuation by returning (ReturnedTo Lret off), so
708 -- that the continuation can be reused by the heap-check failure
709 -- code in the enclosing case expression.
710 --
711 AssignTo res_regs _ -> do
712 { lret <- newLabelC
713 ; let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs
714 ; lcall <- newLabelC
715 ; updfr_off <- getUpdFrameOff
716 ; let area = Young lret
717 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
718 [fun] updfr_off (0,[])
719 -- refer to fun via nodeReg after the copyout, to avoid having
720 -- both live simultaneously; this sometimes enables fun to be
721 -- inlined in the RHS of the R1 assignment.
722 ; let entry = entryCode dflags (closureInfoPtr (CmmReg nodeReg))
723 the_call = toCall entry (Just lret) updfr_off off outArgs regs
724 ; emit $
725 copyout <*>
726 mkCbranch (cmmIsTagged (CmmReg nodeReg)) lret lcall <*>
727 outOfLine lcall the_call <*>
728 mkLabel lret <*>
729 copyin
730 ; return (ReturnedTo lret off)
731 }
732 }