Simplify and tidy up the handling of tuple names
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C-- code generation: expressions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmExpr ( cgExpr ) where
10
11 #define FAST_STRING_NOT_NEEDED
12 #include "HsVersions.h"
13
14 import {-# SOURCE #-} StgCmmBind ( cgBind )
15
16 import StgCmmMonad
17 import StgCmmHeap
18 import StgCmmEnv
19 import StgCmmCon
20 import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
21 import StgCmmLayout
22 import StgCmmPrim
23 import StgCmmHpc
24 import StgCmmTicky
25 import StgCmmUtils
26 import StgCmmClosure
27
28 import StgSyn
29
30 import MkGraph
31 import BlockId
32 import Cmm
33 import CmmInfo
34 import CoreSyn
35 import DataCon
36 import ForeignCall
37 import Id
38 import PrimOp
39 import TyCon
40 import Type
41 import CostCentre ( CostCentreStack, currentCCS )
42 import Maybes
43 import Util
44 import FastString
45 import Outputable
46
47 import Control.Monad (when,void)
48
49 ------------------------------------------------------------------------
50 -- cgExpr: the main function
51 ------------------------------------------------------------------------
52
53 cgExpr :: StgExpr -> FCode ReturnKind
54
55 cgExpr (StgApp fun args) = cgIdApp fun args
56
57 {- seq# a s ==> a -}
58 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
59 cgIdApp a []
60
61 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
62 cgExpr (StgConApp con args) = cgConApp con args
63 cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr }
64 cgExpr (StgTick m n expr) = do dflags <- getDynFlags
65 emit (mkTickBox dflags m n)
66 cgExpr expr
67 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
68 emitReturn [CmmLit cmm_lit]
69
70 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
71 cgExpr (StgLetNoEscape _ _ binds expr) =
72 do { u <- newUnique
73 ; let join_id = mkBlockId u
74 ; cgLneBinds join_id binds
75 ; r <- cgExpr expr
76 ; emitLabel join_id
77 ; return r }
78
79 cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) =
80 cgCase expr bndr alt_type alts
81
82 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
83
84 ------------------------------------------------------------------------
85 -- Let no escape
86 ------------------------------------------------------------------------
87
88 {- Generating code for a let-no-escape binding, aka join point is very
89 very similar to what we do for a case expression. The duality is
90 between
91 let-no-escape x = b
92 in e
93 and
94 case e of ... -> b
95
96 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
97 the alternative of the case; it needs to be compiled in an environment
98 in which all volatile bindings are forgotten, and the free vars are
99 bound only to stable things like stack locations.. The 'e' part will
100 execute *next*, just like the scrutinee of a case. -}
101
102 -------------------------
103 cgLneBinds :: BlockId -> StgBinding -> FCode ()
104 cgLneBinds join_id (StgNonRec bndr rhs)
105 = do { local_cc <- saveCurrentCostCentre
106 -- See Note [Saving the current cost centre]
107 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
108 ; fcode
109 ; addBindC info }
110
111 cgLneBinds join_id (StgRec pairs)
112 = do { local_cc <- saveCurrentCostCentre
113 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
114 ; let (infos, fcodes) = unzip r
115 ; addBindsC infos
116 ; sequence_ fcodes
117 }
118
119 -------------------------
120 cgLetNoEscapeRhs
121 :: BlockId -- join point for successor of let-no-escape
122 -> Maybe LocalReg -- Saved cost centre
123 -> Id
124 -> StgRhs
125 -> FCode (CgIdInfo, FCode ())
126
127 cgLetNoEscapeRhs join_id local_cc bndr rhs =
128 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
129 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
130 ; let code = do { body <- getCode rhs_code
131 ; emitOutOfLine bid (body <*> mkBranch join_id) }
132 ; return (info, code)
133 }
134
135 cgLetNoEscapeRhsBody
136 :: Maybe LocalReg -- Saved cost centre
137 -> Id
138 -> StgRhs
139 -> FCode (CgIdInfo, FCode ())
140 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body)
141 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
142 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
143 = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
144 -- For a constructor RHS we want to generate a single chunk of
145 -- code which can be jumped to from many places, which will
146 -- return the constructor. It's easy; just behave as if it
147 -- was an StgRhsClosure with a ConApp inside!
148
149 -------------------------
150 cgLetNoEscapeClosure
151 :: Id -- binder
152 -> Maybe LocalReg -- Slot for saved current cost centre
153 -> CostCentreStack -- XXX: *** NOT USED *** why not?
154 -> [NonVoid Id] -- Args (as in \ args -> body)
155 -> StgExpr -- Body (as in above)
156 -> FCode (CgIdInfo, FCode ())
157
158 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
159 = do dflags <- getDynFlags
160 return ( lneIdInfo dflags bndr args
161 , code )
162 where
163 code = forkLneBody $ do {
164 ; withNewTickyCounterLNE (idName bndr) args $ do
165 ; restoreCurrentCostCentre cc_slot
166 ; arg_regs <- bindArgsToRegs args
167 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
168
169
170 ------------------------------------------------------------------------
171 -- Case expressions
172 ------------------------------------------------------------------------
173
174 {- Note [Compiling case expressions]
175 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176 It is quite interesting to decide whether to put a heap-check at the
177 start of each alternative. Of course we certainly have to do so if
178 the case forces an evaluation, or if there is a primitive op which can
179 trigger GC.
180
181 A more interesting situation is this (a Plan-B situation)
182
183 !P!;
184 ...P...
185 case x# of
186 0# -> !Q!; ...Q...
187 default -> !R!; ...R...
188
189 where !x! indicates a possible heap-check point. The heap checks
190 in the alternatives *can* be omitted, in which case the topmost
191 heapcheck will take their worst case into account.
192
193 In favour of omitting !Q!, !R!:
194
195 - *May* save a heap overflow test,
196 if ...P... allocates anything.
197
198 - We can use relative addressing from a single Hp to
199 get at all the closures so allocated.
200
201 - No need to save volatile vars etc across heap checks
202 in !Q!, !R!
203
204 Against omitting !Q!, !R!
205
206 - May put a heap-check into the inner loop. Suppose
207 the main loop is P -> R -> P -> R...
208 Q is the loop exit, and only it does allocation.
209 This only hurts us if P does no allocation. If P allocates,
210 then there is a heap check in the inner loop anyway.
211
212 - May do more allocation than reqd. This sometimes bites us
213 badly. For example, nfib (ha!) allocates about 30\% more space if the
214 worst-casing is done, because many many calls to nfib are leaf calls
215 which don't need to allocate anything.
216
217 We can un-allocate, but that costs an instruction
218
219 Neither problem hurts us if there is only one alternative.
220
221 Suppose the inner loop is P->R->P->R etc. Then here is
222 how many heap checks we get in the *inner loop* under various
223 conditions
224
225 Alooc Heap check in branches (!Q!, !R!)?
226 P Q R yes no (absorb to !P!)
227 --------------------------------------
228 n n n 0 0
229 n y n 0 1
230 n . y 1 1
231 y . y 2 1
232 y . n 1 1
233
234 Best choices: absorb heap checks from Q and R into !P! iff
235 a) P itself does some allocation
236 or
237 b) P does allocation, or there is exactly one alternative
238
239 We adopt (b) because that is more likely to put the heap check at the
240 entry to a function, when not many things are live. After a bunch of
241 single-branch cases, we may have lots of things live
242
243 Hence: two basic plans for
244
245 case e of r { alts }
246
247 ------ Plan A: the general case ---------
248
249 ...save current cost centre...
250
251 ...code for e,
252 with sequel (SetLocals r)
253
254 ...restore current cost centre...
255 ...code for alts...
256 ...alts do their own heap checks
257
258 ------ Plan B: special case when ---------
259 (i) e does not allocate or call GC
260 (ii) either upstream code performs allocation
261 or there is just one alternative
262
263 Then heap allocation in the (single) case branch
264 is absorbed by the upstream check.
265 Very common example: primops on unboxed values
266
267 ...code for e,
268 with sequel (SetLocals r)...
269
270 ...code for alts...
271 ...no heap check...
272 -}
273
274
275
276 -------------------------------------
277 data GcPlan
278 = GcInAlts -- Put a GC check at the start the case alternatives,
279 [LocalReg] -- which binds these registers
280 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
281 -- primitive op which does no GC. Absorb the allocation
282 -- of the case alternative(s) into the upstream check
283
284 -------------------------------------
285 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
286
287 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
288 | isEnumerationTyCon tycon -- Note [case on bool]
289 = do { tag_expr <- do_enum_primop op args
290
291 -- If the binder is not dead, convert the tag to a constructor
292 -- and assign it.
293 ; when (not (isDeadBinder bndr)) $ do
294 { dflags <- getDynFlags
295 ; tmp_reg <- bindArgToReg (NonVoid bndr)
296 ; emitAssign (CmmLocal tmp_reg)
297 (tagToClosure dflags tycon tag_expr) }
298
299 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
300 (NonVoid bndr) alts
301 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
302 ; return AssignedDirectly
303 }
304 where
305 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
306 do_enum_primop TagToEnumOp [arg] -- No code!
307 = getArgAmode (NonVoid arg)
308 do_enum_primop primop args
309 = do dflags <- getDynFlags
310 tmp <- newTemp (bWord dflags)
311 cgPrimOp [tmp] primop args
312 return (CmmReg (CmmLocal tmp))
313
314 {-
315 Note [case on bool]
316 ~~~~~~~~~~~~~~~~~~~
317 This special case handles code like
318
319 case a <# b of
320 True ->
321 False ->
322
323 --> case tagToEnum# (a <$# b) of
324 True -> .. ; False -> ...
325
326 --> case (a <$# b) of r ->
327 case tagToEnum# r of
328 True -> .. ; False -> ...
329
330 If we let the ordinary case code handle it, we'll get something like
331
332 tmp1 = a < b
333 tmp2 = Bool_closure_tbl[tmp1]
334 if (tmp2 & 7 != 0) then ... // normal tagged case
335
336 but this junk won't optimise away. What we really want is just an
337 inline comparison:
338
339 if (a < b) then ...
340
341 So we add a special case to generate
342
343 tmp1 = a < b
344 if (tmp1 == 0) then ...
345
346 and later optimisations will further improve this.
347
348 Now that #6135 has been resolved it should be possible to remove that
349 special case. The idea behind this special case and pre-6135 implementation
350 of Bool-returning primops was that tagToEnum# was added implicitly in the
351 codegen and then optimized away. Now the call to tagToEnum# is explicit
352 in the source code, which allows to optimize it away at the earlier stages
353 of compilation (i.e. at the Core level).
354 -}
355
356
357 -- Note [ticket #3132]: we might be looking at a case of a lifted Id
358 -- that was cast to an unlifted type. The Id will always be bottom,
359 -- but we don't want the code generator to fall over here. If we
360 -- just emit an assignment here, the assignment will be
361 -- type-incorrect Cmm. Hence, we emit the usual enter/return code,
362 -- (and because bottom must be untagged, it will be entered and the
363 -- program will crash).
364 -- The Sequel is a type-correct assignment, albeit bogus.
365 -- The (dead) continuation loops; it would be better to invoke some kind
366 -- of panic function here.
367 --
368 -- However, we also want to allow an assignment to be generated
369 -- in the case when the types are compatible, because this allows
370 -- some slightly-dodgy but occasionally-useful casts to be used,
371 -- such as in RtClosureInspect where we cast an HValue to a MutVar#
372 -- so we can print out the contents of the MutVar#. If we generate
373 -- code that enters the HValue, then we'll get a runtime panic, because
374 -- the HValue really is a MutVar#. The types are compatible though,
375 -- so we can just generate an assignment.
376 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
377 | isUnLiftedType (idType v)
378 || reps_compatible
379 = -- assignment suffices for unlifted types
380 do { dflags <- getDynFlags
381 ; when (not reps_compatible) $
382 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
383 ; v_info <- getCgIdInfo v
384 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr))) (idInfoToAmode v_info)
385 ; _ <- bindArgsToRegs [NonVoid bndr]
386 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
387 where
388 reps_compatible = idPrimRep v == idPrimRep bndr
389
390 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
391 = -- fail at run-time, not compile-time
392 do { dflags <- getDynFlags
393 ; mb_cc <- maybeSaveCostCentre True
394 ; _ <- withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
395 ; restoreCurrentCostCentre mb_cc
396 ; emitComment $ mkFastString "should be unreachable code"
397 ; l <- newLabelC
398 ; emitLabel l
399 ; emit (mkBranch l)
400 ; return AssignedDirectly
401 }
402 {-
403 case seq# a s of v
404 (# s', a' #) -> e
405
406 ==>
407
408 case a of v
409 (# s', a' #) -> e
410
411 (taking advantage of the fact that the return convention for (# State#, a #)
412 is the same as the return convention for just 'a')
413 -}
414
415 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
416 = -- handle seq#, same return convention as vanilla 'a'.
417 cgCase (StgApp a []) bndr alt_type alts
418
419 cgCase scrut bndr alt_type alts
420 = -- the general case
421 do { dflags <- getDynFlags
422 ; up_hp_usg <- getVirtHp -- Upstream heap usage
423 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
424 alt_regs = map (idToReg dflags) ret_bndrs
425 ; simple_scrut <- isSimpleScrut scrut alt_type
426 ; let do_gc | not simple_scrut = True
427 | isSingleton alts = False
428 | up_hp_usg > 0 = False
429 | otherwise = True
430 -- cf Note [Compiling case expressions]
431 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
432
433 ; mb_cc <- maybeSaveCostCentre simple_scrut
434
435 ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
436 ; ret_kind <- withSequel sequel (cgExpr scrut)
437 ; restoreCurrentCostCentre mb_cc
438 ; _ <- bindArgsToRegs ret_bndrs
439 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
440 }
441
442
443 {-
444 Note [scrut sequel]
445
446 The job of the scrutinee is to assign its value(s) to alt_regs.
447 Additionally, if we plan to do a heap-check in the alternatives (see
448 Note [Compiling case expressions]), then we *must* retreat Hp to
449 recover any unused heap before passing control to the sequel. If we
450 don't do this, then any unused heap will become slop because the heap
451 check will reset the heap usage. Slop in the heap breaks LDV profiling
452 (+RTS -hb) which needs to do a linear sweep through the nursery.
453
454
455 Note [Inlining out-of-line primops and heap checks]
456 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
457 If shouldInlinePrimOp returns True when called from StgCmmExpr for the
458 purpose of heap check placement, we *must* inline the primop later in
459 StgCmmPrim. If we don't things will go wrong.
460 -}
461
462 -----------------
463 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
464 maybeSaveCostCentre simple_scrut
465 | simple_scrut = return Nothing
466 | otherwise = saveCurrentCostCentre
467
468
469 -----------------
470 isSimpleScrut :: StgExpr -> AltType -> FCode Bool
471 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
472 -- heap usage from alternatives into the stuff before the case
473 -- NB: if you get this wrong, and claim that the expression doesn't allocate
474 -- when it does, you'll deeply mess up allocation
475 isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
476 isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
477 isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
478 isSimpleScrut _ _ = return False
479
480 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
481 -- True iff the op cannot block or allocate
482 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
483 isSimpleOp (StgPrimOp op) stg_args = do
484 arg_exprs <- getNonVoidArgAmodes stg_args
485 dflags <- getDynFlags
486 -- See Note [Inlining out-of-line primops and heap checks]
487 return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
488 isSimpleOp (StgPrimCallOp _) _ = return False
489
490 -----------------
491 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
492 -- These are the binders of a case that are assigned
493 -- by the evaluation of the scrutinee
494 -- Only non-void ones come back
495 chooseReturnBndrs bndr (PrimAlt _) _alts
496 = nonVoidIds [bndr]
497
498 chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)]
499 = nonVoidIds ids -- 'bndr' is not assigned!
500
501 chooseReturnBndrs bndr (AlgAlt _) _alts
502 = nonVoidIds [bndr] -- Only 'bndr' is assigned
503
504 chooseReturnBndrs bndr PolyAlt _alts
505 = nonVoidIds [bndr] -- Only 'bndr' is assigned
506
507 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
508 -- UbxTupALt has only one alternative
509
510 -------------------------------------
511 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
512 -> FCode ReturnKind
513 -- At this point the result of the case are in the binders
514 cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
515 = maybeAltHeapCheck gc_plan (cgExpr rhs)
516
517 cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
518 = maybeAltHeapCheck gc_plan (cgExpr rhs)
519 -- Here bndrs are *already* in scope, so don't rebind them
520
521 cgAlts gc_plan bndr (PrimAlt _) alts
522 = do { dflags <- getDynFlags
523
524 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
525
526 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
527 (DEFAULT,deflt) = head tagged_cmms
528 -- PrimAlts always have a DEFAULT case
529 -- and it always comes first
530
531 tagged_cmms' = [(lit,code)
532 | (LitAlt lit, code) <- tagged_cmms]
533 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
534 ; return AssignedDirectly }
535
536 cgAlts gc_plan bndr (AlgAlt tycon) alts
537 = do { dflags <- getDynFlags
538
539 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
540
541 ; let fam_sz = tyConFamilySize tycon
542 bndr_reg = CmmLocal (idToReg dflags bndr)
543
544 -- Is the constructor tag in the node reg?
545 ; if isSmallFamily dflags fam_sz
546 then do
547 let -- Yes, bndr_reg has constr. tag in ls bits
548 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
549 branches' = [(tag+1,branch) | (tag,branch) <- branches]
550 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
551 return AssignedDirectly
552
553 else -- No, get tag from info table
554 do dflags <- getDynFlags
555 let -- Note that ptr _always_ has tag 1
556 -- when the family size is big enough
557 untagged_ptr = cmmRegOffB bndr_reg (-1)
558 tag_expr = getConstrTag dflags (untagged_ptr)
559 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
560 return AssignedDirectly }
561
562 cgAlts _ _ _ _ = panic "cgAlts"
563 -- UbxTupAlt and PolyAlt have only one alternative
564
565
566 -- Note [alg-alt heap check]
567 --
568 -- In an algebraic case with more than one alternative, we will have
569 -- code like
570 --
571 -- L0:
572 -- x = R1
573 -- goto L1
574 -- L1:
575 -- if (x & 7 >= 2) then goto L2 else goto L3
576 -- L2:
577 -- Hp = Hp + 16
578 -- if (Hp > HpLim) then goto L4
579 -- ...
580 -- L4:
581 -- call gc() returns to L5
582 -- L5:
583 -- x = R1
584 -- goto L1
585
586 -------------------
587 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
588 -> FCode ( Maybe CmmAGraph
589 , [(ConTagZ, CmmAGraph)] )
590 cgAlgAltRhss gc_plan bndr alts
591 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
592
593 ; let { mb_deflt = case tagged_cmms of
594 ((DEFAULT,rhs) : _) -> Just rhs
595 _other -> Nothing
596 -- DEFAULT is always first, if present
597
598 ; branches = [ (dataConTagZ con, cmm)
599 | (DataAlt con, cmm) <- tagged_cmms ]
600 }
601
602 ; return (mb_deflt, branches)
603 }
604
605
606 -------------------
607 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
608 -> FCode [(AltCon, CmmAGraph)]
609 cgAltRhss gc_plan bndr alts = do
610 dflags <- getDynFlags
611 let
612 base_reg = idToReg dflags bndr
613 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
614 cg_alt (con, bndrs, _uses, rhs)
615 = getCodeR $
616 maybeAltHeapCheck gc_plan $
617 do { _ <- bindConArgs con base_reg bndrs
618 ; _ <- cgExpr rhs
619 ; return con }
620 forkAlts (map cg_alt alts)
621
622 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
623 maybeAltHeapCheck (NoGcInAlts,_) code = code
624 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
625 altHeapCheck regs code
626 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
627 altHeapCheckReturnsTo regs lret off code
628
629 -----------------------------------------------------------------------------
630 -- Tail calls
631 -----------------------------------------------------------------------------
632
633 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
634 cgConApp con stg_args
635 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
636 = do { arg_exprs <- getNonVoidArgAmodes stg_args
637 ; tickyUnboxedTupleReturn (length arg_exprs)
638 ; emitReturn arg_exprs }
639
640 | otherwise -- Boxed constructors; allocate and return
641 = ASSERT( stg_args `lengthIs` dataConRepRepArity con )
642 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
643 currentCCS con stg_args
644 -- The first "con" says that the name bound to this
645 -- closure is is "con", which is a bit of a fudge, but
646 -- it only affects profiling (hence the False)
647
648 ; emit =<< fcode_init
649 ; emitReturn [idInfoToAmode idinfo] }
650
651 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
652 cgIdApp fun_id [] | isVoidTy (idType fun_id) = emitReturn []
653 cgIdApp fun_id args = do
654 dflags <- getDynFlags
655 fun_info <- getCgIdInfo fun_id
656 self_loop_info <- getSelfLoop
657 let cg_fun_id = cg_id fun_info
658 -- NB: use (cg_id fun_info) instead of fun_id, because
659 -- the former may be externalised for -split-objs.
660 -- See Note [Externalise when splitting] in StgCmmMonad
661
662 fun_arg = StgVarArg cg_fun_id
663 fun_name = idName cg_fun_id
664 fun = idInfoToAmode fun_info
665 lf_info = cg_lf fun_info
666 node_points dflags = nodeMustPointToIt dflags lf_info
667 case (getCallMethod dflags fun_name cg_fun_id lf_info (length args) (cg_loc fun_info) self_loop_info) of
668
669 -- A value in WHNF, so we can just return it.
670 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
671
672 EnterIt -> ASSERT( null args ) -- Discarding arguments
673 emitEnter fun
674
675 SlowCall -> do -- A slow function call via the RTS apply routines
676 { tickySlowCall lf_info args
677 ; emitComment $ mkFastString "slowCall"
678 ; slowCall fun args }
679
680 -- A direct function call (possibly with some left-over arguments)
681 DirectEntry lbl arity -> do
682 { tickyDirectCall arity args
683 ; if node_points dflags
684 then directCall NativeNodeCall lbl arity (fun_arg:args)
685 else directCall NativeDirectCall lbl arity args }
686
687 -- Let-no-escape call or self-recursive tail-call
688 JumpToIt blk_id lne_regs -> do
689 { adjustHpBackwards -- always do this before a tail-call
690 ; cmm_args <- getNonVoidArgAmodes args
691 ; emitMultiAssign lne_regs cmm_args
692 ; emit (mkBranch blk_id)
693 ; return AssignedDirectly }
694
695 -- Note [Self-recursive tail calls]
696 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
697 --
698 -- Self-recursive tail calls can be optimized into a local jump in the same
699 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
700 -- stgSyn/CoreToStg.lhs). Consider this:
701 --
702 -- foo.info:
703 -- a = R1 // calling convention
704 -- b = R2
705 -- goto L1
706 -- L1: ...
707 -- ...
708 -- ...
709 -- L2: R1 = x
710 -- R2 = y
711 -- call foo(R1,R2)
712 --
713 -- Instead of putting x and y into registers (or other locations required by the
714 -- calling convention) and performing a call we can put them into local
715 -- variables a and b and perform jump to L1:
716 --
717 -- foo.info:
718 -- a = R1
719 -- b = R2
720 -- goto L1
721 -- L1: ...
722 -- ...
723 -- ...
724 -- L2: a = x
725 -- b = y
726 -- goto L1
727 --
728 -- This can be done only when function is calling itself in a tail position
729 -- and only if the call passes number of parameters equal to function's arity.
730 -- Note that this cannot be performed if a function calls itself with a
731 -- continuation.
732 --
733 -- This in fact implements optimization known as "loopification". It was
734 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
735 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
736 -- optimization at the Cmm level, whereas we perform ours during code generation
737 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
738 -- generated in the first place.
739 --
740 -- Implementation is spread across a couple of places in the code:
741 --
742 -- * FCode monad stores additional information in its reader environment
743 -- (cgd_self_loop field). This information tells us which function can
744 -- tail call itself in an optimized way (it is the function currently
745 -- being compiled), what is the label of a loop header (L1 in example above)
746 -- and information about local registers in which we should arguments
747 -- before making a call (this would be a and b in example above).
748 --
749 -- * Whenever we are compiling a function, we set that information to reflect
750 -- the fact that function currently being compiled can be jumped to, instead
751 -- of called. This is done in closureCodyBody in StgCmmBind.
752 --
753 -- * We also have to emit a label to which we will be jumping. We make sure
754 -- that the label is placed after a stack check but before the heap
755 -- check. The reason is that making a recursive tail-call does not increase
756 -- the stack so we only need to check once. But it may grow the heap, so we
757 -- have to repeat the heap check in every self-call. This is done in
758 -- do_checks in StgCmmHeap.
759 --
760 -- * When we begin compilation of another closure we remove the additional
761 -- information from the environment. This is done by forkClosureBody
762 -- in StgCmmMonad. Other functions that duplicate the environment -
763 -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
764 -- words, we only need to clean the environment of the self-loop information
765 -- when compiling right hand side of a closure (binding).
766 --
767 -- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
768 -- of call will be generated. getCallMethod decides to generate a self
769 -- recursive tail call when (a) environment stores information about
770 -- possible self tail-call; (b) that tail call is to a function currently
771 -- being compiled; (c) number of passed arguments is equal to function's
772 -- arity. (d) loopification is turned on via -floopification command-line
773 -- option.
774 --
775 -- * Command line option to turn loopification on and off is implemented in
776 -- DynFlags.
777 --
778
779
780 emitEnter :: CmmExpr -> FCode ReturnKind
781 emitEnter fun = do
782 { dflags <- getDynFlags
783 ; adjustHpBackwards
784 ; sequel <- getSequel
785 ; updfr_off <- getUpdFrameOff
786 ; case sequel of
787 -- For a return, we have the option of generating a tag-test or
788 -- not. If the value is tagged, we can return directly, which
789 -- is quicker than entering the value. This is a code
790 -- size/speed trade-off: when optimising for speed rather than
791 -- size we could generate the tag test.
792 --
793 -- Right now, we do what the old codegen did, and omit the tag
794 -- test, just generating an enter.
795 Return _ -> do
796 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
797 ; emit $ mkJump dflags NativeNodeCall entry
798 [cmmUntag dflags fun] updfr_off
799 ; return AssignedDirectly
800 }
801
802 -- The result will be scrutinised in the sequel. This is where
803 -- we generate a tag-test to avoid entering the closure if
804 -- possible.
805 --
806 -- The generated code will be something like this:
807 --
808 -- R1 = fun -- copyout
809 -- if (fun & 7 != 0) goto Lcall else goto Lret
810 -- Lcall:
811 -- call [fun] returns to Lret
812 -- Lret:
813 -- fun' = R1 -- copyin
814 -- ...
815 --
816 -- Note in particular that the label Lret is used as a
817 -- destination by both the tag-test and the call. This is
818 -- becase Lret will necessarily be a proc-point, and we want to
819 -- ensure that we generate only one proc-point for this
820 -- sequence.
821 --
822 -- Furthermore, we tell the caller that we generated a native
823 -- return continuation by returning (ReturnedTo Lret off), so
824 -- that the continuation can be reused by the heap-check failure
825 -- code in the enclosing case expression.
826 --
827 AssignTo res_regs _ -> do
828 { lret <- newLabelC
829 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
830 ; lcall <- newLabelC
831 ; updfr_off <- getUpdFrameOff
832 ; let area = Young lret
833 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
834 [fun] updfr_off []
835 -- refer to fun via nodeReg after the copyout, to avoid having
836 -- both live simultaneously; this sometimes enables fun to be
837 -- inlined in the RHS of the R1 assignment.
838 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
839 the_call = toCall entry (Just lret) updfr_off off outArgs regs
840 ; emit $
841 copyout <*>
842 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg)) lret lcall <*>
843 outOfLine lcall the_call <*>
844 mkLabel lret <*>
845 copyin
846 ; return (ReturnedTo lret off)
847 }
848 }