Make Applicative a superclass of Monad
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Stg to C-- code generation: expressions
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmExpr ( cgExpr ) where
12
13 #define FAST_STRING_NOT_NEEDED
14 #include "HsVersions.h"
15
16 import {-# SOURCE #-} StgCmmBind ( cgBind )
17
18 import StgCmmMonad
19 import StgCmmHeap
20 import StgCmmEnv
21 import StgCmmCon
22 import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
23 import StgCmmLayout
24 import StgCmmPrim
25 import StgCmmHpc
26 import StgCmmTicky
27 import StgCmmUtils
28 import StgCmmClosure
29
30 import StgSyn
31
32 import MkGraph
33 import BlockId
34 import Cmm
35 import CmmInfo
36 import CoreSyn
37 import DataCon
38 import ForeignCall
39 import Id
40 import PrimOp
41 import TyCon
42 import Type
43 import CostCentre ( CostCentreStack, currentCCS )
44 import Maybes
45 import Util
46 import FastString
47 import Outputable
48
49 import Control.Monad (when,void)
50
51 #if __GLASGOW_HASKELL__ >= 709
52 import Prelude hiding ((<*>))
53 #endif
54
55 ------------------------------------------------------------------------
56 -- cgExpr: the main function
57 ------------------------------------------------------------------------
58
59 cgExpr :: StgExpr -> FCode ReturnKind
60
61 cgExpr (StgApp fun args) = cgIdApp fun args
62
63 {- seq# a s ==> a -}
64 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
65 cgIdApp a []
66
67 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
68 cgExpr (StgConApp con args) = cgConApp con args
69 cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr }
70 cgExpr (StgTick m n expr) = do dflags <- getDynFlags
71 emit (mkTickBox dflags m n)
72 cgExpr expr
73 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
74 emitReturn [CmmLit cmm_lit]
75
76 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
77 cgExpr (StgLetNoEscape _ _ binds expr) =
78 do { u <- newUnique
79 ; let join_id = mkBlockId u
80 ; cgLneBinds join_id binds
81 ; r <- cgExpr expr
82 ; emitLabel join_id
83 ; return r }
84
85 cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) =
86 cgCase expr bndr alt_type alts
87
88 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
89
90 ------------------------------------------------------------------------
91 -- Let no escape
92 ------------------------------------------------------------------------
93
94 {- Generating code for a let-no-escape binding, aka join point is very
95 very similar to what we do for a case expression. The duality is
96 between
97 let-no-escape x = b
98 in e
99 and
100 case e of ... -> b
101
102 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
103 the alternative of the case; it needs to be compiled in an environment
104 in which all volatile bindings are forgotten, and the free vars are
105 bound only to stable things like stack locations.. The 'e' part will
106 execute *next*, just like the scrutinee of a case. -}
107
108 -------------------------
109 cgLneBinds :: BlockId -> StgBinding -> FCode ()
110 cgLneBinds join_id (StgNonRec bndr rhs)
111 = do { local_cc <- saveCurrentCostCentre
112 -- See Note [Saving the current cost centre]
113 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
114 ; fcode
115 ; addBindC info }
116
117 cgLneBinds join_id (StgRec pairs)
118 = do { local_cc <- saveCurrentCostCentre
119 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
120 ; let (infos, fcodes) = unzip r
121 ; addBindsC infos
122 ; sequence_ fcodes
123 }
124
125 -------------------------
126 cgLetNoEscapeRhs
127 :: BlockId -- join point for successor of let-no-escape
128 -> Maybe LocalReg -- Saved cost centre
129 -> Id
130 -> StgRhs
131 -> FCode (CgIdInfo, FCode ())
132
133 cgLetNoEscapeRhs join_id local_cc bndr rhs =
134 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
135 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
136 ; let code = do { body <- getCode rhs_code
137 ; emitOutOfLine bid (body <*> mkBranch join_id) }
138 ; return (info, code)
139 }
140
141 cgLetNoEscapeRhsBody
142 :: Maybe LocalReg -- Saved cost centre
143 -> Id
144 -> StgRhs
145 -> FCode (CgIdInfo, FCode ())
146 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body)
147 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
148 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
149 = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
150 -- For a constructor RHS we want to generate a single chunk of
151 -- code which can be jumped to from many places, which will
152 -- return the constructor. It's easy; just behave as if it
153 -- was an StgRhsClosure with a ConApp inside!
154
155 -------------------------
156 cgLetNoEscapeClosure
157 :: Id -- binder
158 -> Maybe LocalReg -- Slot for saved current cost centre
159 -> CostCentreStack -- XXX: *** NOT USED *** why not?
160 -> [NonVoid Id] -- Args (as in \ args -> body)
161 -> StgExpr -- Body (as in above)
162 -> FCode (CgIdInfo, FCode ())
163
164 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
165 = do dflags <- getDynFlags
166 return ( lneIdInfo dflags bndr args
167 , code )
168 where
169 code = forkLneBody $ do {
170 ; withNewTickyCounterLNE (idName bndr) args $ do
171 ; restoreCurrentCostCentre cc_slot
172 ; arg_regs <- bindArgsToRegs args
173 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
174
175
176 ------------------------------------------------------------------------
177 -- Case expressions
178 ------------------------------------------------------------------------
179
180 {- Note [Compiling case expressions]
181 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
182 It is quite interesting to decide whether to put a heap-check at the
183 start of each alternative. Of course we certainly have to do so if
184 the case forces an evaluation, or if there is a primitive op which can
185 trigger GC.
186
187 A more interesting situation is this (a Plan-B situation)
188
189 !P!;
190 ...P...
191 case x# of
192 0# -> !Q!; ...Q...
193 default -> !R!; ...R...
194
195 where !x! indicates a possible heap-check point. The heap checks
196 in the alternatives *can* be omitted, in which case the topmost
197 heapcheck will take their worst case into account.
198
199 In favour of omitting !Q!, !R!:
200
201 - *May* save a heap overflow test,
202 if ...P... allocates anything.
203
204 - We can use relative addressing from a single Hp to
205 get at all the closures so allocated.
206
207 - No need to save volatile vars etc across heap checks
208 in !Q!, !R!
209
210 Against omitting !Q!, !R!
211
212 - May put a heap-check into the inner loop. Suppose
213 the main loop is P -> R -> P -> R...
214 Q is the loop exit, and only it does allocation.
215 This only hurts us if P does no allocation. If P allocates,
216 then there is a heap check in the inner loop anyway.
217
218 - May do more allocation than reqd. This sometimes bites us
219 badly. For example, nfib (ha!) allocates about 30\% more space if the
220 worst-casing is done, because many many calls to nfib are leaf calls
221 which don't need to allocate anything.
222
223 We can un-allocate, but that costs an instruction
224
225 Neither problem hurts us if there is only one alternative.
226
227 Suppose the inner loop is P->R->P->R etc. Then here is
228 how many heap checks we get in the *inner loop* under various
229 conditions
230
231 Alooc Heap check in branches (!Q!, !R!)?
232 P Q R yes no (absorb to !P!)
233 --------------------------------------
234 n n n 0 0
235 n y n 0 1
236 n . y 1 1
237 y . y 2 1
238 y . n 1 1
239
240 Best choices: absorb heap checks from Q and R into !P! iff
241 a) P itself does some allocation
242 or
243 b) P does allocation, or there is exactly one alternative
244
245 We adopt (b) because that is more likely to put the heap check at the
246 entry to a function, when not many things are live. After a bunch of
247 single-branch cases, we may have lots of things live
248
249 Hence: two basic plans for
250
251 case e of r { alts }
252
253 ------ Plan A: the general case ---------
254
255 ...save current cost centre...
256
257 ...code for e,
258 with sequel (SetLocals r)
259
260 ...restore current cost centre...
261 ...code for alts...
262 ...alts do their own heap checks
263
264 ------ Plan B: special case when ---------
265 (i) e does not allocate or call GC
266 (ii) either upstream code performs allocation
267 or there is just one alternative
268
269 Then heap allocation in the (single) case branch
270 is absorbed by the upstream check.
271 Very common example: primops on unboxed values
272
273 ...code for e,
274 with sequel (SetLocals r)...
275
276 ...code for alts...
277 ...no heap check...
278 -}
279
280
281
282 -------------------------------------
283 data GcPlan
284 = GcInAlts -- Put a GC check at the start the case alternatives,
285 [LocalReg] -- which binds these registers
286 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
287 -- primitive op which does no GC. Absorb the allocation
288 -- of the case alternative(s) into the upstream check
289
290 -------------------------------------
291 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
292
293 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
294 | isEnumerationTyCon tycon -- Note [case on bool]
295 = do { tag_expr <- do_enum_primop op args
296
297 -- If the binder is not dead, convert the tag to a constructor
298 -- and assign it.
299 ; when (not (isDeadBinder bndr)) $ do
300 { dflags <- getDynFlags
301 ; tmp_reg <- bindArgToReg (NonVoid bndr)
302 ; emitAssign (CmmLocal tmp_reg)
303 (tagToClosure dflags tycon tag_expr) }
304
305 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
306 (NonVoid bndr) alts
307 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
308 ; return AssignedDirectly
309 }
310 where
311 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
312 do_enum_primop TagToEnumOp [arg] -- No code!
313 = getArgAmode (NonVoid arg)
314 do_enum_primop primop args
315 = do dflags <- getDynFlags
316 tmp <- newTemp (bWord dflags)
317 cgPrimOp [tmp] primop args
318 return (CmmReg (CmmLocal tmp))
319
320 {-
321 Note [case on bool]
322 ~~~~~~~~~~~~~~~~~~~
323 This special case handles code like
324
325 case a <# b of
326 True ->
327 False ->
328
329 --> case tagToEnum# (a <$# b) of
330 True -> .. ; False -> ...
331
332 --> case (a <$# b) of r ->
333 case tagToEnum# r of
334 True -> .. ; False -> ...
335
336 If we let the ordinary case code handle it, we'll get something like
337
338 tmp1 = a < b
339 tmp2 = Bool_closure_tbl[tmp1]
340 if (tmp2 & 7 != 0) then ... // normal tagged case
341
342 but this junk won't optimise away. What we really want is just an
343 inline comparison:
344
345 if (a < b) then ...
346
347 So we add a special case to generate
348
349 tmp1 = a < b
350 if (tmp1 == 0) then ...
351
352 and later optimisations will further improve this.
353
354 Now that #6135 has been resolved it should be possible to remove that
355 special case. The idea behind this special case and pre-6135 implementation
356 of Bool-returning primops was that tagToEnum# was added implicitly in the
357 codegen and then optimized away. Now the call to tagToEnum# is explicit
358 in the source code, which allows to optimize it away at the earlier stages
359 of compilation (i.e. at the Core level).
360 -}
361
362
363 -- Note [ticket #3132]: we might be looking at a case of a lifted Id
364 -- that was cast to an unlifted type. The Id will always be bottom,
365 -- but we don't want the code generator to fall over here. If we
366 -- just emit an assignment here, the assignment will be
367 -- type-incorrect Cmm. Hence, we emit the usual enter/return code,
368 -- (and because bottom must be untagged, it will be entered and the
369 -- program will crash).
370 -- The Sequel is a type-correct assignment, albeit bogus.
371 -- The (dead) continuation loops; it would be better to invoke some kind
372 -- of panic function here.
373 --
374 -- However, we also want to allow an assignment to be generated
375 -- in the case when the types are compatible, because this allows
376 -- some slightly-dodgy but occasionally-useful casts to be used,
377 -- such as in RtClosureInspect where we cast an HValue to a MutVar#
378 -- so we can print out the contents of the MutVar#. If we generate
379 -- code that enters the HValue, then we'll get a runtime panic, because
380 -- the HValue really is a MutVar#. The types are compatible though,
381 -- so we can just generate an assignment.
382 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
383 | isUnLiftedType (idType v)
384 || reps_compatible
385 = -- assignment suffices for unlifted types
386 do { dflags <- getDynFlags
387 ; when (not reps_compatible) $
388 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
389 ; v_info <- getCgIdInfo v
390 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr))) (idInfoToAmode v_info)
391 ; _ <- bindArgsToRegs [NonVoid bndr]
392 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
393 where
394 reps_compatible = idPrimRep v == idPrimRep bndr
395
396 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
397 = -- fail at run-time, not compile-time
398 do { dflags <- getDynFlags
399 ; mb_cc <- maybeSaveCostCentre True
400 ; _ <- withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
401 ; restoreCurrentCostCentre mb_cc
402 ; emitComment $ mkFastString "should be unreachable code"
403 ; l <- newLabelC
404 ; emitLabel l
405 ; emit (mkBranch l)
406 ; return AssignedDirectly
407 }
408 {-
409 case seq# a s of v
410 (# s', a' #) -> e
411
412 ==>
413
414 case a of v
415 (# s', a' #) -> e
416
417 (taking advantage of the fact that the return convention for (# State#, a #)
418 is the same as the return convention for just 'a')
419 -}
420
421 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
422 = -- handle seq#, same return convention as vanilla 'a'.
423 cgCase (StgApp a []) bndr alt_type alts
424
425 cgCase scrut bndr alt_type alts
426 = -- the general case
427 do { dflags <- getDynFlags
428 ; up_hp_usg <- getVirtHp -- Upstream heap usage
429 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
430 alt_regs = map (idToReg dflags) ret_bndrs
431 ; simple_scrut <- isSimpleScrut scrut alt_type
432 ; let do_gc | not simple_scrut = True
433 | isSingleton alts = False
434 | up_hp_usg > 0 = False
435 | otherwise = True
436 -- cf Note [Compiling case expressions]
437 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
438
439 ; mb_cc <- maybeSaveCostCentre simple_scrut
440
441 ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
442 ; ret_kind <- withSequel sequel (cgExpr scrut)
443 ; restoreCurrentCostCentre mb_cc
444 ; _ <- bindArgsToRegs ret_bndrs
445 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
446 }
447
448
449 {-
450 Note [scrut sequel]
451
452 The job of the scrutinee is to assign its value(s) to alt_regs.
453 Additionally, if we plan to do a heap-check in the alternatives (see
454 Note [Compiling case expressions]), then we *must* retreat Hp to
455 recover any unused heap before passing control to the sequel. If we
456 don't do this, then any unused heap will become slop because the heap
457 check will reset the heap usage. Slop in the heap breaks LDV profiling
458 (+RTS -hb) which needs to do a linear sweep through the nursery.
459
460
461 Note [Inlining out-of-line primops and heap checks]
462 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
463 If shouldInlinePrimOp returns True when called from StgCmmExpr for the
464 purpose of heap check placement, we *must* inline the primop later in
465 StgCmmPrim. If we don't things will go wrong.
466 -}
467
468 -----------------
469 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
470 maybeSaveCostCentre simple_scrut
471 | simple_scrut = return Nothing
472 | otherwise = saveCurrentCostCentre
473
474
475 -----------------
476 isSimpleScrut :: StgExpr -> AltType -> FCode Bool
477 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
478 -- heap usage from alternatives into the stuff before the case
479 -- NB: if you get this wrong, and claim that the expression doesn't allocate
480 -- when it does, you'll deeply mess up allocation
481 isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
482 isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
483 isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
484 isSimpleScrut _ _ = return False
485
486 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
487 -- True iff the op cannot block or allocate
488 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
489 isSimpleOp (StgPrimOp op) stg_args = do
490 arg_exprs <- getNonVoidArgAmodes stg_args
491 dflags <- getDynFlags
492 -- See Note [Inlining out-of-line primops and heap checks]
493 return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
494 isSimpleOp (StgPrimCallOp _) _ = return False
495
496 -----------------
497 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
498 -- These are the binders of a case that are assigned
499 -- by the evaluation of the scrutinee
500 -- Only non-void ones come back
501 chooseReturnBndrs bndr (PrimAlt _) _alts
502 = nonVoidIds [bndr]
503
504 chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)]
505 = nonVoidIds ids -- 'bndr' is not assigned!
506
507 chooseReturnBndrs bndr (AlgAlt _) _alts
508 = nonVoidIds [bndr] -- Only 'bndr' is assigned
509
510 chooseReturnBndrs bndr PolyAlt _alts
511 = nonVoidIds [bndr] -- Only 'bndr' is assigned
512
513 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
514 -- UbxTupALt has only one alternative
515
516 -------------------------------------
517 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
518 -> FCode ReturnKind
519 -- At this point the result of the case are in the binders
520 cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
521 = maybeAltHeapCheck gc_plan (cgExpr rhs)
522
523 cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
524 = maybeAltHeapCheck gc_plan (cgExpr rhs)
525 -- Here bndrs are *already* in scope, so don't rebind them
526
527 cgAlts gc_plan bndr (PrimAlt _) alts
528 = do { dflags <- getDynFlags
529
530 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
531
532 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
533 (DEFAULT,deflt) = head tagged_cmms
534 -- PrimAlts always have a DEFAULT case
535 -- and it always comes first
536
537 tagged_cmms' = [(lit,code)
538 | (LitAlt lit, code) <- tagged_cmms]
539 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
540 ; return AssignedDirectly }
541
542 cgAlts gc_plan bndr (AlgAlt tycon) alts
543 = do { dflags <- getDynFlags
544
545 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
546
547 ; let fam_sz = tyConFamilySize tycon
548 bndr_reg = CmmLocal (idToReg dflags bndr)
549
550 -- Is the constructor tag in the node reg?
551 ; if isSmallFamily dflags fam_sz
552 then do
553 let -- Yes, bndr_reg has constr. tag in ls bits
554 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
555 branches' = [(tag+1,branch) | (tag,branch) <- branches]
556 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
557 return AssignedDirectly
558
559 else -- No, get tag from info table
560 do dflags <- getDynFlags
561 let -- Note that ptr _always_ has tag 1
562 -- when the family size is big enough
563 untagged_ptr = cmmRegOffB bndr_reg (-1)
564 tag_expr = getConstrTag dflags (untagged_ptr)
565 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
566 return AssignedDirectly }
567
568 cgAlts _ _ _ _ = panic "cgAlts"
569 -- UbxTupAlt and PolyAlt have only one alternative
570
571
572 -- Note [alg-alt heap check]
573 --
574 -- In an algebraic case with more than one alternative, we will have
575 -- code like
576 --
577 -- L0:
578 -- x = R1
579 -- goto L1
580 -- L1:
581 -- if (x & 7 >= 2) then goto L2 else goto L3
582 -- L2:
583 -- Hp = Hp + 16
584 -- if (Hp > HpLim) then goto L4
585 -- ...
586 -- L4:
587 -- call gc() returns to L5
588 -- L5:
589 -- x = R1
590 -- goto L1
591
592 -------------------
593 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
594 -> FCode ( Maybe CmmAGraph
595 , [(ConTagZ, CmmAGraph)] )
596 cgAlgAltRhss gc_plan bndr alts
597 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
598
599 ; let { mb_deflt = case tagged_cmms of
600 ((DEFAULT,rhs) : _) -> Just rhs
601 _other -> Nothing
602 -- DEFAULT is always first, if present
603
604 ; branches = [ (dataConTagZ con, cmm)
605 | (DataAlt con, cmm) <- tagged_cmms ]
606 }
607
608 ; return (mb_deflt, branches)
609 }
610
611
612 -------------------
613 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
614 -> FCode [(AltCon, CmmAGraph)]
615 cgAltRhss gc_plan bndr alts = do
616 dflags <- getDynFlags
617 let
618 base_reg = idToReg dflags bndr
619 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
620 cg_alt (con, bndrs, _uses, rhs)
621 = getCodeR $
622 maybeAltHeapCheck gc_plan $
623 do { _ <- bindConArgs con base_reg bndrs
624 ; _ <- cgExpr rhs
625 ; return con }
626 forkAlts (map cg_alt alts)
627
628 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
629 maybeAltHeapCheck (NoGcInAlts,_) code = code
630 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
631 altHeapCheck regs code
632 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
633 altHeapCheckReturnsTo regs lret off code
634
635 -----------------------------------------------------------------------------
636 -- Tail calls
637 -----------------------------------------------------------------------------
638
639 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
640 cgConApp con stg_args
641 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
642 = do { arg_exprs <- getNonVoidArgAmodes stg_args
643 ; tickyUnboxedTupleReturn (length arg_exprs)
644 ; emitReturn arg_exprs }
645
646 | otherwise -- Boxed constructors; allocate and return
647 = ASSERT( stg_args `lengthIs` dataConRepRepArity con )
648 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
649 currentCCS con stg_args
650 -- The first "con" says that the name bound to this
651 -- closure is is "con", which is a bit of a fudge, but
652 -- it only affects profiling (hence the False)
653
654 ; emit =<< fcode_init
655 ; emitReturn [idInfoToAmode idinfo] }
656
657 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
658 cgIdApp fun_id [] | isVoidTy (idType fun_id) = emitReturn []
659 cgIdApp fun_id args = do
660 dflags <- getDynFlags
661 fun_info <- getCgIdInfo fun_id
662 self_loop_info <- getSelfLoop
663 let cg_fun_id = cg_id fun_info
664 -- NB: use (cg_id fun_info) instead of fun_id, because
665 -- the former may be externalised for -split-objs.
666 -- See Note [Externalise when splitting] in StgCmmMonad
667
668 fun_arg = StgVarArg cg_fun_id
669 fun_name = idName cg_fun_id
670 fun = idInfoToAmode fun_info
671 lf_info = cg_lf fun_info
672 node_points dflags = nodeMustPointToIt dflags lf_info
673 case (getCallMethod dflags fun_name cg_fun_id lf_info (length args) (cg_loc fun_info) self_loop_info) of
674
675 -- A value in WHNF, so we can just return it.
676 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
677
678 EnterIt -> ASSERT( null args ) -- Discarding arguments
679 emitEnter fun
680
681 SlowCall -> do -- A slow function call via the RTS apply routines
682 { tickySlowCall lf_info args
683 ; emitComment $ mkFastString "slowCall"
684 ; slowCall fun args }
685
686 -- A direct function call (possibly with some left-over arguments)
687 DirectEntry lbl arity -> do
688 { tickyDirectCall arity args
689 ; if node_points dflags
690 then directCall NativeNodeCall lbl arity (fun_arg:args)
691 else directCall NativeDirectCall lbl arity args }
692
693 -- Let-no-escape call or self-recursive tail-call
694 JumpToIt blk_id lne_regs -> do
695 { adjustHpBackwards -- always do this before a tail-call
696 ; cmm_args <- getNonVoidArgAmodes args
697 ; emitMultiAssign lne_regs cmm_args
698 ; emit (mkBranch blk_id)
699 ; return AssignedDirectly }
700
701 -- Note [Self-recursive tail calls]
702 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
703 --
704 -- Self-recursive tail calls can be optimized into a local jump in the same
705 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
706 -- stgSyn/CoreToStg.lhs). Consider this:
707 --
708 -- foo.info:
709 -- a = R1 // calling convention
710 -- b = R2
711 -- goto L1
712 -- L1: ...
713 -- ...
714 -- ...
715 -- L2: R1 = x
716 -- R2 = y
717 -- call foo(R1,R2)
718 --
719 -- Instead of putting x and y into registers (or other locations required by the
720 -- calling convention) and performing a call we can put them into local
721 -- variables a and b and perform jump to L1:
722 --
723 -- foo.info:
724 -- a = R1
725 -- b = R2
726 -- goto L1
727 -- L1: ...
728 -- ...
729 -- ...
730 -- L2: a = x
731 -- b = y
732 -- goto L1
733 --
734 -- This can be done only when function is calling itself in a tail position
735 -- and only if the call passes number of parameters equal to function's arity.
736 -- Note that this cannot be performed if a function calls itself with a
737 -- continuation.
738 --
739 -- This in fact implements optimization known as "loopification". It was
740 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
741 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
742 -- optimization at the Cmm level, whereas we perform ours during code generation
743 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
744 -- generated in the first place.
745 --
746 -- Implementation is spread across a couple of places in the code:
747 --
748 -- * FCode monad stores additional information in its reader environment
749 -- (cgd_self_loop field). This information tells us which function can
750 -- tail call itself in an optimized way (it is the function currently
751 -- being compiled), what is the label of a loop header (L1 in example above)
752 -- and information about local registers in which we should arguments
753 -- before making a call (this would be a and b in example above).
754 --
755 -- * Whenever we are compiling a function, we set that information to reflect
756 -- the fact that function currently being compiled can be jumped to, instead
757 -- of called. This is done in closureCodyBody in StgCmmBind.
758 --
759 -- * We also have to emit a label to which we will be jumping. We make sure
760 -- that the label is placed after a stack check but before the heap
761 -- check. The reason is that making a recursive tail-call does not increase
762 -- the stack so we only need to check once. But it may grow the heap, so we
763 -- have to repeat the heap check in every self-call. This is done in
764 -- do_checks in StgCmmHeap.
765 --
766 -- * When we begin compilation of another closure we remove the additional
767 -- information from the environment. This is done by forkClosureBody
768 -- in StgCmmMonad. Other functions that duplicate the environment -
769 -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
770 -- words, we only need to clean the environment of the self-loop information
771 -- when compiling right hand side of a closure (binding).
772 --
773 -- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
774 -- of call will be generated. getCallMethod decides to generate a self
775 -- recursive tail call when (a) environment stores information about
776 -- possible self tail-call; (b) that tail call is to a function currently
777 -- being compiled; (c) number of passed arguments is equal to function's
778 -- arity. (d) loopification is turned on via -floopification command-line
779 -- option.
780 --
781 -- * Command line option to turn loopification on and off is implemented in
782 -- DynFlags.
783 --
784
785
786 emitEnter :: CmmExpr -> FCode ReturnKind
787 emitEnter fun = do
788 { dflags <- getDynFlags
789 ; adjustHpBackwards
790 ; sequel <- getSequel
791 ; updfr_off <- getUpdFrameOff
792 ; case sequel of
793 -- For a return, we have the option of generating a tag-test or
794 -- not. If the value is tagged, we can return directly, which
795 -- is quicker than entering the value. This is a code
796 -- size/speed trade-off: when optimising for speed rather than
797 -- size we could generate the tag test.
798 --
799 -- Right now, we do what the old codegen did, and omit the tag
800 -- test, just generating an enter.
801 Return _ -> do
802 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
803 ; emit $ mkJump dflags NativeNodeCall entry
804 [cmmUntag dflags fun] updfr_off
805 ; return AssignedDirectly
806 }
807
808 -- The result will be scrutinised in the sequel. This is where
809 -- we generate a tag-test to avoid entering the closure if
810 -- possible.
811 --
812 -- The generated code will be something like this:
813 --
814 -- R1 = fun -- copyout
815 -- if (fun & 7 != 0) goto Lcall else goto Lret
816 -- Lcall:
817 -- call [fun] returns to Lret
818 -- Lret:
819 -- fun' = R1 -- copyin
820 -- ...
821 --
822 -- Note in particular that the label Lret is used as a
823 -- destination by both the tag-test and the call. This is
824 -- becase Lret will necessarily be a proc-point, and we want to
825 -- ensure that we generate only one proc-point for this
826 -- sequence.
827 --
828 -- Furthermore, we tell the caller that we generated a native
829 -- return continuation by returning (ReturnedTo Lret off), so
830 -- that the continuation can be reused by the heap-check failure
831 -- code in the enclosing case expression.
832 --
833 AssignTo res_regs _ -> do
834 { lret <- newLabelC
835 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
836 ; lcall <- newLabelC
837 ; updfr_off <- getUpdFrameOff
838 ; let area = Young lret
839 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
840 [fun] updfr_off []
841 -- refer to fun via nodeReg after the copyout, to avoid having
842 -- both live simultaneously; this sometimes enables fun to be
843 -- inlined in the RHS of the R1 assignment.
844 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
845 the_call = toCall entry (Just lret) updfr_off off outArgs regs
846 ; emit $
847 copyout <*>
848 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg)) lret lcall <*>
849 outOfLine lcall the_call <*>
850 mkLabel lret <*>
851 copyin
852 ; return (ReturnedTo lret off)
853 }
854 }