fdd902d8c7ea98fc1ae6f5d8b00a6e680c4eed0e
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 {-# LANGUAGE CPP #-}
2 {-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
3
4 -----------------------------------------------------------------------------
5 --
6 -- Stg to C-- code generation: expressions
7 --
8 -- (c) The University of Glasgow 2004-2006
9 --
10 -----------------------------------------------------------------------------
11
12 module StgCmmExpr ( cgExpr ) where
13
14 #include "HsVersions.h"
15
16 import {-# SOURCE #-} StgCmmBind ( cgBind )
17
18 import StgCmmMonad
19 import StgCmmHeap
20 import StgCmmEnv
21 import StgCmmCon
22 import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
23 import StgCmmLayout
24 import StgCmmPrim
25 import StgCmmHpc
26 import StgCmmTicky
27 import StgCmmUtils
28 import StgCmmClosure
29
30 import StgSyn
31
32 import MkGraph
33 import BlockId
34 import Cmm
35 import CmmInfo
36 import CoreSyn
37 import DataCon
38 import ForeignCall
39 import Id
40 import PrimOp
41 import TyCon
42 import Type
43 import RepType ( isVoidTy, countConRepArgs )
44 import CostCentre ( CostCentreStack, currentCCS )
45 import Maybes
46 import Util
47 import FastString
48 import Outputable
49
50 import Control.Monad (unless,void)
51 import Control.Arrow (first)
52
53 import Prelude hiding ((<*>))
54
55 ------------------------------------------------------------------------
56 -- cgExpr: the main function
57 ------------------------------------------------------------------------
58
59 cgExpr :: StgExpr -> FCode ReturnKind
60
61 cgExpr (StgApp fun args) = cgIdApp fun args
62
63 {- seq# a s ==> a -}
64 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
65 cgIdApp a []
66
67 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
68 cgExpr (StgConApp con args _)= cgConApp con args
69 cgExpr (StgTick t e) = cgTick t >> cgExpr e
70 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
71 emitReturn [CmmLit cmm_lit]
72
73 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
74 cgExpr (StgLetNoEscape binds expr) =
75 do { u <- newUnique
76 ; let join_id = mkBlockId u
77 ; cgLneBinds join_id binds
78 ; r <- cgExpr expr
79 ; emitLabel join_id
80 ; return r }
81
82 cgExpr (StgCase expr bndr alt_type alts) =
83 cgCase expr bndr alt_type alts
84
85 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
86
87 ------------------------------------------------------------------------
88 -- Let no escape
89 ------------------------------------------------------------------------
90
91 {- Generating code for a let-no-escape binding, aka join point is very
92 very similar to what we do for a case expression. The duality is
93 between
94 let-no-escape x = b
95 in e
96 and
97 case e of ... -> b
98
99 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
100 the alternative of the case; it needs to be compiled in an environment
101 in which all volatile bindings are forgotten, and the free vars are
102 bound only to stable things like stack locations.. The 'e' part will
103 execute *next*, just like the scrutinee of a case. -}
104
105 -------------------------
106 cgLneBinds :: BlockId -> StgBinding -> FCode ()
107 cgLneBinds join_id (StgNonRec bndr rhs)
108 = do { local_cc <- saveCurrentCostCentre
109 -- See Note [Saving the current cost centre]
110 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
111 ; fcode
112 ; addBindC info }
113
114 cgLneBinds join_id (StgRec pairs)
115 = do { local_cc <- saveCurrentCostCentre
116 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
117 ; let (infos, fcodes) = unzip r
118 ; addBindsC infos
119 ; sequence_ fcodes
120 }
121
122 -------------------------
123 cgLetNoEscapeRhs
124 :: BlockId -- join point for successor of let-no-escape
125 -> Maybe LocalReg -- Saved cost centre
126 -> Id
127 -> StgRhs
128 -> FCode (CgIdInfo, FCode ())
129
130 cgLetNoEscapeRhs join_id local_cc bndr rhs =
131 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
132 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
133 ; let code = do { (_, body) <- getCodeScoped rhs_code
134 ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
135 ; return (info, code)
136 }
137
138 cgLetNoEscapeRhsBody
139 :: Maybe LocalReg -- Saved cost centre
140 -> Id
141 -> StgRhs
142 -> FCode (CgIdInfo, FCode ())
143 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd args body)
144 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
145 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
146 = cgLetNoEscapeClosure bndr local_cc cc []
147 (StgConApp con args (pprPanic "cgLetNoEscapeRhsBody" $
148 text "StgRhsCon doesn't have type args"))
149 -- For a constructor RHS we want to generate a single chunk of
150 -- code which can be jumped to from many places, which will
151 -- return the constructor. It's easy; just behave as if it
152 -- was an StgRhsClosure with a ConApp inside!
153
154 -------------------------
155 cgLetNoEscapeClosure
156 :: Id -- binder
157 -> Maybe LocalReg -- Slot for saved current cost centre
158 -> CostCentreStack -- XXX: *** NOT USED *** why not?
159 -> [NonVoid Id] -- Args (as in \ args -> body)
160 -> StgExpr -- Body (as in above)
161 -> FCode (CgIdInfo, FCode ())
162
163 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
164 = do dflags <- getDynFlags
165 return ( lneIdInfo dflags bndr args
166 , code )
167 where
168 code = forkLneBody $ do {
169 ; withNewTickyCounterLNE (idName bndr) args $ do
170 ; restoreCurrentCostCentre cc_slot
171 ; arg_regs <- bindArgsToRegs args
172 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
173
174
175 ------------------------------------------------------------------------
176 -- Case expressions
177 ------------------------------------------------------------------------
178
179 {- Note [Compiling case expressions]
180 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
181 It is quite interesting to decide whether to put a heap-check at the
182 start of each alternative. Of course we certainly have to do so if
183 the case forces an evaluation, or if there is a primitive op which can
184 trigger GC.
185
186 A more interesting situation is this (a Plan-B situation)
187
188 !P!;
189 ...P...
190 case x# of
191 0# -> !Q!; ...Q...
192 default -> !R!; ...R...
193
194 where !x! indicates a possible heap-check point. The heap checks
195 in the alternatives *can* be omitted, in which case the topmost
196 heapcheck will take their worst case into account.
197
198 In favour of omitting !Q!, !R!:
199
200 - *May* save a heap overflow test,
201 if ...P... allocates anything.
202
203 - We can use relative addressing from a single Hp to
204 get at all the closures so allocated.
205
206 - No need to save volatile vars etc across heap checks
207 in !Q!, !R!
208
209 Against omitting !Q!, !R!
210
211 - May put a heap-check into the inner loop. Suppose
212 the main loop is P -> R -> P -> R...
213 Q is the loop exit, and only it does allocation.
214 This only hurts us if P does no allocation. If P allocates,
215 then there is a heap check in the inner loop anyway.
216
217 - May do more allocation than reqd. This sometimes bites us
218 badly. For example, nfib (ha!) allocates about 30\% more space if the
219 worst-casing is done, because many many calls to nfib are leaf calls
220 which don't need to allocate anything.
221
222 We can un-allocate, but that costs an instruction
223
224 Neither problem hurts us if there is only one alternative.
225
226 Suppose the inner loop is P->R->P->R etc. Then here is
227 how many heap checks we get in the *inner loop* under various
228 conditions
229
230 Alloc Heap check in branches (!Q!, !R!)?
231 P Q R yes no (absorb to !P!)
232 --------------------------------------
233 n n n 0 0
234 n y n 0 1
235 n . y 1 1
236 y . y 2 1
237 y . n 1 1
238
239 Best choices: absorb heap checks from Q and R into !P! iff
240 a) P itself does some allocation
241 or
242 b) P does allocation, or there is exactly one alternative
243
244 We adopt (b) because that is more likely to put the heap check at the
245 entry to a function, when not many things are live. After a bunch of
246 single-branch cases, we may have lots of things live
247
248 Hence: two basic plans for
249
250 case e of r { alts }
251
252 ------ Plan A: the general case ---------
253
254 ...save current cost centre...
255
256 ...code for e,
257 with sequel (SetLocals r)
258
259 ...restore current cost centre...
260 ...code for alts...
261 ...alts do their own heap checks
262
263 ------ Plan B: special case when ---------
264 (i) e does not allocate or call GC
265 (ii) either upstream code performs allocation
266 or there is just one alternative
267
268 Then heap allocation in the (single) case branch
269 is absorbed by the upstream check.
270 Very common example: primops on unboxed values
271
272 ...code for e,
273 with sequel (SetLocals r)...
274
275 ...code for alts...
276 ...no heap check...
277 -}
278
279
280
281 -------------------------------------
282 data GcPlan
283 = GcInAlts -- Put a GC check at the start the case alternatives,
284 [LocalReg] -- which binds these registers
285 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
286 -- primitive op which does no GC. Absorb the allocation
287 -- of the case alternative(s) into the upstream check
288
289 -------------------------------------
290 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
291
292 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
293 | isEnumerationTyCon tycon -- Note [case on bool]
294 = do { tag_expr <- do_enum_primop op args
295
296 -- If the binder is not dead, convert the tag to a constructor
297 -- and assign it.
298 ; unless (isDeadBinder bndr) $ do
299 { dflags <- getDynFlags
300 ; tmp_reg <- bindArgToReg (NonVoid bndr)
301 ; emitAssign (CmmLocal tmp_reg)
302 (tagToClosure dflags tycon tag_expr) }
303
304 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
305 (NonVoid bndr) alts
306 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
307 ; return AssignedDirectly
308 }
309 where
310 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
311 do_enum_primop TagToEnumOp [arg] -- No code!
312 = getArgAmode (NonVoid arg)
313 do_enum_primop primop args
314 = do dflags <- getDynFlags
315 tmp <- newTemp (bWord dflags)
316 cgPrimOp [tmp] primop args
317 return (CmmReg (CmmLocal tmp))
318
319 {-
320 Note [case on bool]
321 ~~~~~~~~~~~~~~~~~~~
322 This special case handles code like
323
324 case a <# b of
325 True ->
326 False ->
327
328 --> case tagToEnum# (a <$# b) of
329 True -> .. ; False -> ...
330
331 --> case (a <$# b) of r ->
332 case tagToEnum# r of
333 True -> .. ; False -> ...
334
335 If we let the ordinary case code handle it, we'll get something like
336
337 tmp1 = a < b
338 tmp2 = Bool_closure_tbl[tmp1]
339 if (tmp2 & 7 != 0) then ... // normal tagged case
340
341 but this junk won't optimise away. What we really want is just an
342 inline comparison:
343
344 if (a < b) then ...
345
346 So we add a special case to generate
347
348 tmp1 = a < b
349 if (tmp1 == 0) then ...
350
351 and later optimisations will further improve this.
352
353 Now that #6135 has been resolved it should be possible to remove that
354 special case. The idea behind this special case and pre-6135 implementation
355 of Bool-returning primops was that tagToEnum# was added implicitly in the
356 codegen and then optimized away. Now the call to tagToEnum# is explicit
357 in the source code, which allows to optimize it away at the earlier stages
358 of compilation (i.e. at the Core level).
359
360 Note [Scrutinising VoidRep]
361 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
362 Suppose we have this STG code:
363 f = \[s : State# RealWorld] ->
364 case s of _ -> blah
365 This is very odd. Why are we scrutinising a state token? But it
366 can arise with bizarre NOINLINE pragmas (Trac #9964)
367 crash :: IO ()
368 crash = IO (\s -> let {-# NOINLINE s' #-}
369 s' = s
370 in (# s', () #))
371
372 Now the trouble is that 's' has VoidRep, and we do not bind void
373 arguments in the environment; they don't live anywhere. See the
374 calls to nonVoidIds in various places. So we must not look up
375 's' in the environment. Instead, just evaluate the RHS! Simple.
376 -}
377
378 cgCase (StgApp v []) _ (PrimAlt _) alts
379 | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
380 , [(DEFAULT, _, rhs)] <- alts
381 = cgExpr rhs
382
383 {- Note [Dodgy unsafeCoerce 1]
384 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
385 Consider
386 case (x :: HValue) |> co of (y :: MutVar# Int)
387 DEFAULT -> ...
388 We want to gnerate an assignment
389 y := x
390 We want to allow this assignment to be generated in the case when the
391 types are compatible, because this allows some slightly-dodgy but
392 occasionally-useful casts to be used, such as in RtClosureInspect
393 where we cast an HValue to a MutVar# so we can print out the contents
394 of the MutVar#. If instead we generate code that enters the HValue,
395 then we'll get a runtime panic, because the HValue really is a
396 MutVar#. The types are compatible though, so we can just generate an
397 assignment.
398 -}
399 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
400 | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
401 || reps_compatible
402 = -- assignment suffices for unlifted types
403 do { dflags <- getDynFlags
404 ; unless reps_compatible $
405 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
406 ; v_info <- getCgIdInfo v
407 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
408 (idInfoToAmode v_info)
409 ; bindArgToReg (NonVoid bndr)
410 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
411 where
412 reps_compatible = idPrimRep v == idPrimRep bndr
413
414 {- Note [Dodgy unsafeCoerce 2, #3132]
415 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
416 In all other cases of a lifted Id being cast to an unlifted type, the
417 Id should be bound to bottom, otherwise this is an unsafe use of
418 unsafeCoerce. We can generate code to enter the Id and assume that
419 it will never return. Hence, we emit the usual enter/return code, and
420 because bottom must be untagged, it will be entered. The Sequel is a
421 type-correct assignment, albeit bogus. The (dead) continuation loops;
422 it would be better to invoke some kind of panic function here.
423 -}
424 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
425 = do { dflags <- getDynFlags
426 ; mb_cc <- maybeSaveCostCentre True
427 ; withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
428 ; restoreCurrentCostCentre mb_cc
429 ; emitComment $ mkFastString "should be unreachable code"
430 ; l <- newLabelC
431 ; emitLabel l
432 ; emit (mkBranch l) -- an infinite loop
433 ; return AssignedDirectly
434 }
435
436 {- Note [Handle seq#]
437 ~~~~~~~~~~~~~~~~~~~~~
438 case seq# a s of v
439 (# s', a' #) -> e
440
441 ==>
442
443 case a of v
444 (# s', a' #) -> e
445
446 (taking advantage of the fact that the return convention for (# State#, a #)
447 is the same as the return convention for just 'a')
448 -}
449
450 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
451 = -- Note [Handle seq#]
452 -- Use the same return convention as vanilla 'a'.
453 cgCase (StgApp a []) bndr alt_type alts
454
455 cgCase scrut bndr alt_type alts
456 = -- the general case
457 do { dflags <- getDynFlags
458 ; up_hp_usg <- getVirtHp -- Upstream heap usage
459 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
460 alt_regs = map (idToReg dflags) ret_bndrs
461 ; simple_scrut <- isSimpleScrut scrut alt_type
462 ; let do_gc | not simple_scrut = True
463 | isSingleton alts = False
464 | up_hp_usg > 0 = False
465 | otherwise = True
466 -- cf Note [Compiling case expressions]
467 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
468
469 ; mb_cc <- maybeSaveCostCentre simple_scrut
470
471 ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
472 ; ret_kind <- withSequel sequel (cgExpr scrut)
473 ; restoreCurrentCostCentre mb_cc
474 ; _ <- bindArgsToRegs ret_bndrs
475 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
476 }
477
478
479 {-
480 Note [scrut sequel]
481
482 The job of the scrutinee is to assign its value(s) to alt_regs.
483 Additionally, if we plan to do a heap-check in the alternatives (see
484 Note [Compiling case expressions]), then we *must* retreat Hp to
485 recover any unused heap before passing control to the sequel. If we
486 don't do this, then any unused heap will become slop because the heap
487 check will reset the heap usage. Slop in the heap breaks LDV profiling
488 (+RTS -hb) which needs to do a linear sweep through the nursery.
489
490
491 Note [Inlining out-of-line primops and heap checks]
492 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
493 If shouldInlinePrimOp returns True when called from StgCmmExpr for the
494 purpose of heap check placement, we *must* inline the primop later in
495 StgCmmPrim. If we don't things will go wrong.
496 -}
497
498 -----------------
499 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
500 maybeSaveCostCentre simple_scrut
501 | simple_scrut = return Nothing
502 | otherwise = saveCurrentCostCentre
503
504
505 -----------------
506 isSimpleScrut :: StgExpr -> AltType -> FCode Bool
507 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
508 -- heap usage from alternatives into the stuff before the case
509 -- NB: if you get this wrong, and claim that the expression doesn't allocate
510 -- when it does, you'll deeply mess up allocation
511 isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
512 isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
513 isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
514 isSimpleScrut _ _ = return False
515
516 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
517 -- True iff the op cannot block or allocate
518 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
519 isSimpleOp (StgPrimOp op) stg_args = do
520 arg_exprs <- getNonVoidArgAmodes stg_args
521 dflags <- getDynFlags
522 -- See Note [Inlining out-of-line primops and heap checks]
523 return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
524 isSimpleOp (StgPrimCallOp _) _ = return False
525
526 -----------------
527 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
528 -- These are the binders of a case that are assigned by the evaluation of the
529 -- scrutinee.
530 -- They're non-void, see Note [Post-unarisation invariants] in UnariseStg.
531 chooseReturnBndrs bndr (PrimAlt _) _alts
532 = assertNonVoidIds [bndr]
533
534 chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
535 = ASSERT2(n == length ids, ppr n $$ ppr ids $$ ppr _bndr)
536 assertNonVoidIds ids -- 'bndr' is not assigned!
537
538 chooseReturnBndrs bndr (AlgAlt _) _alts
539 = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
540
541 chooseReturnBndrs bndr PolyAlt _alts
542 = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
543
544 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
545 -- MultiValAlt has only one alternative
546
547 -------------------------------------
548 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
549 -> FCode ReturnKind
550 -- At this point the result of the case are in the binders
551 cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
552 = maybeAltHeapCheck gc_plan (cgExpr rhs)
553
554 cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
555 = maybeAltHeapCheck gc_plan (cgExpr rhs)
556 -- Here bndrs are *already* in scope, so don't rebind them
557
558 cgAlts gc_plan bndr (PrimAlt _) alts
559 = do { dflags <- getDynFlags
560
561 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
562
563 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
564 (DEFAULT,deflt) = head tagged_cmms
565 -- PrimAlts always have a DEFAULT case
566 -- and it always comes first
567
568 tagged_cmms' = [(lit,code)
569 | (LitAlt lit, code) <- tagged_cmms]
570 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
571 ; return AssignedDirectly }
572
573 cgAlts gc_plan bndr (AlgAlt tycon) alts
574 = do { dflags <- getDynFlags
575
576 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
577
578 ; let fam_sz = tyConFamilySize tycon
579 bndr_reg = CmmLocal (idToReg dflags bndr)
580
581 -- Is the constructor tag in the node reg?
582 ; if isSmallFamily dflags fam_sz
583 then do
584 let -- Yes, bndr_reg has constr. tag in ls bits
585 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
586 branches' = [(tag+1,branch) | (tag,branch) <- branches]
587 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
588
589 else -- No, get tag from info table
590 do dflags <- getDynFlags
591 let -- Note that ptr _always_ has tag 1
592 -- when the family size is big enough
593 untagged_ptr = cmmRegOffB bndr_reg (-1)
594 tag_expr = getConstrTag dflags (untagged_ptr)
595 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
596
597 ; return AssignedDirectly }
598
599 cgAlts _ _ _ _ = panic "cgAlts"
600 -- UbxTupAlt and PolyAlt have only one alternative
601
602
603 -- Note [alg-alt heap check]
604 --
605 -- In an algebraic case with more than one alternative, we will have
606 -- code like
607 --
608 -- L0:
609 -- x = R1
610 -- goto L1
611 -- L1:
612 -- if (x & 7 >= 2) then goto L2 else goto L3
613 -- L2:
614 -- Hp = Hp + 16
615 -- if (Hp > HpLim) then goto L4
616 -- ...
617 -- L4:
618 -- call gc() returns to L5
619 -- L5:
620 -- x = R1
621 -- goto L1
622
623 -------------------
624 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
625 -> FCode ( Maybe CmmAGraphScoped
626 , [(ConTagZ, CmmAGraphScoped)] )
627 cgAlgAltRhss gc_plan bndr alts
628 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
629
630 ; let { mb_deflt = case tagged_cmms of
631 ((DEFAULT,rhs) : _) -> Just rhs
632 _other -> Nothing
633 -- DEFAULT is always first, if present
634
635 ; branches = [ (dataConTagZ con, cmm)
636 | (DataAlt con, cmm) <- tagged_cmms ]
637 }
638
639 ; return (mb_deflt, branches)
640 }
641
642
643 -------------------
644 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
645 -> FCode [(AltCon, CmmAGraphScoped)]
646 cgAltRhss gc_plan bndr alts = do
647 dflags <- getDynFlags
648 let
649 base_reg = idToReg dflags bndr
650 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraphScoped)
651 cg_alt (con, bndrs, rhs)
652 = getCodeScoped $
653 maybeAltHeapCheck gc_plan $
654 do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
655 -- alt binders are always non-void,
656 -- see Note [Post-unarisation invariants] in UnariseStg
657 ; _ <- cgExpr rhs
658 ; return con }
659 forkAlts (map cg_alt alts)
660
661 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
662 maybeAltHeapCheck (NoGcInAlts,_) code = code
663 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
664 altHeapCheck regs code
665 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
666 altHeapCheckReturnsTo regs lret off code
667
668 -----------------------------------------------------------------------------
669 -- Tail calls
670 -----------------------------------------------------------------------------
671
672 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
673 cgConApp con stg_args
674 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
675 = do { arg_exprs <- getNonVoidArgAmodes stg_args
676 ; tickyUnboxedTupleReturn (length arg_exprs)
677 ; emitReturn arg_exprs }
678
679 | otherwise -- Boxed constructors; allocate and return
680 = ASSERT2( stg_args `lengthIs` countConRepArgs con, ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args )
681 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
682 currentCCS con (assertNonVoidStgArgs stg_args)
683 -- con args are always non-void,
684 -- see Note [Post-unarisation invariants] in UnariseStg
685 -- The first "con" says that the name bound to this
686 -- closure is is "con", which is a bit of a fudge, but
687 -- it only affects profiling (hence the False)
688
689 ; emit =<< fcode_init
690 ; tickyReturnNewCon (length stg_args)
691 ; emitReturn [idInfoToAmode idinfo] }
692
693 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
694 cgIdApp fun_id [] | isVoidTy (idType fun_id) = emitReturn []
695 cgIdApp fun_id args = do
696 dflags <- getDynFlags
697 fun_info <- getCgIdInfo fun_id
698 self_loop_info <- getSelfLoop
699 let cg_fun_id = cg_id fun_info
700 -- NB: use (cg_id fun_info) instead of fun_id, because
701 -- the former may be externalised for -split-objs.
702 -- See Note [Externalise when splitting] in StgCmmMonad
703
704 fun_arg = StgVarArg cg_fun_id
705 fun_name = idName cg_fun_id
706 fun = idInfoToAmode fun_info
707 lf_info = cg_lf fun_info
708 n_args = length args
709 v_args = length $ filter (isVoidTy . stgArgType) args
710 node_points dflags = nodeMustPointToIt dflags lf_info
711 case getCallMethod dflags fun_name cg_fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
712
713 -- A value in WHNF, so we can just return it.
714 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
715
716 EnterIt -> ASSERT( null args ) -- Discarding arguments
717 emitEnter fun
718
719 SlowCall -> do -- A slow function call via the RTS apply routines
720 { tickySlowCall lf_info args
721 ; emitComment $ mkFastString "slowCall"
722 ; slowCall fun args }
723
724 -- A direct function call (possibly with some left-over arguments)
725 DirectEntry lbl arity -> do
726 { tickyDirectCall arity args
727 ; if node_points dflags
728 then directCall NativeNodeCall lbl arity (fun_arg:args)
729 else directCall NativeDirectCall lbl arity args }
730
731 -- Let-no-escape call or self-recursive tail-call
732 JumpToIt blk_id lne_regs -> do
733 { adjustHpBackwards -- always do this before a tail-call
734 ; cmm_args <- getNonVoidArgAmodes args
735 ; emitMultiAssign lne_regs cmm_args
736 ; emit (mkBranch blk_id)
737 ; return AssignedDirectly }
738
739 -- Note [Self-recursive tail calls]
740 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
741 --
742 -- Self-recursive tail calls can be optimized into a local jump in the same
743 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
744 -- stgSyn/CoreToStg.hs). Consider this:
745 --
746 -- foo.info:
747 -- a = R1 // calling convention
748 -- b = R2
749 -- goto L1
750 -- L1: ...
751 -- ...
752 -- ...
753 -- L2: R1 = x
754 -- R2 = y
755 -- call foo(R1,R2)
756 --
757 -- Instead of putting x and y into registers (or other locations required by the
758 -- calling convention) and performing a call we can put them into local
759 -- variables a and b and perform jump to L1:
760 --
761 -- foo.info:
762 -- a = R1
763 -- b = R2
764 -- goto L1
765 -- L1: ...
766 -- ...
767 -- ...
768 -- L2: a = x
769 -- b = y
770 -- goto L1
771 --
772 -- This can be done only when function is calling itself in a tail position
773 -- and only if the call passes number of parameters equal to function's arity.
774 -- Note that this cannot be performed if a function calls itself with a
775 -- continuation.
776 --
777 -- This in fact implements optimization known as "loopification". It was
778 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
779 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
780 -- optimization at the Cmm level, whereas we perform ours during code generation
781 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
782 -- generated in the first place.
783 --
784 -- Implementation is spread across a couple of places in the code:
785 --
786 -- * FCode monad stores additional information in its reader environment
787 -- (cgd_self_loop field). This information tells us which function can
788 -- tail call itself in an optimized way (it is the function currently
789 -- being compiled), what is the label of a loop header (L1 in example above)
790 -- and information about local registers in which we should arguments
791 -- before making a call (this would be a and b in example above).
792 --
793 -- * Whenever we are compiling a function, we set that information to reflect
794 -- the fact that function currently being compiled can be jumped to, instead
795 -- of called. This is done in closureCodyBody in StgCmmBind.
796 --
797 -- * We also have to emit a label to which we will be jumping. We make sure
798 -- that the label is placed after a stack check but before the heap
799 -- check. The reason is that making a recursive tail-call does not increase
800 -- the stack so we only need to check once. But it may grow the heap, so we
801 -- have to repeat the heap check in every self-call. This is done in
802 -- do_checks in StgCmmHeap.
803 --
804 -- * When we begin compilation of another closure we remove the additional
805 -- information from the environment. This is done by forkClosureBody
806 -- in StgCmmMonad. Other functions that duplicate the environment -
807 -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
808 -- words, we only need to clean the environment of the self-loop information
809 -- when compiling right hand side of a closure (binding).
810 --
811 -- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
812 -- of call will be generated. getCallMethod decides to generate a self
813 -- recursive tail call when (a) environment stores information about
814 -- possible self tail-call; (b) that tail call is to a function currently
815 -- being compiled; (c) number of passed non-void arguments is equal to
816 -- function's arity. (d) loopification is turned on via -floopification
817 -- command-line option.
818 --
819 -- * Command line option to turn loopification on and off is implemented in
820 -- DynFlags.
821 --
822 --
823 -- Note [Void arguments in self-recursive tail calls]
824 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
825 --
826 -- State# tokens can get in the way of the loopification optimization as seen in
827 -- #11372. Consider this:
828 --
829 -- foo :: [a]
830 -- -> (a -> State# s -> (# State s, Bool #))
831 -- -> State# s
832 -- -> (# State# s, Maybe a #)
833 -- foo [] f s = (# s, Nothing #)
834 -- foo (x:xs) f s = case f x s of
835 -- (# s', b #) -> case b of
836 -- True -> (# s', Just x #)
837 -- False -> foo xs f s'
838 --
839 -- We would like to compile the call to foo as a local jump instead of a call
840 -- (see Note [Self-recursive tail calls]). However, the generated function has
841 -- an arity of 2 while we apply it to 3 arguments, one of them being of void
842 -- type. Thus, we mustn't count arguments of void type when checking whether
843 -- we can turn a call into a self-recursive jump.
844 --
845
846 emitEnter :: CmmExpr -> FCode ReturnKind
847 emitEnter fun = do
848 { dflags <- getDynFlags
849 ; adjustHpBackwards
850 ; sequel <- getSequel
851 ; updfr_off <- getUpdFrameOff
852 ; case sequel of
853 -- For a return, we have the option of generating a tag-test or
854 -- not. If the value is tagged, we can return directly, which
855 -- is quicker than entering the value. This is a code
856 -- size/speed trade-off: when optimising for speed rather than
857 -- size we could generate the tag test.
858 --
859 -- Right now, we do what the old codegen did, and omit the tag
860 -- test, just generating an enter.
861 Return -> do
862 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
863 ; emit $ mkJump dflags NativeNodeCall entry
864 [cmmUntag dflags fun] updfr_off
865 ; return AssignedDirectly
866 }
867
868 -- The result will be scrutinised in the sequel. This is where
869 -- we generate a tag-test to avoid entering the closure if
870 -- possible.
871 --
872 -- The generated code will be something like this:
873 --
874 -- R1 = fun -- copyout
875 -- if (fun & 7 != 0) goto Lcall else goto Lret
876 -- Lcall:
877 -- call [fun] returns to Lret
878 -- Lret:
879 -- fun' = R1 -- copyin
880 -- ...
881 --
882 -- Note in particular that the label Lret is used as a
883 -- destination by both the tag-test and the call. This is
884 -- because Lret will necessarily be a proc-point, and we want to
885 -- ensure that we generate only one proc-point for this
886 -- sequence.
887 --
888 -- Furthermore, we tell the caller that we generated a native
889 -- return continuation by returning (ReturnedTo Lret off), so
890 -- that the continuation can be reused by the heap-check failure
891 -- code in the enclosing case expression.
892 --
893 AssignTo res_regs _ -> do
894 { lret <- newLabelC
895 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
896 ; lcall <- newLabelC
897 ; updfr_off <- getUpdFrameOff
898 ; let area = Young lret
899 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
900 [fun] updfr_off []
901 -- refer to fun via nodeReg after the copyout, to avoid having
902 -- both live simultaneously; this sometimes enables fun to be
903 -- inlined in the RHS of the R1 assignment.
904 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
905 the_call = toCall entry (Just lret) updfr_off off outArgs regs
906 ; tscope <- getTickScope
907 ; emit $
908 copyout <*>
909 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
910 lret lcall Nothing <*>
911 outOfLine lcall (the_call,tscope) <*>
912 mkLabel lret tscope <*>
913 copyin
914 ; return (ReturnedTo lret off)
915 }
916 }
917
918 ------------------------------------------------------------------------
919 -- Ticks
920 ------------------------------------------------------------------------
921
922 -- | Generate Cmm code for a tick. Depending on the type of Tickish,
923 -- this will either generate actual Cmm instrumentation code, or
924 -- simply pass on the annotation as a @CmmTickish@.
925 cgTick :: Tickish Id -> FCode ()
926 cgTick tick
927 = do { dflags <- getDynFlags
928 ; case tick of
929 ProfNote cc t p -> emitSetCCC cc t p
930 HpcTick m n -> emit (mkTickBox dflags m n)
931 SourceNote s n -> emitTick $ SourceNote s n
932 _other -> return () -- ignore
933 }