Update Trac ticket URLs to point to GitLab
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..), StaticEnv,
21 isSimplified, contIsStop,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats,
34
35 -- Utilities
36 isExitJoinId
37 ) where
38
39 #include "HsVersions.h"
40
41 import GhcPrelude
42
43 import SimplEnv
44 import CoreMonad ( SimplMode(..), Tick(..) )
45 import DynFlags
46 import CoreSyn
47 import qualified CoreSubst
48 import PprCore
49 import CoreFVs
50 import CoreUtils
51 import CoreArity
52 import CoreUnfold
53 import Name
54 import Id
55 import IdInfo
56 import Var
57 import Demand
58 import SimplMonad
59 import Type hiding( substTy )
60 import Coercion hiding( substCo )
61 import DataCon ( dataConWorkId, isNullaryRepDataCon )
62 import VarSet
63 import BasicTypes
64 import Util
65 import OrdList ( isNilOL )
66 import MonadUtils
67 import Outputable
68 import Pair
69 import PrelRules
70 import FastString ( fsLit )
71
72 import Control.Monad ( when )
73 import Data.List ( sortBy )
74
75 {-
76 ************************************************************************
77 * *
78 The SimplCont and DupFlag types
79 * *
80 ************************************************************************
81
82 A SimplCont allows the simplifier to traverse the expression in a
83 zipper-like fashion. The SimplCont represents the rest of the expression,
84 "above" the point of interest.
85
86 You can also think of a SimplCont as an "evaluation context", using
87 that term in the way it is used for operational semantics. This is the
88 way I usually think of it, For example you'll often see a syntax for
89 evaluation context looking like
90 C ::= [] | C e | case C of alts | C `cast` co
91 That's the kind of thing we are doing here, and I use that syntax in
92 the comments.
93
94
95 Key points:
96 * A SimplCont describes a *strict* context (just like
97 evaluation contexts do). E.g. Just [] is not a SimplCont
98
99 * A SimplCont describes a context that *does not* bind
100 any variables. E.g. \x. [] is not a SimplCont
101 -}
102
103 data SimplCont
104 = Stop -- Stop[e] = e
105 OutType -- Type of the <hole>
106 CallCtxt -- Tells if there is something interesting about
107 -- the context, and hence the inliner
108 -- should be a bit keener (see interestingCallContext)
109 -- Specifically:
110 -- This is an argument of a function that has RULES
111 -- Inlining the call might allow the rule to fire
112 -- Never ValAppCxt (use ApplyToVal instead)
113 -- or CaseCtxt (use Select instead)
114
115 | CastIt -- (CastIt co K)[e] = K[ e `cast` co ]
116 OutCoercion -- The coercion simplified
117 -- Invariant: never an identity coercion
118 SimplCont
119
120 | ApplyToVal -- (ApplyToVal arg K)[e] = K[ e arg ]
121 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
122 , sc_arg :: InExpr -- The argument,
123 , sc_env :: StaticEnv -- see Note [StaticEnv invariant]
124 , sc_cont :: SimplCont }
125
126 | ApplyToTy -- (ApplyToTy ty K)[e] = K[ e ty ]
127 { sc_arg_ty :: OutType -- Argument type
128 , sc_hole_ty :: OutType -- Type of the function, presumably (forall a. blah)
129 -- See Note [The hole type in ApplyToTy]
130 , sc_cont :: SimplCont }
131
132 | Select -- (Select alts K)[e] = K[ case e of alts ]
133 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
134 , sc_bndr :: InId -- case binder
135 , sc_alts :: [InAlt] -- Alternatives
136 , sc_env :: StaticEnv -- See Note [StaticEnv invariant]
137 , sc_cont :: SimplCont }
138
139 -- The two strict forms have no DupFlag, because we never duplicate them
140 | StrictBind -- (StrictBind x xs b K)[e] = let x = e in K[\xs.b]
141 -- or, equivalently, = K[ (\x xs.b) e ]
142 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
143 , sc_bndr :: InId
144 , sc_bndrs :: [InBndr]
145 , sc_body :: InExpr
146 , sc_env :: StaticEnv -- See Note [StaticEnv invariant]
147 , sc_cont :: SimplCont }
148
149 | StrictArg -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ]
150 { sc_dup :: DupFlag -- Always Simplified or OkToDup
151 , sc_fun :: ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
152 -- plus strictness flags for *further* args
153 , sc_cci :: CallCtxt -- Whether *this* argument position is interesting
154 , sc_cont :: SimplCont }
155
156 | TickIt -- (TickIt t K)[e] = K[ tick t e ]
157 (Tickish Id) -- Tick tickish <hole>
158 SimplCont
159
160 type StaticEnv = SimplEnv -- Just the static part is relevant
161
162 data DupFlag = NoDup -- Unsimplified, might be big
163 | Simplified -- Simplified
164 | OkToDup -- Simplified and small
165
166 isSimplified :: DupFlag -> Bool
167 isSimplified NoDup = False
168 isSimplified _ = True -- Invariant: the subst-env is empty
169
170 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
171 perhapsSubstTy dup env ty
172 | isSimplified dup = ty
173 | otherwise = substTy env ty
174
175 {- Note [StaticEnv invariant]
176 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
177 We pair up an InExpr or InAlts with a StaticEnv, which establishes the
178 lexical scope for that InExpr. When we simplify that InExpr/InAlts, we
179 use
180 - Its captured StaticEnv
181 - Overriding its InScopeSet with the larger one at the
182 simplification point.
183
184 Why override the InScopeSet? Example:
185 (let y = ey in f) ex
186 By the time we simplify ex, 'y' will be in scope.
187
188 However the InScopeSet in the StaticEnv is not irrelevant: it should
189 include all the free vars of applying the substitution to the InExpr.
190 Reason: contHoleType uses perhapsSubstTy to apply the substitution to
191 the expression, and that (rightly) gives ASSERT failures if the InScopeSet
192 isn't big enough.
193
194 Note [DupFlag invariants]
195 ~~~~~~~~~~~~~~~~~~~~~~~~~
196 In both (ApplyToVal dup _ env k)
197 and (Select dup _ _ env k)
198 the following invariants hold
199
200 (a) if dup = OkToDup, then continuation k is also ok-to-dup
201 (b) if dup = OkToDup or Simplified, the subst-env is empty
202 (and and hence no need to re-simplify)
203 -}
204
205 instance Outputable DupFlag where
206 ppr OkToDup = text "ok"
207 ppr NoDup = text "nodup"
208 ppr Simplified = text "simpl"
209
210 instance Outputable SimplCont where
211 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
212 ppr (CastIt co cont ) = (text "CastIt" <+> pprOptCo co) $$ ppr cont
213 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
214 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
215 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
216 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
217 = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg)
218 $$ ppr cont
219 ppr (StrictBind { sc_bndr = b, sc_cont = cont })
220 = (text "StrictBind" <+> ppr b) $$ ppr cont
221 ppr (StrictArg { sc_fun = ai, sc_cont = cont })
222 = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
223 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
224 = (text "Select" <+> ppr dup <+> ppr bndr) $$
225 whenPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
226
227
228 {- Note [The hole type in ApplyToTy]
229 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
230 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
231 continuation. It is absolutely necessary to compute contHoleType, but it is
232 not used for anything else (and hence may not be evaluated).
233
234 Why is it necessary for contHoleType? Consider the continuation
235 ApplyToType Int (Stop Int)
236 corresponding to
237 (<hole> @Int) :: Int
238 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
239 and there is no way to know which, so we must record it.
240
241 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
242 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
243 doesn't matter because we'll never compute them all.
244
245 ************************************************************************
246 * *
247 ArgInfo and ArgSpec
248 * *
249 ************************************************************************
250 -}
251
252 data ArgInfo
253 = ArgInfo {
254 ai_fun :: OutId, -- The function
255 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
256
257 ai_type :: OutType, -- Type of (f a1 ... an)
258
259 ai_rules :: FunRules, -- Rules for this function
260
261 ai_encl :: Bool, -- Flag saying whether this function
262 -- or an enclosing one has rules (recursively)
263 -- True => be keener to inline in all args
264
265 ai_strs :: [Bool], -- Strictness of remaining arguments
266 -- Usually infinite, but if it is finite it guarantees
267 -- that the function diverges after being given
268 -- that number of args
269 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
270 -- Always infinite
271 }
272
273 data ArgSpec
274 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
275 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
276 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
277 | CastBy OutCoercion -- Cast by this; c.f. CastIt
278
279 instance Outputable ArgSpec where
280 ppr (ValArg e) = text "ValArg" <+> ppr e
281 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
282 ppr (CastBy c) = text "CastBy" <+> ppr c
283
284 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
285 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
286 , ai_type = applyTypeToArg (ai_type ai) arg
287 , ai_rules = decRules (ai_rules ai) }
288
289 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
290 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
291 , ai_type = piResultTy poly_fun_ty arg_ty
292 , ai_rules = decRules (ai_rules ai) }
293 where
294 poly_fun_ty = ai_type ai
295 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
296
297 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
298 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
299 , ai_type = pSnd (coercionKind co) }
300
301 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
302 argInfoAppArgs [] = []
303 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
304 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
305 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
306
307 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
308 pushSimplifiedArgs _env [] k = k
309 pushSimplifiedArgs env (arg : args) k
310 = case arg of
311 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
312 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
313 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
314 CastBy c -> CastIt c rest
315 where
316 rest = pushSimplifiedArgs env args k
317 -- The env has an empty SubstEnv
318
319 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
320 -- NB: the [ArgSpec] is reversed so that the first arg
321 -- in the list is the last one in the application
322 argInfoExpr fun rev_args
323 = go rev_args
324 where
325 go [] = Var fun
326 go (ValArg a : as) = go as `App` a
327 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
328 go (CastBy co : as) = mkCast (go as) co
329
330
331 type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function
332 -- Nothing => No rules
333 -- Just (n, rules) => some rules, requiring at least n more type/value args
334
335 decRules :: FunRules -> FunRules
336 decRules (Just (n, rules)) = Just (n-1, rules)
337 decRules Nothing = Nothing
338
339 mkFunRules :: [CoreRule] -> FunRules
340 mkFunRules [] = Nothing
341 mkFunRules rs = Just (n_required, rs)
342 where
343 n_required = maximum (map ruleArity rs)
344
345 {-
346 ************************************************************************
347 * *
348 Functions on SimplCont
349 * *
350 ************************************************************************
351 -}
352
353 mkBoringStop :: OutType -> SimplCont
354 mkBoringStop ty = Stop ty BoringCtxt
355
356 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
357 mkRhsStop ty = Stop ty RhsCtxt
358
359 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
360 mkLazyArgStop ty cci = Stop ty cci
361
362 -------------------
363 contIsRhsOrArg :: SimplCont -> Bool
364 contIsRhsOrArg (Stop {}) = True
365 contIsRhsOrArg (StrictBind {}) = True
366 contIsRhsOrArg (StrictArg {}) = True
367 contIsRhsOrArg _ = False
368
369 contIsRhs :: SimplCont -> Bool
370 contIsRhs (Stop _ RhsCtxt) = True
371 contIsRhs _ = False
372
373 -------------------
374 contIsStop :: SimplCont -> Bool
375 contIsStop (Stop {}) = True
376 contIsStop _ = False
377
378 contIsDupable :: SimplCont -> Bool
379 contIsDupable (Stop {}) = True
380 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
381 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
382 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
383 contIsDupable (StrictArg { sc_dup = OkToDup }) = True -- ...ditto...
384 contIsDupable (CastIt _ k) = contIsDupable k
385 contIsDupable _ = False
386
387 -------------------
388 contIsTrivial :: SimplCont -> Bool
389 contIsTrivial (Stop {}) = True
390 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
391 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
392 contIsTrivial (CastIt _ k) = contIsTrivial k
393 contIsTrivial _ = False
394
395 -------------------
396 contResultType :: SimplCont -> OutType
397 contResultType (Stop ty _) = ty
398 contResultType (CastIt _ k) = contResultType k
399 contResultType (StrictBind { sc_cont = k }) = contResultType k
400 contResultType (StrictArg { sc_cont = k }) = contResultType k
401 contResultType (Select { sc_cont = k }) = contResultType k
402 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
403 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
404 contResultType (TickIt _ k) = contResultType k
405
406 contHoleType :: SimplCont -> OutType
407 contHoleType (Stop ty _) = ty
408 contHoleType (TickIt _ k) = contHoleType k
409 contHoleType (CastIt co _) = pFst (coercionKind co)
410 contHoleType (StrictBind { sc_bndr = b, sc_dup = dup, sc_env = se })
411 = perhapsSubstTy dup se (idType b)
412 contHoleType (StrictArg { sc_fun = ai }) = funArgTy (ai_type ai)
413 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
414 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
415 = mkVisFunTy (perhapsSubstTy dup se (exprType e))
416 (contHoleType k)
417 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
418 = perhapsSubstTy d se (idType b)
419
420 -------------------
421 countArgs :: SimplCont -> Int
422 -- Count all arguments, including types, coercions, and other values
423 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
424 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
425 countArgs _ = 0
426
427 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
428 -- Summarises value args, discards type args and coercions
429 -- The returned continuation of the call is only used to
430 -- answer questions like "are you interesting?"
431 contArgs cont
432 | lone cont = (True, [], cont)
433 | otherwise = go [] cont
434 where
435 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
436 lone (ApplyToVal {}) = False
437 lone (CastIt {}) = False
438 lone _ = True
439
440 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
441 = go (is_interesting arg se : args) k
442 go args (ApplyToTy { sc_cont = k }) = go args k
443 go args (CastIt _ k) = go args k
444 go args k = (False, reverse args, k)
445
446 is_interesting arg se = interestingArg se arg
447 -- Do *not* use short-cutting substitution here
448 -- because we want to get as much IdInfo as possible
449
450
451 -------------------
452 mkArgInfo :: SimplEnv
453 -> Id
454 -> [CoreRule] -- Rules for function
455 -> Int -- Number of value args
456 -> SimplCont -- Context of the call
457 -> ArgInfo
458
459 mkArgInfo env fun rules n_val_args call_cont
460 | n_val_args < idArity fun -- Note [Unsaturated functions]
461 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
462 , ai_rules = fun_rules
463 , ai_encl = False
464 , ai_strs = vanilla_stricts
465 , ai_discs = vanilla_discounts }
466 | otherwise
467 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
468 , ai_rules = fun_rules
469 , ai_encl = interestingArgContext rules call_cont
470 , ai_strs = arg_stricts
471 , ai_discs = arg_discounts }
472 where
473 fun_ty = idType fun
474
475 fun_rules = mkFunRules rules
476
477 vanilla_discounts, arg_discounts :: [Int]
478 vanilla_discounts = repeat 0
479 arg_discounts = case idUnfolding fun of
480 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
481 -> discounts ++ vanilla_discounts
482 _ -> vanilla_discounts
483
484 vanilla_stricts, arg_stricts :: [Bool]
485 vanilla_stricts = repeat False
486
487 arg_stricts
488 | not (sm_inline (seMode env))
489 = vanilla_stricts -- See Note [Do not expose strictness if sm_inline=False]
490 | otherwise
491 = add_type_str fun_ty $
492 case splitStrictSig (idStrictness fun) of
493 (demands, result_info)
494 | not (demands `lengthExceeds` n_val_args)
495 -> -- Enough args, use the strictness given.
496 -- For bottoming functions we used to pretend that the arg
497 -- is lazy, so that we don't treat the arg as an
498 -- interesting context. This avoids substituting
499 -- top-level bindings for (say) strings into
500 -- calls to error. But now we are more careful about
501 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
502 if isBotRes result_info then
503 map isStrictDmd demands -- Finite => result is bottom
504 else
505 map isStrictDmd demands ++ vanilla_stricts
506 | otherwise
507 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
508 <+> ppr n_val_args <+> ppr demands )
509 vanilla_stricts -- Not enough args, or no strictness
510
511 add_type_str :: Type -> [Bool] -> [Bool]
512 -- If the function arg types are strict, record that in the 'strictness bits'
513 -- No need to instantiate because unboxed types (which dominate the strict
514 -- types) can't instantiate type variables.
515 -- add_type_str is done repeatedly (for each call);
516 -- might be better once-for-all in the function
517 -- But beware primops/datacons with no strictness
518
519 add_type_str _ [] = []
520 add_type_str fun_ty all_strs@(str:strs)
521 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty -- Add strict-type info
522 = (str || Just False == isLiftedType_maybe arg_ty)
523 : add_type_str fun_ty' strs
524 -- If the type is levity-polymorphic, we can't know whether it's
525 -- strict. isLiftedType_maybe will return Just False only when
526 -- we're sure the type is unlifted.
527
528 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty
529 = add_type_str fun_ty' all_strs -- Look through foralls
530
531 | otherwise
532 = all_strs
533
534 {- Note [Unsaturated functions]
535 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
536 Consider (test eyeball/inline4)
537 x = a:as
538 y = f x
539 where f has arity 2. Then we do not want to inline 'x', because
540 it'll just be floated out again. Even if f has lots of discounts
541 on its first argument -- it must be saturated for these to kick in
542
543 Note [Do not expose strictness if sm_inline=False]
544 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
545 #15163 showed a case in which we had
546
547 {-# INLINE [1] zip #-}
548 zip = undefined
549
550 {-# RULES "foo" forall as bs. stream (zip as bs) = ..blah... #-}
551
552 If we expose zip's bottoming nature when simplifing the LHS of the
553 RULE we get
554 {-# RULES "foo" forall as bs.
555 stream (case zip of {}) = ..blah... #-}
556 discarding the arguments to zip. Usually this is fine, but on the
557 LHS of a rule it's not, because 'as' and 'bs' are now not bound on
558 the LHS.
559
560 This is a pretty pathalogical example, so I'm not losing sleep over
561 it, but the simplest solution was to check sm_inline; if it is False,
562 which it is on the LHS of a rule (see updModeForRules), then don't
563 make use of the strictness info for the function.
564 -}
565
566
567 {-
568 ************************************************************************
569 * *
570 Interesting arguments
571 * *
572 ************************************************************************
573
574 Note [Interesting call context]
575 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
576 We want to avoid inlining an expression where there can't possibly be
577 any gain, such as in an argument position. Hence, if the continuation
578 is interesting (eg. a case scrutinee, application etc.) then we
579 inline, otherwise we don't.
580
581 Previously some_benefit used to return True only if the variable was
582 applied to some value arguments. This didn't work:
583
584 let x = _coerce_ (T Int) Int (I# 3) in
585 case _coerce_ Int (T Int) x of
586 I# y -> ....
587
588 we want to inline x, but can't see that it's a constructor in a case
589 scrutinee position, and some_benefit is False.
590
591 Another example:
592
593 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
594
595 .... case dMonadST _@_ x0 of (a,b,c) -> ....
596
597 we'd really like to inline dMonadST here, but we *don't* want to
598 inline if the case expression is just
599
600 case x of y { DEFAULT -> ... }
601
602 since we can just eliminate this case instead (x is in WHNF). Similar
603 applies when x is bound to a lambda expression. Hence
604 contIsInteresting looks for case expressions with just a single
605 default case.
606
607 Note [No case of case is boring]
608 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
609 If we see
610 case f x of <alts>
611
612 we'd usually treat the context as interesting, to encourage 'f' to
613 inline. But if case-of-case is off, it's really not so interesting
614 after all, because we are unlikely to be able to push the case
615 expression into the branches of any case in f's unfolding. So, to
616 reduce unnecessary code expansion, we just make the context look boring.
617 This made a small compile-time perf improvement in perf/compiler/T6048,
618 and it looks plausible to me.
619 -}
620
621 interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt
622 -- See Note [Interesting call context]
623 interestingCallContext env cont
624 = interesting cont
625 where
626 interesting (Select {})
627 | sm_case_case (getMode env) = CaseCtxt
628 | otherwise = BoringCtxt
629 -- See Note [No case of case is boring]
630
631 interesting (ApplyToVal {}) = ValAppCtxt
632 -- Can happen if we have (f Int |> co) y
633 -- If f has an INLINE prag we need to give it some
634 -- motivation to inline. See Note [Cast then apply]
635 -- in CoreUnfold
636
637 interesting (StrictArg { sc_cci = cci }) = cci
638 interesting (StrictBind {}) = BoringCtxt
639 interesting (Stop _ cci) = cci
640 interesting (TickIt _ k) = interesting k
641 interesting (ApplyToTy { sc_cont = k }) = interesting k
642 interesting (CastIt _ k) = interesting k
643 -- If this call is the arg of a strict function, the context
644 -- is a bit interesting. If we inline here, we may get useful
645 -- evaluation information to avoid repeated evals: e.g.
646 -- x + (y * z)
647 -- Here the contIsInteresting makes the '*' keener to inline,
648 -- which in turn exposes a constructor which makes the '+' inline.
649 -- Assuming that +,* aren't small enough to inline regardless.
650 --
651 -- It's also very important to inline in a strict context for things
652 -- like
653 -- foldr k z (f x)
654 -- Here, the context of (f x) is strict, and if f's unfolding is
655 -- a build it's *great* to inline it here. So we must ensure that
656 -- the context for (f x) is not totally uninteresting.
657
658 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
659 -- If the argument has form (f x y), where x,y are boring,
660 -- and f is marked INLINE, then we don't want to inline f.
661 -- But if the context of the argument is
662 -- g (f x y)
663 -- where g has rules, then we *do* want to inline f, in case it
664 -- exposes a rule that might fire. Similarly, if the context is
665 -- h (g (f x x))
666 -- where h has rules, then we do want to inline f; hence the
667 -- call_cont argument to interestingArgContext
668 --
669 -- The ai-rules flag makes this happen; if it's
670 -- set, the inliner gets just enough keener to inline f
671 -- regardless of how boring f's arguments are, if it's marked INLINE
672 --
673 -- The alternative would be to *always* inline an INLINE function,
674 -- regardless of how boring its context is; but that seems overkill
675 -- For example, it'd mean that wrapper functions were always inlined
676 --
677 -- The call_cont passed to interestingArgContext is the context of
678 -- the call itself, e.g. g <hole> in the example above
679 interestingArgContext rules call_cont
680 = notNull rules || enclosing_fn_has_rules
681 where
682 enclosing_fn_has_rules = go call_cont
683
684 go (Select {}) = False
685 go (ApplyToVal {}) = False -- Shouldn't really happen
686 go (ApplyToTy {}) = False -- Ditto
687 go (StrictArg { sc_cci = cci }) = interesting cci
688 go (StrictBind {}) = False -- ??
689 go (CastIt _ c) = go c
690 go (Stop _ cci) = interesting cci
691 go (TickIt _ c) = go c
692
693 interesting RuleArgCtxt = True
694 interesting _ = False
695
696
697 {- Note [Interesting arguments]
698 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
699 An argument is interesting if it deserves a discount for unfoldings
700 with a discount in that argument position. The idea is to avoid
701 unfolding a function that is applied only to variables that have no
702 unfolding (i.e. they are probably lambda bound): f x y z There is
703 little point in inlining f here.
704
705 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
706 we must look through lets, eg (let x = e in C a b), because the let will
707 float, exposing the value, if we inline. That makes it different to
708 exprIsHNF.
709
710 Before 2009 we said it was interesting if the argument had *any* structure
711 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see #3016.
712
713 But we don't regard (f x y) as interesting, unless f is unsaturated.
714 If it's saturated and f hasn't inlined, then it's probably not going
715 to now!
716
717 Note [Conlike is interesting]
718 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
719 Consider
720 f d = ...((*) d x y)...
721 ... f (df d')...
722 where df is con-like. Then we'd really like to inline 'f' so that the
723 rule for (*) (df d) can fire. To do this
724 a) we give a discount for being an argument of a class-op (eg (*) d)
725 b) we say that a con-like argument (eg (df d)) is interesting
726 -}
727
728 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
729 -- See Note [Interesting arguments]
730 interestingArg env e = go env 0 e
731 where
732 -- n is # value args to which the expression is applied
733 go env n (Var v)
734 = case substId env v of
735 DoneId v' -> go_var n v'
736 DoneEx e _ -> go (zapSubstEnv env) n e
737 ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
738
739 go _ _ (Lit {}) = ValueArg
740 go _ _ (Type _) = TrivArg
741 go _ _ (Coercion _) = TrivArg
742 go env n (App fn (Type _)) = go env n fn
743 go env n (App fn _) = go env (n+1) fn
744 go env n (Tick _ a) = go env n a
745 go env n (Cast e _) = go env n e
746 go env n (Lam v e)
747 | isTyVar v = go env n e
748 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
749 | otherwise = ValueArg
750 go _ _ (Case {}) = NonTrivArg
751 go env n (Let b e) = case go env' n e of
752 ValueArg -> ValueArg
753 _ -> NonTrivArg
754 where
755 env' = env `addNewInScopeIds` bindersOf b
756
757 go_var n v
758 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
759 -- data constructors here
760 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
761 | n > 0 = NonTrivArg -- Saturated or unknown call
762 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
763 -- See Note [Conlike is interesting]
764 | otherwise = TrivArg -- n==0, no useful unfolding
765 where
766 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
767
768 {-
769 ************************************************************************
770 * *
771 SimplMode
772 * *
773 ************************************************************************
774
775 The SimplMode controls several switches; see its definition in
776 CoreMonad
777 sm_rules :: Bool -- Whether RULES are enabled
778 sm_inline :: Bool -- Whether inlining is enabled
779 sm_case_case :: Bool -- Whether case-of-case is enabled
780 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
781 -}
782
783 simplEnvForGHCi :: DynFlags -> SimplEnv
784 simplEnvForGHCi dflags
785 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
786 , sm_phase = InitialPhase
787 , sm_dflags = dflags
788 , sm_rules = rules_on
789 , sm_inline = False
790 , sm_eta_expand = eta_expand_on
791 , sm_case_case = True }
792 where
793 rules_on = gopt Opt_EnableRewriteRules dflags
794 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
795 -- Do not do any inlining, in case we expose some unboxed
796 -- tuple stuff that confuses the bytecode interpreter
797
798 updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode
799 -- See Note [Simplifying inside stable unfoldings]
800 updModeForStableUnfoldings inline_rule_act current_mode
801 = current_mode { sm_phase = phaseFromActivation inline_rule_act
802 , sm_inline = True
803 , sm_eta_expand = False }
804 -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
805 -- For sm_rules, just inherit; sm_rules might be "off"
806 -- because of -fno-enable-rewrite-rules
807 where
808 phaseFromActivation (ActiveAfter _ n) = Phase n
809 phaseFromActivation _ = InitialPhase
810
811 updModeForRules :: SimplMode -> SimplMode
812 -- See Note [Simplifying rules]
813 updModeForRules current_mode
814 = current_mode { sm_phase = InitialPhase
815 , sm_inline = False -- See Note [Do not expose strictness if sm_inline=False]
816 , sm_rules = False
817 , sm_eta_expand = False }
818
819 {- Note [Simplifying rules]
820 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
821 When simplifying a rule LHS, refrain from /any/ inlining or applying
822 of other RULES.
823
824 Doing anything to the LHS is plain confusing, because it means that what the
825 rule matches is not what the user wrote. c.f. #10595, and #10528.
826 Moreover, inlining (or applying rules) on rule LHSs risks introducing
827 Ticks into the LHS, which makes matching trickier. #10665, #10745.
828
829 Doing this to either side confounds tools like HERMIT, which seek to reason
830 about and apply the RULES as originally written. See #10829.
831
832 Note [No eta expansion in stable unfoldings]
833 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
834 If we have a stable unfolding
835
836 f :: Ord a => a -> IO ()
837 -- Unfolding template
838 -- = /\a \(d:Ord a) (x:a). bla
839
840 we do not want to eta-expand to
841
842 f :: Ord a => a -> IO ()
843 -- Unfolding template
844 -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co
845
846 because not specialisation of the overloading doesn't work properly
847 (see Note [Specialisation shape] in Specialise), #9509.
848
849 So we disable eta-expansion in stable unfoldings.
850
851 Note [Inlining in gentle mode]
852 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
853 Something is inlined if
854 (i) the sm_inline flag is on, AND
855 (ii) the thing has an INLINE pragma, AND
856 (iii) the thing is inlinable in the earliest phase.
857
858 Example of why (iii) is important:
859 {-# INLINE [~1] g #-}
860 g = ...
861
862 {-# INLINE f #-}
863 f x = g (g x)
864
865 If we were to inline g into f's inlining, then an importing module would
866 never be able to do
867 f e --> g (g e) ---> RULE fires
868 because the stable unfolding for f has had g inlined into it.
869
870 On the other hand, it is bad not to do ANY inlining into an
871 stable unfolding, because then recursive knots in instance declarations
872 don't get unravelled.
873
874 However, *sometimes* SimplGently must do no call-site inlining at all
875 (hence sm_inline = False). Before full laziness we must be careful
876 not to inline wrappers, because doing so inhibits floating
877 e.g. ...(case f x of ...)...
878 ==> ...(case (case x of I# x# -> fw x#) of ...)...
879 ==> ...(case x of I# x# -> case fw x# of ...)...
880 and now the redex (f x) isn't floatable any more.
881
882 The no-inlining thing is also important for Template Haskell. You might be
883 compiling in one-shot mode with -O2; but when TH compiles a splice before
884 running it, we don't want to use -O2. Indeed, we don't want to inline
885 anything, because the byte-code interpreter might get confused about
886 unboxed tuples and suchlike.
887
888 Note [Simplifying inside stable unfoldings]
889 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
890 We must take care with simplification inside stable unfoldings (which come from
891 INLINE pragmas).
892
893 First, consider the following example
894 let f = \pq -> BIG
895 in
896 let g = \y -> f y y
897 {-# INLINE g #-}
898 in ...g...g...g...g...g...
899 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
900 and thence copied multiple times when g is inlined. HENCE we treat
901 any occurrence in a stable unfolding as a multiple occurrence, not a single
902 one; see OccurAnal.addRuleUsage.
903
904 Second, we do want *do* to some modest rules/inlining stuff in stable
905 unfoldings, partly to eliminate senseless crap, and partly to break
906 the recursive knots generated by instance declarations.
907
908 However, suppose we have
909 {-# INLINE <act> f #-}
910 f = <rhs>
911 meaning "inline f in phases p where activation <act>(p) holds".
912 Then what inlinings/rules can we apply to the copy of <rhs> captured in
913 f's stable unfolding? Our model is that literally <rhs> is substituted for
914 f when it is inlined. So our conservative plan (implemented by
915 updModeForStableUnfoldings) is this:
916
917 -------------------------------------------------------------
918 When simplifying the RHS of a stable unfolding, set the phase
919 to the phase in which the stable unfolding first becomes active
920 -------------------------------------------------------------
921
922 That ensures that
923
924 a) Rules/inlinings that *cease* being active before p will
925 not apply to the stable unfolding, consistent with it being
926 inlined in its *original* form in phase p.
927
928 b) Rules/inlinings that only become active *after* p will
929 not apply to the stable unfolding, again to be consistent with
930 inlining the *original* rhs in phase p.
931
932 For example,
933 {-# INLINE f #-}
934 f x = ...g...
935
936 {-# NOINLINE [1] g #-}
937 g y = ...
938
939 {-# RULE h g = ... #-}
940 Here we must not inline g into f's RHS, even when we get to phase 0,
941 because when f is later inlined into some other module we want the
942 rule for h to fire.
943
944 Similarly, consider
945 {-# INLINE f #-}
946 f x = ...g...
947
948 g y = ...
949 and suppose that there are auto-generated specialisations and a strictness
950 wrapper for g. The specialisations get activation AlwaysActive, and the
951 strictness wrapper get activation (ActiveAfter 0). So the strictness
952 wrepper fails the test and won't be inlined into f's stable unfolding. That
953 means f can inline, expose the specialised call to g, so the specialisation
954 rules can fire.
955
956 A note about wrappers
957 ~~~~~~~~~~~~~~~~~~~~~
958 It's also important not to inline a worker back into a wrapper.
959 A wrapper looks like
960 wraper = inline_me (\x -> ...worker... )
961 Normally, the inline_me prevents the worker getting inlined into
962 the wrapper (initially, the worker's only call site!). But,
963 if the wrapper is sure to be called, the strictness analyser will
964 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
965 continuation.
966 -}
967
968 activeUnfolding :: SimplMode -> Id -> Bool
969 activeUnfolding mode id
970 | isCompulsoryUnfolding (realIdUnfolding id)
971 = True -- Even sm_inline can't override compulsory unfoldings
972 | otherwise
973 = isActive (sm_phase mode) (idInlineActivation id)
974 && sm_inline mode
975 -- `or` isStableUnfolding (realIdUnfolding id)
976 -- Inline things when
977 -- (a) they are active
978 -- (b) sm_inline says so, except that for stable unfoldings
979 -- (ie pragmas) we inline anyway
980
981 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
982 -- When matching in RULE, we want to "look through" an unfolding
983 -- (to see a constructor) if *rules* are on, even if *inlinings*
984 -- are not. A notable example is DFuns, which really we want to
985 -- match in rules like (op dfun) in gentle mode. Another example
986 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
987 -- see very early on
988 getUnfoldingInRuleMatch env
989 = (in_scope, id_unf)
990 where
991 in_scope = seInScope env
992 mode = getMode env
993 id_unf id | unf_is_active id = idUnfolding id
994 | otherwise = NoUnfolding
995 unf_is_active id
996 | not (sm_rules mode) = -- active_unfolding_minimal id
997 isStableUnfolding (realIdUnfolding id)
998 -- Do we even need to test this? I think this InScopeEnv
999 -- is only consulted if activeRule returns True, which
1000 -- never happens if sm_rules is False
1001 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
1002
1003 ----------------------
1004 activeRule :: SimplMode -> Activation -> Bool
1005 -- Nothing => No rules at all
1006 activeRule mode
1007 | not (sm_rules mode) = \_ -> False -- Rewriting is off
1008 | otherwise = isActive (sm_phase mode)
1009
1010 {-
1011 ************************************************************************
1012 * *
1013 preInlineUnconditionally
1014 * *
1015 ************************************************************************
1016
1017 preInlineUnconditionally
1018 ~~~~~~~~~~~~~~~~~~~~~~~~
1019 @preInlineUnconditionally@ examines a bndr to see if it is used just
1020 once in a completely safe way, so that it is safe to discard the
1021 binding inline its RHS at the (unique) usage site, REGARDLESS of how
1022 big the RHS might be. If this is the case we don't simplify the RHS
1023 first, but just inline it un-simplified.
1024
1025 This is much better than first simplifying a perhaps-huge RHS and then
1026 inlining and re-simplifying it. Indeed, it can be at least quadratically
1027 better. Consider
1028
1029 x1 = e1
1030 x2 = e2[x1]
1031 x3 = e3[x2]
1032 ...etc...
1033 xN = eN[xN-1]
1034
1035 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
1036 This can happen with cascades of functions too:
1037
1038 f1 = \x1.e1
1039 f2 = \xs.e2[f1]
1040 f3 = \xs.e3[f3]
1041 ...etc...
1042
1043 THE MAIN INVARIANT is this:
1044
1045 ---- preInlineUnconditionally invariant -----
1046 IF preInlineUnconditionally chooses to inline x = <rhs>
1047 THEN doing the inlining should not change the occurrence
1048 info for the free vars of <rhs>
1049 ----------------------------------------------
1050
1051 For example, it's tempting to look at trivial binding like
1052 x = y
1053 and inline it unconditionally. But suppose x is used many times,
1054 but this is the unique occurrence of y. Then inlining x would change
1055 y's occurrence info, which breaks the invariant. It matters: y
1056 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
1057
1058
1059 Even RHSs labelled InlineMe aren't caught here, because there might be
1060 no benefit from inlining at the call site.
1061
1062 [Sept 01] Don't unconditionally inline a top-level thing, because that
1063 can simply make a static thing into something built dynamically. E.g.
1064 x = (a,b)
1065 main = \s -> h x
1066
1067 [Remember that we treat \s as a one-shot lambda.] No point in
1068 inlining x unless there is something interesting about the call site.
1069
1070 But watch out: if you aren't careful, some useful foldr/build fusion
1071 can be lost (most notably in spectral/hartel/parstof) because the
1072 foldr didn't see the build. Doing the dynamic allocation isn't a big
1073 deal, in fact, but losing the fusion can be. But the right thing here
1074 seems to be to do a callSiteInline based on the fact that there is
1075 something interesting about the call site (it's strict). Hmm. That
1076 seems a bit fragile.
1077
1078 Conclusion: inline top level things gaily until Phase 0 (the last
1079 phase), at which point don't.
1080
1081 Note [pre/postInlineUnconditionally in gentle mode]
1082 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1083 Even in gentle mode we want to do preInlineUnconditionally. The
1084 reason is that too little clean-up happens if you don't inline
1085 use-once things. Also a bit of inlining is *good* for full laziness;
1086 it can expose constant sub-expressions. Example in
1087 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
1088 let-float if you inline windowToViewport
1089
1090 However, as usual for Gentle mode, do not inline things that are
1091 inactive in the initial stages. See Note [Gentle mode].
1092
1093 Note [Stable unfoldings and preInlineUnconditionally]
1094 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1095 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
1096 Example
1097
1098 {-# INLINE f #-}
1099 f :: Eq a => a -> a
1100 f x = ...
1101
1102 fInt :: Int -> Int
1103 fInt = f Int dEqInt
1104
1105 ...fInt...fInt...fInt...
1106
1107 Here f occurs just once, in the RHS of fInt. But if we inline it there
1108 it might make fInt look big, and we'll lose the opportunity to inline f
1109 at each of fInt's call sites. The INLINE pragma will only inline when
1110 the application is saturated for exactly this reason; and we don't
1111 want PreInlineUnconditionally to second-guess it. A live example is
1112 #3736.
1113 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1114
1115 NB: if the pragma is INLINEABLE, then we don't want to behave in
1116 this special way -- an INLINEABLE pragma just says to GHC "inline this
1117 if you like". But if there is a unique occurrence, we want to inline
1118 the stable unfolding, not the RHS.
1119
1120 Note [Top-level bottoming Ids]
1121 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1122 Don't inline top-level Ids that are bottoming, even if they are used just
1123 once, because FloatOut has gone to some trouble to extract them out.
1124 Inlining them won't make the program run faster!
1125
1126 Note [Do not inline CoVars unconditionally]
1127 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1128 Coercion variables appear inside coercions, and the RHS of a let-binding
1129 is a term (not a coercion) so we can't necessarily inline the latter in
1130 the former.
1131 -}
1132
1133 preInlineUnconditionally
1134 :: SimplEnv -> TopLevelFlag -> InId
1135 -> InExpr -> StaticEnv -- These two go together
1136 -> Maybe SimplEnv -- Returned env has extended substitution
1137 -- Precondition: rhs satisfies the let/app invariant
1138 -- See Note [CoreSyn let/app invariant] in CoreSyn
1139 -- Reason: we don't want to inline single uses, or discard dead bindings,
1140 -- for unlifted, side-effect-ful bindings
1141 preInlineUnconditionally env top_lvl bndr rhs rhs_env
1142 | not pre_inline_unconditionally = Nothing
1143 | not active = Nothing
1144 | isTopLevel top_lvl && isBottomingId bndr = Nothing -- Note [Top-level bottoming Ids]
1145 | isCoVar bndr = Nothing -- Note [Do not inline CoVars unconditionally]
1146 | isExitJoinId bndr = Nothing -- Note [Do not inline exit join points]
1147 -- in module Exitify
1148 | not (one_occ (idOccInfo bndr)) = Nothing
1149 | not (isStableUnfolding unf) = Just (extend_subst_with rhs)
1150
1151 -- Note [Stable unfoldings and preInlineUnconditionally]
1152 | isInlinablePragma inline_prag
1153 , Just inl <- maybeUnfoldingTemplate unf = Just (extend_subst_with inl)
1154 | otherwise = Nothing
1155 where
1156 unf = idUnfolding bndr
1157 extend_subst_with inl_rhs = extendIdSubst env bndr (mkContEx rhs_env inl_rhs)
1158
1159 one_occ IAmDead = True -- Happens in ((\x.1) v)
1160 one_occ (OneOcc { occ_one_br = True -- One textual occurrence
1161 , occ_in_lam = in_lam
1162 , occ_int_cxt = int_cxt })
1163 | not in_lam = isNotTopLevel top_lvl || early_phase
1164 | otherwise = int_cxt && canInlineInLam rhs
1165 one_occ _ = False
1166
1167 pre_inline_unconditionally = gopt Opt_SimplPreInlining (seDynFlags env)
1168 mode = getMode env
1169 active = isActive (sm_phase mode) (inlinePragmaActivation inline_prag)
1170 -- See Note [pre/postInlineUnconditionally in gentle mode]
1171 inline_prag = idInlinePragma bndr
1172
1173 -- Be very careful before inlining inside a lambda, because (a) we must not
1174 -- invalidate occurrence information, and (b) we want to avoid pushing a
1175 -- single allocation (here) into multiple allocations (inside lambda).
1176 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1177 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1178 -- where
1179 -- is_cheap = exprIsCheap rhs
1180 -- ok = is_cheap && int_cxt
1181
1182 -- int_cxt The context isn't totally boring
1183 -- E.g. let f = \ab.BIG in \y. map f xs
1184 -- Don't want to substitute for f, because then we allocate
1185 -- its closure every time the \y is called
1186 -- But: let f = \ab.BIG in \y. map (f y) xs
1187 -- Now we do want to substitute for f, even though it's not
1188 -- saturated, because we're going to allocate a closure for
1189 -- (f y) every time round the loop anyhow.
1190
1191 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1192 -- so substituting rhs inside a lambda doesn't change the occ info.
1193 -- Sadly, not quite the same as exprIsHNF.
1194 canInlineInLam (Lit _) = True
1195 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1196 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1197 canInlineInLam _ = False
1198 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1199 -- ticks around a Lam will disappear anyway.
1200
1201 early_phase = case sm_phase mode of
1202 Phase 0 -> False
1203 _ -> True
1204 -- If we don't have this early_phase test, consider
1205 -- x = length [1,2,3]
1206 -- The full laziness pass carefully floats all the cons cells to
1207 -- top level, and preInlineUnconditionally floats them all back in.
1208 -- Result is (a) static allocation replaced by dynamic allocation
1209 -- (b) many simplifier iterations because this tickles
1210 -- a related problem; only one inlining per pass
1211 --
1212 -- On the other hand, I have seen cases where top-level fusion is
1213 -- lost if we don't inline top level thing (e.g. string constants)
1214 -- Hence the test for phase zero (which is the phase for all the final
1215 -- simplifications). Until phase zero we take no special notice of
1216 -- top level things, but then we become more leery about inlining
1217 -- them.
1218
1219 {-
1220 ************************************************************************
1221 * *
1222 postInlineUnconditionally
1223 * *
1224 ************************************************************************
1225
1226 postInlineUnconditionally
1227 ~~~~~~~~~~~~~~~~~~~~~~~~~
1228 @postInlineUnconditionally@ decides whether to unconditionally inline
1229 a thing based on the form of its RHS; in particular if it has a
1230 trivial RHS. If so, we can inline and discard the binding altogether.
1231
1232 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1233 only have *forward* references. Hence, it's safe to discard the binding
1234
1235 NOTE: This isn't our last opportunity to inline. We're at the binding
1236 site right now, and we'll get another opportunity when we get to the
1237 occurrence(s)
1238
1239 Note that we do this unconditional inlining only for trival RHSs.
1240 Don't inline even WHNFs inside lambdas; doing so may simply increase
1241 allocation when the function is called. This isn't the last chance; see
1242 NOTE above.
1243
1244 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1245 Because we don't even want to inline them into the RHS of constructor
1246 arguments. See NOTE above
1247
1248 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1249 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1250 with both a and b marked NOINLINE. But that seems incompatible with
1251 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1252 story for now.
1253 -}
1254
1255 postInlineUnconditionally
1256 :: SimplEnv -> TopLevelFlag
1257 -> OutId -- The binder (*not* a CoVar), including its unfolding
1258 -> OccInfo -- From the InId
1259 -> OutExpr
1260 -> Bool
1261 -- Precondition: rhs satisfies the let/app invariant
1262 -- See Note [CoreSyn let/app invariant] in CoreSyn
1263 -- Reason: we don't want to inline single uses, or discard dead bindings,
1264 -- for unlifted, side-effect-ful bindings
1265 postInlineUnconditionally env top_lvl bndr occ_info rhs
1266 | not active = False
1267 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1268 -- because it might be referred to "earlier"
1269 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1270 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1271 | exprIsTrivial rhs = True
1272 | otherwise
1273 = case occ_info of
1274 -- The point of examining occ_info here is that for *non-values*
1275 -- that occur outside a lambda, the call-site inliner won't have
1276 -- a chance (because it doesn't know that the thing
1277 -- only occurs once). The pre-inliner won't have gotten
1278 -- it either, if the thing occurs in more than one branch
1279 -- So the main target is things like
1280 -- let x = f y in
1281 -- case v of
1282 -- True -> case x of ...
1283 -- False -> case x of ...
1284 -- This is very important in practice; e.g. wheel-seive1 doubles
1285 -- in allocation if you miss this out
1286 OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt }
1287 -- OneOcc => no code-duplication issue
1288 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1289 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1290 --
1291 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1292 -- Reason: doing so risks exponential behaviour. We simplify a big
1293 -- expression, inline it, and simplify it again. But if the
1294 -- very same thing happens in the big expression, we get
1295 -- exponential cost!
1296 -- PRINCIPLE: when we've already simplified an expression once,
1297 -- make sure that we only inline it if it's reasonably small.
1298
1299 && (not in_lam ||
1300 -- Outside a lambda, we want to be reasonably aggressive
1301 -- about inlining into multiple branches of case
1302 -- e.g. let x = <non-value>
1303 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1304 -- Inlining can be a big win if C3 is the hot-spot, even if
1305 -- the uses in C1, C2 are not 'interesting'
1306 -- An example that gets worse if you add int_cxt here is 'clausify'
1307
1308 (isCheapUnfolding unfolding && int_cxt))
1309 -- isCheap => acceptable work duplication; in_lam may be true
1310 -- int_cxt to prevent us inlining inside a lambda without some
1311 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1312
1313 IAmDead -> True -- This happens; for example, the case_bndr during case of
1314 -- known constructor: case (a,b) of x { (p,q) -> ... }
1315 -- Here x isn't mentioned in the RHS, so we don't want to
1316 -- create the (dead) let-binding let x = (a,b) in ...
1317
1318 _ -> False
1319
1320 -- Here's an example that we don't handle well:
1321 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1322 -- in \y. ....case f of {...} ....
1323 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1324 -- But
1325 -- - We can't preInlineUnconditionally because that woud invalidate
1326 -- the occ info for b.
1327 -- - We can't postInlineUnconditionally because the RHS is big, and
1328 -- that risks exponential behaviour
1329 -- - We can't call-site inline, because the rhs is big
1330 -- Alas!
1331
1332 where
1333 unfolding = idUnfolding bndr
1334 dflags = seDynFlags env
1335 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1336 -- See Note [pre/postInlineUnconditionally in gentle mode]
1337
1338 {-
1339 Note [Top level and postInlineUnconditionally]
1340 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1341 We don't do postInlineUnconditionally for top-level things (even for
1342 ones that are trivial):
1343
1344 * Doing so will inline top-level error expressions that have been
1345 carefully floated out by FloatOut. More generally, it might
1346 replace static allocation with dynamic.
1347
1348 * Even for trivial expressions there's a problem. Consider
1349 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1350 blah xs = reverse xs
1351 ruggle = sort
1352 In one simplifier pass we might fire the rule, getting
1353 blah xs = ruggle xs
1354 but in *that* simplifier pass we must not do postInlineUnconditionally
1355 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1356
1357 If the rhs is trivial it'll be inlined by callSiteInline, and then
1358 the binding will be dead and discarded by the next use of OccurAnal
1359
1360 * There is less point, because the main goal is to get rid of local
1361 bindings used in multiple case branches.
1362
1363 * The inliner should inline trivial things at call sites anyway.
1364
1365 * The Id might be exported. We could check for that separately,
1366 but since we aren't going to postInlineUnconditionally /any/
1367 top-level bindings, we don't need to test.
1368
1369 Note [Stable unfoldings and postInlineUnconditionally]
1370 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1371 Do not do postInlineUnconditionally if the Id has a stable unfolding,
1372 otherwise we lose the unfolding. Example
1373
1374 -- f has stable unfolding with rhs (e |> co)
1375 -- where 'e' is big
1376 f = e |> co
1377
1378 Then there's a danger we'll optimise to
1379
1380 f' = e
1381 f = f' |> co
1382
1383 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1384 won't inline because 'e' is too big.
1385
1386 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1387
1388
1389 ************************************************************************
1390 * *
1391 Rebuilding a lambda
1392 * *
1393 ************************************************************************
1394 -}
1395
1396 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1397 -- mkLam tries three things
1398 -- a) eta reduction, if that gives a trivial expression
1399 -- b) eta expansion [only if there are some value lambdas]
1400
1401 mkLam _env [] body _cont
1402 = return body
1403 mkLam env bndrs body cont
1404 = do { dflags <- getDynFlags
1405 ; mkLam' dflags bndrs body }
1406 where
1407 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1408 mkLam' dflags bndrs (Cast body co)
1409 | not (any bad bndrs)
1410 -- Note [Casts and lambdas]
1411 = do { lam <- mkLam' dflags bndrs body
1412 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1413 where
1414 co_vars = tyCoVarsOfCo co
1415 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1416
1417 mkLam' dflags bndrs body@(Lam {})
1418 = mkLam' dflags (bndrs ++ bndrs1) body1
1419 where
1420 (bndrs1, body1) = collectBinders body
1421
1422 mkLam' dflags bndrs (Tick t expr)
1423 | tickishFloatable t
1424 = mkTick t <$> mkLam' dflags bndrs expr
1425
1426 mkLam' dflags bndrs body
1427 | gopt Opt_DoEtaReduction dflags
1428 , Just etad_lam <- tryEtaReduce bndrs body
1429 = do { tick (EtaReduction (head bndrs))
1430 ; return etad_lam }
1431
1432 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1433 , sm_eta_expand (getMode env)
1434 , any isRuntimeVar bndrs
1435 , let body_arity = exprEtaExpandArity dflags body
1436 , body_arity > 0
1437 = do { tick (EtaExpansion (head bndrs))
1438 ; let res = mkLams bndrs (etaExpand body_arity body)
1439 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1440 , text "after" <+> ppr res])
1441 ; return res }
1442
1443 | otherwise
1444 = return (mkLams bndrs body)
1445
1446 {-
1447 Note [Eta expanding lambdas]
1448 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1449 In general we *do* want to eta-expand lambdas. Consider
1450 f (\x -> case x of (a,b) -> \s -> blah)
1451 where 's' is a state token, and hence can be eta expanded. This
1452 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1453 important function!
1454
1455 The eta-expansion will never happen unless we do it now. (Well, it's
1456 possible that CorePrep will do it, but CorePrep only has a half-baked
1457 eta-expander that can't deal with casts. So it's much better to do it
1458 here.)
1459
1460 However, when the lambda is let-bound, as the RHS of a let, we have a
1461 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1462 bother to try expansion in mkLam in that case; hence the contIsRhs
1463 guard.
1464
1465 NB: We check the SimplEnv (sm_eta_expand), not DynFlags.
1466 See Note [No eta expansion in stable unfoldings]
1467
1468 Note [Casts and lambdas]
1469 ~~~~~~~~~~~~~~~~~~~~~~~~
1470 Consider
1471 (\x. (\y. e) `cast` g1) `cast` g2
1472 There is a danger here that the two lambdas look separated, and the
1473 full laziness pass might float an expression to between the two.
1474
1475 So this equation in mkLam' floats the g1 out, thus:
1476 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1477 where x:tx.
1478
1479 In general, this floats casts outside lambdas, where (I hope) they
1480 might meet and cancel with some other cast:
1481 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1482 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1483 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1484 (if not (g `in` co))
1485
1486 Notice that it works regardless of 'e'. Originally it worked only
1487 if 'e' was itself a lambda, but in some cases that resulted in
1488 fruitless iteration in the simplifier. A good example was when
1489 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1490 like (\x. Get `cast` g)
1491 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1492 the Get, and the next iteration eta-reduced it, and then eta-expanded
1493 it again.
1494
1495 Note also the side condition for the case of coercion binders.
1496 It does not make sense to transform
1497 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1498 because the latter is not well-kinded.
1499
1500 ************************************************************************
1501 * *
1502 Eta expansion
1503 * *
1504 ************************************************************************
1505 -}
1506
1507 tryEtaExpandRhs :: SimplMode -> OutId -> OutExpr
1508 -> SimplM (Arity, Bool, OutExpr)
1509 -- See Note [Eta-expanding at let bindings]
1510 -- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then
1511 -- (a) rhs' has manifest arity
1512 -- (b) if is_bot is True then rhs' applied to n args is guaranteed bottom
1513 tryEtaExpandRhs mode bndr rhs
1514 | Just join_arity <- isJoinId_maybe bndr
1515 = do { let (join_bndrs, join_body) = collectNBinders join_arity rhs
1516 ; return (count isId join_bndrs, exprIsBottom join_body, rhs) }
1517 -- Note [Do not eta-expand join points]
1518 -- But do return the correct arity and bottom-ness, because
1519 -- these are used to set the bndr's IdInfo (#15517)
1520
1521 | otherwise
1522 = do { (new_arity, is_bot, new_rhs) <- try_expand
1523
1524 ; WARN( new_arity < old_id_arity,
1525 (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
1526 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1527 -- Note [Arity decrease] in Simplify
1528 return (new_arity, is_bot, new_rhs) }
1529 where
1530 try_expand
1531 | exprIsTrivial rhs
1532 = return (exprArity rhs, False, rhs)
1533
1534 | sm_eta_expand mode -- Provided eta-expansion is on
1535 , new_arity > old_arity -- And the current manifest arity isn't enough
1536 = do { tick (EtaExpansion bndr)
1537 ; return (new_arity, is_bot, etaExpand new_arity rhs) }
1538
1539 | otherwise
1540 = return (old_arity, is_bot && new_arity == old_arity, rhs)
1541
1542 dflags = sm_dflags mode
1543 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1544 old_id_arity = idArity bndr
1545
1546 (new_arity1, is_bot) = findRhsArity dflags bndr rhs old_arity
1547 new_arity2 = idCallArity bndr
1548 new_arity = max new_arity1 new_arity2
1549
1550 {-
1551 Note [Eta-expanding at let bindings]
1552 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1553 We now eta expand at let-bindings, which is where the payoff comes.
1554 The most significant thing is that we can do a simple arity analysis
1555 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1556
1557 One useful consequence of not eta-expanding lambdas is this example:
1558 genMap :: C a => ...
1559 {-# INLINE genMap #-}
1560 genMap f xs = ...
1561
1562 myMap :: D a => ...
1563 {-# INLINE myMap #-}
1564 myMap = genMap
1565
1566 Notice that 'genMap' should only inline if applied to two arguments.
1567 In the stable unfolding for myMap we'll have the unfolding
1568 (\d -> genMap Int (..d..))
1569 We do not want to eta-expand to
1570 (\d f xs -> genMap Int (..d..) f xs)
1571 because then 'genMap' will inline, and it really shouldn't: at least
1572 as far as the programmer is concerned, it's not applied to two
1573 arguments!
1574
1575 Note [Do not eta-expand join points]
1576 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1577 Similarly to CPR (see Note [Don't CPR join points] in WorkWrap), a join point
1578 stands well to gain from its outer binding's eta-expansion, and eta-expanding a
1579 join point is fraught with issues like how to deal with a cast:
1580
1581 let join $j1 :: IO ()
1582 $j1 = ...
1583 $j2 :: Int -> IO ()
1584 $j2 n = if n > 0 then $j1
1585 else ...
1586
1587 =>
1588
1589 let join $j1 :: IO ()
1590 $j1 = (\eta -> ...)
1591 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1592 ~ IO ()
1593 $j2 :: Int -> IO ()
1594 $j2 n = (\eta -> if n > 0 then $j1
1595 else ...)
1596 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1597 ~ IO ()
1598
1599 The cast here can't be pushed inside the lambda (since it's not casting to a
1600 function type), so the lambda has to stay, but it can't because it contains a
1601 reference to a join point. In fact, $j2 can't be eta-expanded at all. Rather
1602 than try and detect this situation (and whatever other situations crop up!), we
1603 don't bother; again, any surrounding eta-expansion will improve these join
1604 points anyway, since an outer cast can *always* be pushed inside. By the time
1605 CorePrep comes around, the code is very likely to look more like this:
1606
1607 let join $j1 :: State# RealWorld -> (# State# RealWorld, ())
1608 $j1 = (...) eta
1609 $j2 :: Int -> State# RealWorld -> (# State# RealWorld, ())
1610 $j2 = if n > 0 then $j1
1611 else (...) eta
1612
1613 Note [Do not eta-expand PAPs]
1614 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1615 We used to have old_arity = manifestArity rhs, which meant that we
1616 would eta-expand even PAPs. But this gives no particular advantage,
1617 and can lead to a massive blow-up in code size, exhibited by #9020.
1618 Suppose we have a PAP
1619 foo :: IO ()
1620 foo = returnIO ()
1621 Then we can eta-expand do
1622 foo = (\eta. (returnIO () |> sym g) eta) |> g
1623 where
1624 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1625
1626 But there is really no point in doing this, and it generates masses of
1627 coercions and whatnot that eventually disappear again. For T9020, GHC
1628 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1629 1.8G to 45M.
1630
1631 But note that this won't eta-expand, say
1632 f = \g -> map g
1633 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1634 strictness analysis will have less to bite on?
1635
1636
1637 ************************************************************************
1638 * *
1639 \subsection{Floating lets out of big lambdas}
1640 * *
1641 ************************************************************************
1642
1643 Note [Floating and type abstraction]
1644 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1645 Consider this:
1646 x = /\a. C e1 e2
1647 We'd like to float this to
1648 y1 = /\a. e1
1649 y2 = /\a. e2
1650 x = /\a. C (y1 a) (y2 a)
1651 for the usual reasons: we want to inline x rather vigorously.
1652
1653 You may think that this kind of thing is rare. But in some programs it is
1654 common. For example, if you do closure conversion you might get:
1655
1656 data a :-> b = forall e. (e -> a -> b) :$ e
1657
1658 f_cc :: forall a. a :-> a
1659 f_cc = /\a. (\e. id a) :$ ()
1660
1661 Now we really want to inline that f_cc thing so that the
1662 construction of the closure goes away.
1663
1664 So I have elaborated simplLazyBind to understand right-hand sides that look
1665 like
1666 /\ a1..an. body
1667
1668 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1669 but there is quite a bit of plumbing in simplLazyBind as well.
1670
1671 The same transformation is good when there are lets in the body:
1672
1673 /\abc -> let(rec) x = e in b
1674 ==>
1675 let(rec) x' = /\abc -> let x = x' a b c in e
1676 in
1677 /\abc -> let x = x' a b c in b
1678
1679 This is good because it can turn things like:
1680
1681 let f = /\a -> letrec g = ... g ... in g
1682 into
1683 letrec g' = /\a -> ... g' a ...
1684 in
1685 let f = /\ a -> g' a
1686
1687 which is better. In effect, it means that big lambdas don't impede
1688 let-floating.
1689
1690 This optimisation is CRUCIAL in eliminating the junk introduced by
1691 desugaring mutually recursive definitions. Don't eliminate it lightly!
1692
1693 [May 1999] If we do this transformation *regardless* then we can
1694 end up with some pretty silly stuff. For example,
1695
1696 let
1697 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1698 in ..
1699 becomes
1700 let y1 = /\s -> r1
1701 y2 = /\s -> r2
1702 st = /\s -> ...[y1 s/x1, y2 s/x2]
1703 in ..
1704
1705 Unless the "..." is a WHNF there is really no point in doing this.
1706 Indeed it can make things worse. Suppose x1 is used strictly,
1707 and is of the form
1708
1709 x1* = case f y of { (a,b) -> e }
1710
1711 If we abstract this wrt the tyvar we then can't do the case inline
1712 as we would normally do.
1713
1714 That's why the whole transformation is part of the same process that
1715 floats let-bindings and constructor arguments out of RHSs. In particular,
1716 it is guarded by the doFloatFromRhs call in simplLazyBind.
1717
1718 Note [Which type variables to abstract over]
1719 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1720 Abstract only over the type variables free in the rhs wrt which the
1721 new binding is abstracted. Note that
1722
1723 * The naive approach of abstracting wrt the
1724 tyvars free in the Id's /type/ fails. Consider:
1725 /\ a b -> let t :: (a,b) = (e1, e2)
1726 x :: a = fst t
1727 in ...
1728 Here, b isn't free in x's type, but we must nevertheless
1729 abstract wrt b as well, because t's type mentions b.
1730 Since t is floated too, we'd end up with the bogus:
1731 poly_t = /\ a b -> (e1, e2)
1732 poly_x = /\ a -> fst (poly_t a *b*)
1733
1734 * We must do closeOverKinds. Example (#10934):
1735 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1736 Here we want to float 't', but we must remember to abstract over
1737 'k' as well, even though it is not explicitly mentioned in the RHS,
1738 otherwise we get
1739 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1740 which is obviously bogus.
1741 -}
1742
1743 abstractFloats :: DynFlags -> TopLevelFlag -> [OutTyVar] -> SimplFloats
1744 -> OutExpr -> SimplM ([OutBind], OutExpr)
1745 abstractFloats dflags top_lvl main_tvs floats body
1746 = ASSERT( notNull body_floats )
1747 ASSERT( isNilOL (sfJoinFloats floats) )
1748 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1749 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1750 where
1751 is_top_lvl = isTopLevel top_lvl
1752 main_tv_set = mkVarSet main_tvs
1753 body_floats = letFloatBinds (sfLetFloats floats)
1754 empty_subst = CoreSubst.mkEmptySubst (sfInScope floats)
1755
1756 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1757 abstract subst (NonRec id rhs)
1758 = do { (poly_id1, poly_app) <- mk_poly1 tvs_here id
1759 ; let (poly_id2, poly_rhs) = mk_poly2 poly_id1 tvs_here rhs'
1760 subst' = CoreSubst.extendIdSubst subst id poly_app
1761 ; return (subst', NonRec poly_id2 poly_rhs) }
1762 where
1763 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1764
1765 -- tvs_here: see Note [Which type variables to abstract over]
1766 tvs_here = scopedSort $
1767 filter (`elemVarSet` main_tv_set) $
1768 closeOverKindsList $
1769 exprSomeFreeVarsList isTyVar rhs'
1770
1771 abstract subst (Rec prs)
1772 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids
1773 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1774 poly_pairs = [ mk_poly2 poly_id tvs_here rhs'
1775 | (poly_id, rhs) <- poly_ids `zip` rhss
1776 , let rhs' = CoreSubst.substExpr (text "abstract_floats")
1777 subst' rhs ]
1778 ; return (subst', Rec poly_pairs) }
1779 where
1780 (ids,rhss) = unzip prs
1781 -- For a recursive group, it's a bit of a pain to work out the minimal
1782 -- set of tyvars over which to abstract:
1783 -- /\ a b c. let x = ...a... in
1784 -- letrec { p = ...x...q...
1785 -- q = .....p...b... } in
1786 -- ...
1787 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1788 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1789 -- Since it's a pain, we just use the whole set, which is always safe
1790 --
1791 -- If you ever want to be more selective, remember this bizarre case too:
1792 -- x::a = x
1793 -- Here, we must abstract 'x' over 'a'.
1794 tvs_here = scopedSort main_tvs
1795
1796 mk_poly1 :: [TyVar] -> Id -> SimplM (Id, CoreExpr)
1797 mk_poly1 tvs_here var
1798 = do { uniq <- getUniqueM
1799 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1800 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1801 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1802 mkLocalIdOrCoVar poly_name poly_ty
1803 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1804 -- In the olden days, it was crucial to copy the occInfo of the original var,
1805 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1806 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1807 -- at already simplified code, so it doesn't matter
1808 --
1809 -- It's even right to retain single-occurrence or dead-var info:
1810 -- Suppose we started with /\a -> let x = E in B
1811 -- where x occurs once in B. Then we transform to:
1812 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1813 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1814 -- the occurrences of x' will be just the occurrences originally
1815 -- pinned on x.
1816
1817 mk_poly2 :: Id -> [TyVar] -> CoreExpr -> (Id, CoreExpr)
1818 mk_poly2 poly_id tvs_here rhs
1819 = (poly_id `setIdUnfolding` unf, poly_rhs)
1820 where
1821 poly_rhs = mkLams tvs_here rhs
1822 unf = mkUnfolding dflags InlineRhs is_top_lvl False poly_rhs
1823
1824 -- We want the unfolding. Consider
1825 -- let
1826 -- x = /\a. let y = ... in Just y
1827 -- in body
1828 -- Then we float the y-binding out (via abstractFloats and addPolyBind)
1829 -- but 'x' may well then be inlined in 'body' in which case we'd like the
1830 -- opportunity to inline 'y' too.
1831
1832 {-
1833 Note [Abstract over coercions]
1834 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1835 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1836 type variable a. Rather than sort this mess out, we simply bale out and abstract
1837 wrt all the type variables if any of them are coercion variables.
1838
1839
1840 Historical note: if you use let-bindings instead of a substitution, beware of this:
1841
1842 -- Suppose we start with:
1843 --
1844 -- x = /\ a -> let g = G in E
1845 --
1846 -- Then we'll float to get
1847 --
1848 -- x = let poly_g = /\ a -> G
1849 -- in /\ a -> let g = poly_g a in E
1850 --
1851 -- But now the occurrence analyser will see just one occurrence
1852 -- of poly_g, not inside a lambda, so the simplifier will
1853 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1854 -- (I used to think that the "don't inline lone occurrences" stuff
1855 -- would stop this happening, but since it's the *only* occurrence,
1856 -- PreInlineUnconditionally kicks in first!)
1857 --
1858 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1859 -- to appear many times. (NB: mkInlineMe eliminates
1860 -- such notes on trivial RHSs, so do it manually.)
1861
1862 ************************************************************************
1863 * *
1864 prepareAlts
1865 * *
1866 ************************************************************************
1867
1868 prepareAlts tries these things:
1869
1870 1. Eliminate alternatives that cannot match, including the
1871 DEFAULT alternative.
1872
1873 2. If the DEFAULT alternative can match only one possible constructor,
1874 then make that constructor explicit.
1875 e.g.
1876 case e of x { DEFAULT -> rhs }
1877 ===>
1878 case e of x { (a,b) -> rhs }
1879 where the type is a single constructor type. This gives better code
1880 when rhs also scrutinises x or e.
1881
1882 3. Returns a list of the constructors that cannot holds in the
1883 DEFAULT alternative (if there is one)
1884
1885 Here "cannot match" includes knowledge from GADTs
1886
1887 It's a good idea to do this stuff before simplifying the alternatives, to
1888 avoid simplifying alternatives we know can't happen, and to come up with
1889 the list of constructors that are handled, to put into the IdInfo of the
1890 case binder, for use when simplifying the alternatives.
1891
1892 Eliminating the default alternative in (1) isn't so obvious, but it can
1893 happen:
1894
1895 data Colour = Red | Green | Blue
1896
1897 f x = case x of
1898 Red -> ..
1899 Green -> ..
1900 DEFAULT -> h x
1901
1902 h y = case y of
1903 Blue -> ..
1904 DEFAULT -> [ case y of ... ]
1905
1906 If we inline h into f, the default case of the inlined h can't happen.
1907 If we don't notice this, we may end up filtering out *all* the cases
1908 of the inner case y, which give us nowhere to go!
1909 -}
1910
1911 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1912 -- The returned alternatives can be empty, none are possible
1913 prepareAlts scrut case_bndr' alts
1914 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1915 -- Case binder is needed just for its type. Note that as an
1916 -- OutId, it has maximum information; this is important.
1917 -- Test simpl013 is an example
1918 = do { us <- getUniquesM
1919 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1920 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1921 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1922 -- "idcs" stands for "impossible default data constructors"
1923 -- i.e. the constructors that can't match the default case
1924 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1925 ; when yes3 $ tick (AltMerge case_bndr')
1926 ; return (idcs3, alts3) }
1927
1928 | otherwise -- Not a data type, so nothing interesting happens
1929 = return ([], alts)
1930 where
1931 imposs_cons = case scrut of
1932 Var v -> otherCons (idUnfolding v)
1933 _ -> []
1934
1935
1936 {-
1937 ************************************************************************
1938 * *
1939 mkCase
1940 * *
1941 ************************************************************************
1942
1943 mkCase tries these things
1944
1945 * Note [Nerge nested cases]
1946 * Note [Eliminate identity case]
1947 * Note [Scrutinee constant folding]
1948
1949 Note [Merge Nested Cases]
1950 ~~~~~~~~~~~~~~~~~~~~~~~~~
1951 case e of b { ==> case e of b {
1952 p1 -> rhs1 p1 -> rhs1
1953 ... ...
1954 pm -> rhsm pm -> rhsm
1955 _ -> case b of b' { pn -> let b'=b in rhsn
1956 pn -> rhsn ...
1957 ... po -> let b'=b in rhso
1958 po -> rhso _ -> let b'=b in rhsd
1959 _ -> rhsd
1960 }
1961
1962 which merges two cases in one case when -- the default alternative of
1963 the outer case scrutises the same variable as the outer case. This
1964 transformation is called Case Merging. It avoids that the same
1965 variable is scrutinised multiple times.
1966
1967 Note [Eliminate Identity Case]
1968 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1969 case e of ===> e
1970 True -> True;
1971 False -> False
1972
1973 and similar friends.
1974
1975 Note [Scrutinee Constant Folding]
1976 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1977 case x op# k# of _ { ===> case x of _ {
1978 a1# -> e1 (a1# inv_op# k#) -> e1
1979 a2# -> e2 (a2# inv_op# k#) -> e2
1980 ... ...
1981 DEFAULT -> ed DEFAULT -> ed
1982
1983 where (x op# k#) inv_op# k# == x
1984
1985 And similarly for commuted arguments and for some unary operations.
1986
1987 The purpose of this transformation is not only to avoid an arithmetic
1988 operation at runtime but to allow other transformations to apply in cascade.
1989
1990 Example with the "Merge Nested Cases" optimization (from #12877):
1991
1992 main = case t of t0
1993 0## -> ...
1994 DEFAULT -> case t0 `minusWord#` 1## of t1
1995 0## -> ...
1996 DEFAUT -> case t1 `minusWord#` 1## of t2
1997 0## -> ...
1998 DEFAULT -> case t2 `minusWord#` 1## of _
1999 0## -> ...
2000 DEFAULT -> ...
2001
2002 becomes:
2003
2004 main = case t of _
2005 0## -> ...
2006 1## -> ...
2007 2## -> ...
2008 3## -> ...
2009 DEFAULT -> ...
2010
2011 There are some wrinkles
2012
2013 * Do not apply caseRules if there is just a single DEFAULT alternative
2014 case e +# 3# of b { DEFAULT -> rhs }
2015 If we applied the transformation here we would (stupidly) get
2016 case a of b' { DEFAULT -> let b = e +# 3# in rhs }
2017 and now the process may repeat, because that let will really
2018 be a case.
2019
2020 * The type of the scrutinee might change. E.g.
2021 case tagToEnum (x :: Int#) of (b::Bool)
2022 False -> e1
2023 True -> e2
2024 ==>
2025 case x of (b'::Int#)
2026 DEFAULT -> e1
2027 1# -> e2
2028
2029 * The case binder may be used in the right hand sides, so we need
2030 to make a local binding for it, if it is alive. e.g.
2031 case e +# 10# of b
2032 DEFAULT -> blah...b...
2033 44# -> blah2...b...
2034 ===>
2035 case e of b'
2036 DEFAULT -> let b = b' +# 10# in blah...b...
2037 34# -> let b = 44# in blah2...b...
2038
2039 Note that in the non-DEFAULT cases we know what to bind 'b' to,
2040 whereas in the DEFAULT case we must reconstruct the original value.
2041 But NB: we use b'; we do not duplicate 'e'.
2042
2043 * In dataToTag we might need to make up some fake binders;
2044 see Note [caseRules for dataToTag] in PrelRules
2045 -}
2046
2047 mkCase, mkCase1, mkCase2, mkCase3
2048 :: DynFlags
2049 -> OutExpr -> OutId
2050 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
2051 -> SimplM OutExpr
2052
2053 --------------------------------------------------
2054 -- 1. Merge Nested Cases
2055 --------------------------------------------------
2056
2057 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
2058 | gopt Opt_CaseMerge dflags
2059 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
2060 <- stripTicksTop tickishFloatable deflt_rhs
2061 , inner_scrut_var == outer_bndr
2062 = do { tick (CaseMerge outer_bndr)
2063
2064 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
2065 (con, args, wrap_rhs rhs)
2066 -- Simplifier's no-shadowing invariant should ensure
2067 -- that outer_bndr is not shadowed by the inner patterns
2068 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
2069 -- The let is OK even for unboxed binders,
2070
2071 wrapped_alts | isDeadBinder inner_bndr = inner_alts
2072 | otherwise = map wrap_alt inner_alts
2073
2074 merged_alts = mergeAlts outer_alts wrapped_alts
2075 -- NB: mergeAlts gives priority to the left
2076 -- case x of
2077 -- A -> e1
2078 -- DEFAULT -> case x of
2079 -- A -> e2
2080 -- B -> e3
2081 -- When we merge, we must ensure that e1 takes
2082 -- precedence over e2 as the value for A!
2083
2084 ; fmap (mkTicks ticks) $
2085 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
2086 }
2087 -- Warning: don't call mkCase recursively!
2088 -- Firstly, there's no point, because inner alts have already had
2089 -- mkCase applied to them, so they won't have a case in their default
2090 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
2091 -- in munge_rhs may put a case into the DEFAULT branch!
2092
2093 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
2094
2095 --------------------------------------------------
2096 -- 2. Eliminate Identity Case
2097 --------------------------------------------------
2098
2099 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
2100 | all identity_alt alts
2101 = do { tick (CaseIdentity case_bndr)
2102 ; return (mkTicks ticks $ re_cast scrut rhs1) }
2103 where
2104 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
2105 identity_alt (con, args, rhs) = check_eq rhs con args
2106
2107 check_eq (Cast rhs co) con args -- See Note [RHS casts]
2108 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
2109 check_eq (Tick t e) alt args
2110 = tickishFloatable t && check_eq e alt args
2111
2112 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
2113 check_eq (Var v) _ _ | v == case_bndr = True
2114 check_eq (Var v) (DataAlt con) args
2115 | null arg_tys, null args = v == dataConWorkId con
2116 -- Optimisation only
2117 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
2118 mkConApp2 con arg_tys args
2119 check_eq _ _ _ = False
2120
2121 arg_tys = tyConAppArgs (idType case_bndr)
2122
2123 -- Note [RHS casts]
2124 -- ~~~~~~~~~~~~~~~~
2125 -- We've seen this:
2126 -- case e of x { _ -> x `cast` c }
2127 -- And we definitely want to eliminate this case, to give
2128 -- e `cast` c
2129 -- So we throw away the cast from the RHS, and reconstruct
2130 -- it at the other end. All the RHS casts must be the same
2131 -- if (all identity_alt alts) holds.
2132 --
2133 -- Don't worry about nested casts, because the simplifier combines them
2134
2135 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
2136 re_cast scrut _ = scrut
2137
2138 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
2139
2140 --------------------------------------------------
2141 -- 2. Scrutinee Constant Folding
2142 --------------------------------------------------
2143
2144 mkCase2 dflags scrut bndr alts_ty alts
2145 | -- See Note [Scrutinee Constant Folding]
2146 case alts of -- Not if there is just a DEFAULT alternative
2147 [(DEFAULT,_,_)] -> False
2148 _ -> True
2149 , gopt Opt_CaseFolding dflags
2150 , Just (scrut', tx_con, mk_orig) <- caseRules dflags scrut
2151 = do { bndr' <- newId (fsLit "lwild") (exprType scrut')
2152
2153 ; alts' <- mapMaybeM (tx_alt tx_con mk_orig bndr') alts
2154 -- mapMaybeM: discard unreachable alternatives
2155 -- See Note [Unreachable caseRules alternatives]
2156 -- in PrelRules
2157
2158 ; mkCase3 dflags scrut' bndr' alts_ty $
2159 add_default (re_sort alts')
2160 }
2161
2162 | otherwise
2163 = mkCase3 dflags scrut bndr alts_ty alts
2164 where
2165 -- We need to keep the correct association between the scrutinee and its
2166 -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
2167 -- "let bndr = ... in":
2168 --
2169 -- case v + 10 of y =====> case v of y
2170 -- 20 -> e1 10 -> let y = 20 in e1
2171 -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2
2172 --
2173 -- Other transformations give: =====> case v of y'
2174 -- 10 -> let y = 20 in e1
2175 -- DEFAULT -> let y = y' + 10 in e2
2176 --
2177 -- This wrapping is done in tx_alt; we use mk_orig, returned by caseRules,
2178 -- to construct an expression equivalent to the original one, for use
2179 -- in the DEFAULT case
2180
2181 tx_alt :: (AltCon -> Maybe AltCon) -> (Id -> CoreExpr) -> Id
2182 -> CoreAlt -> SimplM (Maybe CoreAlt)
2183 tx_alt tx_con mk_orig new_bndr (con, bs, rhs)
2184 = case tx_con con of
2185 Nothing -> return Nothing
2186 Just con' -> do { bs' <- mk_new_bndrs new_bndr con'
2187 ; return (Just (con', bs', rhs')) }
2188 where
2189 rhs' | isDeadBinder bndr = rhs
2190 | otherwise = bindNonRec bndr orig_val rhs
2191
2192 orig_val = case con of
2193 DEFAULT -> mk_orig new_bndr
2194 LitAlt l -> Lit l
2195 DataAlt dc -> mkConApp2 dc (tyConAppArgs (idType bndr)) bs
2196
2197 mk_new_bndrs new_bndr (DataAlt dc)
2198 | not (isNullaryRepDataCon dc)
2199 = -- For non-nullary data cons we must invent some fake binders
2200 -- See Note [caseRules for dataToTag] in PrelRules
2201 do { us <- getUniquesM
2202 ; let (ex_tvs, arg_ids) = dataConRepInstPat us dc
2203 (tyConAppArgs (idType new_bndr))
2204 ; return (ex_tvs ++ arg_ids) }
2205 mk_new_bndrs _ _ = return []
2206
2207 re_sort :: [CoreAlt] -> [CoreAlt] -- Re-sort the alternatives to
2208 re_sort alts = sortBy cmpAlt alts -- preserve the #case_invariants#
2209
2210 add_default :: [CoreAlt] -> [CoreAlt]
2211 -- See Note [Literal cases]
2212 add_default ((LitAlt {}, bs, rhs) : alts) = (DEFAULT, bs, rhs) : alts
2213 add_default alts = alts
2214
2215 {- Note [Literal cases]
2216 ~~~~~~~~~~~~~~~~~~~~~~~
2217 If we have
2218 case tagToEnum (a ># b) of
2219 False -> e1
2220 True -> e2
2221
2222 then caseRules for TagToEnum will turn it into
2223 case tagToEnum (a ># b) of
2224 0# -> e1
2225 1# -> e2
2226
2227 Since the case is exhaustive (all cases are) we can convert it to
2228 case tagToEnum (a ># b) of
2229 DEFAULT -> e1
2230 1# -> e2
2231
2232 This may generate sligthtly better code (although it should not, since
2233 all cases are exhaustive) and/or optimise better. I'm not certain that
2234 it's necessary, but currenty we do make this change. We do it here,
2235 NOT in the TagToEnum rules (see "Beware" in Note [caseRules for tagToEnum]
2236 in PrelRules)
2237 -}
2238
2239 --------------------------------------------------
2240 -- Catch-all
2241 --------------------------------------------------
2242 mkCase3 _dflags scrut bndr alts_ty alts
2243 = return (Case scrut bndr alts_ty alts)
2244
2245 -- See Note [Exitification] and Note [Do not inline exit join points] in Exitify.hs
2246 -- This lives here (and not in Id) because occurrence info is only valid on
2247 -- InIds, so it's crucial that isExitJoinId is only called on freshly
2248 -- occ-analysed code. It's not a generic function you can call anywhere.
2249 isExitJoinId :: Var -> Bool
2250 isExitJoinId id = isJoinId id && isOneOcc (idOccInfo id) && occ_in_lam (idOccInfo id)
2251
2252 {-
2253 Note [Dead binders]
2254 ~~~~~~~~~~~~~~~~~~~~
2255 Note that dead-ness is maintained by the simplifier, so that it is
2256 accurate after simplification as well as before.
2257
2258
2259 Note [Cascading case merge]
2260 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2261 Case merging should cascade in one sweep, because it
2262 happens bottom-up
2263
2264 case e of a {
2265 DEFAULT -> case a of b
2266 DEFAULT -> case b of c {
2267 DEFAULT -> e
2268 A -> ea
2269 B -> eb
2270 C -> ec
2271 ==>
2272 case e of a {
2273 DEFAULT -> case a of b
2274 DEFAULT -> let c = b in e
2275 A -> let c = b in ea
2276 B -> eb
2277 C -> ec
2278 ==>
2279 case e of a {
2280 DEFAULT -> let b = a in let c = b in e
2281 A -> let b = a in let c = b in ea
2282 B -> let b = a in eb
2283 C -> ec
2284
2285
2286 However here's a tricky case that we still don't catch, and I don't
2287 see how to catch it in one pass:
2288
2289 case x of c1 { I# a1 ->
2290 case a1 of c2 ->
2291 0 -> ...
2292 DEFAULT -> case x of c3 { I# a2 ->
2293 case a2 of ...
2294
2295 After occurrence analysis (and its binder-swap) we get this
2296
2297 case x of c1 { I# a1 ->
2298 let x = c1 in -- Binder-swap addition
2299 case a1 of c2 ->
2300 0 -> ...
2301 DEFAULT -> case x of c3 { I# a2 ->
2302 case a2 of ...
2303
2304 When we simplify the inner case x, we'll see that
2305 x=c1=I# a1. So we'll bind a2 to a1, and get
2306
2307 case x of c1 { I# a1 ->
2308 case a1 of c2 ->
2309 0 -> ...
2310 DEFAULT -> case a1 of ...
2311
2312 This is corect, but we can't do a case merge in this sweep
2313 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
2314 without getting changed to c1=I# c2.
2315
2316 I don't think this is worth fixing, even if I knew how. It'll
2317 all come out in the next pass anyway.
2318 -}