Do not inline or apply rules on LHS of rules
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRuleLHS,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countValArgs, countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import MkCore ( sortQuantVars )
41 import DynFlags
42 import CoreSyn
43 import qualified CoreSubst
44 import PprCore
45 import CoreFVs
46 import CoreUtils
47 import CoreArity
48 import CoreUnfold
49 import Name
50 import Id
51 import Var
52 import Demand
53 import SimplMonad
54 import Type hiding( substTy )
55 import Coercion hiding( substCo, substTy )
56 import DataCon ( dataConWorkId )
57 import VarEnv
58 import VarSet
59 import BasicTypes
60 import Util
61 import MonadUtils
62 import Outputable
63 import FastString
64 import Pair
65
66 import Control.Monad ( when )
67
68 {-
69 ************************************************************************
70 * *
71 The SimplCont and DupFlag types
72 * *
73 ************************************************************************
74
75 A SimplCont allows the simplifier to traverse the expression in a
76 zipper-like fashion. The SimplCont represents the rest of the expression,
77 "above" the point of interest.
78
79 You can also think of a SimplCont as an "evaluation context", using
80 that term in the way it is used for operational semantics. This is the
81 way I usually think of it, For example you'll often see a syntax for
82 evaluation context looking like
83 C ::= [] | C e | case C of alts | C `cast` co
84 That's the kind of thing we are doing here, and I use that syntax in
85 the comments.
86
87
88 Key points:
89 * A SimplCont describes a *strict* context (just like
90 evaluation contexts do). E.g. Just [] is not a SimplCont
91
92 * A SimplCont describes a context that *does not* bind
93 any variables. E.g. \x. [] is not a SimplCont
94 -}
95
96 data SimplCont
97 = Stop -- An empty context, or <hole>
98 OutType -- Type of the <hole>
99 CallCtxt -- Tells if there is something interesting about
100 -- the context, and hence the inliner
101 -- should be a bit keener (see interestingCallContext)
102 -- Specifically:
103 -- This is an argument of a function that has RULES
104 -- Inlining the call might allow the rule to fire
105 -- Never ValAppCxt (use ApplyToVal instead)
106 -- or CaseCtxt (use Select instead)
107
108 | CastIt -- <hole> `cast` co
109 OutCoercion -- The coercion simplified
110 -- Invariant: never an identity coercion
111 SimplCont
112
113 | ApplyToVal { -- <hole> arg
114 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
115 sc_arg :: InExpr, -- The argument,
116 sc_env :: StaticEnv, -- and its static env
117 sc_cont :: SimplCont }
118
119 | ApplyToTy { -- <hole> ty
120 sc_arg_ty :: OutType, -- Argument type
121 sc_hole_ty :: OutType, -- Type of the function, presumably (forall a. blah)
122 -- See Note [The hole type in ApplyToTy]
123 sc_cont :: SimplCont }
124
125 | Select { -- case <hole> of alts
126 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
127 sc_bndr :: InId, -- case binder
128 sc_alts :: [InAlt], -- Alternatives
129 sc_env :: StaticEnv, -- and their static environment
130 sc_cont :: SimplCont }
131
132 -- The two strict forms have no DupFlag, because we never duplicate them
133 | StrictBind -- (\x* \xs. e) <hole>
134 InId [InBndr] -- let x* = <hole> in e
135 InExpr StaticEnv -- is a special case
136 SimplCont
137
138 | StrictArg -- f e1 ..en <hole>
139 ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
140 -- plus strictness flags for *further* args
141 CallCtxt -- Whether *this* argument position is interesting
142 SimplCont
143
144 | TickIt
145 (Tickish Id) -- Tick tickish <hole>
146 SimplCont
147
148 data DupFlag = NoDup -- Unsimplified, might be big
149 | Simplified -- Simplified
150 | OkToDup -- Simplified and small
151
152 isSimplified :: DupFlag -> Bool
153 isSimplified NoDup = False
154 isSimplified _ = True -- Invariant: the subst-env is empty
155
156 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
157 perhapsSubstTy dup env ty
158 | isSimplified dup = ty
159 | otherwise = substTy env ty
160
161 {-
162 Note [DupFlag invariants]
163 ~~~~~~~~~~~~~~~~~~~~~~~~~
164 In both (ApplyToVal dup _ env k)
165 and (Select dup _ _ env k)
166 the following invariants hold
167
168 (a) if dup = OkToDup, then continuation k is also ok-to-dup
169 (b) if dup = OkToDup or Simplified, the subst-env is empty
170 (and and hence no need to re-simplify)
171 -}
172
173 instance Outputable DupFlag where
174 ppr OkToDup = ptext (sLit "ok")
175 ppr NoDup = ptext (sLit "nodup")
176 ppr Simplified = ptext (sLit "simpl")
177
178 instance Outputable SimplCont where
179 ppr (Stop ty interesting) = ptext (sLit "Stop") <> brackets (ppr interesting) <+> ppr ty
180 ppr (CastIt co cont ) = (ptext (sLit "CastIt") <+> ppr co) $$ ppr cont
181 ppr (TickIt t cont) = (ptext (sLit "TickIt") <+> ppr t) $$ ppr cont
182 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
183 = (ptext (sLit "ApplyToTy") <+> pprParendType ty) $$ ppr cont
184 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
185 = (ptext (sLit "ApplyToVal") <+> ppr dup <+> pprParendExpr arg)
186 $$ ppr cont
187 ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
188 ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
189 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
190 = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
191 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
192
193
194 {- Note [The hole type in ApplyToTy]
195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
197 continuation. It is absolutely necessary to compute contHoleType, but it is
198 not used for anything else (and hence may not be evaluated).
199
200 Why is it necessary for contHoleType? Consider the continuation
201 ApplyToType Int (Stop Int)
202 corresponding to
203 (<hole> @Int) :: Int
204 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
205 and there is no way to know which, so we must record it.
206
207 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
208 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
209 doesn't matter because we'll never compute them all.
210
211 ************************************************************************
212 * *
213 ArgInfo and ArgSpec
214 * *
215 ************************************************************************
216 -}
217
218 data ArgInfo
219 = ArgInfo {
220 ai_fun :: OutId, -- The function
221 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
222 ai_type :: OutType, -- Type of (f a1 ... an)
223
224 ai_rules :: [CoreRule], -- Rules for this function
225
226 ai_encl :: Bool, -- Flag saying whether this function
227 -- or an enclosing one has rules (recursively)
228 -- True => be keener to inline in all args
229
230 ai_strs :: [Bool], -- Strictness of remaining arguments
231 -- Usually infinite, but if it is finite it guarantees
232 -- that the function diverges after being given
233 -- that number of args
234 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
235 -- Always infinite
236 }
237
238 data ArgSpec
239 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
240 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
241 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
242 | CastBy OutCoercion -- Cast by this; c.f. CastIt
243
244 instance Outputable ArgSpec where
245 ppr (ValArg e) = ptext (sLit "ValArg") <+> ppr e
246 ppr (TyArg { as_arg_ty = ty }) = ptext (sLit "TyArg") <+> ppr ty
247 ppr (CastBy c) = ptext (sLit "CastBy") <+> ppr c
248
249 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
250 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
251 , ai_type = funResultTy (ai_type ai) }
252
253 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
254 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
255 , ai_type = applyTy poly_fun_ty arg_ty }
256 where
257 poly_fun_ty = ai_type ai
258 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
259
260 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
261 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
262 , ai_type = pSnd (coercionKind co) }
263
264 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
265 argInfoAppArgs [] = []
266 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
267 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
268 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
269
270 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
271 pushSimplifiedArgs _env [] k = k
272 pushSimplifiedArgs env (arg : args) k
273 = case arg of
274 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
275 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
276 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
277 CastBy c -> CastIt c rest
278 where
279 rest = pushSimplifiedArgs env args k
280 -- The env has an empty SubstEnv
281
282 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
283 -- NB: the [ArgSpec] is reversed so that the first arg
284 -- in the list is the last one in the application
285 argInfoExpr fun rev_args
286 = go rev_args
287 where
288 go [] = Var fun
289 go (ValArg a : as) = go as `App` a
290 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
291 go (CastBy co : as) = mkCast (go as) co
292
293
294 {-
295 ************************************************************************
296 * *
297 Functions on SimplCont
298 * *
299 ************************************************************************
300 -}
301
302 mkBoringStop :: OutType -> SimplCont
303 mkBoringStop ty = Stop ty BoringCtxt
304
305 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
306 mkRhsStop ty = Stop ty RhsCtxt
307
308 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
309 mkLazyArgStop ty cci = Stop ty cci
310
311 -------------------
312 contIsRhsOrArg :: SimplCont -> Bool
313 contIsRhsOrArg (Stop {}) = True
314 contIsRhsOrArg (StrictBind {}) = True
315 contIsRhsOrArg (StrictArg {}) = True
316 contIsRhsOrArg _ = False
317
318 contIsRhs :: SimplCont -> Bool
319 contIsRhs (Stop _ RhsCtxt) = True
320 contIsRhs _ = False
321
322 -------------------
323 contIsDupable :: SimplCont -> Bool
324 contIsDupable (Stop {}) = True
325 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
326 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
327 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
328 contIsDupable (CastIt _ k) = contIsDupable k
329 contIsDupable _ = False
330
331 -------------------
332 contIsTrivial :: SimplCont -> Bool
333 contIsTrivial (Stop {}) = True
334 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
335 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
336 contIsTrivial (CastIt _ k) = contIsTrivial k
337 contIsTrivial _ = False
338
339 -------------------
340 contResultType :: SimplCont -> OutType
341 contResultType (Stop ty _) = ty
342 contResultType (CastIt _ k) = contResultType k
343 contResultType (StrictBind _ _ _ _ k) = contResultType k
344 contResultType (StrictArg _ _ k) = contResultType k
345 contResultType (Select { sc_cont = k }) = contResultType k
346 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
347 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
348 contResultType (TickIt _ k) = contResultType k
349
350 contHoleType :: SimplCont -> OutType
351 contHoleType (Stop ty _) = ty
352 contHoleType (TickIt _ k) = contHoleType k
353 contHoleType (CastIt co _) = pFst (coercionKind co)
354 contHoleType (StrictBind b _ _ se _) = substTy se (idType b)
355 contHoleType (StrictArg ai _ _) = funArgTy (ai_type ai)
356 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
357 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
358 = mkFunTy (perhapsSubstTy dup se (exprType e))
359 (contHoleType k)
360 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
361 = perhapsSubstTy d se (idType b)
362
363 -------------------
364 countValArgs :: SimplCont -> Int
365 -- Count value arguments excluding coercions
366 countValArgs (ApplyToVal { sc_arg = arg, sc_cont = cont })
367 | Coercion {} <- arg = countValArgs cont
368 | otherwise = 1 + countValArgs cont
369 countValArgs _ = 0
370
371 countArgs :: SimplCont -> Int
372 -- Count all arguments, including types, coercions, and other values
373 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
374 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
375 countArgs _ = 0
376
377 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
378 -- Summarises value args, discards type args and coercions
379 -- The returned continuation of the call is only used to
380 -- answer questions like "are you interesting?"
381 contArgs cont
382 | lone cont = (True, [], cont)
383 | otherwise = go [] cont
384 where
385 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
386 lone (ApplyToVal {}) = False
387 lone (CastIt {}) = False
388 lone _ = True
389
390 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
391 = go (is_interesting arg se : args) k
392 go args (ApplyToTy { sc_cont = k }) = go args k
393 go args (CastIt _ k) = go args k
394 go args k = (False, reverse args, k)
395
396 is_interesting arg se = interestingArg se arg
397 -- Do *not* use short-cutting substitution here
398 -- because we want to get as much IdInfo as possible
399
400
401 -------------------
402 mkArgInfo :: Id
403 -> [CoreRule] -- Rules for function
404 -> Int -- Number of value args
405 -> SimplCont -- Context of the call
406 -> ArgInfo
407
408 mkArgInfo fun rules n_val_args call_cont
409 | n_val_args < idArity fun -- Note [Unsaturated functions]
410 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
411 , ai_rules = rules, ai_encl = False
412 , ai_strs = vanilla_stricts
413 , ai_discs = vanilla_discounts }
414 | otherwise
415 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
416 , ai_rules = rules
417 , ai_encl = interestingArgContext rules call_cont
418 , ai_strs = add_type_str fun_ty arg_stricts
419 , ai_discs = arg_discounts }
420 where
421 fun_ty = idType fun
422
423 vanilla_discounts, arg_discounts :: [Int]
424 vanilla_discounts = repeat 0
425 arg_discounts = case idUnfolding fun of
426 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
427 -> discounts ++ vanilla_discounts
428 _ -> vanilla_discounts
429
430 vanilla_stricts, arg_stricts :: [Bool]
431 vanilla_stricts = repeat False
432
433 arg_stricts
434 = case splitStrictSig (idStrictness fun) of
435 (demands, result_info)
436 | not (demands `lengthExceeds` n_val_args)
437 -> -- Enough args, use the strictness given.
438 -- For bottoming functions we used to pretend that the arg
439 -- is lazy, so that we don't treat the arg as an
440 -- interesting context. This avoids substituting
441 -- top-level bindings for (say) strings into
442 -- calls to error. But now we are more careful about
443 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
444 if isBotRes result_info then
445 map isStrictDmd demands -- Finite => result is bottom
446 else
447 map isStrictDmd demands ++ vanilla_stricts
448 | otherwise
449 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
450 <+> ppr n_val_args <+> ppr demands )
451 vanilla_stricts -- Not enough args, or no strictness
452
453 add_type_str :: Type -> [Bool] -> [Bool]
454 -- If the function arg types are strict, record that in the 'strictness bits'
455 -- No need to instantiate because unboxed types (which dominate the strict
456 -- types) can't instantiate type variables.
457 -- add_type_str is done repeatedly (for each call); might be better
458 -- once-for-all in the function
459 -- But beware primops/datacons with no strictness
460 add_type_str _ [] = []
461 add_type_str fun_ty strs -- Look through foralls
462 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
463 = add_type_str fun_ty' strs
464 add_type_str fun_ty (str:strs) -- Add strict-type info
465 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
466 = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
467 add_type_str _ strs
468 = strs
469
470 {- Note [Unsaturated functions]
471 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
472 Consider (test eyeball/inline4)
473 x = a:as
474 y = f x
475 where f has arity 2. Then we do not want to inline 'x', because
476 it'll just be floated out again. Even if f has lots of discounts
477 on its first argument -- it must be saturated for these to kick in
478 -}
479
480
481 {-
482 ************************************************************************
483 * *
484 Interesting arguments
485 * *
486 ************************************************************************
487
488 Note [Interesting call context]
489 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
490 We want to avoid inlining an expression where there can't possibly be
491 any gain, such as in an argument position. Hence, if the continuation
492 is interesting (eg. a case scrutinee, application etc.) then we
493 inline, otherwise we don't.
494
495 Previously some_benefit used to return True only if the variable was
496 applied to some value arguments. This didn't work:
497
498 let x = _coerce_ (T Int) Int (I# 3) in
499 case _coerce_ Int (T Int) x of
500 I# y -> ....
501
502 we want to inline x, but can't see that it's a constructor in a case
503 scrutinee position, and some_benefit is False.
504
505 Another example:
506
507 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
508
509 .... case dMonadST _@_ x0 of (a,b,c) -> ....
510
511 we'd really like to inline dMonadST here, but we *don't* want to
512 inline if the case expression is just
513
514 case x of y { DEFAULT -> ... }
515
516 since we can just eliminate this case instead (x is in WHNF). Similar
517 applies when x is bound to a lambda expression. Hence
518 contIsInteresting looks for case expressions with just a single
519 default case.
520 -}
521
522 interestingCallContext :: SimplCont -> CallCtxt
523 -- See Note [Interesting call context]
524 interestingCallContext cont
525 = interesting cont
526 where
527 interesting (Select {}) = CaseCtxt
528 interesting (ApplyToVal {}) = ValAppCtxt
529 -- Can happen if we have (f Int |> co) y
530 -- If f has an INLINE prag we need to give it some
531 -- motivation to inline. See Note [Cast then apply]
532 -- in CoreUnfold
533 interesting (StrictArg _ cci _) = cci
534 interesting (StrictBind {}) = BoringCtxt
535 interesting (Stop _ cci) = cci
536 interesting (TickIt _ k) = interesting k
537 interesting (ApplyToTy { sc_cont = k }) = interesting k
538 interesting (CastIt _ k) = interesting k
539 -- If this call is the arg of a strict function, the context
540 -- is a bit interesting. If we inline here, we may get useful
541 -- evaluation information to avoid repeated evals: e.g.
542 -- x + (y * z)
543 -- Here the contIsInteresting makes the '*' keener to inline,
544 -- which in turn exposes a constructor which makes the '+' inline.
545 -- Assuming that +,* aren't small enough to inline regardless.
546 --
547 -- It's also very important to inline in a strict context for things
548 -- like
549 -- foldr k z (f x)
550 -- Here, the context of (f x) is strict, and if f's unfolding is
551 -- a build it's *great* to inline it here. So we must ensure that
552 -- the context for (f x) is not totally uninteresting.
553
554 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
555 -- If the argument has form (f x y), where x,y are boring,
556 -- and f is marked INLINE, then we don't want to inline f.
557 -- But if the context of the argument is
558 -- g (f x y)
559 -- where g has rules, then we *do* want to inline f, in case it
560 -- exposes a rule that might fire. Similarly, if the context is
561 -- h (g (f x x))
562 -- where h has rules, then we do want to inline f; hence the
563 -- call_cont argument to interestingArgContext
564 --
565 -- The ai-rules flag makes this happen; if it's
566 -- set, the inliner gets just enough keener to inline f
567 -- regardless of how boring f's arguments are, if it's marked INLINE
568 --
569 -- The alternative would be to *always* inline an INLINE function,
570 -- regardless of how boring its context is; but that seems overkill
571 -- For example, it'd mean that wrapper functions were always inlined
572 --
573 -- The call_cont passed to interestingArgContext is the context of
574 -- the call itself, e.g. g <hole> in the example above
575 interestingArgContext rules call_cont
576 = notNull rules || enclosing_fn_has_rules
577 where
578 enclosing_fn_has_rules = go call_cont
579
580 go (Select {}) = False
581 go (ApplyToVal {}) = False -- Shouldn't really happen
582 go (ApplyToTy {}) = False -- Ditto
583 go (StrictArg _ cci _) = interesting cci
584 go (StrictBind {}) = False -- ??
585 go (CastIt _ c) = go c
586 go (Stop _ cci) = interesting cci
587 go (TickIt _ c) = go c
588
589 interesting RuleArgCtxt = True
590 interesting _ = False
591
592
593 {- Note [Interesting arguments]
594 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
595 An argument is interesting if it deserves a discount for unfoldings
596 with a discount in that argument position. The idea is to avoid
597 unfolding a function that is applied only to variables that have no
598 unfolding (i.e. they are probably lambda bound): f x y z There is
599 little point in inlining f here.
600
601 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
602 we must look through lets, eg (let x = e in C a b), because the let will
603 float, exposing the value, if we inline. That makes it different to
604 exprIsHNF.
605
606 Before 2009 we said it was interesting if the argument had *any* structure
607 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
608
609 But we don't regard (f x y) as interesting, unless f is unsaturated.
610 If it's saturated and f hasn't inlined, then it's probably not going
611 to now!
612
613 Note [Conlike is interesting]
614 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
615 Consider
616 f d = ...((*) d x y)...
617 ... f (df d')...
618 where df is con-like. Then we'd really like to inline 'f' so that the
619 rule for (*) (df d) can fire. To do this
620 a) we give a discount for being an argument of a class-op (eg (*) d)
621 b) we say that a con-like argument (eg (df d)) is interesting
622 -}
623
624 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
625 -- See Note [Interesting arguments]
626 interestingArg env e = go env 0 e
627 where
628 -- n is # value args to which the expression is applied
629 go env n (Var v)
630 | SimplEnv { seIdSubst = ids, seInScope = in_scope } <- env
631 = case lookupVarEnv ids v of
632 Nothing -> go_var n (refineFromInScope in_scope v)
633 Just (DoneId v') -> go_var n (refineFromInScope in_scope v')
634 Just (DoneEx e) -> go (zapSubstEnv env) n e
635 Just (ContEx tvs cvs ids e) -> go (setSubstEnv env tvs cvs ids) n e
636
637 go _ _ (Lit {}) = ValueArg
638 go _ _ (Type _) = TrivArg
639 go _ _ (Coercion _) = TrivArg
640 go env n (App fn (Type _)) = go env n fn
641 go env n (App fn (Coercion _)) = go env n fn
642 go env n (App fn _) = go env (n+1) fn
643 go env n (Tick _ a) = go env n a
644 go env n (Cast e _) = go env n e
645 go env n (Lam v e)
646 | isTyVar v = go env n e
647 | n>0 = go env (n-1) e
648 | otherwise = ValueArg
649 go env n (Let _ e) = case go env n e of { ValueArg -> ValueArg; _ -> NonTrivArg }
650 go _ _ (Case {}) = NonTrivArg
651
652 go_var n v
653 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
654 -- data constructors here
655 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
656 | n > 0 = NonTrivArg -- Saturated or unknown call
657 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
658 -- See Note [Conlike is interesting]
659 | otherwise = TrivArg -- n==0, no useful unfolding
660 where
661 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
662
663 {-
664 ************************************************************************
665 * *
666 SimplifierMode
667 * *
668 ************************************************************************
669
670 The SimplifierMode controls several switches; see its definition in
671 CoreMonad
672 sm_rules :: Bool -- Whether RULES are enabled
673 sm_inline :: Bool -- Whether inlining is enabled
674 sm_case_case :: Bool -- Whether case-of-case is enabled
675 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
676 -}
677
678 simplEnvForGHCi :: DynFlags -> SimplEnv
679 simplEnvForGHCi dflags
680 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
681 , sm_phase = InitialPhase
682 , sm_rules = rules_on
683 , sm_inline = False
684 , sm_eta_expand = eta_expand_on
685 , sm_case_case = True }
686 where
687 rules_on = gopt Opt_EnableRewriteRules dflags
688 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
689 -- Do not do any inlining, in case we expose some unboxed
690 -- tuple stuff that confuses the bytecode interpreter
691
692 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
693 -- See Note [Simplifying inside stable unfoldings]
694 updModeForStableUnfoldings inline_rule_act current_mode
695 = current_mode { sm_phase = phaseFromActivation inline_rule_act
696 , sm_inline = True
697 , sm_eta_expand = False }
698 -- For sm_rules, just inherit; sm_rules might be "off"
699 -- because of -fno-enable-rewrite-rules
700 where
701 phaseFromActivation (ActiveAfter n) = Phase n
702 phaseFromActivation _ = InitialPhase
703
704 updModeForRuleLHS :: SimplifierMode -> SimplifierMode
705 -- See Note [Simplifying RULE LHSs]
706 updModeForRuleLHS current_mode
707 = current_mode { sm_phase = InitialPhase
708 , sm_inline = False
709 , sm_rules = False
710 , sm_eta_expand = False }
711
712 {- Note [Simplifying RULE LHSs]
713 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
714 When simplifying on the LHS of a rule, refrain from all inlining and
715 all RULES. Doing anything to the LHS is plain confusing, because it
716 means that what the rule matches is not what the user wrote.
717 c.f. Trac #10595, and #10528.
718
719 Note [Inlining in gentle mode]
720 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
721 Something is inlined if
722 (i) the sm_inline flag is on, AND
723 (ii) the thing has an INLINE pragma, AND
724 (iii) the thing is inlinable in the earliest phase.
725
726 Example of why (iii) is important:
727 {-# INLINE [~1] g #-}
728 g = ...
729
730 {-# INLINE f #-}
731 f x = g (g x)
732
733 If we were to inline g into f's inlining, then an importing module would
734 never be able to do
735 f e --> g (g e) ---> RULE fires
736 because the stable unfolding for f has had g inlined into it.
737
738 On the other hand, it is bad not to do ANY inlining into an
739 stable unfolding, because then recursive knots in instance declarations
740 don't get unravelled.
741
742 However, *sometimes* SimplGently must do no call-site inlining at all
743 (hence sm_inline = False). Before full laziness we must be careful
744 not to inline wrappers, because doing so inhibits floating
745 e.g. ...(case f x of ...)...
746 ==> ...(case (case x of I# x# -> fw x#) of ...)...
747 ==> ...(case x of I# x# -> case fw x# of ...)...
748 and now the redex (f x) isn't floatable any more.
749
750 The no-inlining thing is also important for Template Haskell. You might be
751 compiling in one-shot mode with -O2; but when TH compiles a splice before
752 running it, we don't want to use -O2. Indeed, we don't want to inline
753 anything, because the byte-code interpreter might get confused about
754 unboxed tuples and suchlike.
755
756 Note [Simplifying inside stable unfoldings]
757 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
758 We must take care with simplification inside stable unfoldings (which come from
759 INLINE pragmas).
760
761 First, consider the following example
762 let f = \pq -> BIG
763 in
764 let g = \y -> f y y
765 {-# INLINE g #-}
766 in ...g...g...g...g...g...
767 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
768 and thence copied multiple times when g is inlined. HENCE we treat
769 any occurrence in a stable unfolding as a multiple occurrence, not a single
770 one; see OccurAnal.addRuleUsage.
771
772 Second, we do want *do* to some modest rules/inlining stuff in stable
773 unfoldings, partly to eliminate senseless crap, and partly to break
774 the recursive knots generated by instance declarations.
775
776 However, suppose we have
777 {-# INLINE <act> f #-}
778 f = <rhs>
779 meaning "inline f in phases p where activation <act>(p) holds".
780 Then what inlinings/rules can we apply to the copy of <rhs> captured in
781 f's stable unfolding? Our model is that literally <rhs> is substituted for
782 f when it is inlined. So our conservative plan (implemented by
783 updModeForStableUnfoldings) is this:
784
785 -------------------------------------------------------------
786 When simplifying the RHS of an stable unfolding, set the phase
787 to the phase in which the stable unfolding first becomes active
788 -------------------------------------------------------------
789
790 That ensures that
791
792 a) Rules/inlinings that *cease* being active before p will
793 not apply to the stable unfolding, consistent with it being
794 inlined in its *original* form in phase p.
795
796 b) Rules/inlinings that only become active *after* p will
797 not apply to the stable unfolding, again to be consistent with
798 inlining the *original* rhs in phase p.
799
800 For example,
801 {-# INLINE f #-}
802 f x = ...g...
803
804 {-# NOINLINE [1] g #-}
805 g y = ...
806
807 {-# RULE h g = ... #-}
808 Here we must not inline g into f's RHS, even when we get to phase 0,
809 because when f is later inlined into some other module we want the
810 rule for h to fire.
811
812 Similarly, consider
813 {-# INLINE f #-}
814 f x = ...g...
815
816 g y = ...
817 and suppose that there are auto-generated specialisations and a strictness
818 wrapper for g. The specialisations get activation AlwaysActive, and the
819 strictness wrapper get activation (ActiveAfter 0). So the strictness
820 wrepper fails the test and won't be inlined into f's stable unfolding. That
821 means f can inline, expose the specialised call to g, so the specialisation
822 rules can fire.
823
824 A note about wrappers
825 ~~~~~~~~~~~~~~~~~~~~~
826 It's also important not to inline a worker back into a wrapper.
827 A wrapper looks like
828 wraper = inline_me (\x -> ...worker... )
829 Normally, the inline_me prevents the worker getting inlined into
830 the wrapper (initially, the worker's only call site!). But,
831 if the wrapper is sure to be called, the strictness analyser will
832 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
833 continuation.
834 -}
835
836 activeUnfolding :: SimplEnv -> Id -> Bool
837 activeUnfolding env
838 | not (sm_inline mode) = active_unfolding_minimal
839 | otherwise = case sm_phase mode of
840 InitialPhase -> active_unfolding_gentle
841 Phase n -> active_unfolding n
842 where
843 mode = getMode env
844
845 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
846 -- When matching in RULE, we want to "look through" an unfolding
847 -- (to see a constructor) if *rules* are on, even if *inlinings*
848 -- are not. A notable example is DFuns, which really we want to
849 -- match in rules like (op dfun) in gentle mode. Another example
850 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
851 -- see very early on
852 getUnfoldingInRuleMatch env
853 = (in_scope, id_unf)
854 where
855 in_scope = seInScope env
856 mode = getMode env
857 id_unf id | unf_is_active id = idUnfolding id
858 | otherwise = NoUnfolding
859 unf_is_active id
860 | not (sm_rules mode) = active_unfolding_minimal id
861 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
862
863 active_unfolding_minimal :: Id -> Bool
864 -- Compuslory unfoldings only
865 -- Ignore SimplGently, because we want to inline regardless;
866 -- the Id has no top-level binding at all
867 --
868 -- NB: we used to have a second exception, for data con wrappers.
869 -- On the grounds that we use gentle mode for rule LHSs, and
870 -- they match better when data con wrappers are inlined.
871 -- But that only really applies to the trivial wrappers (like (:)),
872 -- and they are now constructed as Compulsory unfoldings (in MkId)
873 -- so they'll happen anyway.
874 active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
875
876 active_unfolding :: PhaseNum -> Id -> Bool
877 active_unfolding n id = isActiveIn n (idInlineActivation id)
878
879 active_unfolding_gentle :: Id -> Bool
880 -- Anything that is early-active
881 -- See Note [Gentle mode]
882 active_unfolding_gentle id
883 = isInlinePragma prag
884 && isEarlyActive (inlinePragmaActivation prag)
885 -- NB: wrappers are not early-active
886 where
887 prag = idInlinePragma id
888
889 ----------------------
890 activeRule :: SimplEnv -> Activation -> Bool
891 -- Nothing => No rules at all
892 activeRule env
893 | not (sm_rules mode) = \_ -> False -- Rewriting is off
894 | otherwise = isActive (sm_phase mode)
895 where
896 mode = getMode env
897
898 {-
899 ************************************************************************
900 * *
901 preInlineUnconditionally
902 * *
903 ************************************************************************
904
905 preInlineUnconditionally
906 ~~~~~~~~~~~~~~~~~~~~~~~~
907 @preInlineUnconditionally@ examines a bndr to see if it is used just
908 once in a completely safe way, so that it is safe to discard the
909 binding inline its RHS at the (unique) usage site, REGARDLESS of how
910 big the RHS might be. If this is the case we don't simplify the RHS
911 first, but just inline it un-simplified.
912
913 This is much better than first simplifying a perhaps-huge RHS and then
914 inlining and re-simplifying it. Indeed, it can be at least quadratically
915 better. Consider
916
917 x1 = e1
918 x2 = e2[x1]
919 x3 = e3[x2]
920 ...etc...
921 xN = eN[xN-1]
922
923 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
924 This can happen with cascades of functions too:
925
926 f1 = \x1.e1
927 f2 = \xs.e2[f1]
928 f3 = \xs.e3[f3]
929 ...etc...
930
931 THE MAIN INVARIANT is this:
932
933 ---- preInlineUnconditionally invariant -----
934 IF preInlineUnconditionally chooses to inline x = <rhs>
935 THEN doing the inlining should not change the occurrence
936 info for the free vars of <rhs>
937 ----------------------------------------------
938
939 For example, it's tempting to look at trivial binding like
940 x = y
941 and inline it unconditionally. But suppose x is used many times,
942 but this is the unique occurrence of y. Then inlining x would change
943 y's occurrence info, which breaks the invariant. It matters: y
944 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
945
946
947 Even RHSs labelled InlineMe aren't caught here, because there might be
948 no benefit from inlining at the call site.
949
950 [Sept 01] Don't unconditionally inline a top-level thing, because that
951 can simply make a static thing into something built dynamically. E.g.
952 x = (a,b)
953 main = \s -> h x
954
955 [Remember that we treat \s as a one-shot lambda.] No point in
956 inlining x unless there is something interesting about the call site.
957
958 But watch out: if you aren't careful, some useful foldr/build fusion
959 can be lost (most notably in spectral/hartel/parstof) because the
960 foldr didn't see the build. Doing the dynamic allocation isn't a big
961 deal, in fact, but losing the fusion can be. But the right thing here
962 seems to be to do a callSiteInline based on the fact that there is
963 something interesting about the call site (it's strict). Hmm. That
964 seems a bit fragile.
965
966 Conclusion: inline top level things gaily until Phase 0 (the last
967 phase), at which point don't.
968
969 Note [pre/postInlineUnconditionally in gentle mode]
970 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
971 Even in gentle mode we want to do preInlineUnconditionally. The
972 reason is that too little clean-up happens if you don't inline
973 use-once things. Also a bit of inlining is *good* for full laziness;
974 it can expose constant sub-expressions. Example in
975 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
976 let-float if you inline windowToViewport
977
978 However, as usual for Gentle mode, do not inline things that are
979 inactive in the intial stages. See Note [Gentle mode].
980
981 Note [Stable unfoldings and preInlineUnconditionally]
982 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
983 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
984 Example
985
986 {-# INLINE f #-}
987 f :: Eq a => a -> a
988 f x = ...
989
990 fInt :: Int -> Int
991 fInt = f Int dEqInt
992
993 ...fInt...fInt...fInt...
994
995 Here f occurs just once, in the RHS of f1. But if we inline it there
996 we'll lose the opportunity to inline at each of fInt's call sites.
997 The INLINE pragma will only inline when the application is saturated
998 for exactly this reason; and we don't want PreInlineUnconditionally
999 to second-guess it. A live example is Trac #3736.
1000 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1001
1002 Note [Top-level botomming Ids]
1003 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1004 Don't inline top-level Ids that are bottoming, even if they are used just
1005 once, because FloatOut has gone to some trouble to extract them out.
1006 Inlining them won't make the program run faster!
1007
1008 Note [Do not inline CoVars unconditionally]
1009 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1010 Coercion variables appear inside coercions, and the RHS of a let-binding
1011 is a term (not a coercion) so we can't necessarily inline the latter in
1012 the former.
1013 -}
1014
1015 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1016 -- Precondition: rhs satisfies the let/app invariant
1017 -- See Note [CoreSyn let/app invariant] in CoreSyn
1018 -- Reason: we don't want to inline single uses, or discard dead bindings,
1019 -- for unlifted, side-effect-full bindings
1020 preInlineUnconditionally dflags env top_lvl bndr rhs
1021 | not active = False
1022 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1023 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1024 | not (gopt Opt_SimplPreInlining dflags) = False
1025 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1026 | otherwise = case idOccInfo bndr of
1027 IAmDead -> True -- Happens in ((\x.1) v)
1028 OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
1029 _ -> False
1030 where
1031 mode = getMode env
1032 active = isActive (sm_phase mode) act
1033 -- See Note [pre/postInlineUnconditionally in gentle mode]
1034 act = idInlineActivation bndr
1035 try_once in_lam int_cxt -- There's one textual occurrence
1036 | not in_lam = isNotTopLevel top_lvl || early_phase
1037 | otherwise = int_cxt && canInlineInLam rhs
1038
1039 -- Be very careful before inlining inside a lambda, because (a) we must not
1040 -- invalidate occurrence information, and (b) we want to avoid pushing a
1041 -- single allocation (here) into multiple allocations (inside lambda).
1042 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1043 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1044 -- where
1045 -- is_cheap = exprIsCheap rhs
1046 -- ok = is_cheap && int_cxt
1047
1048 -- int_cxt The context isn't totally boring
1049 -- E.g. let f = \ab.BIG in \y. map f xs
1050 -- Don't want to substitute for f, because then we allocate
1051 -- its closure every time the \y is called
1052 -- But: let f = \ab.BIG in \y. map (f y) xs
1053 -- Now we do want to substitute for f, even though it's not
1054 -- saturated, because we're going to allocate a closure for
1055 -- (f y) every time round the loop anyhow.
1056
1057 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1058 -- so substituting rhs inside a lambda doesn't change the occ info.
1059 -- Sadly, not quite the same as exprIsHNF.
1060 canInlineInLam (Lit _) = True
1061 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1062 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1063 canInlineInLam _ = False
1064 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1065 -- ticks around a Lam will disappear anyway.
1066
1067 early_phase = case sm_phase mode of
1068 Phase 0 -> False
1069 _ -> True
1070 -- If we don't have this early_phase test, consider
1071 -- x = length [1,2,3]
1072 -- The full laziness pass carefully floats all the cons cells to
1073 -- top level, and preInlineUnconditionally floats them all back in.
1074 -- Result is (a) static allocation replaced by dynamic allocation
1075 -- (b) many simplifier iterations because this tickles
1076 -- a related problem; only one inlining per pass
1077 --
1078 -- On the other hand, I have seen cases where top-level fusion is
1079 -- lost if we don't inline top level thing (e.g. string constants)
1080 -- Hence the test for phase zero (which is the phase for all the final
1081 -- simplifications). Until phase zero we take no special notice of
1082 -- top level things, but then we become more leery about inlining
1083 -- them.
1084
1085 {-
1086 ************************************************************************
1087 * *
1088 postInlineUnconditionally
1089 * *
1090 ************************************************************************
1091
1092 postInlineUnconditionally
1093 ~~~~~~~~~~~~~~~~~~~~~~~~~
1094 @postInlineUnconditionally@ decides whether to unconditionally inline
1095 a thing based on the form of its RHS; in particular if it has a
1096 trivial RHS. If so, we can inline and discard the binding altogether.
1097
1098 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1099 only have *forward* references. Hence, it's safe to discard the binding
1100
1101 NOTE: This isn't our last opportunity to inline. We're at the binding
1102 site right now, and we'll get another opportunity when we get to the
1103 ocurrence(s)
1104
1105 Note that we do this unconditional inlining only for trival RHSs.
1106 Don't inline even WHNFs inside lambdas; doing so may simply increase
1107 allocation when the function is called. This isn't the last chance; see
1108 NOTE above.
1109
1110 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1111 Because we don't even want to inline them into the RHS of constructor
1112 arguments. See NOTE above
1113
1114 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1115 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1116 with both a and b marked NOINLINE. But that seems incompatible with
1117 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1118 story for now.
1119 -}
1120
1121 postInlineUnconditionally
1122 :: DynFlags -> SimplEnv -> TopLevelFlag
1123 -> OutId -- The binder (an InId would be fine too)
1124 -- (*not* a CoVar)
1125 -> OccInfo -- From the InId
1126 -> OutExpr
1127 -> Unfolding
1128 -> Bool
1129 -- Precondition: rhs satisfies the let/app invariant
1130 -- See Note [CoreSyn let/app invariant] in CoreSyn
1131 -- Reason: we don't want to inline single uses, or discard dead bindings,
1132 -- for unlifted, side-effect-full bindings
1133 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1134 | not active = False
1135 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1136 -- because it might be referred to "earlier"
1137 | isExportedId bndr = False
1138 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1139 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1140 | exprIsTrivial rhs = True
1141 | otherwise
1142 = case occ_info of
1143 -- The point of examining occ_info here is that for *non-values*
1144 -- that occur outside a lambda, the call-site inliner won't have
1145 -- a chance (because it doesn't know that the thing
1146 -- only occurs once). The pre-inliner won't have gotten
1147 -- it either, if the thing occurs in more than one branch
1148 -- So the main target is things like
1149 -- let x = f y in
1150 -- case v of
1151 -- True -> case x of ...
1152 -- False -> case x of ...
1153 -- This is very important in practice; e.g. wheel-seive1 doubles
1154 -- in allocation if you miss this out
1155 OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue
1156 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1157 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1158 --
1159 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1160 -- Reason: doing so risks exponential behaviour. We simplify a big
1161 -- expression, inline it, and simplify it again. But if the
1162 -- very same thing happens in the big expression, we get
1163 -- exponential cost!
1164 -- PRINCIPLE: when we've already simplified an expression once,
1165 -- make sure that we only inline it if it's reasonably small.
1166
1167 && (not in_lam ||
1168 -- Outside a lambda, we want to be reasonably aggressive
1169 -- about inlining into multiple branches of case
1170 -- e.g. let x = <non-value>
1171 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1172 -- Inlining can be a big win if C3 is the hot-spot, even if
1173 -- the uses in C1, C2 are not 'interesting'
1174 -- An example that gets worse if you add int_cxt here is 'clausify'
1175
1176 (isCheapUnfolding unfolding && int_cxt))
1177 -- isCheap => acceptable work duplication; in_lam may be true
1178 -- int_cxt to prevent us inlining inside a lambda without some
1179 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1180
1181 IAmDead -> True -- This happens; for example, the case_bndr during case of
1182 -- known constructor: case (a,b) of x { (p,q) -> ... }
1183 -- Here x isn't mentioned in the RHS, so we don't want to
1184 -- create the (dead) let-binding let x = (a,b) in ...
1185
1186 _ -> False
1187
1188 -- Here's an example that we don't handle well:
1189 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1190 -- in \y. ....case f of {...} ....
1191 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1192 -- But
1193 -- - We can't preInlineUnconditionally because that woud invalidate
1194 -- the occ info for b.
1195 -- - We can't postInlineUnconditionally because the RHS is big, and
1196 -- that risks exponential behaviour
1197 -- - We can't call-site inline, because the rhs is big
1198 -- Alas!
1199
1200 where
1201 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1202 -- See Note [pre/postInlineUnconditionally in gentle mode]
1203
1204 {-
1205 Note [Top level and postInlineUnconditionally]
1206 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1207 We don't do postInlineUnconditionally for top-level things (even for
1208 ones that are trivial):
1209
1210 * Doing so will inline top-level error expressions that have been
1211 carefully floated out by FloatOut. More generally, it might
1212 replace static allocation with dynamic.
1213
1214 * Even for trivial expressions there's a problem. Consider
1215 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1216 blah xs = reverse xs
1217 ruggle = sort
1218 In one simplifier pass we might fire the rule, getting
1219 blah xs = ruggle xs
1220 but in *that* simplifier pass we must not do postInlineUnconditionally
1221 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1222
1223 If the rhs is trivial it'll be inlined by callSiteInline, and then
1224 the binding will be dead and discarded by the next use of OccurAnal
1225
1226 * There is less point, because the main goal is to get rid of local
1227 bindings used in multiple case branches.
1228
1229 * The inliner should inline trivial things at call sites anyway.
1230
1231 Note [Stable unfoldings and postInlineUnconditionally]
1232 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1233 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1234 otherwise we lose the unfolding. Example
1235
1236 -- f has stable unfolding with rhs (e |> co)
1237 -- where 'e' is big
1238 f = e |> co
1239
1240 Then there's a danger we'll optimise to
1241
1242 f' = e
1243 f = f' |> co
1244
1245 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1246 won't inline because 'e' is too big.
1247
1248 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1249
1250
1251 ************************************************************************
1252 * *
1253 Rebuilding a lambda
1254 * *
1255 ************************************************************************
1256 -}
1257
1258 mkLam :: [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1259 -- mkLam tries three things
1260 -- a) eta reduction, if that gives a trivial expression
1261 -- b) eta expansion [only if there are some value lambdas]
1262
1263 mkLam [] body _cont
1264 = return body
1265 mkLam bndrs body cont
1266 = do { dflags <- getDynFlags
1267 ; mkLam' dflags bndrs body }
1268 where
1269 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1270 mkLam' dflags bndrs (Cast body co)
1271 | not (any bad bndrs)
1272 -- Note [Casts and lambdas]
1273 = do { lam <- mkLam' dflags bndrs body
1274 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1275 where
1276 co_vars = tyCoVarsOfCo co
1277 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1278
1279 mkLam' dflags bndrs body@(Lam {})
1280 = mkLam' dflags (bndrs ++ bndrs1) body1
1281 where
1282 (bndrs1, body1) = collectBinders body
1283
1284 mkLam' dflags bndrs (Tick t expr)
1285 | tickishFloatable t
1286 = mkTick t <$> mkLam' dflags bndrs expr
1287
1288 mkLam' dflags bndrs body
1289 | gopt Opt_DoEtaReduction dflags
1290 , Just etad_lam <- tryEtaReduce bndrs body
1291 = do { tick (EtaReduction (head bndrs))
1292 ; return etad_lam }
1293
1294 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1295 , gopt Opt_DoLambdaEtaExpansion dflags
1296 , any isRuntimeVar bndrs
1297 , let body_arity = exprEtaExpandArity dflags body
1298 , body_arity > 0
1299 = do { tick (EtaExpansion (head bndrs))
1300 ; return (mkLams bndrs (etaExpand body_arity body)) }
1301
1302 | otherwise
1303 = return (mkLams bndrs body)
1304
1305 {-
1306 Note [Eta expanding lambdas]
1307 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1308 In general we *do* want to eta-expand lambdas. Consider
1309 f (\x -> case x of (a,b) -> \s -> blah)
1310 where 's' is a state token, and hence can be eta expanded. This
1311 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1312 important function!
1313
1314 The eta-expansion will never happen unless we do it now. (Well, it's
1315 possible that CorePrep will do it, but CorePrep only has a half-baked
1316 eta-expander that can't deal with casts. So it's much better to do it
1317 here.)
1318
1319 However, when the lambda is let-bound, as the RHS of a let, we have a
1320 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1321 bother to try expansion in mkLam in that case; hence the contIsRhs
1322 guard.
1323
1324 Note [Casts and lambdas]
1325 ~~~~~~~~~~~~~~~~~~~~~~~~
1326 Consider
1327 (\x. (\y. e) `cast` g1) `cast` g2
1328 There is a danger here that the two lambdas look separated, and the
1329 full laziness pass might float an expression to between the two.
1330
1331 So this equation in mkLam' floats the g1 out, thus:
1332 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1333 where x:tx.
1334
1335 In general, this floats casts outside lambdas, where (I hope) they
1336 might meet and cancel with some other cast:
1337 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1338 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1339 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1340 (if not (g `in` co))
1341
1342 Notice that it works regardless of 'e'. Originally it worked only
1343 if 'e' was itself a lambda, but in some cases that resulted in
1344 fruitless iteration in the simplifier. A good example was when
1345 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1346 like (\x. Get `cast` g)
1347 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1348 the Get, and the next iteration eta-reduced it, and then eta-expanded
1349 it again.
1350
1351 Note also the side condition for the case of coercion binders.
1352 It does not make sense to transform
1353 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1354 because the latter is not well-kinded.
1355
1356 ************************************************************************
1357 * *
1358 Eta expansion
1359 * *
1360 ************************************************************************
1361 -}
1362
1363 tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
1364 -- See Note [Eta-expanding at let bindings]
1365 tryEtaExpandRhs env bndr rhs
1366 = do { dflags <- getDynFlags
1367 ; (new_arity, new_rhs) <- try_expand dflags
1368
1369 ; WARN( new_arity < old_id_arity,
1370 (ptext (sLit "Arity decrease:") <+> (ppr bndr <+> ppr old_id_arity
1371 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1372 -- Note [Arity decrease] in Simplify
1373 return (new_arity, new_rhs) }
1374 where
1375 try_expand dflags
1376 | exprIsTrivial rhs
1377 = return (exprArity rhs, rhs)
1378
1379 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1380 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1381 new_arity2 = idCallArity bndr
1382 new_arity = max new_arity1 new_arity2
1383 , new_arity > old_arity -- And the current manifest arity isn't enough
1384 = do { tick (EtaExpansion bndr)
1385 ; return (new_arity, etaExpand new_arity rhs) }
1386 | otherwise
1387 = return (old_arity, rhs)
1388
1389 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1390 old_id_arity = idArity bndr
1391
1392 {-
1393 Note [Eta-expanding at let bindings]
1394 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1395 We now eta expand at let-bindings, which is where the payoff comes.
1396 The most significant thing is that we can do a simple arity analysis
1397 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1398
1399 One useful consequence of not eta-expanding lambdas is this example:
1400 genMap :: C a => ...
1401 {-# INLINE genMap #-}
1402 genMap f xs = ...
1403
1404 myMap :: D a => ...
1405 {-# INLINE myMap #-}
1406 myMap = genMap
1407
1408 Notice that 'genMap' should only inline if applied to two arguments.
1409 In the stable unfolding for myMap we'll have the unfolding
1410 (\d -> genMap Int (..d..))
1411 We do not want to eta-expand to
1412 (\d f xs -> genMap Int (..d..) f xs)
1413 because then 'genMap' will inline, and it really shouldn't: at least
1414 as far as the programmer is concerned, it's not applied to two
1415 arguments!
1416
1417 Note [Do not eta-expand PAPs]
1418 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1419 We used to have old_arity = manifestArity rhs, which meant that we
1420 would eta-expand even PAPs. But this gives no particular advantage,
1421 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1422 Suppose we have a PAP
1423 foo :: IO ()
1424 foo = returnIO ()
1425 Then we can eta-expand do
1426 foo = (\eta. (returnIO () |> sym g) eta) |> g
1427 where
1428 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1429
1430 But there is really no point in doing this, and it generates masses of
1431 coercions and whatnot that eventually disappear again. For T9020, GHC
1432 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1433 1.8G to 45M.
1434
1435 But note that this won't eta-expand, say
1436 f = \g -> map g
1437 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1438 strictness analysis will have less to bite on?
1439
1440
1441 ************************************************************************
1442 * *
1443 \subsection{Floating lets out of big lambdas}
1444 * *
1445 ************************************************************************
1446
1447 Note [Floating and type abstraction]
1448 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1449 Consider this:
1450 x = /\a. C e1 e2
1451 We'd like to float this to
1452 y1 = /\a. e1
1453 y2 = /\a. e2
1454 x = /\a. C (y1 a) (y2 a)
1455 for the usual reasons: we want to inline x rather vigorously.
1456
1457 You may think that this kind of thing is rare. But in some programs it is
1458 common. For example, if you do closure conversion you might get:
1459
1460 data a :-> b = forall e. (e -> a -> b) :$ e
1461
1462 f_cc :: forall a. a :-> a
1463 f_cc = /\a. (\e. id a) :$ ()
1464
1465 Now we really want to inline that f_cc thing so that the
1466 construction of the closure goes away.
1467
1468 So I have elaborated simplLazyBind to understand right-hand sides that look
1469 like
1470 /\ a1..an. body
1471
1472 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1473 but there is quite a bit of plumbing in simplLazyBind as well.
1474
1475 The same transformation is good when there are lets in the body:
1476
1477 /\abc -> let(rec) x = e in b
1478 ==>
1479 let(rec) x' = /\abc -> let x = x' a b c in e
1480 in
1481 /\abc -> let x = x' a b c in b
1482
1483 This is good because it can turn things like:
1484
1485 let f = /\a -> letrec g = ... g ... in g
1486 into
1487 letrec g' = /\a -> ... g' a ...
1488 in
1489 let f = /\ a -> g' a
1490
1491 which is better. In effect, it means that big lambdas don't impede
1492 let-floating.
1493
1494 This optimisation is CRUCIAL in eliminating the junk introduced by
1495 desugaring mutually recursive definitions. Don't eliminate it lightly!
1496
1497 [May 1999] If we do this transformation *regardless* then we can
1498 end up with some pretty silly stuff. For example,
1499
1500 let
1501 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1502 in ..
1503 becomes
1504 let y1 = /\s -> r1
1505 y2 = /\s -> r2
1506 st = /\s -> ...[y1 s/x1, y2 s/x2]
1507 in ..
1508
1509 Unless the "..." is a WHNF there is really no point in doing this.
1510 Indeed it can make things worse. Suppose x1 is used strictly,
1511 and is of the form
1512
1513 x1* = case f y of { (a,b) -> e }
1514
1515 If we abstract this wrt the tyvar we then can't do the case inline
1516 as we would normally do.
1517
1518 That's why the whole transformation is part of the same process that
1519 floats let-bindings and constructor arguments out of RHSs. In particular,
1520 it is guarded by the doFloatFromRhs call in simplLazyBind.
1521 -}
1522
1523 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1524 abstractFloats main_tvs body_env body
1525 = ASSERT( notNull body_floats )
1526 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1527 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1528 where
1529 main_tv_set = mkVarSet main_tvs
1530 body_floats = getFloatBinds body_env
1531 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1532
1533 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1534 abstract subst (NonRec id rhs)
1535 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1536 ; let poly_rhs = mkLams tvs_here rhs'
1537 subst' = CoreSubst.extendIdSubst subst id poly_app
1538 ; return (subst', (NonRec poly_id poly_rhs)) }
1539 where
1540 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1541 tvs_here = varSetElemsKvsFirst (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
1542
1543 -- Abstract only over the type variables free in the rhs
1544 -- wrt which the new binding is abstracted. But the naive
1545 -- approach of abstract wrt the tyvars free in the Id's type
1546 -- fails. Consider:
1547 -- /\ a b -> let t :: (a,b) = (e1, e2)
1548 -- x :: a = fst t
1549 -- in ...
1550 -- Here, b isn't free in x's type, but we must nevertheless
1551 -- abstract wrt b as well, because t's type mentions b.
1552 -- Since t is floated too, we'd end up with the bogus:
1553 -- poly_t = /\ a b -> (e1, e2)
1554 -- poly_x = /\ a -> fst (poly_t a *b*)
1555 -- So for now we adopt the even more naive approach of
1556 -- abstracting wrt *all* the tyvars. We'll see if that
1557 -- gives rise to problems. SLPJ June 98
1558
1559 abstract subst (Rec prs)
1560 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1561 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1562 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1563 | rhs <- rhss]
1564 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1565 where
1566 (ids,rhss) = unzip prs
1567 -- For a recursive group, it's a bit of a pain to work out the minimal
1568 -- set of tyvars over which to abstract:
1569 -- /\ a b c. let x = ...a... in
1570 -- letrec { p = ...x...q...
1571 -- q = .....p...b... } in
1572 -- ...
1573 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1574 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1575 -- Since it's a pain, we just use the whole set, which is always safe
1576 --
1577 -- If you ever want to be more selective, remember this bizarre case too:
1578 -- x::a = x
1579 -- Here, we must abstract 'x' over 'a'.
1580 tvs_here = sortQuantVars main_tvs
1581
1582 mk_poly tvs_here var
1583 = do { uniq <- getUniqueM
1584 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1585 poly_ty = mkForAllTys tvs_here (idType var) -- But new type of course
1586 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1587 mkLocalId poly_name poly_ty
1588 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1589 -- In the olden days, it was crucial to copy the occInfo of the original var,
1590 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1591 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1592 -- at already simplified code, so it doesn't matter
1593 --
1594 -- It's even right to retain single-occurrence or dead-var info:
1595 -- Suppose we started with /\a -> let x = E in B
1596 -- where x occurs once in B. Then we transform to:
1597 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1598 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1599 -- the occurrences of x' will be just the occurrences originally
1600 -- pinned on x.
1601
1602 {-
1603 Note [Abstract over coercions]
1604 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1605 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1606 type variable a. Rather than sort this mess out, we simply bale out and abstract
1607 wrt all the type variables if any of them are coercion variables.
1608
1609
1610 Historical note: if you use let-bindings instead of a substitution, beware of this:
1611
1612 -- Suppose we start with:
1613 --
1614 -- x = /\ a -> let g = G in E
1615 --
1616 -- Then we'll float to get
1617 --
1618 -- x = let poly_g = /\ a -> G
1619 -- in /\ a -> let g = poly_g a in E
1620 --
1621 -- But now the occurrence analyser will see just one occurrence
1622 -- of poly_g, not inside a lambda, so the simplifier will
1623 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1624 -- (I used to think that the "don't inline lone occurrences" stuff
1625 -- would stop this happening, but since it's the *only* occurrence,
1626 -- PreInlineUnconditionally kicks in first!)
1627 --
1628 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1629 -- to appear many times. (NB: mkInlineMe eliminates
1630 -- such notes on trivial RHSs, so do it manually.)
1631
1632 ************************************************************************
1633 * *
1634 prepareAlts
1635 * *
1636 ************************************************************************
1637
1638 prepareAlts tries these things:
1639
1640 1. Eliminate alternatives that cannot match, including the
1641 DEFAULT alternative.
1642
1643 2. If the DEFAULT alternative can match only one possible constructor,
1644 then make that constructor explicit.
1645 e.g.
1646 case e of x { DEFAULT -> rhs }
1647 ===>
1648 case e of x { (a,b) -> rhs }
1649 where the type is a single constructor type. This gives better code
1650 when rhs also scrutinises x or e.
1651
1652 3. Returns a list of the constructors that cannot holds in the
1653 DEFAULT alternative (if there is one)
1654
1655 Here "cannot match" includes knowledge from GADTs
1656
1657 It's a good idea to do this stuff before simplifying the alternatives, to
1658 avoid simplifying alternatives we know can't happen, and to come up with
1659 the list of constructors that are handled, to put into the IdInfo of the
1660 case binder, for use when simplifying the alternatives.
1661
1662 Eliminating the default alternative in (1) isn't so obvious, but it can
1663 happen:
1664
1665 data Colour = Red | Green | Blue
1666
1667 f x = case x of
1668 Red -> ..
1669 Green -> ..
1670 DEFAULT -> h x
1671
1672 h y = case y of
1673 Blue -> ..
1674 DEFAULT -> [ case y of ... ]
1675
1676 If we inline h into f, the default case of the inlined h can't happen.
1677 If we don't notice this, we may end up filtering out *all* the cases
1678 of the inner case y, which give us nowhere to go!
1679 -}
1680
1681 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1682 -- The returned alternatives can be empty, none are possible
1683 prepareAlts scrut case_bndr' alts
1684 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1685 -- Case binder is needed just for its type. Note that as an
1686 -- OutId, it has maximum information; this is important.
1687 -- Test simpl013 is an example
1688 = do { us <- getUniquesM
1689 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1690 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1691 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1692 -- "idcs" stands for "impossible default data constructors"
1693 -- i.e. the constructors that can't match the default case
1694 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1695 ; when yes3 $ tick (AltMerge case_bndr')
1696 ; return (idcs3, alts3) }
1697
1698 | otherwise -- Not a data type, so nothing interesting happens
1699 = return ([], alts)
1700 where
1701 imposs_cons = case scrut of
1702 Var v -> otherCons (idUnfolding v)
1703 _ -> []
1704
1705
1706 {-
1707 ************************************************************************
1708 * *
1709 mkCase
1710 * *
1711 ************************************************************************
1712
1713 mkCase tries these things
1714
1715 1. Merge Nested Cases
1716
1717 case e of b { ==> case e of b {
1718 p1 -> rhs1 p1 -> rhs1
1719 ... ...
1720 pm -> rhsm pm -> rhsm
1721 _ -> case b of b' { pn -> let b'=b in rhsn
1722 pn -> rhsn ...
1723 ... po -> let b'=b in rhso
1724 po -> rhso _ -> let b'=b in rhsd
1725 _ -> rhsd
1726 }
1727
1728 which merges two cases in one case when -- the default alternative of
1729 the outer case scrutises the same variable as the outer case. This
1730 transformation is called Case Merging. It avoids that the same
1731 variable is scrutinised multiple times.
1732
1733 2. Eliminate Identity Case
1734
1735 case e of ===> e
1736 True -> True;
1737 False -> False
1738
1739 and similar friends.
1740 -}
1741
1742 mkCase, mkCase1, mkCase2
1743 :: DynFlags
1744 -> OutExpr -> OutId
1745 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1746 -> SimplM OutExpr
1747
1748 --------------------------------------------------
1749 -- 1. Merge Nested Cases
1750 --------------------------------------------------
1751
1752 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1753 | gopt Opt_CaseMerge dflags
1754 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1755 <- stripTicksTop tickishFloatable deflt_rhs
1756 , inner_scrut_var == outer_bndr
1757 = do { tick (CaseMerge outer_bndr)
1758
1759 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1760 (con, args, wrap_rhs rhs)
1761 -- Simplifier's no-shadowing invariant should ensure
1762 -- that outer_bndr is not shadowed by the inner patterns
1763 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1764 -- The let is OK even for unboxed binders,
1765
1766 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1767 | otherwise = map wrap_alt inner_alts
1768
1769 merged_alts = mergeAlts outer_alts wrapped_alts
1770 -- NB: mergeAlts gives priority to the left
1771 -- case x of
1772 -- A -> e1
1773 -- DEFAULT -> case x of
1774 -- A -> e2
1775 -- B -> e3
1776 -- When we merge, we must ensure that e1 takes
1777 -- precedence over e2 as the value for A!
1778
1779 ; fmap (mkTicks ticks) $
1780 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1781 }
1782 -- Warning: don't call mkCase recursively!
1783 -- Firstly, there's no point, because inner alts have already had
1784 -- mkCase applied to them, so they won't have a case in their default
1785 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1786 -- in munge_rhs may put a case into the DEFAULT branch!
1787
1788 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1789
1790 --------------------------------------------------
1791 -- 2. Eliminate Identity Case
1792 --------------------------------------------------
1793
1794 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1795 | all identity_alt alts
1796 = do { tick (CaseIdentity case_bndr)
1797 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1798 where
1799 ticks = concatMap (stripTicksT tickishFloatable . thirdOf3) (tail alts)
1800 identity_alt (con, args, rhs) = check_eq rhs con args
1801
1802 check_eq (Cast rhs co) con args
1803 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1804 -- See Note [RHS casts]
1805 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1806 check_eq (Var v) _ _ | v == case_bndr = True
1807 check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con
1808 -- Optimisation only
1809 check_eq (Tick t e) alt args = tickishFloatable t &&
1810 check_eq e alt args
1811 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1812 mkConApp con (arg_tys ++
1813 varsToCoreExprs args)
1814 check_eq _ _ _ = False
1815
1816 arg_tys = map Type (tyConAppArgs (idType case_bndr))
1817
1818 -- Note [RHS casts]
1819 -- ~~~~~~~~~~~~~~~~
1820 -- We've seen this:
1821 -- case e of x { _ -> x `cast` c }
1822 -- And we definitely want to eliminate this case, to give
1823 -- e `cast` c
1824 -- So we throw away the cast from the RHS, and reconstruct
1825 -- it at the other end. All the RHS casts must be the same
1826 -- if (all identity_alt alts) holds.
1827 --
1828 -- Don't worry about nested casts, because the simplifier combines them
1829
1830 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1831 re_cast scrut _ = scrut
1832
1833 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1834
1835 --------------------------------------------------
1836 -- Catch-all
1837 --------------------------------------------------
1838 mkCase2 _dflags scrut bndr alts_ty alts
1839 = return (Case scrut bndr alts_ty alts)
1840
1841 {-
1842 Note [Dead binders]
1843 ~~~~~~~~~~~~~~~~~~~~
1844 Note that dead-ness is maintained by the simplifier, so that it is
1845 accurate after simplification as well as before.
1846
1847
1848 Note [Cascading case merge]
1849 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1850 Case merging should cascade in one sweep, because it
1851 happens bottom-up
1852
1853 case e of a {
1854 DEFAULT -> case a of b
1855 DEFAULT -> case b of c {
1856 DEFAULT -> e
1857 A -> ea
1858 B -> eb
1859 C -> ec
1860 ==>
1861 case e of a {
1862 DEFAULT -> case a of b
1863 DEFAULT -> let c = b in e
1864 A -> let c = b in ea
1865 B -> eb
1866 C -> ec
1867 ==>
1868 case e of a {
1869 DEFAULT -> let b = a in let c = b in e
1870 A -> let b = a in let c = b in ea
1871 B -> let b = a in eb
1872 C -> ec
1873
1874
1875 However here's a tricky case that we still don't catch, and I don't
1876 see how to catch it in one pass:
1877
1878 case x of c1 { I# a1 ->
1879 case a1 of c2 ->
1880 0 -> ...
1881 DEFAULT -> case x of c3 { I# a2 ->
1882 case a2 of ...
1883
1884 After occurrence analysis (and its binder-swap) we get this
1885
1886 case x of c1 { I# a1 ->
1887 let x = c1 in -- Binder-swap addition
1888 case a1 of c2 ->
1889 0 -> ...
1890 DEFAULT -> case x of c3 { I# a2 ->
1891 case a2 of ...
1892
1893 When we simplify the inner case x, we'll see that
1894 x=c1=I# a1. So we'll bind a2 to a1, and get
1895
1896 case x of c1 { I# a1 ->
1897 case a1 of c2 ->
1898 0 -> ...
1899 DEFAULT -> case a1 of ...
1900
1901 This is corect, but we can't do a case merge in this sweep
1902 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
1903 without getting changed to c1=I# c2.
1904
1905 I don't think this is worth fixing, even if I knew how. It'll
1906 all come out in the next pass anyway.
1907 -}