Add kind equalities to GHC.
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countValArgs, countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import DynFlags
41 import CoreSyn
42 import qualified CoreSubst
43 import PprCore
44 import CoreFVs
45 import CoreUtils
46 import CoreArity
47 import CoreUnfold
48 import Name
49 import Id
50 import Var
51 import Demand
52 import SimplMonad
53 import Type hiding( substTy )
54 import Coercion hiding( substCo )
55 import DataCon ( dataConWorkId )
56 import VarEnv
57 import VarSet
58 import BasicTypes
59 import Util
60 import MonadUtils
61 import Outputable
62 import FastString
63 import Pair
64
65 import Control.Monad ( when )
66
67 {-
68 ************************************************************************
69 * *
70 The SimplCont and DupFlag types
71 * *
72 ************************************************************************
73
74 A SimplCont allows the simplifier to traverse the expression in a
75 zipper-like fashion. The SimplCont represents the rest of the expression,
76 "above" the point of interest.
77
78 You can also think of a SimplCont as an "evaluation context", using
79 that term in the way it is used for operational semantics. This is the
80 way I usually think of it, For example you'll often see a syntax for
81 evaluation context looking like
82 C ::= [] | C e | case C of alts | C `cast` co
83 That's the kind of thing we are doing here, and I use that syntax in
84 the comments.
85
86
87 Key points:
88 * A SimplCont describes a *strict* context (just like
89 evaluation contexts do). E.g. Just [] is not a SimplCont
90
91 * A SimplCont describes a context that *does not* bind
92 any variables. E.g. \x. [] is not a SimplCont
93 -}
94
95 data SimplCont
96 = Stop -- An empty context, or <hole>
97 OutType -- Type of the <hole>
98 CallCtxt -- Tells if there is something interesting about
99 -- the context, and hence the inliner
100 -- should be a bit keener (see interestingCallContext)
101 -- Specifically:
102 -- This is an argument of a function that has RULES
103 -- Inlining the call might allow the rule to fire
104 -- Never ValAppCxt (use ApplyToVal instead)
105 -- or CaseCtxt (use Select instead)
106
107 | CastIt -- <hole> `cast` co
108 OutCoercion -- The coercion simplified
109 -- Invariant: never an identity coercion
110 SimplCont
111
112 | ApplyToVal { -- <hole> arg
113 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
114 sc_arg :: InExpr, -- The argument,
115 sc_env :: StaticEnv, -- and its static env
116 sc_cont :: SimplCont }
117
118 | ApplyToTy { -- <hole> ty
119 sc_arg_ty :: OutType, -- Argument type
120 sc_hole_ty :: OutType, -- Type of the function, presumably (forall a. blah)
121 -- See Note [The hole type in ApplyToTy]
122 sc_cont :: SimplCont }
123
124 | Select { -- case <hole> of alts
125 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
126 sc_bndr :: InId, -- case binder
127 sc_alts :: [InAlt], -- Alternatives
128 sc_env :: StaticEnv, -- and their static environment
129 sc_cont :: SimplCont }
130
131 -- The two strict forms have no DupFlag, because we never duplicate them
132 | StrictBind -- (\x* \xs. e) <hole>
133 InId [InBndr] -- let x* = <hole> in e
134 InExpr StaticEnv -- is a special case
135 SimplCont
136
137 | StrictArg -- f e1 ..en <hole>
138 ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
139 -- plus strictness flags for *further* args
140 CallCtxt -- Whether *this* argument position is interesting
141 SimplCont
142
143 | TickIt
144 (Tickish Id) -- Tick tickish <hole>
145 SimplCont
146
147 data DupFlag = NoDup -- Unsimplified, might be big
148 | Simplified -- Simplified
149 | OkToDup -- Simplified and small
150
151 isSimplified :: DupFlag -> Bool
152 isSimplified NoDup = False
153 isSimplified _ = True -- Invariant: the subst-env is empty
154
155 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
156 perhapsSubstTy dup env ty
157 | isSimplified dup = ty
158 | otherwise = substTy env ty
159
160 {-
161 Note [DupFlag invariants]
162 ~~~~~~~~~~~~~~~~~~~~~~~~~
163 In both (ApplyToVal dup _ env k)
164 and (Select dup _ _ env k)
165 the following invariants hold
166
167 (a) if dup = OkToDup, then continuation k is also ok-to-dup
168 (b) if dup = OkToDup or Simplified, the subst-env is empty
169 (and and hence no need to re-simplify)
170 -}
171
172 instance Outputable DupFlag where
173 ppr OkToDup = ptext (sLit "ok")
174 ppr NoDup = ptext (sLit "nodup")
175 ppr Simplified = ptext (sLit "simpl")
176
177 instance Outputable SimplCont where
178 ppr (Stop ty interesting) = ptext (sLit "Stop") <> brackets (ppr interesting) <+> ppr ty
179 ppr (CastIt co cont ) = (ptext (sLit "CastIt") <+> ppr co) $$ ppr cont
180 ppr (TickIt t cont) = (ptext (sLit "TickIt") <+> ppr t) $$ ppr cont
181 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
182 = (ptext (sLit "ApplyToTy") <+> pprParendType ty) $$ ppr cont
183 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
184 = (ptext (sLit "ApplyToVal") <+> ppr dup <+> pprParendExpr arg)
185 $$ ppr cont
186 ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
187 ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
188 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
189 = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
190 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
191
192
193 {- Note [The hole type in ApplyToTy]
194 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
196 continuation. It is absolutely necessary to compute contHoleType, but it is
197 not used for anything else (and hence may not be evaluated).
198
199 Why is it necessary for contHoleType? Consider the continuation
200 ApplyToType Int (Stop Int)
201 corresponding to
202 (<hole> @Int) :: Int
203 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
204 and there is no way to know which, so we must record it.
205
206 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
207 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
208 doesn't matter because we'll never compute them all.
209
210 ************************************************************************
211 * *
212 ArgInfo and ArgSpec
213 * *
214 ************************************************************************
215 -}
216
217 data ArgInfo
218 = ArgInfo {
219 ai_fun :: OutId, -- The function
220 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
221 ai_type :: OutType, -- Type of (f a1 ... an)
222
223 ai_rules :: [CoreRule], -- Rules for this function
224
225 ai_encl :: Bool, -- Flag saying whether this function
226 -- or an enclosing one has rules (recursively)
227 -- True => be keener to inline in all args
228
229 ai_strs :: [Bool], -- Strictness of remaining arguments
230 -- Usually infinite, but if it is finite it guarantees
231 -- that the function diverges after being given
232 -- that number of args
233 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
234 -- Always infinite
235 }
236
237 data ArgSpec
238 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
239 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
240 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
241 | CastBy OutCoercion -- Cast by this; c.f. CastIt
242
243 instance Outputable ArgSpec where
244 ppr (ValArg e) = ptext (sLit "ValArg") <+> ppr e
245 ppr (TyArg { as_arg_ty = ty }) = ptext (sLit "TyArg") <+> ppr ty
246 ppr (CastBy c) = ptext (sLit "CastBy") <+> ppr c
247
248 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
249 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
250 , ai_type = applyTypeToArg (ai_type ai) arg }
251
252 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
253 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
254 , ai_type = piResultTy poly_fun_ty arg_ty }
255 where
256 poly_fun_ty = ai_type ai
257 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
258
259 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
260 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
261 , ai_type = pSnd (coercionKind co) }
262
263 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
264 argInfoAppArgs [] = []
265 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
266 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
267 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
268
269 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
270 pushSimplifiedArgs _env [] k = k
271 pushSimplifiedArgs env (arg : args) k
272 = case arg of
273 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
274 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
275 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
276 CastBy c -> CastIt c rest
277 where
278 rest = pushSimplifiedArgs env args k
279 -- The env has an empty SubstEnv
280
281 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
282 -- NB: the [ArgSpec] is reversed so that the first arg
283 -- in the list is the last one in the application
284 argInfoExpr fun rev_args
285 = go rev_args
286 where
287 go [] = Var fun
288 go (ValArg a : as) = go as `App` a
289 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
290 go (CastBy co : as) = mkCast (go as) co
291
292
293 {-
294 ************************************************************************
295 * *
296 Functions on SimplCont
297 * *
298 ************************************************************************
299 -}
300
301 mkBoringStop :: OutType -> SimplCont
302 mkBoringStop ty = Stop ty BoringCtxt
303
304 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
305 mkRhsStop ty = Stop ty RhsCtxt
306
307 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
308 mkLazyArgStop ty cci = Stop ty cci
309
310 -------------------
311 contIsRhsOrArg :: SimplCont -> Bool
312 contIsRhsOrArg (Stop {}) = True
313 contIsRhsOrArg (StrictBind {}) = True
314 contIsRhsOrArg (StrictArg {}) = True
315 contIsRhsOrArg _ = False
316
317 contIsRhs :: SimplCont -> Bool
318 contIsRhs (Stop _ RhsCtxt) = True
319 contIsRhs _ = False
320
321 -------------------
322 contIsDupable :: SimplCont -> Bool
323 contIsDupable (Stop {}) = True
324 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
325 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
326 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
327 contIsDupable (CastIt _ k) = contIsDupable k
328 contIsDupable _ = False
329
330 -------------------
331 contIsTrivial :: SimplCont -> Bool
332 contIsTrivial (Stop {}) = True
333 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
334 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
335 contIsTrivial (CastIt _ k) = contIsTrivial k
336 contIsTrivial _ = False
337
338 -------------------
339 contResultType :: SimplCont -> OutType
340 contResultType (Stop ty _) = ty
341 contResultType (CastIt _ k) = contResultType k
342 contResultType (StrictBind _ _ _ _ k) = contResultType k
343 contResultType (StrictArg _ _ k) = contResultType k
344 contResultType (Select { sc_cont = k }) = contResultType k
345 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
346 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
347 contResultType (TickIt _ k) = contResultType k
348
349 contHoleType :: SimplCont -> OutType
350 contHoleType (Stop ty _) = ty
351 contHoleType (TickIt _ k) = contHoleType k
352 contHoleType (CastIt co _) = pFst (coercionKind co)
353 contHoleType (StrictBind b _ _ se _) = substTy se (idType b)
354 contHoleType (StrictArg ai _ _) = funArgTy (ai_type ai)
355 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
356 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
357 = mkFunTy (perhapsSubstTy dup se (exprType e))
358 (contHoleType k)
359 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
360 = perhapsSubstTy d se (idType b)
361
362 -------------------
363 countValArgs :: SimplCont -> Int
364 -- Count value arguments excluding coercions
365 countValArgs (ApplyToVal { sc_arg = arg, sc_cont = cont })
366 | Coercion {} <- arg = countValArgs cont
367 | otherwise = 1 + countValArgs cont
368 countValArgs _ = 0
369
370 countArgs :: SimplCont -> Int
371 -- Count all arguments, including types, coercions, and other values
372 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
373 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
374 countArgs _ = 0
375
376 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
377 -- Summarises value args, discards type args and coercions
378 -- The returned continuation of the call is only used to
379 -- answer questions like "are you interesting?"
380 contArgs cont
381 | lone cont = (True, [], cont)
382 | otherwise = go [] cont
383 where
384 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
385 lone (ApplyToVal {}) = False
386 lone (CastIt {}) = False
387 lone _ = True
388
389 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
390 = go (is_interesting arg se : args) k
391 go args (ApplyToTy { sc_cont = k }) = go args k
392 go args (CastIt _ k) = go args k
393 go args k = (False, reverse args, k)
394
395 is_interesting arg se = interestingArg se arg
396 -- Do *not* use short-cutting substitution here
397 -- because we want to get as much IdInfo as possible
398
399
400 -------------------
401 mkArgInfo :: Id
402 -> [CoreRule] -- Rules for function
403 -> Int -- Number of value args
404 -> SimplCont -- Context of the call
405 -> ArgInfo
406
407 mkArgInfo fun rules n_val_args call_cont
408 | n_val_args < idArity fun -- Note [Unsaturated functions]
409 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
410 , ai_rules = rules, ai_encl = False
411 , ai_strs = vanilla_stricts
412 , ai_discs = vanilla_discounts }
413 | otherwise
414 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
415 , ai_rules = rules
416 , ai_encl = interestingArgContext rules call_cont
417 , ai_strs = add_type_str fun_ty arg_stricts
418 , ai_discs = arg_discounts }
419 where
420 fun_ty = idType fun
421
422 vanilla_discounts, arg_discounts :: [Int]
423 vanilla_discounts = repeat 0
424 arg_discounts = case idUnfolding fun of
425 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
426 -> discounts ++ vanilla_discounts
427 _ -> vanilla_discounts
428
429 vanilla_stricts, arg_stricts :: [Bool]
430 vanilla_stricts = repeat False
431
432 arg_stricts
433 = case splitStrictSig (idStrictness fun) of
434 (demands, result_info)
435 | not (demands `lengthExceeds` n_val_args)
436 -> -- Enough args, use the strictness given.
437 -- For bottoming functions we used to pretend that the arg
438 -- is lazy, so that we don't treat the arg as an
439 -- interesting context. This avoids substituting
440 -- top-level bindings for (say) strings into
441 -- calls to error. But now we are more careful about
442 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
443 if isBotRes result_info then
444 map isStrictDmd demands -- Finite => result is bottom
445 else
446 map isStrictDmd demands ++ vanilla_stricts
447 | otherwise
448 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
449 <+> ppr n_val_args <+> ppr demands )
450 vanilla_stricts -- Not enough args, or no strictness
451
452 add_type_str :: Type -> [Bool] -> [Bool]
453 -- If the function arg types are strict, record that in the 'strictness bits'
454 -- No need to instantiate because unboxed types (which dominate the strict
455 -- types) can't instantiate type variables.
456 -- add_type_str is done repeatedly (for each call); might be better
457 -- once-for-all in the function
458 -- But beware primops/datacons with no strictness
459 add_type_str _ [] = []
460 add_type_str fun_ty strs -- Look through foralls
461 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
462 = add_type_str fun_ty' strs
463 add_type_str fun_ty (str:strs) -- Add strict-type info
464 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
465 = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
466 add_type_str _ strs
467 = strs
468
469 {- Note [Unsaturated functions]
470 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
471 Consider (test eyeball/inline4)
472 x = a:as
473 y = f x
474 where f has arity 2. Then we do not want to inline 'x', because
475 it'll just be floated out again. Even if f has lots of discounts
476 on its first argument -- it must be saturated for these to kick in
477 -}
478
479
480 {-
481 ************************************************************************
482 * *
483 Interesting arguments
484 * *
485 ************************************************************************
486
487 Note [Interesting call context]
488 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
489 We want to avoid inlining an expression where there can't possibly be
490 any gain, such as in an argument position. Hence, if the continuation
491 is interesting (eg. a case scrutinee, application etc.) then we
492 inline, otherwise we don't.
493
494 Previously some_benefit used to return True only if the variable was
495 applied to some value arguments. This didn't work:
496
497 let x = _coerce_ (T Int) Int (I# 3) in
498 case _coerce_ Int (T Int) x of
499 I# y -> ....
500
501 we want to inline x, but can't see that it's a constructor in a case
502 scrutinee position, and some_benefit is False.
503
504 Another example:
505
506 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
507
508 .... case dMonadST _@_ x0 of (a,b,c) -> ....
509
510 we'd really like to inline dMonadST here, but we *don't* want to
511 inline if the case expression is just
512
513 case x of y { DEFAULT -> ... }
514
515 since we can just eliminate this case instead (x is in WHNF). Similar
516 applies when x is bound to a lambda expression. Hence
517 contIsInteresting looks for case expressions with just a single
518 default case.
519 -}
520
521 interestingCallContext :: SimplCont -> CallCtxt
522 -- See Note [Interesting call context]
523 interestingCallContext cont
524 = interesting cont
525 where
526 interesting (Select {}) = CaseCtxt
527 interesting (ApplyToVal {}) = ValAppCtxt
528 -- Can happen if we have (f Int |> co) y
529 -- If f has an INLINE prag we need to give it some
530 -- motivation to inline. See Note [Cast then apply]
531 -- in CoreUnfold
532 interesting (StrictArg _ cci _) = cci
533 interesting (StrictBind {}) = BoringCtxt
534 interesting (Stop _ cci) = cci
535 interesting (TickIt _ k) = interesting k
536 interesting (ApplyToTy { sc_cont = k }) = interesting k
537 interesting (CastIt _ k) = interesting k
538 -- If this call is the arg of a strict function, the context
539 -- is a bit interesting. If we inline here, we may get useful
540 -- evaluation information to avoid repeated evals: e.g.
541 -- x + (y * z)
542 -- Here the contIsInteresting makes the '*' keener to inline,
543 -- which in turn exposes a constructor which makes the '+' inline.
544 -- Assuming that +,* aren't small enough to inline regardless.
545 --
546 -- It's also very important to inline in a strict context for things
547 -- like
548 -- foldr k z (f x)
549 -- Here, the context of (f x) is strict, and if f's unfolding is
550 -- a build it's *great* to inline it here. So we must ensure that
551 -- the context for (f x) is not totally uninteresting.
552
553 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
554 -- If the argument has form (f x y), where x,y are boring,
555 -- and f is marked INLINE, then we don't want to inline f.
556 -- But if the context of the argument is
557 -- g (f x y)
558 -- where g has rules, then we *do* want to inline f, in case it
559 -- exposes a rule that might fire. Similarly, if the context is
560 -- h (g (f x x))
561 -- where h has rules, then we do want to inline f; hence the
562 -- call_cont argument to interestingArgContext
563 --
564 -- The ai-rules flag makes this happen; if it's
565 -- set, the inliner gets just enough keener to inline f
566 -- regardless of how boring f's arguments are, if it's marked INLINE
567 --
568 -- The alternative would be to *always* inline an INLINE function,
569 -- regardless of how boring its context is; but that seems overkill
570 -- For example, it'd mean that wrapper functions were always inlined
571 --
572 -- The call_cont passed to interestingArgContext is the context of
573 -- the call itself, e.g. g <hole> in the example above
574 interestingArgContext rules call_cont
575 = notNull rules || enclosing_fn_has_rules
576 where
577 enclosing_fn_has_rules = go call_cont
578
579 go (Select {}) = False
580 go (ApplyToVal {}) = False -- Shouldn't really happen
581 go (ApplyToTy {}) = False -- Ditto
582 go (StrictArg _ cci _) = interesting cci
583 go (StrictBind {}) = False -- ??
584 go (CastIt _ c) = go c
585 go (Stop _ cci) = interesting cci
586 go (TickIt _ c) = go c
587
588 interesting RuleArgCtxt = True
589 interesting _ = False
590
591
592 {- Note [Interesting arguments]
593 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
594 An argument is interesting if it deserves a discount for unfoldings
595 with a discount in that argument position. The idea is to avoid
596 unfolding a function that is applied only to variables that have no
597 unfolding (i.e. they are probably lambda bound): f x y z There is
598 little point in inlining f here.
599
600 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
601 we must look through lets, eg (let x = e in C a b), because the let will
602 float, exposing the value, if we inline. That makes it different to
603 exprIsHNF.
604
605 Before 2009 we said it was interesting if the argument had *any* structure
606 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
607
608 But we don't regard (f x y) as interesting, unless f is unsaturated.
609 If it's saturated and f hasn't inlined, then it's probably not going
610 to now!
611
612 Note [Conlike is interesting]
613 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
614 Consider
615 f d = ...((*) d x y)...
616 ... f (df d')...
617 where df is con-like. Then we'd really like to inline 'f' so that the
618 rule for (*) (df d) can fire. To do this
619 a) we give a discount for being an argument of a class-op (eg (*) d)
620 b) we say that a con-like argument (eg (df d)) is interesting
621 -}
622
623 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
624 -- See Note [Interesting arguments]
625 interestingArg env e = go env 0 e
626 where
627 -- n is # value args to which the expression is applied
628 go env n (Var v)
629 | SimplEnv { seIdSubst = ids, seInScope = in_scope } <- env
630 = case lookupVarEnv ids v of
631 Nothing -> go_var n (refineFromInScope in_scope v)
632 Just (DoneId v') -> go_var n (refineFromInScope in_scope v')
633 Just (DoneEx e) -> go (zapSubstEnv env) n e
634 Just (ContEx tvs cvs ids e) -> go (setSubstEnv env tvs cvs ids) n e
635
636 go _ _ (Lit {}) = ValueArg
637 go _ _ (Type _) = TrivArg
638 go _ _ (Coercion _) = TrivArg
639 go env n (App fn (Type _)) = go env n fn
640 go env n (App fn (Coercion _)) = go env n fn
641 go env n (App fn _) = go env (n+1) fn
642 go env n (Tick _ a) = go env n a
643 go env n (Cast e _) = go env n e
644 go env n (Lam v e)
645 | isTyVar v = go env n e
646 | n>0 = go env (n-1) e
647 | otherwise = ValueArg
648 go env n (Let _ e) = case go env n e of { ValueArg -> ValueArg; _ -> NonTrivArg }
649 go _ _ (Case {}) = NonTrivArg
650
651 go_var n v
652 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
653 -- data constructors here
654 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
655 | n > 0 = NonTrivArg -- Saturated or unknown call
656 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
657 -- See Note [Conlike is interesting]
658 | otherwise = TrivArg -- n==0, no useful unfolding
659 where
660 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
661
662 {-
663 ************************************************************************
664 * *
665 SimplifierMode
666 * *
667 ************************************************************************
668
669 The SimplifierMode controls several switches; see its definition in
670 CoreMonad
671 sm_rules :: Bool -- Whether RULES are enabled
672 sm_inline :: Bool -- Whether inlining is enabled
673 sm_case_case :: Bool -- Whether case-of-case is enabled
674 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
675 -}
676
677 simplEnvForGHCi :: DynFlags -> SimplEnv
678 simplEnvForGHCi dflags
679 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
680 , sm_phase = InitialPhase
681 , sm_rules = rules_on
682 , sm_inline = False
683 , sm_eta_expand = eta_expand_on
684 , sm_case_case = True }
685 where
686 rules_on = gopt Opt_EnableRewriteRules dflags
687 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
688 -- Do not do any inlining, in case we expose some unboxed
689 -- tuple stuff that confuses the bytecode interpreter
690
691 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
692 -- See Note [Simplifying inside stable unfoldings]
693 updModeForStableUnfoldings inline_rule_act current_mode
694 = current_mode { sm_phase = phaseFromActivation inline_rule_act
695 , sm_inline = True
696 , sm_eta_expand = False }
697 -- For sm_rules, just inherit; sm_rules might be "off"
698 -- because of -fno-enable-rewrite-rules
699 where
700 phaseFromActivation (ActiveAfter n) = Phase n
701 phaseFromActivation _ = InitialPhase
702
703 updModeForRules :: SimplifierMode -> SimplifierMode
704 -- See Note [Simplifying rules]
705 updModeForRules current_mode
706 = current_mode { sm_phase = InitialPhase
707 , sm_inline = False
708 , sm_rules = False
709 , sm_eta_expand = False }
710
711 {- Note [Simplifying rules]
712 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
713 When simplifying a rule, refrain from any inlining or applying of other RULES.
714
715 Doing anything to the LHS is plain confusing, because it means that what the
716 rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
717 Moreover, inlining (or applying rules) on rule LHSs risks introducing
718 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
719
720 Doing this to either side confounds tools like HERMIT, which seek to reason
721 about and apply the RULES as originally written. See Trac #10829.
722
723 Note [Inlining in gentle mode]
724 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
725 Something is inlined if
726 (i) the sm_inline flag is on, AND
727 (ii) the thing has an INLINE pragma, AND
728 (iii) the thing is inlinable in the earliest phase.
729
730 Example of why (iii) is important:
731 {-# INLINE [~1] g #-}
732 g = ...
733
734 {-# INLINE f #-}
735 f x = g (g x)
736
737 If we were to inline g into f's inlining, then an importing module would
738 never be able to do
739 f e --> g (g e) ---> RULE fires
740 because the stable unfolding for f has had g inlined into it.
741
742 On the other hand, it is bad not to do ANY inlining into an
743 stable unfolding, because then recursive knots in instance declarations
744 don't get unravelled.
745
746 However, *sometimes* SimplGently must do no call-site inlining at all
747 (hence sm_inline = False). Before full laziness we must be careful
748 not to inline wrappers, because doing so inhibits floating
749 e.g. ...(case f x of ...)...
750 ==> ...(case (case x of I# x# -> fw x#) of ...)...
751 ==> ...(case x of I# x# -> case fw x# of ...)...
752 and now the redex (f x) isn't floatable any more.
753
754 The no-inlining thing is also important for Template Haskell. You might be
755 compiling in one-shot mode with -O2; but when TH compiles a splice before
756 running it, we don't want to use -O2. Indeed, we don't want to inline
757 anything, because the byte-code interpreter might get confused about
758 unboxed tuples and suchlike.
759
760 Note [Simplifying inside stable unfoldings]
761 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
762 We must take care with simplification inside stable unfoldings (which come from
763 INLINE pragmas).
764
765 First, consider the following example
766 let f = \pq -> BIG
767 in
768 let g = \y -> f y y
769 {-# INLINE g #-}
770 in ...g...g...g...g...g...
771 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
772 and thence copied multiple times when g is inlined. HENCE we treat
773 any occurrence in a stable unfolding as a multiple occurrence, not a single
774 one; see OccurAnal.addRuleUsage.
775
776 Second, we do want *do* to some modest rules/inlining stuff in stable
777 unfoldings, partly to eliminate senseless crap, and partly to break
778 the recursive knots generated by instance declarations.
779
780 However, suppose we have
781 {-# INLINE <act> f #-}
782 f = <rhs>
783 meaning "inline f in phases p where activation <act>(p) holds".
784 Then what inlinings/rules can we apply to the copy of <rhs> captured in
785 f's stable unfolding? Our model is that literally <rhs> is substituted for
786 f when it is inlined. So our conservative plan (implemented by
787 updModeForStableUnfoldings) is this:
788
789 -------------------------------------------------------------
790 When simplifying the RHS of an stable unfolding, set the phase
791 to the phase in which the stable unfolding first becomes active
792 -------------------------------------------------------------
793
794 That ensures that
795
796 a) Rules/inlinings that *cease* being active before p will
797 not apply to the stable unfolding, consistent with it being
798 inlined in its *original* form in phase p.
799
800 b) Rules/inlinings that only become active *after* p will
801 not apply to the stable unfolding, again to be consistent with
802 inlining the *original* rhs in phase p.
803
804 For example,
805 {-# INLINE f #-}
806 f x = ...g...
807
808 {-# NOINLINE [1] g #-}
809 g y = ...
810
811 {-# RULE h g = ... #-}
812 Here we must not inline g into f's RHS, even when we get to phase 0,
813 because when f is later inlined into some other module we want the
814 rule for h to fire.
815
816 Similarly, consider
817 {-# INLINE f #-}
818 f x = ...g...
819
820 g y = ...
821 and suppose that there are auto-generated specialisations and a strictness
822 wrapper for g. The specialisations get activation AlwaysActive, and the
823 strictness wrapper get activation (ActiveAfter 0). So the strictness
824 wrepper fails the test and won't be inlined into f's stable unfolding. That
825 means f can inline, expose the specialised call to g, so the specialisation
826 rules can fire.
827
828 A note about wrappers
829 ~~~~~~~~~~~~~~~~~~~~~
830 It's also important not to inline a worker back into a wrapper.
831 A wrapper looks like
832 wraper = inline_me (\x -> ...worker... )
833 Normally, the inline_me prevents the worker getting inlined into
834 the wrapper (initially, the worker's only call site!). But,
835 if the wrapper is sure to be called, the strictness analyser will
836 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
837 continuation.
838 -}
839
840 activeUnfolding :: SimplEnv -> Id -> Bool
841 activeUnfolding env
842 | not (sm_inline mode) = active_unfolding_minimal
843 | otherwise = case sm_phase mode of
844 InitialPhase -> active_unfolding_gentle
845 Phase n -> active_unfolding n
846 where
847 mode = getMode env
848
849 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
850 -- When matching in RULE, we want to "look through" an unfolding
851 -- (to see a constructor) if *rules* are on, even if *inlinings*
852 -- are not. A notable example is DFuns, which really we want to
853 -- match in rules like (op dfun) in gentle mode. Another example
854 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
855 -- see very early on
856 getUnfoldingInRuleMatch env
857 = (in_scope, id_unf)
858 where
859 in_scope = seInScope env
860 mode = getMode env
861 id_unf id | unf_is_active id = idUnfolding id
862 | otherwise = NoUnfolding
863 unf_is_active id
864 | not (sm_rules mode) = active_unfolding_minimal id
865 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
866
867 active_unfolding_minimal :: Id -> Bool
868 -- Compuslory unfoldings only
869 -- Ignore SimplGently, because we want to inline regardless;
870 -- the Id has no top-level binding at all
871 --
872 -- NB: we used to have a second exception, for data con wrappers.
873 -- On the grounds that we use gentle mode for rule LHSs, and
874 -- they match better when data con wrappers are inlined.
875 -- But that only really applies to the trivial wrappers (like (:)),
876 -- and they are now constructed as Compulsory unfoldings (in MkId)
877 -- so they'll happen anyway.
878 active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
879
880 active_unfolding :: PhaseNum -> Id -> Bool
881 active_unfolding n id = isActiveIn n (idInlineActivation id)
882
883 active_unfolding_gentle :: Id -> Bool
884 -- Anything that is early-active
885 -- See Note [Gentle mode]
886 active_unfolding_gentle id
887 = isInlinePragma prag
888 && isEarlyActive (inlinePragmaActivation prag)
889 -- NB: wrappers are not early-active
890 where
891 prag = idInlinePragma id
892
893 ----------------------
894 activeRule :: SimplEnv -> Activation -> Bool
895 -- Nothing => No rules at all
896 activeRule env
897 | not (sm_rules mode) = \_ -> False -- Rewriting is off
898 | otherwise = isActive (sm_phase mode)
899 where
900 mode = getMode env
901
902 {-
903 ************************************************************************
904 * *
905 preInlineUnconditionally
906 * *
907 ************************************************************************
908
909 preInlineUnconditionally
910 ~~~~~~~~~~~~~~~~~~~~~~~~
911 @preInlineUnconditionally@ examines a bndr to see if it is used just
912 once in a completely safe way, so that it is safe to discard the
913 binding inline its RHS at the (unique) usage site, REGARDLESS of how
914 big the RHS might be. If this is the case we don't simplify the RHS
915 first, but just inline it un-simplified.
916
917 This is much better than first simplifying a perhaps-huge RHS and then
918 inlining and re-simplifying it. Indeed, it can be at least quadratically
919 better. Consider
920
921 x1 = e1
922 x2 = e2[x1]
923 x3 = e3[x2]
924 ...etc...
925 xN = eN[xN-1]
926
927 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
928 This can happen with cascades of functions too:
929
930 f1 = \x1.e1
931 f2 = \xs.e2[f1]
932 f3 = \xs.e3[f3]
933 ...etc...
934
935 THE MAIN INVARIANT is this:
936
937 ---- preInlineUnconditionally invariant -----
938 IF preInlineUnconditionally chooses to inline x = <rhs>
939 THEN doing the inlining should not change the occurrence
940 info for the free vars of <rhs>
941 ----------------------------------------------
942
943 For example, it's tempting to look at trivial binding like
944 x = y
945 and inline it unconditionally. But suppose x is used many times,
946 but this is the unique occurrence of y. Then inlining x would change
947 y's occurrence info, which breaks the invariant. It matters: y
948 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
949
950
951 Even RHSs labelled InlineMe aren't caught here, because there might be
952 no benefit from inlining at the call site.
953
954 [Sept 01] Don't unconditionally inline a top-level thing, because that
955 can simply make a static thing into something built dynamically. E.g.
956 x = (a,b)
957 main = \s -> h x
958
959 [Remember that we treat \s as a one-shot lambda.] No point in
960 inlining x unless there is something interesting about the call site.
961
962 But watch out: if you aren't careful, some useful foldr/build fusion
963 can be lost (most notably in spectral/hartel/parstof) because the
964 foldr didn't see the build. Doing the dynamic allocation isn't a big
965 deal, in fact, but losing the fusion can be. But the right thing here
966 seems to be to do a callSiteInline based on the fact that there is
967 something interesting about the call site (it's strict). Hmm. That
968 seems a bit fragile.
969
970 Conclusion: inline top level things gaily until Phase 0 (the last
971 phase), at which point don't.
972
973 Note [pre/postInlineUnconditionally in gentle mode]
974 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
975 Even in gentle mode we want to do preInlineUnconditionally. The
976 reason is that too little clean-up happens if you don't inline
977 use-once things. Also a bit of inlining is *good* for full laziness;
978 it can expose constant sub-expressions. Example in
979 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
980 let-float if you inline windowToViewport
981
982 However, as usual for Gentle mode, do not inline things that are
983 inactive in the intial stages. See Note [Gentle mode].
984
985 Note [Stable unfoldings and preInlineUnconditionally]
986 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
987 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
988 Example
989
990 {-# INLINE f #-}
991 f :: Eq a => a -> a
992 f x = ...
993
994 fInt :: Int -> Int
995 fInt = f Int dEqInt
996
997 ...fInt...fInt...fInt...
998
999 Here f occurs just once, in the RHS of fInt. But if we inline it there
1000 we'll lose the opportunity to inline at each of fInt's call sites.
1001 The INLINE pragma will only inline when the application is saturated
1002 for exactly this reason; and we don't want PreInlineUnconditionally
1003 to second-guess it. A live example is Trac #3736.
1004 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1005
1006 Note [Top-level bottoming Ids]
1007 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1008 Don't inline top-level Ids that are bottoming, even if they are used just
1009 once, because FloatOut has gone to some trouble to extract them out.
1010 Inlining them won't make the program run faster!
1011
1012 Note [Do not inline CoVars unconditionally]
1013 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1014 Coercion variables appear inside coercions, and the RHS of a let-binding
1015 is a term (not a coercion) so we can't necessarily inline the latter in
1016 the former.
1017 -}
1018
1019 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1020 -- Precondition: rhs satisfies the let/app invariant
1021 -- See Note [CoreSyn let/app invariant] in CoreSyn
1022 -- Reason: we don't want to inline single uses, or discard dead bindings,
1023 -- for unlifted, side-effect-full bindings
1024 preInlineUnconditionally dflags env top_lvl bndr rhs
1025 | not active = False
1026 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1027 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1028 | not (gopt Opt_SimplPreInlining dflags) = False
1029 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1030 | otherwise = case idOccInfo bndr of
1031 IAmDead -> True -- Happens in ((\x.1) v)
1032 OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
1033 _ -> False
1034 where
1035 mode = getMode env
1036 active = isActive (sm_phase mode) act
1037 -- See Note [pre/postInlineUnconditionally in gentle mode]
1038 act = idInlineActivation bndr
1039 try_once in_lam int_cxt -- There's one textual occurrence
1040 | not in_lam = isNotTopLevel top_lvl || early_phase
1041 | otherwise = int_cxt && canInlineInLam rhs
1042
1043 -- Be very careful before inlining inside a lambda, because (a) we must not
1044 -- invalidate occurrence information, and (b) we want to avoid pushing a
1045 -- single allocation (here) into multiple allocations (inside lambda).
1046 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1047 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1048 -- where
1049 -- is_cheap = exprIsCheap rhs
1050 -- ok = is_cheap && int_cxt
1051
1052 -- int_cxt The context isn't totally boring
1053 -- E.g. let f = \ab.BIG in \y. map f xs
1054 -- Don't want to substitute for f, because then we allocate
1055 -- its closure every time the \y is called
1056 -- But: let f = \ab.BIG in \y. map (f y) xs
1057 -- Now we do want to substitute for f, even though it's not
1058 -- saturated, because we're going to allocate a closure for
1059 -- (f y) every time round the loop anyhow.
1060
1061 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1062 -- so substituting rhs inside a lambda doesn't change the occ info.
1063 -- Sadly, not quite the same as exprIsHNF.
1064 canInlineInLam (Lit _) = True
1065 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1066 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1067 canInlineInLam _ = False
1068 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1069 -- ticks around a Lam will disappear anyway.
1070
1071 early_phase = case sm_phase mode of
1072 Phase 0 -> False
1073 _ -> True
1074 -- If we don't have this early_phase test, consider
1075 -- x = length [1,2,3]
1076 -- The full laziness pass carefully floats all the cons cells to
1077 -- top level, and preInlineUnconditionally floats them all back in.
1078 -- Result is (a) static allocation replaced by dynamic allocation
1079 -- (b) many simplifier iterations because this tickles
1080 -- a related problem; only one inlining per pass
1081 --
1082 -- On the other hand, I have seen cases where top-level fusion is
1083 -- lost if we don't inline top level thing (e.g. string constants)
1084 -- Hence the test for phase zero (which is the phase for all the final
1085 -- simplifications). Until phase zero we take no special notice of
1086 -- top level things, but then we become more leery about inlining
1087 -- them.
1088
1089 {-
1090 ************************************************************************
1091 * *
1092 postInlineUnconditionally
1093 * *
1094 ************************************************************************
1095
1096 postInlineUnconditionally
1097 ~~~~~~~~~~~~~~~~~~~~~~~~~
1098 @postInlineUnconditionally@ decides whether to unconditionally inline
1099 a thing based on the form of its RHS; in particular if it has a
1100 trivial RHS. If so, we can inline and discard the binding altogether.
1101
1102 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1103 only have *forward* references. Hence, it's safe to discard the binding
1104
1105 NOTE: This isn't our last opportunity to inline. We're at the binding
1106 site right now, and we'll get another opportunity when we get to the
1107 ocurrence(s)
1108
1109 Note that we do this unconditional inlining only for trival RHSs.
1110 Don't inline even WHNFs inside lambdas; doing so may simply increase
1111 allocation when the function is called. This isn't the last chance; see
1112 NOTE above.
1113
1114 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1115 Because we don't even want to inline them into the RHS of constructor
1116 arguments. See NOTE above
1117
1118 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1119 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1120 with both a and b marked NOINLINE. But that seems incompatible with
1121 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1122 story for now.
1123 -}
1124
1125 postInlineUnconditionally
1126 :: DynFlags -> SimplEnv -> TopLevelFlag
1127 -> OutId -- The binder (an InId would be fine too)
1128 -- (*not* a CoVar)
1129 -> OccInfo -- From the InId
1130 -> OutExpr
1131 -> Unfolding
1132 -> Bool
1133 -- Precondition: rhs satisfies the let/app invariant
1134 -- See Note [CoreSyn let/app invariant] in CoreSyn
1135 -- Reason: we don't want to inline single uses, or discard dead bindings,
1136 -- for unlifted, side-effect-full bindings
1137 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1138 | not active = False
1139 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1140 -- because it might be referred to "earlier"
1141 | isExportedId bndr = False
1142 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1143 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1144 | exprIsTrivial rhs = True
1145 | otherwise
1146 = case occ_info of
1147 -- The point of examining occ_info here is that for *non-values*
1148 -- that occur outside a lambda, the call-site inliner won't have
1149 -- a chance (because it doesn't know that the thing
1150 -- only occurs once). The pre-inliner won't have gotten
1151 -- it either, if the thing occurs in more than one branch
1152 -- So the main target is things like
1153 -- let x = f y in
1154 -- case v of
1155 -- True -> case x of ...
1156 -- False -> case x of ...
1157 -- This is very important in practice; e.g. wheel-seive1 doubles
1158 -- in allocation if you miss this out
1159 OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue
1160 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1161 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1162 --
1163 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1164 -- Reason: doing so risks exponential behaviour. We simplify a big
1165 -- expression, inline it, and simplify it again. But if the
1166 -- very same thing happens in the big expression, we get
1167 -- exponential cost!
1168 -- PRINCIPLE: when we've already simplified an expression once,
1169 -- make sure that we only inline it if it's reasonably small.
1170
1171 && (not in_lam ||
1172 -- Outside a lambda, we want to be reasonably aggressive
1173 -- about inlining into multiple branches of case
1174 -- e.g. let x = <non-value>
1175 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1176 -- Inlining can be a big win if C3 is the hot-spot, even if
1177 -- the uses in C1, C2 are not 'interesting'
1178 -- An example that gets worse if you add int_cxt here is 'clausify'
1179
1180 (isCheapUnfolding unfolding && int_cxt))
1181 -- isCheap => acceptable work duplication; in_lam may be true
1182 -- int_cxt to prevent us inlining inside a lambda without some
1183 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1184
1185 IAmDead -> True -- This happens; for example, the case_bndr during case of
1186 -- known constructor: case (a,b) of x { (p,q) -> ... }
1187 -- Here x isn't mentioned in the RHS, so we don't want to
1188 -- create the (dead) let-binding let x = (a,b) in ...
1189
1190 _ -> False
1191
1192 -- Here's an example that we don't handle well:
1193 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1194 -- in \y. ....case f of {...} ....
1195 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1196 -- But
1197 -- - We can't preInlineUnconditionally because that woud invalidate
1198 -- the occ info for b.
1199 -- - We can't postInlineUnconditionally because the RHS is big, and
1200 -- that risks exponential behaviour
1201 -- - We can't call-site inline, because the rhs is big
1202 -- Alas!
1203
1204 where
1205 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1206 -- See Note [pre/postInlineUnconditionally in gentle mode]
1207
1208 {-
1209 Note [Top level and postInlineUnconditionally]
1210 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1211 We don't do postInlineUnconditionally for top-level things (even for
1212 ones that are trivial):
1213
1214 * Doing so will inline top-level error expressions that have been
1215 carefully floated out by FloatOut. More generally, it might
1216 replace static allocation with dynamic.
1217
1218 * Even for trivial expressions there's a problem. Consider
1219 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1220 blah xs = reverse xs
1221 ruggle = sort
1222 In one simplifier pass we might fire the rule, getting
1223 blah xs = ruggle xs
1224 but in *that* simplifier pass we must not do postInlineUnconditionally
1225 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1226
1227 If the rhs is trivial it'll be inlined by callSiteInline, and then
1228 the binding will be dead and discarded by the next use of OccurAnal
1229
1230 * There is less point, because the main goal is to get rid of local
1231 bindings used in multiple case branches.
1232
1233 * The inliner should inline trivial things at call sites anyway.
1234
1235 Note [Stable unfoldings and postInlineUnconditionally]
1236 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1237 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1238 otherwise we lose the unfolding. Example
1239
1240 -- f has stable unfolding with rhs (e |> co)
1241 -- where 'e' is big
1242 f = e |> co
1243
1244 Then there's a danger we'll optimise to
1245
1246 f' = e
1247 f = f' |> co
1248
1249 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1250 won't inline because 'e' is too big.
1251
1252 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1253
1254
1255 ************************************************************************
1256 * *
1257 Rebuilding a lambda
1258 * *
1259 ************************************************************************
1260 -}
1261
1262 mkLam :: [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1263 -- mkLam tries three things
1264 -- a) eta reduction, if that gives a trivial expression
1265 -- b) eta expansion [only if there are some value lambdas]
1266
1267 mkLam [] body _cont
1268 = return body
1269 mkLam bndrs body cont
1270 = do { dflags <- getDynFlags
1271 ; mkLam' dflags bndrs body }
1272 where
1273 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1274 mkLam' dflags bndrs (Cast body co)
1275 | not (any bad bndrs)
1276 -- Note [Casts and lambdas]
1277 = do { lam <- mkLam' dflags bndrs body
1278 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1279 where
1280 co_vars = tyCoVarsOfCo co
1281 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1282
1283 mkLam' dflags bndrs body@(Lam {})
1284 = mkLam' dflags (bndrs ++ bndrs1) body1
1285 where
1286 (bndrs1, body1) = collectBinders body
1287
1288 mkLam' dflags bndrs (Tick t expr)
1289 | tickishFloatable t
1290 = mkTick t <$> mkLam' dflags bndrs expr
1291
1292 mkLam' dflags bndrs body
1293 | gopt Opt_DoEtaReduction dflags
1294 , Just etad_lam <- tryEtaReduce bndrs body
1295 = do { tick (EtaReduction (head bndrs))
1296 ; return etad_lam }
1297
1298 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1299 , gopt Opt_DoLambdaEtaExpansion dflags
1300 , any isRuntimeVar bndrs
1301 , let body_arity = exprEtaExpandArity dflags body
1302 , body_arity > 0
1303 = do { tick (EtaExpansion (head bndrs))
1304 ; let res = mkLams bndrs (etaExpand body_arity body)
1305 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1306 , text "after" <+> ppr res])
1307 ; return res }
1308
1309 | otherwise
1310 = return (mkLams bndrs body)
1311
1312 {-
1313 Note [Eta expanding lambdas]
1314 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1315 In general we *do* want to eta-expand lambdas. Consider
1316 f (\x -> case x of (a,b) -> \s -> blah)
1317 where 's' is a state token, and hence can be eta expanded. This
1318 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1319 important function!
1320
1321 The eta-expansion will never happen unless we do it now. (Well, it's
1322 possible that CorePrep will do it, but CorePrep only has a half-baked
1323 eta-expander that can't deal with casts. So it's much better to do it
1324 here.)
1325
1326 However, when the lambda is let-bound, as the RHS of a let, we have a
1327 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1328 bother to try expansion in mkLam in that case; hence the contIsRhs
1329 guard.
1330
1331 Note [Casts and lambdas]
1332 ~~~~~~~~~~~~~~~~~~~~~~~~
1333 Consider
1334 (\x. (\y. e) `cast` g1) `cast` g2
1335 There is a danger here that the two lambdas look separated, and the
1336 full laziness pass might float an expression to between the two.
1337
1338 So this equation in mkLam' floats the g1 out, thus:
1339 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1340 where x:tx.
1341
1342 In general, this floats casts outside lambdas, where (I hope) they
1343 might meet and cancel with some other cast:
1344 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1345 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1346 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1347 (if not (g `in` co))
1348
1349 Notice that it works regardless of 'e'. Originally it worked only
1350 if 'e' was itself a lambda, but in some cases that resulted in
1351 fruitless iteration in the simplifier. A good example was when
1352 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1353 like (\x. Get `cast` g)
1354 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1355 the Get, and the next iteration eta-reduced it, and then eta-expanded
1356 it again.
1357
1358 Note also the side condition for the case of coercion binders.
1359 It does not make sense to transform
1360 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1361 because the latter is not well-kinded.
1362
1363 ************************************************************************
1364 * *
1365 Eta expansion
1366 * *
1367 ************************************************************************
1368 -}
1369
1370 tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
1371 -- See Note [Eta-expanding at let bindings]
1372 tryEtaExpandRhs env bndr rhs
1373 = do { dflags <- getDynFlags
1374 ; (new_arity, new_rhs) <- try_expand dflags
1375
1376 ; WARN( new_arity < old_id_arity,
1377 (ptext (sLit "Arity decrease:") <+> (ppr bndr <+> ppr old_id_arity
1378 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1379 -- Note [Arity decrease] in Simplify
1380 return (new_arity, new_rhs) }
1381 where
1382 try_expand dflags
1383 | exprIsTrivial rhs
1384 = return (exprArity rhs, rhs)
1385
1386 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1387 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1388 new_arity2 = idCallArity bndr
1389 new_arity = max new_arity1 new_arity2
1390 , new_arity > old_arity -- And the current manifest arity isn't enough
1391 = do { tick (EtaExpansion bndr)
1392 ; return (new_arity, etaExpand new_arity rhs) }
1393 | otherwise
1394 = return (old_arity, rhs)
1395
1396 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1397 old_id_arity = idArity bndr
1398
1399 {-
1400 Note [Eta-expanding at let bindings]
1401 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1402 We now eta expand at let-bindings, which is where the payoff comes.
1403 The most significant thing is that we can do a simple arity analysis
1404 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1405
1406 One useful consequence of not eta-expanding lambdas is this example:
1407 genMap :: C a => ...
1408 {-# INLINE genMap #-}
1409 genMap f xs = ...
1410
1411 myMap :: D a => ...
1412 {-# INLINE myMap #-}
1413 myMap = genMap
1414
1415 Notice that 'genMap' should only inline if applied to two arguments.
1416 In the stable unfolding for myMap we'll have the unfolding
1417 (\d -> genMap Int (..d..))
1418 We do not want to eta-expand to
1419 (\d f xs -> genMap Int (..d..) f xs)
1420 because then 'genMap' will inline, and it really shouldn't: at least
1421 as far as the programmer is concerned, it's not applied to two
1422 arguments!
1423
1424 Note [Do not eta-expand PAPs]
1425 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1426 We used to have old_arity = manifestArity rhs, which meant that we
1427 would eta-expand even PAPs. But this gives no particular advantage,
1428 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1429 Suppose we have a PAP
1430 foo :: IO ()
1431 foo = returnIO ()
1432 Then we can eta-expand do
1433 foo = (\eta. (returnIO () |> sym g) eta) |> g
1434 where
1435 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1436
1437 But there is really no point in doing this, and it generates masses of
1438 coercions and whatnot that eventually disappear again. For T9020, GHC
1439 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1440 1.8G to 45M.
1441
1442 But note that this won't eta-expand, say
1443 f = \g -> map g
1444 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1445 strictness analysis will have less to bite on?
1446
1447
1448 ************************************************************************
1449 * *
1450 \subsection{Floating lets out of big lambdas}
1451 * *
1452 ************************************************************************
1453
1454 Note [Floating and type abstraction]
1455 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1456 Consider this:
1457 x = /\a. C e1 e2
1458 We'd like to float this to
1459 y1 = /\a. e1
1460 y2 = /\a. e2
1461 x = /\a. C (y1 a) (y2 a)
1462 for the usual reasons: we want to inline x rather vigorously.
1463
1464 You may think that this kind of thing is rare. But in some programs it is
1465 common. For example, if you do closure conversion you might get:
1466
1467 data a :-> b = forall e. (e -> a -> b) :$ e
1468
1469 f_cc :: forall a. a :-> a
1470 f_cc = /\a. (\e. id a) :$ ()
1471
1472 Now we really want to inline that f_cc thing so that the
1473 construction of the closure goes away.
1474
1475 So I have elaborated simplLazyBind to understand right-hand sides that look
1476 like
1477 /\ a1..an. body
1478
1479 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1480 but there is quite a bit of plumbing in simplLazyBind as well.
1481
1482 The same transformation is good when there are lets in the body:
1483
1484 /\abc -> let(rec) x = e in b
1485 ==>
1486 let(rec) x' = /\abc -> let x = x' a b c in e
1487 in
1488 /\abc -> let x = x' a b c in b
1489
1490 This is good because it can turn things like:
1491
1492 let f = /\a -> letrec g = ... g ... in g
1493 into
1494 letrec g' = /\a -> ... g' a ...
1495 in
1496 let f = /\ a -> g' a
1497
1498 which is better. In effect, it means that big lambdas don't impede
1499 let-floating.
1500
1501 This optimisation is CRUCIAL in eliminating the junk introduced by
1502 desugaring mutually recursive definitions. Don't eliminate it lightly!
1503
1504 [May 1999] If we do this transformation *regardless* then we can
1505 end up with some pretty silly stuff. For example,
1506
1507 let
1508 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1509 in ..
1510 becomes
1511 let y1 = /\s -> r1
1512 y2 = /\s -> r2
1513 st = /\s -> ...[y1 s/x1, y2 s/x2]
1514 in ..
1515
1516 Unless the "..." is a WHNF there is really no point in doing this.
1517 Indeed it can make things worse. Suppose x1 is used strictly,
1518 and is of the form
1519
1520 x1* = case f y of { (a,b) -> e }
1521
1522 If we abstract this wrt the tyvar we then can't do the case inline
1523 as we would normally do.
1524
1525 That's why the whole transformation is part of the same process that
1526 floats let-bindings and constructor arguments out of RHSs. In particular,
1527 it is guarded by the doFloatFromRhs call in simplLazyBind.
1528
1529 Note [Which type variables to abstract over]
1530 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1531 Abstract only over the type variables free in the rhs wrt which the
1532 new binding is abstracted. Note that
1533
1534 * The naive approach of abstracting wrt the
1535 tyvars free in the Id's /type/ fails. Consider:
1536 /\ a b -> let t :: (a,b) = (e1, e2)
1537 x :: a = fst t
1538 in ...
1539 Here, b isn't free in x's type, but we must nevertheless
1540 abstract wrt b as well, because t's type mentions b.
1541 Since t is floated too, we'd end up with the bogus:
1542 poly_t = /\ a b -> (e1, e2)
1543 poly_x = /\ a -> fst (poly_t a *b*)
1544
1545 * We must do closeOverKinds. Example (Trac #10934):
1546 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1547 Here we want to float 't', but we must remember to abstract over
1548 'k' as well, even though it is not explicitly mentioned in the RHS,
1549 otherwise we get
1550 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1551 which is obviously bogus.
1552 -}
1553
1554 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1555 abstractFloats main_tvs body_env body
1556 = ASSERT( notNull body_floats )
1557 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1558 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1559 where
1560 main_tv_set = mkVarSet main_tvs
1561 body_floats = getFloatBinds body_env
1562 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1563
1564 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1565 abstract subst (NonRec id rhs)
1566 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1567 ; let poly_rhs = mkLams tvs_here rhs'
1568 subst' = CoreSubst.extendIdSubst subst id poly_app
1569 ; return (subst', (NonRec poly_id poly_rhs)) }
1570 where
1571 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1572
1573 -- tvs_here: see Note [Which type variables to abstract over]
1574 tvs_here = varSetElemsWellScoped $
1575 intersectVarSet main_tv_set $
1576 closeOverKinds $
1577 exprSomeFreeVars isTyVar rhs'
1578
1579 abstract subst (Rec prs)
1580 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1581 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1582 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1583 | rhs <- rhss]
1584 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1585 where
1586 (ids,rhss) = unzip prs
1587 -- For a recursive group, it's a bit of a pain to work out the minimal
1588 -- set of tyvars over which to abstract:
1589 -- /\ a b c. let x = ...a... in
1590 -- letrec { p = ...x...q...
1591 -- q = .....p...b... } in
1592 -- ...
1593 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1594 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1595 -- Since it's a pain, we just use the whole set, which is always safe
1596 --
1597 -- If you ever want to be more selective, remember this bizarre case too:
1598 -- x::a = x
1599 -- Here, we must abstract 'x' over 'a'.
1600 tvs_here = toposortTyVars main_tvs
1601
1602 mk_poly tvs_here var
1603 = do { uniq <- getUniqueM
1604 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1605 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1606 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1607 mkLocalIdOrCoVar poly_name poly_ty
1608 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1609 -- In the olden days, it was crucial to copy the occInfo of the original var,
1610 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1611 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1612 -- at already simplified code, so it doesn't matter
1613 --
1614 -- It's even right to retain single-occurrence or dead-var info:
1615 -- Suppose we started with /\a -> let x = E in B
1616 -- where x occurs once in B. Then we transform to:
1617 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1618 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1619 -- the occurrences of x' will be just the occurrences originally
1620 -- pinned on x.
1621
1622 {-
1623 Note [Abstract over coercions]
1624 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1625 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1626 type variable a. Rather than sort this mess out, we simply bale out and abstract
1627 wrt all the type variables if any of them are coercion variables.
1628
1629
1630 Historical note: if you use let-bindings instead of a substitution, beware of this:
1631
1632 -- Suppose we start with:
1633 --
1634 -- x = /\ a -> let g = G in E
1635 --
1636 -- Then we'll float to get
1637 --
1638 -- x = let poly_g = /\ a -> G
1639 -- in /\ a -> let g = poly_g a in E
1640 --
1641 -- But now the occurrence analyser will see just one occurrence
1642 -- of poly_g, not inside a lambda, so the simplifier will
1643 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1644 -- (I used to think that the "don't inline lone occurrences" stuff
1645 -- would stop this happening, but since it's the *only* occurrence,
1646 -- PreInlineUnconditionally kicks in first!)
1647 --
1648 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1649 -- to appear many times. (NB: mkInlineMe eliminates
1650 -- such notes on trivial RHSs, so do it manually.)
1651
1652 ************************************************************************
1653 * *
1654 prepareAlts
1655 * *
1656 ************************************************************************
1657
1658 prepareAlts tries these things:
1659
1660 1. Eliminate alternatives that cannot match, including the
1661 DEFAULT alternative.
1662
1663 2. If the DEFAULT alternative can match only one possible constructor,
1664 then make that constructor explicit.
1665 e.g.
1666 case e of x { DEFAULT -> rhs }
1667 ===>
1668 case e of x { (a,b) -> rhs }
1669 where the type is a single constructor type. This gives better code
1670 when rhs also scrutinises x or e.
1671
1672 3. Returns a list of the constructors that cannot holds in the
1673 DEFAULT alternative (if there is one)
1674
1675 Here "cannot match" includes knowledge from GADTs
1676
1677 It's a good idea to do this stuff before simplifying the alternatives, to
1678 avoid simplifying alternatives we know can't happen, and to come up with
1679 the list of constructors that are handled, to put into the IdInfo of the
1680 case binder, for use when simplifying the alternatives.
1681
1682 Eliminating the default alternative in (1) isn't so obvious, but it can
1683 happen:
1684
1685 data Colour = Red | Green | Blue
1686
1687 f x = case x of
1688 Red -> ..
1689 Green -> ..
1690 DEFAULT -> h x
1691
1692 h y = case y of
1693 Blue -> ..
1694 DEFAULT -> [ case y of ... ]
1695
1696 If we inline h into f, the default case of the inlined h can't happen.
1697 If we don't notice this, we may end up filtering out *all* the cases
1698 of the inner case y, which give us nowhere to go!
1699 -}
1700
1701 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1702 -- The returned alternatives can be empty, none are possible
1703 prepareAlts scrut case_bndr' alts
1704 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1705 -- Case binder is needed just for its type. Note that as an
1706 -- OutId, it has maximum information; this is important.
1707 -- Test simpl013 is an example
1708 = do { us <- getUniquesM
1709 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1710 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1711 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1712 -- "idcs" stands for "impossible default data constructors"
1713 -- i.e. the constructors that can't match the default case
1714 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1715 ; when yes3 $ tick (AltMerge case_bndr')
1716 ; return (idcs3, alts3) }
1717
1718 | otherwise -- Not a data type, so nothing interesting happens
1719 = return ([], alts)
1720 where
1721 imposs_cons = case scrut of
1722 Var v -> otherCons (idUnfolding v)
1723 _ -> []
1724
1725
1726 {-
1727 ************************************************************************
1728 * *
1729 mkCase
1730 * *
1731 ************************************************************************
1732
1733 mkCase tries these things
1734
1735 1. Merge Nested Cases
1736
1737 case e of b { ==> case e of b {
1738 p1 -> rhs1 p1 -> rhs1
1739 ... ...
1740 pm -> rhsm pm -> rhsm
1741 _ -> case b of b' { pn -> let b'=b in rhsn
1742 pn -> rhsn ...
1743 ... po -> let b'=b in rhso
1744 po -> rhso _ -> let b'=b in rhsd
1745 _ -> rhsd
1746 }
1747
1748 which merges two cases in one case when -- the default alternative of
1749 the outer case scrutises the same variable as the outer case. This
1750 transformation is called Case Merging. It avoids that the same
1751 variable is scrutinised multiple times.
1752
1753 2. Eliminate Identity Case
1754
1755 case e of ===> e
1756 True -> True;
1757 False -> False
1758
1759 and similar friends.
1760 -}
1761
1762 mkCase, mkCase1, mkCase2
1763 :: DynFlags
1764 -> OutExpr -> OutId
1765 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1766 -> SimplM OutExpr
1767
1768 --------------------------------------------------
1769 -- 1. Merge Nested Cases
1770 --------------------------------------------------
1771
1772 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1773 | gopt Opt_CaseMerge dflags
1774 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1775 <- stripTicksTop tickishFloatable deflt_rhs
1776 , inner_scrut_var == outer_bndr
1777 = do { tick (CaseMerge outer_bndr)
1778
1779 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1780 (con, args, wrap_rhs rhs)
1781 -- Simplifier's no-shadowing invariant should ensure
1782 -- that outer_bndr is not shadowed by the inner patterns
1783 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1784 -- The let is OK even for unboxed binders,
1785
1786 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1787 | otherwise = map wrap_alt inner_alts
1788
1789 merged_alts = mergeAlts outer_alts wrapped_alts
1790 -- NB: mergeAlts gives priority to the left
1791 -- case x of
1792 -- A -> e1
1793 -- DEFAULT -> case x of
1794 -- A -> e2
1795 -- B -> e3
1796 -- When we merge, we must ensure that e1 takes
1797 -- precedence over e2 as the value for A!
1798
1799 ; fmap (mkTicks ticks) $
1800 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1801 }
1802 -- Warning: don't call mkCase recursively!
1803 -- Firstly, there's no point, because inner alts have already had
1804 -- mkCase applied to them, so they won't have a case in their default
1805 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1806 -- in munge_rhs may put a case into the DEFAULT branch!
1807
1808 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1809
1810 --------------------------------------------------
1811 -- 2. Eliminate Identity Case
1812 --------------------------------------------------
1813
1814 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1815 | all identity_alt alts
1816 = do { tick (CaseIdentity case_bndr)
1817 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1818 where
1819 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
1820 identity_alt (con, args, rhs) = check_eq rhs con args
1821
1822 check_eq (Cast rhs co) con args
1823 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1824 -- See Note [RHS casts]
1825 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1826 check_eq (Var v) _ _ | v == case_bndr = True
1827 check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con
1828 -- Optimisation only
1829 check_eq (Tick t e) alt args = tickishFloatable t &&
1830 check_eq e alt args
1831 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1832 mkConApp con (arg_tys ++
1833 varsToCoreExprs args)
1834 check_eq _ _ _ = False
1835
1836 arg_tys = map Type (tyConAppArgs (idType case_bndr))
1837
1838 -- Note [RHS casts]
1839 -- ~~~~~~~~~~~~~~~~
1840 -- We've seen this:
1841 -- case e of x { _ -> x `cast` c }
1842 -- And we definitely want to eliminate this case, to give
1843 -- e `cast` c
1844 -- So we throw away the cast from the RHS, and reconstruct
1845 -- it at the other end. All the RHS casts must be the same
1846 -- if (all identity_alt alts) holds.
1847 --
1848 -- Don't worry about nested casts, because the simplifier combines them
1849
1850 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1851 re_cast scrut _ = scrut
1852
1853 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1854
1855 --------------------------------------------------
1856 -- Catch-all
1857 --------------------------------------------------
1858 mkCase2 _dflags scrut bndr alts_ty alts
1859 = return (Case scrut bndr alts_ty alts)
1860
1861 {-
1862 Note [Dead binders]
1863 ~~~~~~~~~~~~~~~~~~~~
1864 Note that dead-ness is maintained by the simplifier, so that it is
1865 accurate after simplification as well as before.
1866
1867
1868 Note [Cascading case merge]
1869 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1870 Case merging should cascade in one sweep, because it
1871 happens bottom-up
1872
1873 case e of a {
1874 DEFAULT -> case a of b
1875 DEFAULT -> case b of c {
1876 DEFAULT -> e
1877 A -> ea
1878 B -> eb
1879 C -> ec
1880 ==>
1881 case e of a {
1882 DEFAULT -> case a of b
1883 DEFAULT -> let c = b in e
1884 A -> let c = b in ea
1885 B -> eb
1886 C -> ec
1887 ==>
1888 case e of a {
1889 DEFAULT -> let b = a in let c = b in e
1890 A -> let b = a in let c = b in ea
1891 B -> let b = a in eb
1892 C -> ec
1893
1894
1895 However here's a tricky case that we still don't catch, and I don't
1896 see how to catch it in one pass:
1897
1898 case x of c1 { I# a1 ->
1899 case a1 of c2 ->
1900 0 -> ...
1901 DEFAULT -> case x of c3 { I# a2 ->
1902 case a2 of ...
1903
1904 After occurrence analysis (and its binder-swap) we get this
1905
1906 case x of c1 { I# a1 ->
1907 let x = c1 in -- Binder-swap addition
1908 case a1 of c2 ->
1909 0 -> ...
1910 DEFAULT -> case x of c3 { I# a2 ->
1911 case a2 of ...
1912
1913 When we simplify the inner case x, we'll see that
1914 x=c1=I# a1. So we'll bind a2 to a1, and get
1915
1916 case x of c1 { I# a1 ->
1917 case a1 of c2 ->
1918 0 -> ...
1919 DEFAULT -> case a1 of ...
1920
1921 This is corect, but we can't do a case merge in this sweep
1922 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
1923 without getting changed to c1=I# c2.
1924
1925 I don't think this is worth fixing, even if I knew how. It'll
1926 all come out in the next pass anyway.
1927 -}