The Early Inline Patch
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType, applyContToJoinType,
23 contIsTrivial, contArgs,
24 countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import DynFlags
41 import CoreSyn
42 import qualified CoreSubst
43 import PprCore
44 import CoreFVs
45 import CoreUtils
46 import CoreArity
47 import CoreUnfold
48 import Name
49 import Id
50 import IdInfo
51 import Var
52 import Demand
53 import SimplMonad
54 import Type hiding( substTy )
55 import Coercion hiding( substCo )
56 import DataCon ( dataConWorkId )
57 import VarEnv
58 import VarSet
59 import BasicTypes
60 import Util
61 import MonadUtils
62 import Outputable
63 import Pair
64 import PrelRules
65 import Literal
66
67 import Control.Monad ( when )
68 import Data.List ( sortBy )
69
70 {-
71 ************************************************************************
72 * *
73 The SimplCont and DupFlag types
74 * *
75 ************************************************************************
76
77 A SimplCont allows the simplifier to traverse the expression in a
78 zipper-like fashion. The SimplCont represents the rest of the expression,
79 "above" the point of interest.
80
81 You can also think of a SimplCont as an "evaluation context", using
82 that term in the way it is used for operational semantics. This is the
83 way I usually think of it, For example you'll often see a syntax for
84 evaluation context looking like
85 C ::= [] | C e | case C of alts | C `cast` co
86 That's the kind of thing we are doing here, and I use that syntax in
87 the comments.
88
89
90 Key points:
91 * A SimplCont describes a *strict* context (just like
92 evaluation contexts do). E.g. Just [] is not a SimplCont
93
94 * A SimplCont describes a context that *does not* bind
95 any variables. E.g. \x. [] is not a SimplCont
96 -}
97
98 data SimplCont
99 = Stop -- An empty context, or <hole>
100 OutType -- Type of the <hole>
101 CallCtxt -- Tells if there is something interesting about
102 -- the context, and hence the inliner
103 -- should be a bit keener (see interestingCallContext)
104 -- Specifically:
105 -- This is an argument of a function that has RULES
106 -- Inlining the call might allow the rule to fire
107 -- Never ValAppCxt (use ApplyToVal instead)
108 -- or CaseCtxt (use Select instead)
109
110 | CastIt -- <hole> `cast` co
111 OutCoercion -- The coercion simplified
112 -- Invariant: never an identity coercion
113 SimplCont
114
115 | ApplyToVal { -- <hole> arg
116 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
117 sc_arg :: InExpr, -- The argument,
118 sc_env :: StaticEnv, -- and its static env
119 sc_cont :: SimplCont }
120
121 | ApplyToTy { -- <hole> ty
122 sc_arg_ty :: OutType, -- Argument type
123 sc_hole_ty :: OutType, -- Type of the function, presumably (forall a. blah)
124 -- See Note [The hole type in ApplyToTy]
125 sc_cont :: SimplCont }
126
127 | Select { -- case <hole> of alts
128 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
129 sc_bndr :: InId, -- case binder
130 sc_alts :: [InAlt], -- Alternatives
131 sc_env :: StaticEnv, -- and their static environment
132 sc_cont :: SimplCont }
133
134 -- The two strict forms have no DupFlag, because we never duplicate them
135 | StrictBind -- (\x* \xs. e) <hole>
136 InId [InBndr] -- let x* = <hole> in e
137 InExpr StaticEnv -- is a special case
138 SimplCont
139
140 | StrictArg -- f e1 ..en <hole>
141 ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
142 -- plus strictness flags for *further* args
143 CallCtxt -- Whether *this* argument position is interesting
144 SimplCont
145
146 | TickIt
147 (Tickish Id) -- Tick tickish <hole>
148 SimplCont
149
150 data DupFlag = NoDup -- Unsimplified, might be big
151 | Simplified -- Simplified
152 | OkToDup -- Simplified and small
153
154 isSimplified :: DupFlag -> Bool
155 isSimplified NoDup = False
156 isSimplified _ = True -- Invariant: the subst-env is empty
157
158 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
159 perhapsSubstTy dup env ty
160 | isSimplified dup = ty
161 | otherwise = substTy env ty
162
163 {-
164 Note [DupFlag invariants]
165 ~~~~~~~~~~~~~~~~~~~~~~~~~
166 In both (ApplyToVal dup _ env k)
167 and (Select dup _ _ env k)
168 the following invariants hold
169
170 (a) if dup = OkToDup, then continuation k is also ok-to-dup
171 (b) if dup = OkToDup or Simplified, the subst-env is empty
172 (and and hence no need to re-simplify)
173 -}
174
175 instance Outputable DupFlag where
176 ppr OkToDup = text "ok"
177 ppr NoDup = text "nodup"
178 ppr Simplified = text "simpl"
179
180 instance Outputable SimplCont where
181 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
182 ppr (CastIt co cont ) = (text "CastIt" <+> pprOptCo co) $$ ppr cont
183 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
184 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
185 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
186 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
187 = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg)
188 $$ ppr cont
189 ppr (StrictBind b _ _ _ cont) = (text "StrictBind" <+> ppr b) $$ ppr cont
190 ppr (StrictArg ai _ cont) = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
191 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
192 = (text "Select" <+> ppr dup <+> ppr bndr) $$
193 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
194
195
196 {- Note [The hole type in ApplyToTy]
197 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
198 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
199 continuation. It is absolutely necessary to compute contHoleType, but it is
200 not used for anything else (and hence may not be evaluated).
201
202 Why is it necessary for contHoleType? Consider the continuation
203 ApplyToType Int (Stop Int)
204 corresponding to
205 (<hole> @Int) :: Int
206 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
207 and there is no way to know which, so we must record it.
208
209 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
210 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
211 doesn't matter because we'll never compute them all.
212
213 ************************************************************************
214 * *
215 ArgInfo and ArgSpec
216 * *
217 ************************************************************************
218 -}
219
220 data ArgInfo
221 = ArgInfo {
222 ai_fun :: OutId, -- The function
223 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
224 ai_type :: OutType, -- Type of (f a1 ... an)
225
226 ai_rules :: [CoreRule], -- Rules for this function
227
228 ai_encl :: Bool, -- Flag saying whether this function
229 -- or an enclosing one has rules (recursively)
230 -- True => be keener to inline in all args
231
232 ai_strs :: [Bool], -- Strictness of remaining arguments
233 -- Usually infinite, but if it is finite it guarantees
234 -- that the function diverges after being given
235 -- that number of args
236 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
237 -- Always infinite
238 }
239
240 data ArgSpec
241 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
242 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
243 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
244 | CastBy OutCoercion -- Cast by this; c.f. CastIt
245
246 instance Outputable ArgSpec where
247 ppr (ValArg e) = text "ValArg" <+> ppr e
248 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
249 ppr (CastBy c) = text "CastBy" <+> ppr c
250
251 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
252 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
253 , ai_type = applyTypeToArg (ai_type ai) arg }
254
255 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
256 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
257 , ai_type = piResultTy poly_fun_ty arg_ty }
258 where
259 poly_fun_ty = ai_type ai
260 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
261
262 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
263 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
264 , ai_type = pSnd (coercionKind co) }
265
266 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
267 argInfoAppArgs [] = []
268 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
269 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
270 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
271
272 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
273 pushSimplifiedArgs _env [] k = k
274 pushSimplifiedArgs env (arg : args) k
275 = case arg of
276 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
277 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
278 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
279 CastBy c -> CastIt c rest
280 where
281 rest = pushSimplifiedArgs env args k
282 -- The env has an empty SubstEnv
283
284 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
285 -- NB: the [ArgSpec] is reversed so that the first arg
286 -- in the list is the last one in the application
287 argInfoExpr fun rev_args
288 = go rev_args
289 where
290 go [] = Var fun
291 go (ValArg a : as) = go as `App` a
292 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
293 go (CastBy co : as) = mkCast (go as) co
294
295
296 {-
297 ************************************************************************
298 * *
299 Functions on SimplCont
300 * *
301 ************************************************************************
302 -}
303
304 mkBoringStop :: OutType -> SimplCont
305 mkBoringStop ty = Stop ty BoringCtxt
306
307 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
308 mkRhsStop ty = Stop ty RhsCtxt
309
310 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
311 mkLazyArgStop ty cci = Stop ty cci
312
313 -------------------
314 contIsRhsOrArg :: SimplCont -> Bool
315 contIsRhsOrArg (Stop {}) = True
316 contIsRhsOrArg (StrictBind {}) = True
317 contIsRhsOrArg (StrictArg {}) = True
318 contIsRhsOrArg _ = False
319
320 contIsRhs :: SimplCont -> Bool
321 contIsRhs (Stop _ RhsCtxt) = True
322 contIsRhs _ = False
323
324 -------------------
325 contIsDupable :: SimplCont -> Bool
326 contIsDupable (Stop {}) = True
327 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
328 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
329 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
330 contIsDupable (CastIt _ k) = contIsDupable k
331 contIsDupable _ = False
332
333 -------------------
334 contIsTrivial :: SimplCont -> Bool
335 contIsTrivial (Stop {}) = True
336 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
337 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
338 contIsTrivial (CastIt _ k) = contIsTrivial k
339 contIsTrivial _ = False
340
341 -------------------
342 contResultType :: SimplCont -> OutType
343 contResultType (Stop ty _) = ty
344 contResultType (CastIt _ k) = contResultType k
345 contResultType (StrictBind _ _ _ _ k) = contResultType k
346 contResultType (StrictArg _ _ k) = contResultType k
347 contResultType (Select { sc_cont = k }) = contResultType k
348 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
349 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
350 contResultType (TickIt _ k) = contResultType k
351
352 contHoleType :: SimplCont -> OutType
353 contHoleType (Stop ty _) = ty
354 contHoleType (TickIt _ k) = contHoleType k
355 contHoleType (CastIt co _) = pFst (coercionKind co)
356 contHoleType (StrictBind b _ _ se _) = substTy se (idType b)
357 contHoleType (StrictArg ai _ _) = funArgTy (ai_type ai)
358 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
359 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
360 = mkFunTy (perhapsSubstTy dup se (exprType e))
361 (contHoleType k)
362 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
363 = perhapsSubstTy d se (idType b)
364
365 applyContToJoinType :: JoinArity -> SimplCont -> OutType -> OutType
366 applyContToJoinType ar cont ty
367 = setJoinResTy ar (contResultType cont) ty
368
369 -------------------
370 countArgs :: SimplCont -> Int
371 -- Count all arguments, including types, coercions, and other values
372 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
373 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
374 countArgs _ = 0
375
376 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
377 -- Summarises value args, discards type args and coercions
378 -- The returned continuation of the call is only used to
379 -- answer questions like "are you interesting?"
380 contArgs cont
381 | lone cont = (True, [], cont)
382 | otherwise = go [] cont
383 where
384 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
385 lone (ApplyToVal {}) = False
386 lone (CastIt {}) = False
387 lone _ = True
388
389 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
390 = go (is_interesting arg se : args) k
391 go args (ApplyToTy { sc_cont = k }) = go args k
392 go args (CastIt _ k) = go args k
393 go args k = (False, reverse args, k)
394
395 is_interesting arg se = interestingArg se arg
396 -- Do *not* use short-cutting substitution here
397 -- because we want to get as much IdInfo as possible
398
399
400 -------------------
401 mkArgInfo :: Id
402 -> [CoreRule] -- Rules for function
403 -> Int -- Number of value args
404 -> SimplCont -- Context of the call
405 -> ArgInfo
406
407 mkArgInfo fun rules n_val_args call_cont
408 | n_val_args < idArity fun -- Note [Unsaturated functions]
409 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
410 , ai_rules = rules, ai_encl = False
411 , ai_strs = vanilla_stricts
412 , ai_discs = vanilla_discounts }
413 | otherwise
414 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
415 , ai_rules = rules
416 , ai_encl = interestingArgContext rules call_cont
417 , ai_strs = add_type_str fun_ty arg_stricts
418 , ai_discs = arg_discounts }
419 where
420 fun_ty = idType fun
421
422 vanilla_discounts, arg_discounts :: [Int]
423 vanilla_discounts = repeat 0
424 arg_discounts = case idUnfolding fun of
425 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
426 -> discounts ++ vanilla_discounts
427 _ -> vanilla_discounts
428
429 vanilla_stricts, arg_stricts :: [Bool]
430 vanilla_stricts = repeat False
431
432 arg_stricts
433 = case splitStrictSig (idStrictness fun) of
434 (demands, result_info)
435 | not (demands `lengthExceeds` n_val_args)
436 -> -- Enough args, use the strictness given.
437 -- For bottoming functions we used to pretend that the arg
438 -- is lazy, so that we don't treat the arg as an
439 -- interesting context. This avoids substituting
440 -- top-level bindings for (say) strings into
441 -- calls to error. But now we are more careful about
442 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
443 if isBotRes result_info then
444 map isStrictDmd demands -- Finite => result is bottom
445 else
446 map isStrictDmd demands ++ vanilla_stricts
447 | otherwise
448 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
449 <+> ppr n_val_args <+> ppr demands )
450 vanilla_stricts -- Not enough args, or no strictness
451
452 add_type_str :: Type -> [Bool] -> [Bool]
453 -- If the function arg types are strict, record that in the 'strictness bits'
454 -- No need to instantiate because unboxed types (which dominate the strict
455 -- types) can't instantiate type variables.
456 -- add_type_str is done repeatedly (for each call); might be better
457 -- once-for-all in the function
458 -- But beware primops/datacons with no strictness
459
460 add_type_str
461 = go
462 where
463 go _ [] = []
464 go fun_ty strs -- Look through foralls
465 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
466 = go fun_ty' strs
467 go fun_ty (str:strs) -- Add strict-type info
468 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
469 = (str || Just False == isLiftedType_maybe arg_ty) : go fun_ty' strs
470 -- If the type is levity-polymorphic, we can't know whether it's
471 -- strict. isLiftedType_maybe will return Just False only when
472 -- we're sure the type is unlifted.
473 go _ strs
474 = strs
475
476 {- Note [Unsaturated functions]
477 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
478 Consider (test eyeball/inline4)
479 x = a:as
480 y = f x
481 where f has arity 2. Then we do not want to inline 'x', because
482 it'll just be floated out again. Even if f has lots of discounts
483 on its first argument -- it must be saturated for these to kick in
484 -}
485
486
487 {-
488 ************************************************************************
489 * *
490 Interesting arguments
491 * *
492 ************************************************************************
493
494 Note [Interesting call context]
495 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
496 We want to avoid inlining an expression where there can't possibly be
497 any gain, such as in an argument position. Hence, if the continuation
498 is interesting (eg. a case scrutinee, application etc.) then we
499 inline, otherwise we don't.
500
501 Previously some_benefit used to return True only if the variable was
502 applied to some value arguments. This didn't work:
503
504 let x = _coerce_ (T Int) Int (I# 3) in
505 case _coerce_ Int (T Int) x of
506 I# y -> ....
507
508 we want to inline x, but can't see that it's a constructor in a case
509 scrutinee position, and some_benefit is False.
510
511 Another example:
512
513 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
514
515 .... case dMonadST _@_ x0 of (a,b,c) -> ....
516
517 we'd really like to inline dMonadST here, but we *don't* want to
518 inline if the case expression is just
519
520 case x of y { DEFAULT -> ... }
521
522 since we can just eliminate this case instead (x is in WHNF). Similar
523 applies when x is bound to a lambda expression. Hence
524 contIsInteresting looks for case expressions with just a single
525 default case.
526 -}
527
528 interestingCallContext :: SimplCont -> CallCtxt
529 -- See Note [Interesting call context]
530 interestingCallContext cont
531 = interesting cont
532 where
533 interesting (Select {}) = CaseCtxt
534 interesting (ApplyToVal {}) = ValAppCtxt
535 -- Can happen if we have (f Int |> co) y
536 -- If f has an INLINE prag we need to give it some
537 -- motivation to inline. See Note [Cast then apply]
538 -- in CoreUnfold
539 interesting (StrictArg _ cci _) = cci
540 interesting (StrictBind {}) = BoringCtxt
541 interesting (Stop _ cci) = cci
542 interesting (TickIt _ k) = interesting k
543 interesting (ApplyToTy { sc_cont = k }) = interesting k
544 interesting (CastIt _ k) = interesting k
545 -- If this call is the arg of a strict function, the context
546 -- is a bit interesting. If we inline here, we may get useful
547 -- evaluation information to avoid repeated evals: e.g.
548 -- x + (y * z)
549 -- Here the contIsInteresting makes the '*' keener to inline,
550 -- which in turn exposes a constructor which makes the '+' inline.
551 -- Assuming that +,* aren't small enough to inline regardless.
552 --
553 -- It's also very important to inline in a strict context for things
554 -- like
555 -- foldr k z (f x)
556 -- Here, the context of (f x) is strict, and if f's unfolding is
557 -- a build it's *great* to inline it here. So we must ensure that
558 -- the context for (f x) is not totally uninteresting.
559
560 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
561 -- If the argument has form (f x y), where x,y are boring,
562 -- and f is marked INLINE, then we don't want to inline f.
563 -- But if the context of the argument is
564 -- g (f x y)
565 -- where g has rules, then we *do* want to inline f, in case it
566 -- exposes a rule that might fire. Similarly, if the context is
567 -- h (g (f x x))
568 -- where h has rules, then we do want to inline f; hence the
569 -- call_cont argument to interestingArgContext
570 --
571 -- The ai-rules flag makes this happen; if it's
572 -- set, the inliner gets just enough keener to inline f
573 -- regardless of how boring f's arguments are, if it's marked INLINE
574 --
575 -- The alternative would be to *always* inline an INLINE function,
576 -- regardless of how boring its context is; but that seems overkill
577 -- For example, it'd mean that wrapper functions were always inlined
578 --
579 -- The call_cont passed to interestingArgContext is the context of
580 -- the call itself, e.g. g <hole> in the example above
581 interestingArgContext rules call_cont
582 = notNull rules || enclosing_fn_has_rules
583 where
584 enclosing_fn_has_rules = go call_cont
585
586 go (Select {}) = False
587 go (ApplyToVal {}) = False -- Shouldn't really happen
588 go (ApplyToTy {}) = False -- Ditto
589 go (StrictArg _ cci _) = interesting cci
590 go (StrictBind {}) = False -- ??
591 go (CastIt _ c) = go c
592 go (Stop _ cci) = interesting cci
593 go (TickIt _ c) = go c
594
595 interesting RuleArgCtxt = True
596 interesting _ = False
597
598
599 {- Note [Interesting arguments]
600 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
601 An argument is interesting if it deserves a discount for unfoldings
602 with a discount in that argument position. The idea is to avoid
603 unfolding a function that is applied only to variables that have no
604 unfolding (i.e. they are probably lambda bound): f x y z There is
605 little point in inlining f here.
606
607 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
608 we must look through lets, eg (let x = e in C a b), because the let will
609 float, exposing the value, if we inline. That makes it different to
610 exprIsHNF.
611
612 Before 2009 we said it was interesting if the argument had *any* structure
613 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
614
615 But we don't regard (f x y) as interesting, unless f is unsaturated.
616 If it's saturated and f hasn't inlined, then it's probably not going
617 to now!
618
619 Note [Conlike is interesting]
620 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
621 Consider
622 f d = ...((*) d x y)...
623 ... f (df d')...
624 where df is con-like. Then we'd really like to inline 'f' so that the
625 rule for (*) (df d) can fire. To do this
626 a) we give a discount for being an argument of a class-op (eg (*) d)
627 b) we say that a con-like argument (eg (df d)) is interesting
628 -}
629
630 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
631 -- See Note [Interesting arguments]
632 interestingArg env e = go env 0 e
633 where
634 -- n is # value args to which the expression is applied
635 go env n (Var v)
636 | SimplEnv { seIdSubst = ids, seInScope = in_scope } <- env
637 = case snd <$> lookupVarEnv ids v of
638 Nothing -> go_var n (refineFromInScope in_scope v)
639 Just (DoneId v') -> go_var n (refineFromInScope in_scope v')
640 Just (DoneEx e) -> go (zapSubstEnv env) n e
641 Just (ContEx tvs cvs ids e) -> go (setSubstEnv env tvs cvs ids) n e
642
643 go _ _ (Lit {}) = ValueArg
644 go _ _ (Type _) = TrivArg
645 go _ _ (Coercion _) = TrivArg
646 go env n (App fn (Type _)) = go env n fn
647 go env n (App fn _) = go env (n+1) fn
648 go env n (Tick _ a) = go env n a
649 go env n (Cast e _) = go env n e
650 go env n (Lam v e)
651 | isTyVar v = go env n e
652 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
653 | otherwise = ValueArg
654 go _ _ (Case {}) = NonTrivArg
655 go env n (Let b e) = case go env' n e of
656 ValueArg -> ValueArg
657 _ -> NonTrivArg
658 where
659 env' = env `addNewInScopeIds` bindersOf b
660
661 go_var n v
662 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
663 -- data constructors here
664 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
665 | n > 0 = NonTrivArg -- Saturated or unknown call
666 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
667 -- See Note [Conlike is interesting]
668 | otherwise = TrivArg -- n==0, no useful unfolding
669 where
670 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
671
672 {-
673 ************************************************************************
674 * *
675 SimplifierMode
676 * *
677 ************************************************************************
678
679 The SimplifierMode controls several switches; see its definition in
680 CoreMonad
681 sm_rules :: Bool -- Whether RULES are enabled
682 sm_inline :: Bool -- Whether inlining is enabled
683 sm_case_case :: Bool -- Whether case-of-case is enabled
684 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
685 -}
686
687 simplEnvForGHCi :: DynFlags -> SimplEnv
688 simplEnvForGHCi dflags
689 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
690 , sm_phase = InitialPhase
691 , sm_rules = rules_on
692 , sm_inline = False
693 , sm_eta_expand = eta_expand_on
694 , sm_case_case = True }
695 where
696 rules_on = gopt Opt_EnableRewriteRules dflags
697 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
698 -- Do not do any inlining, in case we expose some unboxed
699 -- tuple stuff that confuses the bytecode interpreter
700
701 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
702 -- See Note [Simplifying inside stable unfoldings]
703 updModeForStableUnfoldings inline_rule_act current_mode
704 = current_mode { sm_phase = phaseFromActivation inline_rule_act
705 , sm_inline = True
706 , sm_eta_expand = False }
707 -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
708 -- For sm_rules, just inherit; sm_rules might be "off"
709 -- because of -fno-enable-rewrite-rules
710 where
711 phaseFromActivation (ActiveAfter _ n) = Phase n
712 phaseFromActivation _ = InitialPhase
713
714 updModeForRules :: SimplifierMode -> SimplifierMode
715 -- See Note [Simplifying rules]
716 updModeForRules current_mode
717 = current_mode { sm_phase = InitialPhase
718 , sm_inline = False
719 , sm_rules = False
720 , sm_eta_expand = False }
721
722 {- Note [Simplifying rules]
723 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
724 When simplifying a rule LHS, refrain from /any/ inlining or applying
725 of other RULES.
726
727 Doing anything to the LHS is plain confusing, because it means that what the
728 rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
729 Moreover, inlining (or applying rules) on rule LHSs risks introducing
730 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
731
732 Doing this to either side confounds tools like HERMIT, which seek to reason
733 about and apply the RULES as originally written. See Trac #10829.
734
735 Note [No eta expansion in stable unfoldings]
736 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
737 If we have a stable unfolding
738
739 f :: Ord a => a -> IO ()
740 -- Unfolding template
741 -- = /\a \(d:Ord a) (x:a). bla
742
743 we do not want to eta-expand to
744
745 f :: Ord a => a -> IO ()
746 -- Unfolding template
747 -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co
748
749 because not specialisation of the overloading doesn't work properly
750 (see Note [Specialisation shape] in Specialise), Trac #9509.
751
752 So we disable eta-expansion in stable unfoldings.
753
754 Note [Inlining in gentle mode]
755 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
756 Something is inlined if
757 (i) the sm_inline flag is on, AND
758 (ii) the thing has an INLINE pragma, AND
759 (iii) the thing is inlinable in the earliest phase.
760
761 Example of why (iii) is important:
762 {-# INLINE [~1] g #-}
763 g = ...
764
765 {-# INLINE f #-}
766 f x = g (g x)
767
768 If we were to inline g into f's inlining, then an importing module would
769 never be able to do
770 f e --> g (g e) ---> RULE fires
771 because the stable unfolding for f has had g inlined into it.
772
773 On the other hand, it is bad not to do ANY inlining into an
774 stable unfolding, because then recursive knots in instance declarations
775 don't get unravelled.
776
777 However, *sometimes* SimplGently must do no call-site inlining at all
778 (hence sm_inline = False). Before full laziness we must be careful
779 not to inline wrappers, because doing so inhibits floating
780 e.g. ...(case f x of ...)...
781 ==> ...(case (case x of I# x# -> fw x#) of ...)...
782 ==> ...(case x of I# x# -> case fw x# of ...)...
783 and now the redex (f x) isn't floatable any more.
784
785 The no-inlining thing is also important for Template Haskell. You might be
786 compiling in one-shot mode with -O2; but when TH compiles a splice before
787 running it, we don't want to use -O2. Indeed, we don't want to inline
788 anything, because the byte-code interpreter might get confused about
789 unboxed tuples and suchlike.
790
791 Note [Simplifying inside stable unfoldings]
792 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
793 We must take care with simplification inside stable unfoldings (which come from
794 INLINE pragmas).
795
796 First, consider the following example
797 let f = \pq -> BIG
798 in
799 let g = \y -> f y y
800 {-# INLINE g #-}
801 in ...g...g...g...g...g...
802 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
803 and thence copied multiple times when g is inlined. HENCE we treat
804 any occurrence in a stable unfolding as a multiple occurrence, not a single
805 one; see OccurAnal.addRuleUsage.
806
807 Second, we do want *do* to some modest rules/inlining stuff in stable
808 unfoldings, partly to eliminate senseless crap, and partly to break
809 the recursive knots generated by instance declarations.
810
811 However, suppose we have
812 {-# INLINE <act> f #-}
813 f = <rhs>
814 meaning "inline f in phases p where activation <act>(p) holds".
815 Then what inlinings/rules can we apply to the copy of <rhs> captured in
816 f's stable unfolding? Our model is that literally <rhs> is substituted for
817 f when it is inlined. So our conservative plan (implemented by
818 updModeForStableUnfoldings) is this:
819
820 -------------------------------------------------------------
821 When simplifying the RHS of an stable unfolding, set the phase
822 to the phase in which the stable unfolding first becomes active
823 -------------------------------------------------------------
824
825 That ensures that
826
827 a) Rules/inlinings that *cease* being active before p will
828 not apply to the stable unfolding, consistent with it being
829 inlined in its *original* form in phase p.
830
831 b) Rules/inlinings that only become active *after* p will
832 not apply to the stable unfolding, again to be consistent with
833 inlining the *original* rhs in phase p.
834
835 For example,
836 {-# INLINE f #-}
837 f x = ...g...
838
839 {-# NOINLINE [1] g #-}
840 g y = ...
841
842 {-# RULE h g = ... #-}
843 Here we must not inline g into f's RHS, even when we get to phase 0,
844 because when f is later inlined into some other module we want the
845 rule for h to fire.
846
847 Similarly, consider
848 {-# INLINE f #-}
849 f x = ...g...
850
851 g y = ...
852 and suppose that there are auto-generated specialisations and a strictness
853 wrapper for g. The specialisations get activation AlwaysActive, and the
854 strictness wrapper get activation (ActiveAfter 0). So the strictness
855 wrepper fails the test and won't be inlined into f's stable unfolding. That
856 means f can inline, expose the specialised call to g, so the specialisation
857 rules can fire.
858
859 A note about wrappers
860 ~~~~~~~~~~~~~~~~~~~~~
861 It's also important not to inline a worker back into a wrapper.
862 A wrapper looks like
863 wraper = inline_me (\x -> ...worker... )
864 Normally, the inline_me prevents the worker getting inlined into
865 the wrapper (initially, the worker's only call site!). But,
866 if the wrapper is sure to be called, the strictness analyser will
867 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
868 continuation.
869 -}
870
871 activeUnfolding :: SimplEnv -> Id -> Bool
872 activeUnfolding env id
873 | isCompulsoryUnfolding (realIdUnfolding id)
874 = True -- Even sm_inline can't override compulsory unfoldings
875 | otherwise
876 = isActive (sm_phase mode) (idInlineActivation id)
877 && sm_inline mode
878 -- `or` isStableUnfolding (realIdUnfolding id)
879 -- Inline things when
880 -- (a) they are active
881 -- (b) sm_inline says so, except that for stable unfoldings
882 -- (ie pragmas) we inline anyway
883 where
884 mode = getMode env
885
886 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
887 -- When matching in RULE, we want to "look through" an unfolding
888 -- (to see a constructor) if *rules* are on, even if *inlinings*
889 -- are not. A notable example is DFuns, which really we want to
890 -- match in rules like (op dfun) in gentle mode. Another example
891 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
892 -- see very early on
893 getUnfoldingInRuleMatch env
894 = (in_scope, id_unf)
895 where
896 in_scope = seInScope env
897 mode = getMode env
898 id_unf id | unf_is_active id = idUnfolding id
899 | otherwise = NoUnfolding
900 unf_is_active id
901 | not (sm_rules mode) = -- active_unfolding_minimal id
902 isStableUnfolding (realIdUnfolding id)
903 -- Do we even need to test this? I think this InScopeEnv
904 -- is only consulted if activeRule returns True, which
905 -- never happens if sm_rules is False
906 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
907
908 ----------------------
909 activeRule :: SimplEnv -> Activation -> Bool
910 -- Nothing => No rules at all
911 activeRule env
912 | not (sm_rules mode) = \_ -> False -- Rewriting is off
913 | otherwise = isActive (sm_phase mode)
914 where
915 mode = getMode env
916
917 {-
918 ************************************************************************
919 * *
920 preInlineUnconditionally
921 * *
922 ************************************************************************
923
924 preInlineUnconditionally
925 ~~~~~~~~~~~~~~~~~~~~~~~~
926 @preInlineUnconditionally@ examines a bndr to see if it is used just
927 once in a completely safe way, so that it is safe to discard the
928 binding inline its RHS at the (unique) usage site, REGARDLESS of how
929 big the RHS might be. If this is the case we don't simplify the RHS
930 first, but just inline it un-simplified.
931
932 This is much better than first simplifying a perhaps-huge RHS and then
933 inlining and re-simplifying it. Indeed, it can be at least quadratically
934 better. Consider
935
936 x1 = e1
937 x2 = e2[x1]
938 x3 = e3[x2]
939 ...etc...
940 xN = eN[xN-1]
941
942 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
943 This can happen with cascades of functions too:
944
945 f1 = \x1.e1
946 f2 = \xs.e2[f1]
947 f3 = \xs.e3[f3]
948 ...etc...
949
950 THE MAIN INVARIANT is this:
951
952 ---- preInlineUnconditionally invariant -----
953 IF preInlineUnconditionally chooses to inline x = <rhs>
954 THEN doing the inlining should not change the occurrence
955 info for the free vars of <rhs>
956 ----------------------------------------------
957
958 For example, it's tempting to look at trivial binding like
959 x = y
960 and inline it unconditionally. But suppose x is used many times,
961 but this is the unique occurrence of y. Then inlining x would change
962 y's occurrence info, which breaks the invariant. It matters: y
963 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
964
965
966 Even RHSs labelled InlineMe aren't caught here, because there might be
967 no benefit from inlining at the call site.
968
969 [Sept 01] Don't unconditionally inline a top-level thing, because that
970 can simply make a static thing into something built dynamically. E.g.
971 x = (a,b)
972 main = \s -> h x
973
974 [Remember that we treat \s as a one-shot lambda.] No point in
975 inlining x unless there is something interesting about the call site.
976
977 But watch out: if you aren't careful, some useful foldr/build fusion
978 can be lost (most notably in spectral/hartel/parstof) because the
979 foldr didn't see the build. Doing the dynamic allocation isn't a big
980 deal, in fact, but losing the fusion can be. But the right thing here
981 seems to be to do a callSiteInline based on the fact that there is
982 something interesting about the call site (it's strict). Hmm. That
983 seems a bit fragile.
984
985 Conclusion: inline top level things gaily until Phase 0 (the last
986 phase), at which point don't.
987
988 Note [pre/postInlineUnconditionally in gentle mode]
989 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
990 Even in gentle mode we want to do preInlineUnconditionally. The
991 reason is that too little clean-up happens if you don't inline
992 use-once things. Also a bit of inlining is *good* for full laziness;
993 it can expose constant sub-expressions. Example in
994 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
995 let-float if you inline windowToViewport
996
997 However, as usual for Gentle mode, do not inline things that are
998 inactive in the intial stages. See Note [Gentle mode].
999
1000 Note [Stable unfoldings and preInlineUnconditionally]
1001 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1002 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
1003 Example
1004
1005 {-# INLINE f #-}
1006 f :: Eq a => a -> a
1007 f x = ...
1008
1009 fInt :: Int -> Int
1010 fInt = f Int dEqInt
1011
1012 ...fInt...fInt...fInt...
1013
1014 Here f occurs just once, in the RHS of fInt. But if we inline it there
1015 it might make fInt look big, and we'll lose the opportunity to inline f
1016 at each of fInt's call sites. The INLINE pragma will only inline when
1017 the application is saturated for exactly this reason; and we don't
1018 want PreInlineUnconditionally to second-guess it. A live example is
1019 Trac #3736.
1020 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1021
1022 Note [Top-level bottoming Ids]
1023 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1024 Don't inline top-level Ids that are bottoming, even if they are used just
1025 once, because FloatOut has gone to some trouble to extract them out.
1026 Inlining them won't make the program run faster!
1027
1028 Note [Do not inline CoVars unconditionally]
1029 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1030 Coercion variables appear inside coercions, and the RHS of a let-binding
1031 is a term (not a coercion) so we can't necessarily inline the latter in
1032 the former.
1033 -}
1034
1035 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1036 -- Precondition: rhs satisfies the let/app invariant
1037 -- See Note [CoreSyn let/app invariant] in CoreSyn
1038 -- Reason: we don't want to inline single uses, or discard dead bindings,
1039 -- for unlifted, side-effect-ful bindings
1040 preInlineUnconditionally dflags env top_lvl bndr rhs
1041 | not active = False
1042 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1043 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1044 | not (gopt Opt_SimplPreInlining dflags) = False
1045 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1046 | otherwise = case idOccInfo bndr of
1047 IAmDead -> True -- Happens in ((\x.1) v)
1048 occ@OneOcc { occ_one_br = True }
1049 -> try_once (occ_in_lam occ)
1050 (occ_int_cxt occ)
1051 _ -> False
1052 where
1053 mode = getMode env
1054 active = isActive (sm_phase mode) act
1055 -- See Note [pre/postInlineUnconditionally in gentle mode]
1056 act = idInlineActivation bndr
1057 try_once in_lam int_cxt -- There's one textual occurrence
1058 | not in_lam = isNotTopLevel top_lvl || early_phase
1059 | otherwise = int_cxt && canInlineInLam rhs
1060
1061 -- Be very careful before inlining inside a lambda, because (a) we must not
1062 -- invalidate occurrence information, and (b) we want to avoid pushing a
1063 -- single allocation (here) into multiple allocations (inside lambda).
1064 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1065 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1066 -- where
1067 -- is_cheap = exprIsCheap rhs
1068 -- ok = is_cheap && int_cxt
1069
1070 -- int_cxt The context isn't totally boring
1071 -- E.g. let f = \ab.BIG in \y. map f xs
1072 -- Don't want to substitute for f, because then we allocate
1073 -- its closure every time the \y is called
1074 -- But: let f = \ab.BIG in \y. map (f y) xs
1075 -- Now we do want to substitute for f, even though it's not
1076 -- saturated, because we're going to allocate a closure for
1077 -- (f y) every time round the loop anyhow.
1078
1079 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1080 -- so substituting rhs inside a lambda doesn't change the occ info.
1081 -- Sadly, not quite the same as exprIsHNF.
1082 canInlineInLam (Lit _) = True
1083 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1084 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1085 canInlineInLam _ = False
1086 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1087 -- ticks around a Lam will disappear anyway.
1088
1089 early_phase = case sm_phase mode of
1090 Phase 0 -> False
1091 _ -> True
1092 -- If we don't have this early_phase test, consider
1093 -- x = length [1,2,3]
1094 -- The full laziness pass carefully floats all the cons cells to
1095 -- top level, and preInlineUnconditionally floats them all back in.
1096 -- Result is (a) static allocation replaced by dynamic allocation
1097 -- (b) many simplifier iterations because this tickles
1098 -- a related problem; only one inlining per pass
1099 --
1100 -- On the other hand, I have seen cases where top-level fusion is
1101 -- lost if we don't inline top level thing (e.g. string constants)
1102 -- Hence the test for phase zero (which is the phase for all the final
1103 -- simplifications). Until phase zero we take no special notice of
1104 -- top level things, but then we become more leery about inlining
1105 -- them.
1106
1107 {-
1108 ************************************************************************
1109 * *
1110 postInlineUnconditionally
1111 * *
1112 ************************************************************************
1113
1114 postInlineUnconditionally
1115 ~~~~~~~~~~~~~~~~~~~~~~~~~
1116 @postInlineUnconditionally@ decides whether to unconditionally inline
1117 a thing based on the form of its RHS; in particular if it has a
1118 trivial RHS. If so, we can inline and discard the binding altogether.
1119
1120 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1121 only have *forward* references. Hence, it's safe to discard the binding
1122
1123 NOTE: This isn't our last opportunity to inline. We're at the binding
1124 site right now, and we'll get another opportunity when we get to the
1125 occurrence(s)
1126
1127 Note that we do this unconditional inlining only for trival RHSs.
1128 Don't inline even WHNFs inside lambdas; doing so may simply increase
1129 allocation when the function is called. This isn't the last chance; see
1130 NOTE above.
1131
1132 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1133 Because we don't even want to inline them into the RHS of constructor
1134 arguments. See NOTE above
1135
1136 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1137 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1138 with both a and b marked NOINLINE. But that seems incompatible with
1139 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1140 story for now.
1141 -}
1142
1143 postInlineUnconditionally
1144 :: DynFlags -> SimplEnv -> TopLevelFlag
1145 -> OutId -- The binder (an InId would be fine too)
1146 -- (*not* a CoVar)
1147 -> OccInfo -- From the InId
1148 -> OutExpr
1149 -> Unfolding
1150 -> Bool
1151 -- Precondition: rhs satisfies the let/app invariant
1152 -- See Note [CoreSyn let/app invariant] in CoreSyn
1153 -- Reason: we don't want to inline single uses, or discard dead bindings,
1154 -- for unlifted, side-effect-ful bindings
1155 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1156 | not active = False
1157 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1158 -- because it might be referred to "earlier"
1159 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1160 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1161 | exprIsTrivial rhs = True
1162 | otherwise
1163 = case occ_info of
1164 -- The point of examining occ_info here is that for *non-values*
1165 -- that occur outside a lambda, the call-site inliner won't have
1166 -- a chance (because it doesn't know that the thing
1167 -- only occurs once). The pre-inliner won't have gotten
1168 -- it either, if the thing occurs in more than one branch
1169 -- So the main target is things like
1170 -- let x = f y in
1171 -- case v of
1172 -- True -> case x of ...
1173 -- False -> case x of ...
1174 -- This is very important in practice; e.g. wheel-seive1 doubles
1175 -- in allocation if you miss this out
1176 OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt }
1177 -- OneOcc => no code-duplication issue
1178 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1179 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1180 --
1181 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1182 -- Reason: doing so risks exponential behaviour. We simplify a big
1183 -- expression, inline it, and simplify it again. But if the
1184 -- very same thing happens in the big expression, we get
1185 -- exponential cost!
1186 -- PRINCIPLE: when we've already simplified an expression once,
1187 -- make sure that we only inline it if it's reasonably small.
1188
1189 && (not in_lam ||
1190 -- Outside a lambda, we want to be reasonably aggressive
1191 -- about inlining into multiple branches of case
1192 -- e.g. let x = <non-value>
1193 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1194 -- Inlining can be a big win if C3 is the hot-spot, even if
1195 -- the uses in C1, C2 are not 'interesting'
1196 -- An example that gets worse if you add int_cxt here is 'clausify'
1197
1198 (isCheapUnfolding unfolding && int_cxt))
1199 -- isCheap => acceptable work duplication; in_lam may be true
1200 -- int_cxt to prevent us inlining inside a lambda without some
1201 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1202
1203 IAmDead -> True -- This happens; for example, the case_bndr during case of
1204 -- known constructor: case (a,b) of x { (p,q) -> ... }
1205 -- Here x isn't mentioned in the RHS, so we don't want to
1206 -- create the (dead) let-binding let x = (a,b) in ...
1207
1208 _ -> False
1209
1210 -- Here's an example that we don't handle well:
1211 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1212 -- in \y. ....case f of {...} ....
1213 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1214 -- But
1215 -- - We can't preInlineUnconditionally because that woud invalidate
1216 -- the occ info for b.
1217 -- - We can't postInlineUnconditionally because the RHS is big, and
1218 -- that risks exponential behaviour
1219 -- - We can't call-site inline, because the rhs is big
1220 -- Alas!
1221
1222 where
1223 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1224 -- See Note [pre/postInlineUnconditionally in gentle mode]
1225
1226 {-
1227 Note [Top level and postInlineUnconditionally]
1228 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1229 We don't do postInlineUnconditionally for top-level things (even for
1230 ones that are trivial):
1231
1232 * Doing so will inline top-level error expressions that have been
1233 carefully floated out by FloatOut. More generally, it might
1234 replace static allocation with dynamic.
1235
1236 * Even for trivial expressions there's a problem. Consider
1237 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1238 blah xs = reverse xs
1239 ruggle = sort
1240 In one simplifier pass we might fire the rule, getting
1241 blah xs = ruggle xs
1242 but in *that* simplifier pass we must not do postInlineUnconditionally
1243 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1244
1245 If the rhs is trivial it'll be inlined by callSiteInline, and then
1246 the binding will be dead and discarded by the next use of OccurAnal
1247
1248 * There is less point, because the main goal is to get rid of local
1249 bindings used in multiple case branches.
1250
1251 * The inliner should inline trivial things at call sites anyway.
1252
1253 * The Id might be exported. We could check for that separately,
1254 but since we aren't going to postInlineUnconditionally /any/
1255 top-level bindings, we don't need to test.
1256
1257 Note [Stable unfoldings and postInlineUnconditionally]
1258 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1259 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1260 otherwise we lose the unfolding. Example
1261
1262 -- f has stable unfolding with rhs (e |> co)
1263 -- where 'e' is big
1264 f = e |> co
1265
1266 Then there's a danger we'll optimise to
1267
1268 f' = e
1269 f = f' |> co
1270
1271 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1272 won't inline because 'e' is too big.
1273
1274 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1275
1276
1277 ************************************************************************
1278 * *
1279 Rebuilding a lambda
1280 * *
1281 ************************************************************************
1282 -}
1283
1284 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1285 -- mkLam tries three things
1286 -- a) eta reduction, if that gives a trivial expression
1287 -- b) eta expansion [only if there are some value lambdas]
1288
1289 mkLam _env [] body _cont
1290 = return body
1291 mkLam env bndrs body cont
1292 = do { dflags <- getDynFlags
1293 ; mkLam' dflags bndrs body }
1294 where
1295 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1296 mkLam' dflags bndrs (Cast body co)
1297 | not (any bad bndrs)
1298 -- Note [Casts and lambdas]
1299 = do { lam <- mkLam' dflags bndrs body
1300 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1301 where
1302 co_vars = tyCoVarsOfCo co
1303 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1304
1305 mkLam' dflags bndrs body@(Lam {})
1306 = mkLam' dflags (bndrs ++ bndrs1) body1
1307 where
1308 (bndrs1, body1) = collectBinders body
1309
1310 mkLam' dflags bndrs (Tick t expr)
1311 | tickishFloatable t
1312 = mkTick t <$> mkLam' dflags bndrs expr
1313
1314 mkLam' dflags bndrs body
1315 | gopt Opt_DoEtaReduction dflags
1316 , Just etad_lam <- tryEtaReduce bndrs body
1317 = do { tick (EtaReduction (head bndrs))
1318 ; return etad_lam }
1319
1320 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1321 , sm_eta_expand (getMode env)
1322 , any isRuntimeVar bndrs
1323 , let body_arity = exprEtaExpandArity dflags body
1324 , body_arity > 0
1325 = do { tick (EtaExpansion (head bndrs))
1326 ; let res = mkLams bndrs (etaExpand body_arity body)
1327 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1328 , text "after" <+> ppr res])
1329 ; return res }
1330
1331 | otherwise
1332 = return (mkLams bndrs body)
1333
1334 {-
1335 Note [Eta expanding lambdas]
1336 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1337 In general we *do* want to eta-expand lambdas. Consider
1338 f (\x -> case x of (a,b) -> \s -> blah)
1339 where 's' is a state token, and hence can be eta expanded. This
1340 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1341 important function!
1342
1343 The eta-expansion will never happen unless we do it now. (Well, it's
1344 possible that CorePrep will do it, but CorePrep only has a half-baked
1345 eta-expander that can't deal with casts. So it's much better to do it
1346 here.)
1347
1348 However, when the lambda is let-bound, as the RHS of a let, we have a
1349 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1350 bother to try expansion in mkLam in that case; hence the contIsRhs
1351 guard.
1352
1353 NB: We check the SimplEnv (sm_eta_expand), not DynFlags.
1354 See Note [No eta expansion in stable unfoldings]
1355
1356 Note [Casts and lambdas]
1357 ~~~~~~~~~~~~~~~~~~~~~~~~
1358 Consider
1359 (\x. (\y. e) `cast` g1) `cast` g2
1360 There is a danger here that the two lambdas look separated, and the
1361 full laziness pass might float an expression to between the two.
1362
1363 So this equation in mkLam' floats the g1 out, thus:
1364 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1365 where x:tx.
1366
1367 In general, this floats casts outside lambdas, where (I hope) they
1368 might meet and cancel with some other cast:
1369 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1370 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1371 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1372 (if not (g `in` co))
1373
1374 Notice that it works regardless of 'e'. Originally it worked only
1375 if 'e' was itself a lambda, but in some cases that resulted in
1376 fruitless iteration in the simplifier. A good example was when
1377 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1378 like (\x. Get `cast` g)
1379 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1380 the Get, and the next iteration eta-reduced it, and then eta-expanded
1381 it again.
1382
1383 Note also the side condition for the case of coercion binders.
1384 It does not make sense to transform
1385 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1386 because the latter is not well-kinded.
1387
1388 ************************************************************************
1389 * *
1390 Eta expansion
1391 * *
1392 ************************************************************************
1393 -}
1394
1395 tryEtaExpandRhs :: SimplEnv -> RecFlag -> OutId -> OutExpr
1396 -> SimplM (Arity, OutExpr)
1397 -- See Note [Eta-expanding at let bindings]
1398 tryEtaExpandRhs env is_rec bndr rhs
1399 = do { dflags <- getDynFlags
1400 ; (new_arity, new_rhs) <- try_expand dflags
1401
1402 ; WARN( new_arity < old_id_arity,
1403 (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
1404 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1405 -- Note [Arity decrease] in Simplify
1406 return (new_arity, new_rhs) }
1407 where
1408 try_expand dflags
1409 | exprIsTrivial rhs
1410 = return (exprArity rhs, rhs)
1411
1412 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1413 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1414 new_arity2 = idCallArity bndr
1415 new_arity = max new_arity1 new_arity2
1416 , new_arity > old_arity -- And the current manifest arity isn't enough
1417 = if is_rec == Recursive && isJoinId bndr
1418 then WARN(True, text "Can't eta-expand recursive join point:" <+>
1419 ppr bndr)
1420 return (old_arity, rhs)
1421 else do { tick (EtaExpansion bndr)
1422 ; return (new_arity, etaExpand new_arity rhs) }
1423 | otherwise
1424 = return (old_arity, rhs)
1425
1426 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1427 old_id_arity = idArity bndr
1428
1429 {-
1430 Note [Eta-expanding at let bindings]
1431 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1432 We now eta expand at let-bindings, which is where the payoff comes.
1433 The most significant thing is that we can do a simple arity analysis
1434 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1435
1436 One useful consequence of not eta-expanding lambdas is this example:
1437 genMap :: C a => ...
1438 {-# INLINE genMap #-}
1439 genMap f xs = ...
1440
1441 myMap :: D a => ...
1442 {-# INLINE myMap #-}
1443 myMap = genMap
1444
1445 Notice that 'genMap' should only inline if applied to two arguments.
1446 In the stable unfolding for myMap we'll have the unfolding
1447 (\d -> genMap Int (..d..))
1448 We do not want to eta-expand to
1449 (\d f xs -> genMap Int (..d..) f xs)
1450 because then 'genMap' will inline, and it really shouldn't: at least
1451 as far as the programmer is concerned, it's not applied to two
1452 arguments!
1453
1454 Note [Do not eta-expand PAPs]
1455 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1456 We used to have old_arity = manifestArity rhs, which meant that we
1457 would eta-expand even PAPs. But this gives no particular advantage,
1458 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1459 Suppose we have a PAP
1460 foo :: IO ()
1461 foo = returnIO ()
1462 Then we can eta-expand do
1463 foo = (\eta. (returnIO () |> sym g) eta) |> g
1464 where
1465 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1466
1467 But there is really no point in doing this, and it generates masses of
1468 coercions and whatnot that eventually disappear again. For T9020, GHC
1469 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1470 1.8G to 45M.
1471
1472 But note that this won't eta-expand, say
1473 f = \g -> map g
1474 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1475 strictness analysis will have less to bite on?
1476
1477
1478 ************************************************************************
1479 * *
1480 \subsection{Floating lets out of big lambdas}
1481 * *
1482 ************************************************************************
1483
1484 Note [Floating and type abstraction]
1485 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1486 Consider this:
1487 x = /\a. C e1 e2
1488 We'd like to float this to
1489 y1 = /\a. e1
1490 y2 = /\a. e2
1491 x = /\a. C (y1 a) (y2 a)
1492 for the usual reasons: we want to inline x rather vigorously.
1493
1494 You may think that this kind of thing is rare. But in some programs it is
1495 common. For example, if you do closure conversion you might get:
1496
1497 data a :-> b = forall e. (e -> a -> b) :$ e
1498
1499 f_cc :: forall a. a :-> a
1500 f_cc = /\a. (\e. id a) :$ ()
1501
1502 Now we really want to inline that f_cc thing so that the
1503 construction of the closure goes away.
1504
1505 So I have elaborated simplLazyBind to understand right-hand sides that look
1506 like
1507 /\ a1..an. body
1508
1509 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1510 but there is quite a bit of plumbing in simplLazyBind as well.
1511
1512 The same transformation is good when there are lets in the body:
1513
1514 /\abc -> let(rec) x = e in b
1515 ==>
1516 let(rec) x' = /\abc -> let x = x' a b c in e
1517 in
1518 /\abc -> let x = x' a b c in b
1519
1520 This is good because it can turn things like:
1521
1522 let f = /\a -> letrec g = ... g ... in g
1523 into
1524 letrec g' = /\a -> ... g' a ...
1525 in
1526 let f = /\ a -> g' a
1527
1528 which is better. In effect, it means that big lambdas don't impede
1529 let-floating.
1530
1531 This optimisation is CRUCIAL in eliminating the junk introduced by
1532 desugaring mutually recursive definitions. Don't eliminate it lightly!
1533
1534 [May 1999] If we do this transformation *regardless* then we can
1535 end up with some pretty silly stuff. For example,
1536
1537 let
1538 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1539 in ..
1540 becomes
1541 let y1 = /\s -> r1
1542 y2 = /\s -> r2
1543 st = /\s -> ...[y1 s/x1, y2 s/x2]
1544 in ..
1545
1546 Unless the "..." is a WHNF there is really no point in doing this.
1547 Indeed it can make things worse. Suppose x1 is used strictly,
1548 and is of the form
1549
1550 x1* = case f y of { (a,b) -> e }
1551
1552 If we abstract this wrt the tyvar we then can't do the case inline
1553 as we would normally do.
1554
1555 That's why the whole transformation is part of the same process that
1556 floats let-bindings and constructor arguments out of RHSs. In particular,
1557 it is guarded by the doFloatFromRhs call in simplLazyBind.
1558
1559 Note [Which type variables to abstract over]
1560 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1561 Abstract only over the type variables free in the rhs wrt which the
1562 new binding is abstracted. Note that
1563
1564 * The naive approach of abstracting wrt the
1565 tyvars free in the Id's /type/ fails. Consider:
1566 /\ a b -> let t :: (a,b) = (e1, e2)
1567 x :: a = fst t
1568 in ...
1569 Here, b isn't free in x's type, but we must nevertheless
1570 abstract wrt b as well, because t's type mentions b.
1571 Since t is floated too, we'd end up with the bogus:
1572 poly_t = /\ a b -> (e1, e2)
1573 poly_x = /\ a -> fst (poly_t a *b*)
1574
1575 * We must do closeOverKinds. Example (Trac #10934):
1576 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1577 Here we want to float 't', but we must remember to abstract over
1578 'k' as well, even though it is not explicitly mentioned in the RHS,
1579 otherwise we get
1580 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1581 which is obviously bogus.
1582 -}
1583
1584 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1585 abstractFloats main_tvs body_env body
1586 = ASSERT( notNull body_floats )
1587 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1588 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1589 where
1590 main_tv_set = mkVarSet main_tvs
1591 body_floats = getFloatBinds body_env
1592 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1593
1594 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1595 abstract subst (NonRec id rhs)
1596 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1597 ; let poly_rhs = mkLams tvs_here rhs'
1598 subst' = CoreSubst.extendIdSubst subst id poly_app
1599 ; return (subst', (NonRec poly_id poly_rhs)) }
1600 where
1601 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1602
1603 -- tvs_here: see Note [Which type variables to abstract over]
1604 tvs_here = toposortTyVars $
1605 filter (`elemVarSet` main_tv_set) $
1606 closeOverKindsList $
1607 exprSomeFreeVarsList isTyVar rhs'
1608
1609 abstract subst (Rec prs)
1610 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1611 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1612 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1613 | rhs <- rhss]
1614 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1615 where
1616 (ids,rhss) = unzip prs
1617 -- For a recursive group, it's a bit of a pain to work out the minimal
1618 -- set of tyvars over which to abstract:
1619 -- /\ a b c. let x = ...a... in
1620 -- letrec { p = ...x...q...
1621 -- q = .....p...b... } in
1622 -- ...
1623 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1624 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1625 -- Since it's a pain, we just use the whole set, which is always safe
1626 --
1627 -- If you ever want to be more selective, remember this bizarre case too:
1628 -- x::a = x
1629 -- Here, we must abstract 'x' over 'a'.
1630 tvs_here = toposortTyVars main_tvs
1631
1632 mk_poly tvs_here var
1633 = do { uniq <- getUniqueM
1634 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1635 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1636 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1637 mkLocalIdOrCoVar poly_name poly_ty
1638 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1639 -- In the olden days, it was crucial to copy the occInfo of the original var,
1640 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1641 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1642 -- at already simplified code, so it doesn't matter
1643 --
1644 -- It's even right to retain single-occurrence or dead-var info:
1645 -- Suppose we started with /\a -> let x = E in B
1646 -- where x occurs once in B. Then we transform to:
1647 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1648 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1649 -- the occurrences of x' will be just the occurrences originally
1650 -- pinned on x.
1651
1652 {-
1653 Note [Abstract over coercions]
1654 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1655 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1656 type variable a. Rather than sort this mess out, we simply bale out and abstract
1657 wrt all the type variables if any of them are coercion variables.
1658
1659
1660 Historical note: if you use let-bindings instead of a substitution, beware of this:
1661
1662 -- Suppose we start with:
1663 --
1664 -- x = /\ a -> let g = G in E
1665 --
1666 -- Then we'll float to get
1667 --
1668 -- x = let poly_g = /\ a -> G
1669 -- in /\ a -> let g = poly_g a in E
1670 --
1671 -- But now the occurrence analyser will see just one occurrence
1672 -- of poly_g, not inside a lambda, so the simplifier will
1673 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1674 -- (I used to think that the "don't inline lone occurrences" stuff
1675 -- would stop this happening, but since it's the *only* occurrence,
1676 -- PreInlineUnconditionally kicks in first!)
1677 --
1678 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1679 -- to appear many times. (NB: mkInlineMe eliminates
1680 -- such notes on trivial RHSs, so do it manually.)
1681
1682 ************************************************************************
1683 * *
1684 prepareAlts
1685 * *
1686 ************************************************************************
1687
1688 prepareAlts tries these things:
1689
1690 1. Eliminate alternatives that cannot match, including the
1691 DEFAULT alternative.
1692
1693 2. If the DEFAULT alternative can match only one possible constructor,
1694 then make that constructor explicit.
1695 e.g.
1696 case e of x { DEFAULT -> rhs }
1697 ===>
1698 case e of x { (a,b) -> rhs }
1699 where the type is a single constructor type. This gives better code
1700 when rhs also scrutinises x or e.
1701
1702 3. Returns a list of the constructors that cannot holds in the
1703 DEFAULT alternative (if there is one)
1704
1705 Here "cannot match" includes knowledge from GADTs
1706
1707 It's a good idea to do this stuff before simplifying the alternatives, to
1708 avoid simplifying alternatives we know can't happen, and to come up with
1709 the list of constructors that are handled, to put into the IdInfo of the
1710 case binder, for use when simplifying the alternatives.
1711
1712 Eliminating the default alternative in (1) isn't so obvious, but it can
1713 happen:
1714
1715 data Colour = Red | Green | Blue
1716
1717 f x = case x of
1718 Red -> ..
1719 Green -> ..
1720 DEFAULT -> h x
1721
1722 h y = case y of
1723 Blue -> ..
1724 DEFAULT -> [ case y of ... ]
1725
1726 If we inline h into f, the default case of the inlined h can't happen.
1727 If we don't notice this, we may end up filtering out *all* the cases
1728 of the inner case y, which give us nowhere to go!
1729 -}
1730
1731 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1732 -- The returned alternatives can be empty, none are possible
1733 prepareAlts scrut case_bndr' alts
1734 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1735 -- Case binder is needed just for its type. Note that as an
1736 -- OutId, it has maximum information; this is important.
1737 -- Test simpl013 is an example
1738 = do { us <- getUniquesM
1739 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1740 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1741 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1742 -- "idcs" stands for "impossible default data constructors"
1743 -- i.e. the constructors that can't match the default case
1744 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1745 ; when yes3 $ tick (AltMerge case_bndr')
1746 ; return (idcs3, alts3) }
1747
1748 | otherwise -- Not a data type, so nothing interesting happens
1749 = return ([], alts)
1750 where
1751 imposs_cons = case scrut of
1752 Var v -> otherCons (idUnfolding v)
1753 _ -> []
1754
1755
1756 {-
1757 ************************************************************************
1758 * *
1759 mkCase
1760 * *
1761 ************************************************************************
1762
1763 mkCase tries these things
1764
1765 1. Merge Nested Cases
1766
1767 case e of b { ==> case e of b {
1768 p1 -> rhs1 p1 -> rhs1
1769 ... ...
1770 pm -> rhsm pm -> rhsm
1771 _ -> case b of b' { pn -> let b'=b in rhsn
1772 pn -> rhsn ...
1773 ... po -> let b'=b in rhso
1774 po -> rhso _ -> let b'=b in rhsd
1775 _ -> rhsd
1776 }
1777
1778 which merges two cases in one case when -- the default alternative of
1779 the outer case scrutises the same variable as the outer case. This
1780 transformation is called Case Merging. It avoids that the same
1781 variable is scrutinised multiple times.
1782
1783 2. Eliminate Identity Case
1784
1785 case e of ===> e
1786 True -> True;
1787 False -> False
1788
1789 and similar friends.
1790
1791 3. Scrutinee Constant Folding
1792
1793 case x op# k# of _ { ===> case x of _ {
1794 a1# -> e1 (a1# inv_op# k#) -> e1
1795 a2# -> e2 (a2# inv_op# k#) -> e2
1796 ... ...
1797 DEFAULT -> ed DEFAULT -> ed
1798
1799 where (x op# k#) inv_op# k# == x
1800
1801 And similarly for commuted arguments and for some unary operations.
1802
1803 The purpose of this transformation is not only to avoid an arithmetic
1804 operation at runtime but to allow other transformations to apply in cascade.
1805
1806 Example with the "Merge Nested Cases" optimization (from #12877):
1807
1808 main = case t of t0
1809 0## -> ...
1810 DEFAULT -> case t0 `minusWord#` 1## of t1
1811 0## -> ...
1812 DEFAUT -> case t1 `minusWord#` 1## of t2
1813 0## -> ...
1814 DEFAULT -> case t2 `minusWord#` 1## of _
1815 0## -> ...
1816 DEFAULT -> ...
1817
1818 becomes:
1819
1820 main = case t of _
1821 0## -> ...
1822 1## -> ...
1823 2## -> ...
1824 3## -> ...
1825 DEFAULT -> ...
1826
1827 -}
1828
1829 mkCase, mkCase1, mkCase2, mkCase3
1830 :: DynFlags
1831 -> OutExpr -> OutId
1832 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1833 -> SimplM OutExpr
1834
1835 --------------------------------------------------
1836 -- 1. Merge Nested Cases
1837 --------------------------------------------------
1838
1839 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1840 | gopt Opt_CaseMerge dflags
1841 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1842 <- stripTicksTop tickishFloatable deflt_rhs
1843 , inner_scrut_var == outer_bndr
1844 = do { tick (CaseMerge outer_bndr)
1845
1846 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1847 (con, args, wrap_rhs rhs)
1848 -- Simplifier's no-shadowing invariant should ensure
1849 -- that outer_bndr is not shadowed by the inner patterns
1850 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1851 -- The let is OK even for unboxed binders,
1852
1853 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1854 | otherwise = map wrap_alt inner_alts
1855
1856 merged_alts = mergeAlts outer_alts wrapped_alts
1857 -- NB: mergeAlts gives priority to the left
1858 -- case x of
1859 -- A -> e1
1860 -- DEFAULT -> case x of
1861 -- A -> e2
1862 -- B -> e3
1863 -- When we merge, we must ensure that e1 takes
1864 -- precedence over e2 as the value for A!
1865
1866 ; fmap (mkTicks ticks) $
1867 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1868 }
1869 -- Warning: don't call mkCase recursively!
1870 -- Firstly, there's no point, because inner alts have already had
1871 -- mkCase applied to them, so they won't have a case in their default
1872 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1873 -- in munge_rhs may put a case into the DEFAULT branch!
1874
1875 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1876
1877 --------------------------------------------------
1878 -- 2. Eliminate Identity Case
1879 --------------------------------------------------
1880
1881 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1882 | all identity_alt alts
1883 = do { tick (CaseIdentity case_bndr)
1884 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1885 where
1886 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
1887 identity_alt (con, args, rhs) = check_eq rhs con args
1888
1889 check_eq (Cast rhs co) con args
1890 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1891 -- See Note [RHS casts]
1892 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1893 check_eq (Var v) _ _ | v == case_bndr = True
1894 check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con
1895 -- Optimisation only
1896 check_eq (Tick t e) alt args = tickishFloatable t &&
1897 check_eq e alt args
1898 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1899 mkConApp con (arg_tys ++
1900 varsToCoreExprs args)
1901 check_eq _ _ _ = False
1902
1903 arg_tys = map Type (tyConAppArgs (idType case_bndr))
1904
1905 -- Note [RHS casts]
1906 -- ~~~~~~~~~~~~~~~~
1907 -- We've seen this:
1908 -- case e of x { _ -> x `cast` c }
1909 -- And we definitely want to eliminate this case, to give
1910 -- e `cast` c
1911 -- So we throw away the cast from the RHS, and reconstruct
1912 -- it at the other end. All the RHS casts must be the same
1913 -- if (all identity_alt alts) holds.
1914 --
1915 -- Don't worry about nested casts, because the simplifier combines them
1916
1917 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1918 re_cast scrut _ = scrut
1919
1920 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1921
1922 --------------------------------------------------
1923 -- 2. Scrutinee Constant Folding
1924 --------------------------------------------------
1925
1926 mkCase2 dflags scrut bndr alts_ty alts
1927 | gopt Opt_CaseFolding dflags
1928 , Just (scrut',f) <- caseRules dflags scrut
1929 = mkCase3 dflags scrut' bndr alts_ty (new_alts f)
1930 | otherwise
1931 = mkCase3 dflags scrut bndr alts_ty alts
1932 where
1933 -- We need to keep the correct association between the scrutinee and its
1934 -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
1935 -- "let bndr = ... in":
1936 --
1937 -- case v + 10 of y =====> case v of y
1938 -- 20 -> e1 10 -> let y = 20 in e1
1939 -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2
1940 --
1941 -- Other transformations give: =====> case v of y'
1942 -- 10 -> let y = 20 in e1
1943 -- DEFAULT -> let y = y' + 10 in e2
1944 --
1945 wrap_rhs l rhs
1946 | isDeadBinder bndr = rhs
1947 | otherwise = Let (NonRec bndr l) rhs
1948
1949 -- We need to re-sort the alternatives to preserve the #case_invariants#
1950 new_alts f = sortBy cmpAlt (map (mapAlt f) alts)
1951
1952 mapAlt f alt@(c,bs,e) = case c of
1953 DEFAULT -> (c, bs, wrap_rhs scrut e)
1954 LitAlt l
1955 | isLitValue l -> (LitAlt (mapLitValue dflags f l),
1956 bs, wrap_rhs (Lit l) e)
1957 _ -> pprPanic "Unexpected alternative (mkCase2)" (ppr alt)
1958
1959 --------------------------------------------------
1960 -- Catch-all
1961 --------------------------------------------------
1962 mkCase3 _dflags scrut bndr alts_ty alts
1963 = return (Case scrut bndr alts_ty alts)
1964
1965 {-
1966 Note [Dead binders]
1967 ~~~~~~~~~~~~~~~~~~~~
1968 Note that dead-ness is maintained by the simplifier, so that it is
1969 accurate after simplification as well as before.
1970
1971
1972 Note [Cascading case merge]
1973 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1974 Case merging should cascade in one sweep, because it
1975 happens bottom-up
1976
1977 case e of a {
1978 DEFAULT -> case a of b
1979 DEFAULT -> case b of c {
1980 DEFAULT -> e
1981 A -> ea
1982 B -> eb
1983 C -> ec
1984 ==>
1985 case e of a {
1986 DEFAULT -> case a of b
1987 DEFAULT -> let c = b in e
1988 A -> let c = b in ea
1989 B -> eb
1990 C -> ec
1991 ==>
1992 case e of a {
1993 DEFAULT -> let b = a in let c = b in e
1994 A -> let b = a in let c = b in ea
1995 B -> let b = a in eb
1996 C -> ec
1997
1998
1999 However here's a tricky case that we still don't catch, and I don't
2000 see how to catch it in one pass:
2001
2002 case x of c1 { I# a1 ->
2003 case a1 of c2 ->
2004 0 -> ...
2005 DEFAULT -> case x of c3 { I# a2 ->
2006 case a2 of ...
2007
2008 After occurrence analysis (and its binder-swap) we get this
2009
2010 case x of c1 { I# a1 ->
2011 let x = c1 in -- Binder-swap addition
2012 case a1 of c2 ->
2013 0 -> ...
2014 DEFAULT -> case x of c3 { I# a2 ->
2015 case a2 of ...
2016
2017 When we simplify the inner case x, we'll see that
2018 x=c1=I# a1. So we'll bind a2 to a1, and get
2019
2020 case x of c1 { I# a1 ->
2021 case a1 of c2 ->
2022 0 -> ...
2023 DEFAULT -> case a1 of ...
2024
2025 This is corect, but we can't do a case merge in this sweep
2026 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
2027 without getting changed to c1=I# c2.
2028
2029 I don't think this is worth fixing, even if I knew how. It'll
2030 all come out in the next pass anyway.
2031 -}