6c4737507ac8fd8510bdc432a4aaa2eb2c1c7fa8
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import DynFlags
41 import CoreSyn
42 import qualified CoreSubst
43 import PprCore
44 import CoreFVs
45 import CoreUtils
46 import CoreArity
47 import CoreUnfold
48 import Name
49 import Id
50 import Var
51 import Demand
52 import SimplMonad
53 import Type hiding( substTy )
54 import Coercion hiding( substCo )
55 import DataCon ( dataConWorkId )
56 import VarEnv
57 import VarSet
58 import BasicTypes
59 import Util
60 import MonadUtils
61 import Outputable
62 import Pair
63 import PrelRules
64 import Literal
65
66 import Control.Monad ( when )
67
68 {-
69 ************************************************************************
70 * *
71 The SimplCont and DupFlag types
72 * *
73 ************************************************************************
74
75 A SimplCont allows the simplifier to traverse the expression in a
76 zipper-like fashion. The SimplCont represents the rest of the expression,
77 "above" the point of interest.
78
79 You can also think of a SimplCont as an "evaluation context", using
80 that term in the way it is used for operational semantics. This is the
81 way I usually think of it, For example you'll often see a syntax for
82 evaluation context looking like
83 C ::= [] | C e | case C of alts | C `cast` co
84 That's the kind of thing we are doing here, and I use that syntax in
85 the comments.
86
87
88 Key points:
89 * A SimplCont describes a *strict* context (just like
90 evaluation contexts do). E.g. Just [] is not a SimplCont
91
92 * A SimplCont describes a context that *does not* bind
93 any variables. E.g. \x. [] is not a SimplCont
94 -}
95
96 data SimplCont
97 = Stop -- An empty context, or <hole>
98 OutType -- Type of the <hole>
99 CallCtxt -- Tells if there is something interesting about
100 -- the context, and hence the inliner
101 -- should be a bit keener (see interestingCallContext)
102 -- Specifically:
103 -- This is an argument of a function that has RULES
104 -- Inlining the call might allow the rule to fire
105 -- Never ValAppCxt (use ApplyToVal instead)
106 -- or CaseCtxt (use Select instead)
107
108 | CastIt -- <hole> `cast` co
109 OutCoercion -- The coercion simplified
110 -- Invariant: never an identity coercion
111 SimplCont
112
113 | ApplyToVal { -- <hole> arg
114 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
115 sc_arg :: InExpr, -- The argument,
116 sc_env :: StaticEnv, -- and its static env
117 sc_cont :: SimplCont }
118
119 | ApplyToTy { -- <hole> ty
120 sc_arg_ty :: OutType, -- Argument type
121 sc_hole_ty :: OutType, -- Type of the function, presumably (forall a. blah)
122 -- See Note [The hole type in ApplyToTy]
123 sc_cont :: SimplCont }
124
125 | Select { -- case <hole> of alts
126 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
127 sc_bndr :: InId, -- case binder
128 sc_alts :: [InAlt], -- Alternatives
129 sc_env :: StaticEnv, -- and their static environment
130 sc_cont :: SimplCont }
131
132 -- The two strict forms have no DupFlag, because we never duplicate them
133 | StrictBind -- (\x* \xs. e) <hole>
134 InId [InBndr] -- let x* = <hole> in e
135 InExpr StaticEnv -- is a special case
136 SimplCont
137
138 | StrictArg -- f e1 ..en <hole>
139 ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
140 -- plus strictness flags for *further* args
141 CallCtxt -- Whether *this* argument position is interesting
142 SimplCont
143
144 | TickIt
145 (Tickish Id) -- Tick tickish <hole>
146 SimplCont
147
148 data DupFlag = NoDup -- Unsimplified, might be big
149 | Simplified -- Simplified
150 | OkToDup -- Simplified and small
151
152 isSimplified :: DupFlag -> Bool
153 isSimplified NoDup = False
154 isSimplified _ = True -- Invariant: the subst-env is empty
155
156 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
157 perhapsSubstTy dup env ty
158 | isSimplified dup = ty
159 | otherwise = substTy env ty
160
161 {-
162 Note [DupFlag invariants]
163 ~~~~~~~~~~~~~~~~~~~~~~~~~
164 In both (ApplyToVal dup _ env k)
165 and (Select dup _ _ env k)
166 the following invariants hold
167
168 (a) if dup = OkToDup, then continuation k is also ok-to-dup
169 (b) if dup = OkToDup or Simplified, the subst-env is empty
170 (and and hence no need to re-simplify)
171 -}
172
173 instance Outputable DupFlag where
174 ppr OkToDup = text "ok"
175 ppr NoDup = text "nodup"
176 ppr Simplified = text "simpl"
177
178 instance Outputable SimplCont where
179 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
180 ppr (CastIt co cont ) = (text "CastIt" <+> ppr co) $$ ppr cont
181 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
182 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
183 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
184 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
185 = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg)
186 $$ ppr cont
187 ppr (StrictBind b _ _ _ cont) = (text "StrictBind" <+> ppr b) $$ ppr cont
188 ppr (StrictArg ai _ cont) = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
189 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
190 = (text "Select" <+> ppr dup <+> ppr bndr) $$
191 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
192
193
194 {- Note [The hole type in ApplyToTy]
195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
197 continuation. It is absolutely necessary to compute contHoleType, but it is
198 not used for anything else (and hence may not be evaluated).
199
200 Why is it necessary for contHoleType? Consider the continuation
201 ApplyToType Int (Stop Int)
202 corresponding to
203 (<hole> @Int) :: Int
204 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
205 and there is no way to know which, so we must record it.
206
207 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
208 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
209 doesn't matter because we'll never compute them all.
210
211 ************************************************************************
212 * *
213 ArgInfo and ArgSpec
214 * *
215 ************************************************************************
216 -}
217
218 data ArgInfo
219 = ArgInfo {
220 ai_fun :: OutId, -- The function
221 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
222 ai_type :: OutType, -- Type of (f a1 ... an)
223
224 ai_rules :: [CoreRule], -- Rules for this function
225
226 ai_encl :: Bool, -- Flag saying whether this function
227 -- or an enclosing one has rules (recursively)
228 -- True => be keener to inline in all args
229
230 ai_strs :: [Bool], -- Strictness of remaining arguments
231 -- Usually infinite, but if it is finite it guarantees
232 -- that the function diverges after being given
233 -- that number of args
234 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
235 -- Always infinite
236 }
237
238 data ArgSpec
239 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
240 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
241 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
242 | CastBy OutCoercion -- Cast by this; c.f. CastIt
243
244 instance Outputable ArgSpec where
245 ppr (ValArg e) = text "ValArg" <+> ppr e
246 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
247 ppr (CastBy c) = text "CastBy" <+> ppr c
248
249 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
250 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
251 , ai_type = applyTypeToArg (ai_type ai) arg }
252
253 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
254 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
255 , ai_type = piResultTy poly_fun_ty arg_ty }
256 where
257 poly_fun_ty = ai_type ai
258 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
259
260 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
261 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
262 , ai_type = pSnd (coercionKind co) }
263
264 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
265 argInfoAppArgs [] = []
266 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
267 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
268 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
269
270 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
271 pushSimplifiedArgs _env [] k = k
272 pushSimplifiedArgs env (arg : args) k
273 = case arg of
274 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
275 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
276 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
277 CastBy c -> CastIt c rest
278 where
279 rest = pushSimplifiedArgs env args k
280 -- The env has an empty SubstEnv
281
282 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
283 -- NB: the [ArgSpec] is reversed so that the first arg
284 -- in the list is the last one in the application
285 argInfoExpr fun rev_args
286 = go rev_args
287 where
288 go [] = Var fun
289 go (ValArg a : as) = go as `App` a
290 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
291 go (CastBy co : as) = mkCast (go as) co
292
293
294 {-
295 ************************************************************************
296 * *
297 Functions on SimplCont
298 * *
299 ************************************************************************
300 -}
301
302 mkBoringStop :: OutType -> SimplCont
303 mkBoringStop ty = Stop ty BoringCtxt
304
305 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
306 mkRhsStop ty = Stop ty RhsCtxt
307
308 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
309 mkLazyArgStop ty cci = Stop ty cci
310
311 -------------------
312 contIsRhsOrArg :: SimplCont -> Bool
313 contIsRhsOrArg (Stop {}) = True
314 contIsRhsOrArg (StrictBind {}) = True
315 contIsRhsOrArg (StrictArg {}) = True
316 contIsRhsOrArg _ = False
317
318 contIsRhs :: SimplCont -> Bool
319 contIsRhs (Stop _ RhsCtxt) = True
320 contIsRhs _ = False
321
322 -------------------
323 contIsDupable :: SimplCont -> Bool
324 contIsDupable (Stop {}) = True
325 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
326 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
327 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
328 contIsDupable (CastIt _ k) = contIsDupable k
329 contIsDupable _ = False
330
331 -------------------
332 contIsTrivial :: SimplCont -> Bool
333 contIsTrivial (Stop {}) = True
334 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
335 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
336 contIsTrivial (CastIt _ k) = contIsTrivial k
337 contIsTrivial _ = False
338
339 -------------------
340 contResultType :: SimplCont -> OutType
341 contResultType (Stop ty _) = ty
342 contResultType (CastIt _ k) = contResultType k
343 contResultType (StrictBind _ _ _ _ k) = contResultType k
344 contResultType (StrictArg _ _ k) = contResultType k
345 contResultType (Select { sc_cont = k }) = contResultType k
346 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
347 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
348 contResultType (TickIt _ k) = contResultType k
349
350 contHoleType :: SimplCont -> OutType
351 contHoleType (Stop ty _) = ty
352 contHoleType (TickIt _ k) = contHoleType k
353 contHoleType (CastIt co _) = pFst (coercionKind co)
354 contHoleType (StrictBind b _ _ se _) = substTy se (idType b)
355 contHoleType (StrictArg ai _ _) = funArgTy (ai_type ai)
356 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
357 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
358 = mkFunTy (perhapsSubstTy dup se (exprType e))
359 (contHoleType k)
360 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
361 = perhapsSubstTy d se (idType b)
362
363 -------------------
364 countArgs :: SimplCont -> Int
365 -- Count all arguments, including types, coercions, and other values
366 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
367 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
368 countArgs _ = 0
369
370 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
371 -- Summarises value args, discards type args and coercions
372 -- The returned continuation of the call is only used to
373 -- answer questions like "are you interesting?"
374 contArgs cont
375 | lone cont = (True, [], cont)
376 | otherwise = go [] cont
377 where
378 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
379 lone (ApplyToVal {}) = False
380 lone (CastIt {}) = False
381 lone _ = True
382
383 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
384 = go (is_interesting arg se : args) k
385 go args (ApplyToTy { sc_cont = k }) = go args k
386 go args (CastIt _ k) = go args k
387 go args k = (False, reverse args, k)
388
389 is_interesting arg se = interestingArg se arg
390 -- Do *not* use short-cutting substitution here
391 -- because we want to get as much IdInfo as possible
392
393
394 -------------------
395 mkArgInfo :: Id
396 -> [CoreRule] -- Rules for function
397 -> Int -- Number of value args
398 -> SimplCont -- Context of the call
399 -> ArgInfo
400
401 mkArgInfo fun rules n_val_args call_cont
402 | n_val_args < idArity fun -- Note [Unsaturated functions]
403 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
404 , ai_rules = rules, ai_encl = False
405 , ai_strs = vanilla_stricts
406 , ai_discs = vanilla_discounts }
407 | otherwise
408 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
409 , ai_rules = rules
410 , ai_encl = interestingArgContext rules call_cont
411 , ai_strs = add_type_str fun_ty arg_stricts
412 , ai_discs = arg_discounts }
413 where
414 fun_ty = idType fun
415
416 vanilla_discounts, arg_discounts :: [Int]
417 vanilla_discounts = repeat 0
418 arg_discounts = case idUnfolding fun of
419 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
420 -> discounts ++ vanilla_discounts
421 _ -> vanilla_discounts
422
423 vanilla_stricts, arg_stricts :: [Bool]
424 vanilla_stricts = repeat False
425
426 arg_stricts
427 = case splitStrictSig (idStrictness fun) of
428 (demands, result_info)
429 | not (demands `lengthExceeds` n_val_args)
430 -> -- Enough args, use the strictness given.
431 -- For bottoming functions we used to pretend that the arg
432 -- is lazy, so that we don't treat the arg as an
433 -- interesting context. This avoids substituting
434 -- top-level bindings for (say) strings into
435 -- calls to error. But now we are more careful about
436 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
437 if isBotRes result_info then
438 map isStrictDmd demands -- Finite => result is bottom
439 else
440 map isStrictDmd demands ++ vanilla_stricts
441 | otherwise
442 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
443 <+> ppr n_val_args <+> ppr demands )
444 vanilla_stricts -- Not enough args, or no strictness
445
446 add_type_str :: Type -> [Bool] -> [Bool]
447 -- If the function arg types are strict, record that in the 'strictness bits'
448 -- No need to instantiate because unboxed types (which dominate the strict
449 -- types) can't instantiate type variables.
450 -- add_type_str is done repeatedly (for each call); might be better
451 -- once-for-all in the function
452 -- But beware primops/datacons with no strictness
453 add_type_str _ [] = []
454 add_type_str fun_ty strs -- Look through foralls
455 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
456 = add_type_str fun_ty' strs
457 add_type_str fun_ty (str:strs) -- Add strict-type info
458 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
459 = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
460 add_type_str _ strs
461 = strs
462
463 {- Note [Unsaturated functions]
464 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
465 Consider (test eyeball/inline4)
466 x = a:as
467 y = f x
468 where f has arity 2. Then we do not want to inline 'x', because
469 it'll just be floated out again. Even if f has lots of discounts
470 on its first argument -- it must be saturated for these to kick in
471 -}
472
473
474 {-
475 ************************************************************************
476 * *
477 Interesting arguments
478 * *
479 ************************************************************************
480
481 Note [Interesting call context]
482 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
483 We want to avoid inlining an expression where there can't possibly be
484 any gain, such as in an argument position. Hence, if the continuation
485 is interesting (eg. a case scrutinee, application etc.) then we
486 inline, otherwise we don't.
487
488 Previously some_benefit used to return True only if the variable was
489 applied to some value arguments. This didn't work:
490
491 let x = _coerce_ (T Int) Int (I# 3) in
492 case _coerce_ Int (T Int) x of
493 I# y -> ....
494
495 we want to inline x, but can't see that it's a constructor in a case
496 scrutinee position, and some_benefit is False.
497
498 Another example:
499
500 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
501
502 .... case dMonadST _@_ x0 of (a,b,c) -> ....
503
504 we'd really like to inline dMonadST here, but we *don't* want to
505 inline if the case expression is just
506
507 case x of y { DEFAULT -> ... }
508
509 since we can just eliminate this case instead (x is in WHNF). Similar
510 applies when x is bound to a lambda expression. Hence
511 contIsInteresting looks for case expressions with just a single
512 default case.
513 -}
514
515 interestingCallContext :: SimplCont -> CallCtxt
516 -- See Note [Interesting call context]
517 interestingCallContext cont
518 = interesting cont
519 where
520 interesting (Select {}) = CaseCtxt
521 interesting (ApplyToVal {}) = ValAppCtxt
522 -- Can happen if we have (f Int |> co) y
523 -- If f has an INLINE prag we need to give it some
524 -- motivation to inline. See Note [Cast then apply]
525 -- in CoreUnfold
526 interesting (StrictArg _ cci _) = cci
527 interesting (StrictBind {}) = BoringCtxt
528 interesting (Stop _ cci) = cci
529 interesting (TickIt _ k) = interesting k
530 interesting (ApplyToTy { sc_cont = k }) = interesting k
531 interesting (CastIt _ k) = interesting k
532 -- If this call is the arg of a strict function, the context
533 -- is a bit interesting. If we inline here, we may get useful
534 -- evaluation information to avoid repeated evals: e.g.
535 -- x + (y * z)
536 -- Here the contIsInteresting makes the '*' keener to inline,
537 -- which in turn exposes a constructor which makes the '+' inline.
538 -- Assuming that +,* aren't small enough to inline regardless.
539 --
540 -- It's also very important to inline in a strict context for things
541 -- like
542 -- foldr k z (f x)
543 -- Here, the context of (f x) is strict, and if f's unfolding is
544 -- a build it's *great* to inline it here. So we must ensure that
545 -- the context for (f x) is not totally uninteresting.
546
547 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
548 -- If the argument has form (f x y), where x,y are boring,
549 -- and f is marked INLINE, then we don't want to inline f.
550 -- But if the context of the argument is
551 -- g (f x y)
552 -- where g has rules, then we *do* want to inline f, in case it
553 -- exposes a rule that might fire. Similarly, if the context is
554 -- h (g (f x x))
555 -- where h has rules, then we do want to inline f; hence the
556 -- call_cont argument to interestingArgContext
557 --
558 -- The ai-rules flag makes this happen; if it's
559 -- set, the inliner gets just enough keener to inline f
560 -- regardless of how boring f's arguments are, if it's marked INLINE
561 --
562 -- The alternative would be to *always* inline an INLINE function,
563 -- regardless of how boring its context is; but that seems overkill
564 -- For example, it'd mean that wrapper functions were always inlined
565 --
566 -- The call_cont passed to interestingArgContext is the context of
567 -- the call itself, e.g. g <hole> in the example above
568 interestingArgContext rules call_cont
569 = notNull rules || enclosing_fn_has_rules
570 where
571 enclosing_fn_has_rules = go call_cont
572
573 go (Select {}) = False
574 go (ApplyToVal {}) = False -- Shouldn't really happen
575 go (ApplyToTy {}) = False -- Ditto
576 go (StrictArg _ cci _) = interesting cci
577 go (StrictBind {}) = False -- ??
578 go (CastIt _ c) = go c
579 go (Stop _ cci) = interesting cci
580 go (TickIt _ c) = go c
581
582 interesting RuleArgCtxt = True
583 interesting _ = False
584
585
586 {- Note [Interesting arguments]
587 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
588 An argument is interesting if it deserves a discount for unfoldings
589 with a discount in that argument position. The idea is to avoid
590 unfolding a function that is applied only to variables that have no
591 unfolding (i.e. they are probably lambda bound): f x y z There is
592 little point in inlining f here.
593
594 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
595 we must look through lets, eg (let x = e in C a b), because the let will
596 float, exposing the value, if we inline. That makes it different to
597 exprIsHNF.
598
599 Before 2009 we said it was interesting if the argument had *any* structure
600 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
601
602 But we don't regard (f x y) as interesting, unless f is unsaturated.
603 If it's saturated and f hasn't inlined, then it's probably not going
604 to now!
605
606 Note [Conlike is interesting]
607 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
608 Consider
609 f d = ...((*) d x y)...
610 ... f (df d')...
611 where df is con-like. Then we'd really like to inline 'f' so that the
612 rule for (*) (df d) can fire. To do this
613 a) we give a discount for being an argument of a class-op (eg (*) d)
614 b) we say that a con-like argument (eg (df d)) is interesting
615 -}
616
617 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
618 -- See Note [Interesting arguments]
619 interestingArg env e = go env 0 e
620 where
621 -- n is # value args to which the expression is applied
622 go env n (Var v)
623 | SimplEnv { seIdSubst = ids, seInScope = in_scope } <- env
624 = case lookupVarEnv ids v of
625 Nothing -> go_var n (refineFromInScope in_scope v)
626 Just (DoneId v') -> go_var n (refineFromInScope in_scope v')
627 Just (DoneEx e) -> go (zapSubstEnv env) n e
628 Just (ContEx tvs cvs ids e) -> go (setSubstEnv env tvs cvs ids) n e
629
630 go _ _ (Lit {}) = ValueArg
631 go _ _ (Type _) = TrivArg
632 go _ _ (Coercion _) = TrivArg
633 go env n (App fn (Type _)) = go env n fn
634 go env n (App fn _) = go env (n+1) fn
635 go env n (Tick _ a) = go env n a
636 go env n (Cast e _) = go env n e
637 go env n (Lam v e)
638 | isTyVar v = go env n e
639 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
640 | otherwise = ValueArg
641 go _ _ (Case {}) = NonTrivArg
642 go env n (Let b e) = case go env' n e of
643 ValueArg -> ValueArg
644 _ -> NonTrivArg
645 where
646 env' = env `addNewInScopeIds` bindersOf b
647
648 go_var n v
649 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
650 -- data constructors here
651 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
652 | n > 0 = NonTrivArg -- Saturated or unknown call
653 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
654 -- See Note [Conlike is interesting]
655 | otherwise = TrivArg -- n==0, no useful unfolding
656 where
657 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
658
659 {-
660 ************************************************************************
661 * *
662 SimplifierMode
663 * *
664 ************************************************************************
665
666 The SimplifierMode controls several switches; see its definition in
667 CoreMonad
668 sm_rules :: Bool -- Whether RULES are enabled
669 sm_inline :: Bool -- Whether inlining is enabled
670 sm_case_case :: Bool -- Whether case-of-case is enabled
671 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
672 -}
673
674 simplEnvForGHCi :: DynFlags -> SimplEnv
675 simplEnvForGHCi dflags
676 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
677 , sm_phase = InitialPhase
678 , sm_rules = rules_on
679 , sm_inline = False
680 , sm_eta_expand = eta_expand_on
681 , sm_case_case = True }
682 where
683 rules_on = gopt Opt_EnableRewriteRules dflags
684 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
685 -- Do not do any inlining, in case we expose some unboxed
686 -- tuple stuff that confuses the bytecode interpreter
687
688 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
689 -- See Note [Simplifying inside stable unfoldings]
690 updModeForStableUnfoldings inline_rule_act current_mode
691 = current_mode { sm_phase = phaseFromActivation inline_rule_act
692 , sm_inline = True
693 , sm_eta_expand = False }
694 -- For sm_rules, just inherit; sm_rules might be "off"
695 -- because of -fno-enable-rewrite-rules
696 where
697 phaseFromActivation (ActiveAfter _ n) = Phase n
698 phaseFromActivation _ = InitialPhase
699
700 updModeForRules :: SimplifierMode -> SimplifierMode
701 -- See Note [Simplifying rules]
702 updModeForRules current_mode
703 = current_mode { sm_phase = InitialPhase
704 , sm_inline = False
705 , sm_rules = False
706 , sm_eta_expand = False }
707
708 {- Note [Simplifying rules]
709 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
710 When simplifying a rule, refrain from any inlining or applying of other RULES.
711
712 Doing anything to the LHS is plain confusing, because it means that what the
713 rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
714 Moreover, inlining (or applying rules) on rule LHSs risks introducing
715 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
716
717 Doing this to either side confounds tools like HERMIT, which seek to reason
718 about and apply the RULES as originally written. See Trac #10829.
719
720 Note [Inlining in gentle mode]
721 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
722 Something is inlined if
723 (i) the sm_inline flag is on, AND
724 (ii) the thing has an INLINE pragma, AND
725 (iii) the thing is inlinable in the earliest phase.
726
727 Example of why (iii) is important:
728 {-# INLINE [~1] g #-}
729 g = ...
730
731 {-# INLINE f #-}
732 f x = g (g x)
733
734 If we were to inline g into f's inlining, then an importing module would
735 never be able to do
736 f e --> g (g e) ---> RULE fires
737 because the stable unfolding for f has had g inlined into it.
738
739 On the other hand, it is bad not to do ANY inlining into an
740 stable unfolding, because then recursive knots in instance declarations
741 don't get unravelled.
742
743 However, *sometimes* SimplGently must do no call-site inlining at all
744 (hence sm_inline = False). Before full laziness we must be careful
745 not to inline wrappers, because doing so inhibits floating
746 e.g. ...(case f x of ...)...
747 ==> ...(case (case x of I# x# -> fw x#) of ...)...
748 ==> ...(case x of I# x# -> case fw x# of ...)...
749 and now the redex (f x) isn't floatable any more.
750
751 The no-inlining thing is also important for Template Haskell. You might be
752 compiling in one-shot mode with -O2; but when TH compiles a splice before
753 running it, we don't want to use -O2. Indeed, we don't want to inline
754 anything, because the byte-code interpreter might get confused about
755 unboxed tuples and suchlike.
756
757 Note [Simplifying inside stable unfoldings]
758 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
759 We must take care with simplification inside stable unfoldings (which come from
760 INLINE pragmas).
761
762 First, consider the following example
763 let f = \pq -> BIG
764 in
765 let g = \y -> f y y
766 {-# INLINE g #-}
767 in ...g...g...g...g...g...
768 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
769 and thence copied multiple times when g is inlined. HENCE we treat
770 any occurrence in a stable unfolding as a multiple occurrence, not a single
771 one; see OccurAnal.addRuleUsage.
772
773 Second, we do want *do* to some modest rules/inlining stuff in stable
774 unfoldings, partly to eliminate senseless crap, and partly to break
775 the recursive knots generated by instance declarations.
776
777 However, suppose we have
778 {-# INLINE <act> f #-}
779 f = <rhs>
780 meaning "inline f in phases p where activation <act>(p) holds".
781 Then what inlinings/rules can we apply to the copy of <rhs> captured in
782 f's stable unfolding? Our model is that literally <rhs> is substituted for
783 f when it is inlined. So our conservative plan (implemented by
784 updModeForStableUnfoldings) is this:
785
786 -------------------------------------------------------------
787 When simplifying the RHS of an stable unfolding, set the phase
788 to the phase in which the stable unfolding first becomes active
789 -------------------------------------------------------------
790
791 That ensures that
792
793 a) Rules/inlinings that *cease* being active before p will
794 not apply to the stable unfolding, consistent with it being
795 inlined in its *original* form in phase p.
796
797 b) Rules/inlinings that only become active *after* p will
798 not apply to the stable unfolding, again to be consistent with
799 inlining the *original* rhs in phase p.
800
801 For example,
802 {-# INLINE f #-}
803 f x = ...g...
804
805 {-# NOINLINE [1] g #-}
806 g y = ...
807
808 {-# RULE h g = ... #-}
809 Here we must not inline g into f's RHS, even when we get to phase 0,
810 because when f is later inlined into some other module we want the
811 rule for h to fire.
812
813 Similarly, consider
814 {-# INLINE f #-}
815 f x = ...g...
816
817 g y = ...
818 and suppose that there are auto-generated specialisations and a strictness
819 wrapper for g. The specialisations get activation AlwaysActive, and the
820 strictness wrapper get activation (ActiveAfter 0). So the strictness
821 wrepper fails the test and won't be inlined into f's stable unfolding. That
822 means f can inline, expose the specialised call to g, so the specialisation
823 rules can fire.
824
825 A note about wrappers
826 ~~~~~~~~~~~~~~~~~~~~~
827 It's also important not to inline a worker back into a wrapper.
828 A wrapper looks like
829 wraper = inline_me (\x -> ...worker... )
830 Normally, the inline_me prevents the worker getting inlined into
831 the wrapper (initially, the worker's only call site!). But,
832 if the wrapper is sure to be called, the strictness analyser will
833 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
834 continuation.
835 -}
836
837 activeUnfolding :: SimplEnv -> Id -> Bool
838 activeUnfolding env
839 | not (sm_inline mode) = active_unfolding_minimal
840 | otherwise = case sm_phase mode of
841 InitialPhase -> active_unfolding_gentle
842 Phase n -> active_unfolding n
843 where
844 mode = getMode env
845
846 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
847 -- When matching in RULE, we want to "look through" an unfolding
848 -- (to see a constructor) if *rules* are on, even if *inlinings*
849 -- are not. A notable example is DFuns, which really we want to
850 -- match in rules like (op dfun) in gentle mode. Another example
851 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
852 -- see very early on
853 getUnfoldingInRuleMatch env
854 = (in_scope, id_unf)
855 where
856 in_scope = seInScope env
857 mode = getMode env
858 id_unf id | unf_is_active id = idUnfolding id
859 | otherwise = NoUnfolding
860 unf_is_active id
861 | not (sm_rules mode) = active_unfolding_minimal id
862 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
863
864 active_unfolding_minimal :: Id -> Bool
865 -- Compuslory unfoldings only
866 -- Ignore SimplGently, because we want to inline regardless;
867 -- the Id has no top-level binding at all
868 --
869 -- NB: we used to have a second exception, for data con wrappers.
870 -- On the grounds that we use gentle mode for rule LHSs, and
871 -- they match better when data con wrappers are inlined.
872 -- But that only really applies to the trivial wrappers (like (:)),
873 -- and they are now constructed as Compulsory unfoldings (in MkId)
874 -- so they'll happen anyway.
875 active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
876
877 active_unfolding :: PhaseNum -> Id -> Bool
878 active_unfolding n id = isActiveIn n (idInlineActivation id)
879
880 active_unfolding_gentle :: Id -> Bool
881 -- Anything that is early-active
882 -- See Note [Gentle mode]
883 active_unfolding_gentle id
884 = isInlinePragma prag
885 && isEarlyActive (inlinePragmaActivation prag)
886 -- NB: wrappers are not early-active
887 where
888 prag = idInlinePragma id
889
890 ----------------------
891 activeRule :: SimplEnv -> Activation -> Bool
892 -- Nothing => No rules at all
893 activeRule env
894 | not (sm_rules mode) = \_ -> False -- Rewriting is off
895 | otherwise = isActive (sm_phase mode)
896 where
897 mode = getMode env
898
899 {-
900 ************************************************************************
901 * *
902 preInlineUnconditionally
903 * *
904 ************************************************************************
905
906 preInlineUnconditionally
907 ~~~~~~~~~~~~~~~~~~~~~~~~
908 @preInlineUnconditionally@ examines a bndr to see if it is used just
909 once in a completely safe way, so that it is safe to discard the
910 binding inline its RHS at the (unique) usage site, REGARDLESS of how
911 big the RHS might be. If this is the case we don't simplify the RHS
912 first, but just inline it un-simplified.
913
914 This is much better than first simplifying a perhaps-huge RHS and then
915 inlining and re-simplifying it. Indeed, it can be at least quadratically
916 better. Consider
917
918 x1 = e1
919 x2 = e2[x1]
920 x3 = e3[x2]
921 ...etc...
922 xN = eN[xN-1]
923
924 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
925 This can happen with cascades of functions too:
926
927 f1 = \x1.e1
928 f2 = \xs.e2[f1]
929 f3 = \xs.e3[f3]
930 ...etc...
931
932 THE MAIN INVARIANT is this:
933
934 ---- preInlineUnconditionally invariant -----
935 IF preInlineUnconditionally chooses to inline x = <rhs>
936 THEN doing the inlining should not change the occurrence
937 info for the free vars of <rhs>
938 ----------------------------------------------
939
940 For example, it's tempting to look at trivial binding like
941 x = y
942 and inline it unconditionally. But suppose x is used many times,
943 but this is the unique occurrence of y. Then inlining x would change
944 y's occurrence info, which breaks the invariant. It matters: y
945 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
946
947
948 Even RHSs labelled InlineMe aren't caught here, because there might be
949 no benefit from inlining at the call site.
950
951 [Sept 01] Don't unconditionally inline a top-level thing, because that
952 can simply make a static thing into something built dynamically. E.g.
953 x = (a,b)
954 main = \s -> h x
955
956 [Remember that we treat \s as a one-shot lambda.] No point in
957 inlining x unless there is something interesting about the call site.
958
959 But watch out: if you aren't careful, some useful foldr/build fusion
960 can be lost (most notably in spectral/hartel/parstof) because the
961 foldr didn't see the build. Doing the dynamic allocation isn't a big
962 deal, in fact, but losing the fusion can be. But the right thing here
963 seems to be to do a callSiteInline based on the fact that there is
964 something interesting about the call site (it's strict). Hmm. That
965 seems a bit fragile.
966
967 Conclusion: inline top level things gaily until Phase 0 (the last
968 phase), at which point don't.
969
970 Note [pre/postInlineUnconditionally in gentle mode]
971 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
972 Even in gentle mode we want to do preInlineUnconditionally. The
973 reason is that too little clean-up happens if you don't inline
974 use-once things. Also a bit of inlining is *good* for full laziness;
975 it can expose constant sub-expressions. Example in
976 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
977 let-float if you inline windowToViewport
978
979 However, as usual for Gentle mode, do not inline things that are
980 inactive in the intial stages. See Note [Gentle mode].
981
982 Note [Stable unfoldings and preInlineUnconditionally]
983 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
984 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
985 Example
986
987 {-# INLINE f #-}
988 f :: Eq a => a -> a
989 f x = ...
990
991 fInt :: Int -> Int
992 fInt = f Int dEqInt
993
994 ...fInt...fInt...fInt...
995
996 Here f occurs just once, in the RHS of fInt. But if we inline it there
997 we'll lose the opportunity to inline at each of fInt's call sites.
998 The INLINE pragma will only inline when the application is saturated
999 for exactly this reason; and we don't want PreInlineUnconditionally
1000 to second-guess it. A live example is Trac #3736.
1001 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1002
1003 Note [Top-level bottoming Ids]
1004 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1005 Don't inline top-level Ids that are bottoming, even if they are used just
1006 once, because FloatOut has gone to some trouble to extract them out.
1007 Inlining them won't make the program run faster!
1008
1009 Note [Do not inline CoVars unconditionally]
1010 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1011 Coercion variables appear inside coercions, and the RHS of a let-binding
1012 is a term (not a coercion) so we can't necessarily inline the latter in
1013 the former.
1014 -}
1015
1016 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1017 -- Precondition: rhs satisfies the let/app invariant
1018 -- See Note [CoreSyn let/app invariant] in CoreSyn
1019 -- Reason: we don't want to inline single uses, or discard dead bindings,
1020 -- for unlifted, side-effect-ful bindings
1021 preInlineUnconditionally dflags env top_lvl bndr rhs
1022 | not active = False
1023 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1024 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1025 | not (gopt Opt_SimplPreInlining dflags) = False
1026 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1027 | otherwise = case idOccInfo bndr of
1028 IAmDead -> True -- Happens in ((\x.1) v)
1029 OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
1030 _ -> False
1031 where
1032 mode = getMode env
1033 active = isActive (sm_phase mode) act
1034 -- See Note [pre/postInlineUnconditionally in gentle mode]
1035 act = idInlineActivation bndr
1036 try_once in_lam int_cxt -- There's one textual occurrence
1037 | not in_lam = isNotTopLevel top_lvl || early_phase
1038 | otherwise = int_cxt && canInlineInLam rhs
1039
1040 -- Be very careful before inlining inside a lambda, because (a) we must not
1041 -- invalidate occurrence information, and (b) we want to avoid pushing a
1042 -- single allocation (here) into multiple allocations (inside lambda).
1043 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1044 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1045 -- where
1046 -- is_cheap = exprIsCheap rhs
1047 -- ok = is_cheap && int_cxt
1048
1049 -- int_cxt The context isn't totally boring
1050 -- E.g. let f = \ab.BIG in \y. map f xs
1051 -- Don't want to substitute for f, because then we allocate
1052 -- its closure every time the \y is called
1053 -- But: let f = \ab.BIG in \y. map (f y) xs
1054 -- Now we do want to substitute for f, even though it's not
1055 -- saturated, because we're going to allocate a closure for
1056 -- (f y) every time round the loop anyhow.
1057
1058 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1059 -- so substituting rhs inside a lambda doesn't change the occ info.
1060 -- Sadly, not quite the same as exprIsHNF.
1061 canInlineInLam (Lit _) = True
1062 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1063 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1064 canInlineInLam _ = False
1065 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1066 -- ticks around a Lam will disappear anyway.
1067
1068 early_phase = case sm_phase mode of
1069 Phase 0 -> False
1070 _ -> True
1071 -- If we don't have this early_phase test, consider
1072 -- x = length [1,2,3]
1073 -- The full laziness pass carefully floats all the cons cells to
1074 -- top level, and preInlineUnconditionally floats them all back in.
1075 -- Result is (a) static allocation replaced by dynamic allocation
1076 -- (b) many simplifier iterations because this tickles
1077 -- a related problem; only one inlining per pass
1078 --
1079 -- On the other hand, I have seen cases where top-level fusion is
1080 -- lost if we don't inline top level thing (e.g. string constants)
1081 -- Hence the test for phase zero (which is the phase for all the final
1082 -- simplifications). Until phase zero we take no special notice of
1083 -- top level things, but then we become more leery about inlining
1084 -- them.
1085
1086 {-
1087 ************************************************************************
1088 * *
1089 postInlineUnconditionally
1090 * *
1091 ************************************************************************
1092
1093 postInlineUnconditionally
1094 ~~~~~~~~~~~~~~~~~~~~~~~~~
1095 @postInlineUnconditionally@ decides whether to unconditionally inline
1096 a thing based on the form of its RHS; in particular if it has a
1097 trivial RHS. If so, we can inline and discard the binding altogether.
1098
1099 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1100 only have *forward* references. Hence, it's safe to discard the binding
1101
1102 NOTE: This isn't our last opportunity to inline. We're at the binding
1103 site right now, and we'll get another opportunity when we get to the
1104 ocurrence(s)
1105
1106 Note that we do this unconditional inlining only for trival RHSs.
1107 Don't inline even WHNFs inside lambdas; doing so may simply increase
1108 allocation when the function is called. This isn't the last chance; see
1109 NOTE above.
1110
1111 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1112 Because we don't even want to inline them into the RHS of constructor
1113 arguments. See NOTE above
1114
1115 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1116 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1117 with both a and b marked NOINLINE. But that seems incompatible with
1118 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1119 story for now.
1120 -}
1121
1122 postInlineUnconditionally
1123 :: DynFlags -> SimplEnv -> TopLevelFlag
1124 -> OutId -- The binder (an InId would be fine too)
1125 -- (*not* a CoVar)
1126 -> OccInfo -- From the InId
1127 -> OutExpr
1128 -> Unfolding
1129 -> Bool
1130 -- Precondition: rhs satisfies the let/app invariant
1131 -- See Note [CoreSyn let/app invariant] in CoreSyn
1132 -- Reason: we don't want to inline single uses, or discard dead bindings,
1133 -- for unlifted, side-effect-ful bindings
1134 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1135 | not active = False
1136 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1137 -- because it might be referred to "earlier"
1138 | isExportedId bndr = False
1139 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1140 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1141 | exprIsTrivial rhs = True
1142 | otherwise
1143 = case occ_info of
1144 -- The point of examining occ_info here is that for *non-values*
1145 -- that occur outside a lambda, the call-site inliner won't have
1146 -- a chance (because it doesn't know that the thing
1147 -- only occurs once). The pre-inliner won't have gotten
1148 -- it either, if the thing occurs in more than one branch
1149 -- So the main target is things like
1150 -- let x = f y in
1151 -- case v of
1152 -- True -> case x of ...
1153 -- False -> case x of ...
1154 -- This is very important in practice; e.g. wheel-seive1 doubles
1155 -- in allocation if you miss this out
1156 OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue
1157 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1158 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1159 --
1160 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1161 -- Reason: doing so risks exponential behaviour. We simplify a big
1162 -- expression, inline it, and simplify it again. But if the
1163 -- very same thing happens in the big expression, we get
1164 -- exponential cost!
1165 -- PRINCIPLE: when we've already simplified an expression once,
1166 -- make sure that we only inline it if it's reasonably small.
1167
1168 && (not in_lam ||
1169 -- Outside a lambda, we want to be reasonably aggressive
1170 -- about inlining into multiple branches of case
1171 -- e.g. let x = <non-value>
1172 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1173 -- Inlining can be a big win if C3 is the hot-spot, even if
1174 -- the uses in C1, C2 are not 'interesting'
1175 -- An example that gets worse if you add int_cxt here is 'clausify'
1176
1177 (isCheapUnfolding unfolding && int_cxt))
1178 -- isCheap => acceptable work duplication; in_lam may be true
1179 -- int_cxt to prevent us inlining inside a lambda without some
1180 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1181
1182 IAmDead -> True -- This happens; for example, the case_bndr during case of
1183 -- known constructor: case (a,b) of x { (p,q) -> ... }
1184 -- Here x isn't mentioned in the RHS, so we don't want to
1185 -- create the (dead) let-binding let x = (a,b) in ...
1186
1187 _ -> False
1188
1189 -- Here's an example that we don't handle well:
1190 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1191 -- in \y. ....case f of {...} ....
1192 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1193 -- But
1194 -- - We can't preInlineUnconditionally because that woud invalidate
1195 -- the occ info for b.
1196 -- - We can't postInlineUnconditionally because the RHS is big, and
1197 -- that risks exponential behaviour
1198 -- - We can't call-site inline, because the rhs is big
1199 -- Alas!
1200
1201 where
1202 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1203 -- See Note [pre/postInlineUnconditionally in gentle mode]
1204
1205 {-
1206 Note [Top level and postInlineUnconditionally]
1207 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1208 We don't do postInlineUnconditionally for top-level things (even for
1209 ones that are trivial):
1210
1211 * Doing so will inline top-level error expressions that have been
1212 carefully floated out by FloatOut. More generally, it might
1213 replace static allocation with dynamic.
1214
1215 * Even for trivial expressions there's a problem. Consider
1216 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1217 blah xs = reverse xs
1218 ruggle = sort
1219 In one simplifier pass we might fire the rule, getting
1220 blah xs = ruggle xs
1221 but in *that* simplifier pass we must not do postInlineUnconditionally
1222 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1223
1224 If the rhs is trivial it'll be inlined by callSiteInline, and then
1225 the binding will be dead and discarded by the next use of OccurAnal
1226
1227 * There is less point, because the main goal is to get rid of local
1228 bindings used in multiple case branches.
1229
1230 * The inliner should inline trivial things at call sites anyway.
1231
1232 Note [Stable unfoldings and postInlineUnconditionally]
1233 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1234 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1235 otherwise we lose the unfolding. Example
1236
1237 -- f has stable unfolding with rhs (e |> co)
1238 -- where 'e' is big
1239 f = e |> co
1240
1241 Then there's a danger we'll optimise to
1242
1243 f' = e
1244 f = f' |> co
1245
1246 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1247 won't inline because 'e' is too big.
1248
1249 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1250
1251
1252 ************************************************************************
1253 * *
1254 Rebuilding a lambda
1255 * *
1256 ************************************************************************
1257 -}
1258
1259 mkLam :: [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1260 -- mkLam tries three things
1261 -- a) eta reduction, if that gives a trivial expression
1262 -- b) eta expansion [only if there are some value lambdas]
1263
1264 mkLam [] body _cont
1265 = return body
1266 mkLam bndrs body cont
1267 = do { dflags <- getDynFlags
1268 ; mkLam' dflags bndrs body }
1269 where
1270 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1271 mkLam' dflags bndrs (Cast body co)
1272 | not (any bad bndrs)
1273 -- Note [Casts and lambdas]
1274 = do { lam <- mkLam' dflags bndrs body
1275 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1276 where
1277 co_vars = tyCoVarsOfCo co
1278 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1279
1280 mkLam' dflags bndrs body@(Lam {})
1281 = mkLam' dflags (bndrs ++ bndrs1) body1
1282 where
1283 (bndrs1, body1) = collectBinders body
1284
1285 mkLam' dflags bndrs (Tick t expr)
1286 | tickishFloatable t
1287 = mkTick t <$> mkLam' dflags bndrs expr
1288
1289 mkLam' dflags bndrs body
1290 | gopt Opt_DoEtaReduction dflags
1291 , Just etad_lam <- tryEtaReduce bndrs body
1292 = do { tick (EtaReduction (head bndrs))
1293 ; return etad_lam }
1294
1295 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1296 , gopt Opt_DoLambdaEtaExpansion dflags
1297 , any isRuntimeVar bndrs
1298 , let body_arity = exprEtaExpandArity dflags body
1299 , body_arity > 0
1300 = do { tick (EtaExpansion (head bndrs))
1301 ; let res = mkLams bndrs (etaExpand body_arity body)
1302 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1303 , text "after" <+> ppr res])
1304 ; return res }
1305
1306 | otherwise
1307 = return (mkLams bndrs body)
1308
1309 {-
1310 Note [Eta expanding lambdas]
1311 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1312 In general we *do* want to eta-expand lambdas. Consider
1313 f (\x -> case x of (a,b) -> \s -> blah)
1314 where 's' is a state token, and hence can be eta expanded. This
1315 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1316 important function!
1317
1318 The eta-expansion will never happen unless we do it now. (Well, it's
1319 possible that CorePrep will do it, but CorePrep only has a half-baked
1320 eta-expander that can't deal with casts. So it's much better to do it
1321 here.)
1322
1323 However, when the lambda is let-bound, as the RHS of a let, we have a
1324 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1325 bother to try expansion in mkLam in that case; hence the contIsRhs
1326 guard.
1327
1328 Note [Casts and lambdas]
1329 ~~~~~~~~~~~~~~~~~~~~~~~~
1330 Consider
1331 (\x. (\y. e) `cast` g1) `cast` g2
1332 There is a danger here that the two lambdas look separated, and the
1333 full laziness pass might float an expression to between the two.
1334
1335 So this equation in mkLam' floats the g1 out, thus:
1336 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1337 where x:tx.
1338
1339 In general, this floats casts outside lambdas, where (I hope) they
1340 might meet and cancel with some other cast:
1341 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1342 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1343 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1344 (if not (g `in` co))
1345
1346 Notice that it works regardless of 'e'. Originally it worked only
1347 if 'e' was itself a lambda, but in some cases that resulted in
1348 fruitless iteration in the simplifier. A good example was when
1349 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1350 like (\x. Get `cast` g)
1351 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1352 the Get, and the next iteration eta-reduced it, and then eta-expanded
1353 it again.
1354
1355 Note also the side condition for the case of coercion binders.
1356 It does not make sense to transform
1357 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1358 because the latter is not well-kinded.
1359
1360 ************************************************************************
1361 * *
1362 Eta expansion
1363 * *
1364 ************************************************************************
1365 -}
1366
1367 tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
1368 -- See Note [Eta-expanding at let bindings]
1369 tryEtaExpandRhs env bndr rhs
1370 = do { dflags <- getDynFlags
1371 ; (new_arity, new_rhs) <- try_expand dflags
1372
1373 ; WARN( new_arity < old_id_arity,
1374 (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
1375 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1376 -- Note [Arity decrease] in Simplify
1377 return (new_arity, new_rhs) }
1378 where
1379 try_expand dflags
1380 | exprIsTrivial rhs
1381 = return (exprArity rhs, rhs)
1382
1383 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1384 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1385 new_arity2 = idCallArity bndr
1386 new_arity = max new_arity1 new_arity2
1387 , new_arity > old_arity -- And the current manifest arity isn't enough
1388 = do { tick (EtaExpansion bndr)
1389 ; return (new_arity, etaExpand new_arity rhs) }
1390 | otherwise
1391 = return (old_arity, rhs)
1392
1393 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1394 old_id_arity = idArity bndr
1395
1396 {-
1397 Note [Eta-expanding at let bindings]
1398 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1399 We now eta expand at let-bindings, which is where the payoff comes.
1400 The most significant thing is that we can do a simple arity analysis
1401 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1402
1403 One useful consequence of not eta-expanding lambdas is this example:
1404 genMap :: C a => ...
1405 {-# INLINE genMap #-}
1406 genMap f xs = ...
1407
1408 myMap :: D a => ...
1409 {-# INLINE myMap #-}
1410 myMap = genMap
1411
1412 Notice that 'genMap' should only inline if applied to two arguments.
1413 In the stable unfolding for myMap we'll have the unfolding
1414 (\d -> genMap Int (..d..))
1415 We do not want to eta-expand to
1416 (\d f xs -> genMap Int (..d..) f xs)
1417 because then 'genMap' will inline, and it really shouldn't: at least
1418 as far as the programmer is concerned, it's not applied to two
1419 arguments!
1420
1421 Note [Do not eta-expand PAPs]
1422 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1423 We used to have old_arity = manifestArity rhs, which meant that we
1424 would eta-expand even PAPs. But this gives no particular advantage,
1425 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1426 Suppose we have a PAP
1427 foo :: IO ()
1428 foo = returnIO ()
1429 Then we can eta-expand do
1430 foo = (\eta. (returnIO () |> sym g) eta) |> g
1431 where
1432 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1433
1434 But there is really no point in doing this, and it generates masses of
1435 coercions and whatnot that eventually disappear again. For T9020, GHC
1436 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1437 1.8G to 45M.
1438
1439 But note that this won't eta-expand, say
1440 f = \g -> map g
1441 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1442 strictness analysis will have less to bite on?
1443
1444
1445 ************************************************************************
1446 * *
1447 \subsection{Floating lets out of big lambdas}
1448 * *
1449 ************************************************************************
1450
1451 Note [Floating and type abstraction]
1452 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1453 Consider this:
1454 x = /\a. C e1 e2
1455 We'd like to float this to
1456 y1 = /\a. e1
1457 y2 = /\a. e2
1458 x = /\a. C (y1 a) (y2 a)
1459 for the usual reasons: we want to inline x rather vigorously.
1460
1461 You may think that this kind of thing is rare. But in some programs it is
1462 common. For example, if you do closure conversion you might get:
1463
1464 data a :-> b = forall e. (e -> a -> b) :$ e
1465
1466 f_cc :: forall a. a :-> a
1467 f_cc = /\a. (\e. id a) :$ ()
1468
1469 Now we really want to inline that f_cc thing so that the
1470 construction of the closure goes away.
1471
1472 So I have elaborated simplLazyBind to understand right-hand sides that look
1473 like
1474 /\ a1..an. body
1475
1476 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1477 but there is quite a bit of plumbing in simplLazyBind as well.
1478
1479 The same transformation is good when there are lets in the body:
1480
1481 /\abc -> let(rec) x = e in b
1482 ==>
1483 let(rec) x' = /\abc -> let x = x' a b c in e
1484 in
1485 /\abc -> let x = x' a b c in b
1486
1487 This is good because it can turn things like:
1488
1489 let f = /\a -> letrec g = ... g ... in g
1490 into
1491 letrec g' = /\a -> ... g' a ...
1492 in
1493 let f = /\ a -> g' a
1494
1495 which is better. In effect, it means that big lambdas don't impede
1496 let-floating.
1497
1498 This optimisation is CRUCIAL in eliminating the junk introduced by
1499 desugaring mutually recursive definitions. Don't eliminate it lightly!
1500
1501 [May 1999] If we do this transformation *regardless* then we can
1502 end up with some pretty silly stuff. For example,
1503
1504 let
1505 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1506 in ..
1507 becomes
1508 let y1 = /\s -> r1
1509 y2 = /\s -> r2
1510 st = /\s -> ...[y1 s/x1, y2 s/x2]
1511 in ..
1512
1513 Unless the "..." is a WHNF there is really no point in doing this.
1514 Indeed it can make things worse. Suppose x1 is used strictly,
1515 and is of the form
1516
1517 x1* = case f y of { (a,b) -> e }
1518
1519 If we abstract this wrt the tyvar we then can't do the case inline
1520 as we would normally do.
1521
1522 That's why the whole transformation is part of the same process that
1523 floats let-bindings and constructor arguments out of RHSs. In particular,
1524 it is guarded by the doFloatFromRhs call in simplLazyBind.
1525
1526 Note [Which type variables to abstract over]
1527 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1528 Abstract only over the type variables free in the rhs wrt which the
1529 new binding is abstracted. Note that
1530
1531 * The naive approach of abstracting wrt the
1532 tyvars free in the Id's /type/ fails. Consider:
1533 /\ a b -> let t :: (a,b) = (e1, e2)
1534 x :: a = fst t
1535 in ...
1536 Here, b isn't free in x's type, but we must nevertheless
1537 abstract wrt b as well, because t's type mentions b.
1538 Since t is floated too, we'd end up with the bogus:
1539 poly_t = /\ a b -> (e1, e2)
1540 poly_x = /\ a -> fst (poly_t a *b*)
1541
1542 * We must do closeOverKinds. Example (Trac #10934):
1543 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1544 Here we want to float 't', but we must remember to abstract over
1545 'k' as well, even though it is not explicitly mentioned in the RHS,
1546 otherwise we get
1547 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1548 which is obviously bogus.
1549 -}
1550
1551 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1552 abstractFloats main_tvs body_env body
1553 = ASSERT( notNull body_floats )
1554 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1555 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1556 where
1557 main_tv_set = mkVarSet main_tvs
1558 body_floats = getFloatBinds body_env
1559 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1560
1561 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1562 abstract subst (NonRec id rhs)
1563 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1564 ; let poly_rhs = mkLams tvs_here rhs'
1565 subst' = CoreSubst.extendIdSubst subst id poly_app
1566 ; return (subst', (NonRec poly_id poly_rhs)) }
1567 where
1568 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1569
1570 -- tvs_here: see Note [Which type variables to abstract over]
1571 tvs_here = toposortTyVars $
1572 filter (`elemVarSet` main_tv_set) $
1573 closeOverKindsList $
1574 exprSomeFreeVarsList isTyVar rhs'
1575
1576 abstract subst (Rec prs)
1577 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1578 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1579 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1580 | rhs <- rhss]
1581 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1582 where
1583 (ids,rhss) = unzip prs
1584 -- For a recursive group, it's a bit of a pain to work out the minimal
1585 -- set of tyvars over which to abstract:
1586 -- /\ a b c. let x = ...a... in
1587 -- letrec { p = ...x...q...
1588 -- q = .....p...b... } in
1589 -- ...
1590 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1591 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1592 -- Since it's a pain, we just use the whole set, which is always safe
1593 --
1594 -- If you ever want to be more selective, remember this bizarre case too:
1595 -- x::a = x
1596 -- Here, we must abstract 'x' over 'a'.
1597 tvs_here = toposortTyVars main_tvs
1598
1599 mk_poly tvs_here var
1600 = do { uniq <- getUniqueM
1601 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1602 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1603 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1604 mkLocalIdOrCoVar poly_name poly_ty
1605 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1606 -- In the olden days, it was crucial to copy the occInfo of the original var,
1607 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1608 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1609 -- at already simplified code, so it doesn't matter
1610 --
1611 -- It's even right to retain single-occurrence or dead-var info:
1612 -- Suppose we started with /\a -> let x = E in B
1613 -- where x occurs once in B. Then we transform to:
1614 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1615 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1616 -- the occurrences of x' will be just the occurrences originally
1617 -- pinned on x.
1618
1619 {-
1620 Note [Abstract over coercions]
1621 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1622 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1623 type variable a. Rather than sort this mess out, we simply bale out and abstract
1624 wrt all the type variables if any of them are coercion variables.
1625
1626
1627 Historical note: if you use let-bindings instead of a substitution, beware of this:
1628
1629 -- Suppose we start with:
1630 --
1631 -- x = /\ a -> let g = G in E
1632 --
1633 -- Then we'll float to get
1634 --
1635 -- x = let poly_g = /\ a -> G
1636 -- in /\ a -> let g = poly_g a in E
1637 --
1638 -- But now the occurrence analyser will see just one occurrence
1639 -- of poly_g, not inside a lambda, so the simplifier will
1640 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1641 -- (I used to think that the "don't inline lone occurrences" stuff
1642 -- would stop this happening, but since it's the *only* occurrence,
1643 -- PreInlineUnconditionally kicks in first!)
1644 --
1645 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1646 -- to appear many times. (NB: mkInlineMe eliminates
1647 -- such notes on trivial RHSs, so do it manually.)
1648
1649 ************************************************************************
1650 * *
1651 prepareAlts
1652 * *
1653 ************************************************************************
1654
1655 prepareAlts tries these things:
1656
1657 1. Eliminate alternatives that cannot match, including the
1658 DEFAULT alternative.
1659
1660 2. If the DEFAULT alternative can match only one possible constructor,
1661 then make that constructor explicit.
1662 e.g.
1663 case e of x { DEFAULT -> rhs }
1664 ===>
1665 case e of x { (a,b) -> rhs }
1666 where the type is a single constructor type. This gives better code
1667 when rhs also scrutinises x or e.
1668
1669 3. Returns a list of the constructors that cannot holds in the
1670 DEFAULT alternative (if there is one)
1671
1672 Here "cannot match" includes knowledge from GADTs
1673
1674 It's a good idea to do this stuff before simplifying the alternatives, to
1675 avoid simplifying alternatives we know can't happen, and to come up with
1676 the list of constructors that are handled, to put into the IdInfo of the
1677 case binder, for use when simplifying the alternatives.
1678
1679 Eliminating the default alternative in (1) isn't so obvious, but it can
1680 happen:
1681
1682 data Colour = Red | Green | Blue
1683
1684 f x = case x of
1685 Red -> ..
1686 Green -> ..
1687 DEFAULT -> h x
1688
1689 h y = case y of
1690 Blue -> ..
1691 DEFAULT -> [ case y of ... ]
1692
1693 If we inline h into f, the default case of the inlined h can't happen.
1694 If we don't notice this, we may end up filtering out *all* the cases
1695 of the inner case y, which give us nowhere to go!
1696 -}
1697
1698 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1699 -- The returned alternatives can be empty, none are possible
1700 prepareAlts scrut case_bndr' alts
1701 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1702 -- Case binder is needed just for its type. Note that as an
1703 -- OutId, it has maximum information; this is important.
1704 -- Test simpl013 is an example
1705 = do { us <- getUniquesM
1706 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1707 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1708 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1709 -- "idcs" stands for "impossible default data constructors"
1710 -- i.e. the constructors that can't match the default case
1711 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1712 ; when yes3 $ tick (AltMerge case_bndr')
1713 ; return (idcs3, alts3) }
1714
1715 | otherwise -- Not a data type, so nothing interesting happens
1716 = return ([], alts)
1717 where
1718 imposs_cons = case scrut of
1719 Var v -> otherCons (idUnfolding v)
1720 _ -> []
1721
1722
1723 {-
1724 ************************************************************************
1725 * *
1726 mkCase
1727 * *
1728 ************************************************************************
1729
1730 mkCase tries these things
1731
1732 1. Merge Nested Cases
1733
1734 case e of b { ==> case e of b {
1735 p1 -> rhs1 p1 -> rhs1
1736 ... ...
1737 pm -> rhsm pm -> rhsm
1738 _ -> case b of b' { pn -> let b'=b in rhsn
1739 pn -> rhsn ...
1740 ... po -> let b'=b in rhso
1741 po -> rhso _ -> let b'=b in rhsd
1742 _ -> rhsd
1743 }
1744
1745 which merges two cases in one case when -- the default alternative of
1746 the outer case scrutises the same variable as the outer case. This
1747 transformation is called Case Merging. It avoids that the same
1748 variable is scrutinised multiple times.
1749
1750 2. Eliminate Identity Case
1751
1752 case e of ===> e
1753 True -> True;
1754 False -> False
1755
1756 and similar friends.
1757
1758 3. Scrutinee Constant Folding
1759
1760 case x op# k# of _ { ===> case x of _ {
1761 a1# -> e1 (a1# inv_op# k#) -> e1
1762 a2# -> e2 (a2# inv_op# k#) -> e2
1763 ... ...
1764 DEFAULT -> ed DEFAULT -> ed
1765
1766 where (x op# k#) inv_op# k# == x
1767
1768 And similarly for commuted arguments and for some unary operations.
1769
1770 The purpose of this transformation is not only to avoid an arithmetic
1771 operation at runtime but to allow other transformations to apply in cascade.
1772
1773 Example with the "Merge Nested Cases" optimization (from #12877):
1774
1775 main = case t of t0
1776 0## -> ...
1777 DEFAULT -> case t0 `minusWord#` 1## of t1
1778 0## -> ...
1779 DEFAUT -> case t1 `minusWord#` 1## of t2
1780 0## -> ...
1781 DEFAULT -> case t2 `minusWord#` 1## of _
1782 0## -> ...
1783 DEFAULT -> ...
1784
1785 becomes:
1786
1787 main = case t of _
1788 0## -> ...
1789 1## -> ...
1790 2## -> ...
1791 3## -> ...
1792 DEFAULT -> ...
1793
1794 -}
1795
1796 mkCase, mkCase1, mkCase2, mkCase3
1797 :: DynFlags
1798 -> OutExpr -> OutId
1799 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1800 -> SimplM OutExpr
1801
1802 --------------------------------------------------
1803 -- 1. Merge Nested Cases
1804 --------------------------------------------------
1805
1806 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1807 | gopt Opt_CaseMerge dflags
1808 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1809 <- stripTicksTop tickishFloatable deflt_rhs
1810 , inner_scrut_var == outer_bndr
1811 = do { tick (CaseMerge outer_bndr)
1812
1813 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1814 (con, args, wrap_rhs rhs)
1815 -- Simplifier's no-shadowing invariant should ensure
1816 -- that outer_bndr is not shadowed by the inner patterns
1817 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1818 -- The let is OK even for unboxed binders,
1819
1820 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1821 | otherwise = map wrap_alt inner_alts
1822
1823 merged_alts = mergeAlts outer_alts wrapped_alts
1824 -- NB: mergeAlts gives priority to the left
1825 -- case x of
1826 -- A -> e1
1827 -- DEFAULT -> case x of
1828 -- A -> e2
1829 -- B -> e3
1830 -- When we merge, we must ensure that e1 takes
1831 -- precedence over e2 as the value for A!
1832
1833 ; fmap (mkTicks ticks) $
1834 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1835 }
1836 -- Warning: don't call mkCase recursively!
1837 -- Firstly, there's no point, because inner alts have already had
1838 -- mkCase applied to them, so they won't have a case in their default
1839 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1840 -- in munge_rhs may put a case into the DEFAULT branch!
1841
1842 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1843
1844 --------------------------------------------------
1845 -- 2. Eliminate Identity Case
1846 --------------------------------------------------
1847
1848 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1849 | all identity_alt alts
1850 = do { tick (CaseIdentity case_bndr)
1851 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1852 where
1853 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
1854 identity_alt (con, args, rhs) = check_eq rhs con args
1855
1856 check_eq (Cast rhs co) con args
1857 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1858 -- See Note [RHS casts]
1859 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1860 check_eq (Var v) _ _ | v == case_bndr = True
1861 check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con
1862 -- Optimisation only
1863 check_eq (Tick t e) alt args = tickishFloatable t &&
1864 check_eq e alt args
1865 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1866 mkConApp con (arg_tys ++
1867 varsToCoreExprs args)
1868 check_eq _ _ _ = False
1869
1870 arg_tys = map Type (tyConAppArgs (idType case_bndr))
1871
1872 -- Note [RHS casts]
1873 -- ~~~~~~~~~~~~~~~~
1874 -- We've seen this:
1875 -- case e of x { _ -> x `cast` c }
1876 -- And we definitely want to eliminate this case, to give
1877 -- e `cast` c
1878 -- So we throw away the cast from the RHS, and reconstruct
1879 -- it at the other end. All the RHS casts must be the same
1880 -- if (all identity_alt alts) holds.
1881 --
1882 -- Don't worry about nested casts, because the simplifier combines them
1883
1884 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1885 re_cast scrut _ = scrut
1886
1887 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1888
1889 --------------------------------------------------
1890 -- 2. Scrutinee Constant Folding
1891 --------------------------------------------------
1892
1893 mkCase2 dflags scrut bndr alts_ty alts
1894 | gopt Opt_CaseFolding dflags
1895 , Just (scrut',f) <- caseRules scrut
1896 = mkCase3 dflags scrut' bndr alts_ty (map (mapAlt f) alts)
1897 | otherwise
1898 = mkCase3 dflags scrut bndr alts_ty alts
1899 where
1900 -- We need to keep the correct association between the scrutinee and its
1901 -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
1902 -- "let bndr = ... in":
1903 --
1904 -- case v + 10 of y =====> case v of y
1905 -- 20 -> e1 10 -> let y = 20 in e1
1906 -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2
1907 --
1908 -- Other transformations give: =====> case v of y'
1909 -- 10 -> let y = 20 in e1
1910 -- DEFAULT -> let y = y' + 10 in e2
1911 --
1912 wrap_rhs l rhs
1913 | isDeadBinder bndr = rhs
1914 | otherwise = Let (NonRec bndr l) rhs
1915
1916 mapAlt f alt@(c,bs,e) = case c of
1917 DEFAULT -> (c, bs, wrap_rhs scrut e)
1918 LitAlt l
1919 | isLitValue l -> (LitAlt (mapLitValue f l), bs, wrap_rhs (Lit l) e)
1920 _ -> pprPanic "Unexpected alternative (mkCase2)" (ppr alt)
1921
1922 --------------------------------------------------
1923 -- Catch-all
1924 --------------------------------------------------
1925 mkCase3 _dflags scrut bndr alts_ty alts
1926 = return (Case scrut bndr alts_ty alts)
1927
1928 {-
1929 Note [Dead binders]
1930 ~~~~~~~~~~~~~~~~~~~~
1931 Note that dead-ness is maintained by the simplifier, so that it is
1932 accurate after simplification as well as before.
1933
1934
1935 Note [Cascading case merge]
1936 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1937 Case merging should cascade in one sweep, because it
1938 happens bottom-up
1939
1940 case e of a {
1941 DEFAULT -> case a of b
1942 DEFAULT -> case b of c {
1943 DEFAULT -> e
1944 A -> ea
1945 B -> eb
1946 C -> ec
1947 ==>
1948 case e of a {
1949 DEFAULT -> case a of b
1950 DEFAULT -> let c = b in e
1951 A -> let c = b in ea
1952 B -> eb
1953 C -> ec
1954 ==>
1955 case e of a {
1956 DEFAULT -> let b = a in let c = b in e
1957 A -> let b = a in let c = b in ea
1958 B -> let b = a in eb
1959 C -> ec
1960
1961
1962 However here's a tricky case that we still don't catch, and I don't
1963 see how to catch it in one pass:
1964
1965 case x of c1 { I# a1 ->
1966 case a1 of c2 ->
1967 0 -> ...
1968 DEFAULT -> case x of c3 { I# a2 ->
1969 case a2 of ...
1970
1971 After occurrence analysis (and its binder-swap) we get this
1972
1973 case x of c1 { I# a1 ->
1974 let x = c1 in -- Binder-swap addition
1975 case a1 of c2 ->
1976 0 -> ...
1977 DEFAULT -> case x of c3 { I# a2 ->
1978 case a2 of ...
1979
1980 When we simplify the inner case x, we'll see that
1981 x=c1=I# a1. So we'll bind a2 to a1, and get
1982
1983 case x of c1 { I# a1 ->
1984 case a1 of c2 ->
1985 0 -> ...
1986 DEFAULT -> case a1 of ...
1987
1988 This is corect, but we can't do a case merge in this sweep
1989 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
1990 without getting changed to c1=I# c2.
1991
1992 I don't think this is worth fixing, even if I knew how. It'll
1993 all come out in the next pass anyway.
1994 -}