Fix kind-var abstraction in SimplUtils.abstractFloats
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRuleLHS,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countValArgs, countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import MkCore ( sortQuantVars )
41 import DynFlags
42 import CoreSyn
43 import qualified CoreSubst
44 import PprCore
45 import CoreFVs
46 import CoreUtils
47 import CoreArity
48 import CoreUnfold
49 import Name
50 import Id
51 import Var
52 import Demand
53 import SimplMonad
54 import Type hiding( substTy )
55 import Coercion hiding( substCo, substTy )
56 import DataCon ( dataConWorkId )
57 import VarEnv
58 import VarSet
59 import BasicTypes
60 import Util
61 import MonadUtils
62 import Outputable
63 import FastString
64 import Pair
65
66 import Control.Monad ( when )
67
68 {-
69 ************************************************************************
70 * *
71 The SimplCont and DupFlag types
72 * *
73 ************************************************************************
74
75 A SimplCont allows the simplifier to traverse the expression in a
76 zipper-like fashion. The SimplCont represents the rest of the expression,
77 "above" the point of interest.
78
79 You can also think of a SimplCont as an "evaluation context", using
80 that term in the way it is used for operational semantics. This is the
81 way I usually think of it, For example you'll often see a syntax for
82 evaluation context looking like
83 C ::= [] | C e | case C of alts | C `cast` co
84 That's the kind of thing we are doing here, and I use that syntax in
85 the comments.
86
87
88 Key points:
89 * A SimplCont describes a *strict* context (just like
90 evaluation contexts do). E.g. Just [] is not a SimplCont
91
92 * A SimplCont describes a context that *does not* bind
93 any variables. E.g. \x. [] is not a SimplCont
94 -}
95
96 data SimplCont
97 = Stop -- An empty context, or <hole>
98 OutType -- Type of the <hole>
99 CallCtxt -- Tells if there is something interesting about
100 -- the context, and hence the inliner
101 -- should be a bit keener (see interestingCallContext)
102 -- Specifically:
103 -- This is an argument of a function that has RULES
104 -- Inlining the call might allow the rule to fire
105 -- Never ValAppCxt (use ApplyToVal instead)
106 -- or CaseCtxt (use Select instead)
107
108 | CastIt -- <hole> `cast` co
109 OutCoercion -- The coercion simplified
110 -- Invariant: never an identity coercion
111 SimplCont
112
113 | ApplyToVal { -- <hole> arg
114 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
115 sc_arg :: InExpr, -- The argument,
116 sc_env :: StaticEnv, -- and its static env
117 sc_cont :: SimplCont }
118
119 | ApplyToTy { -- <hole> ty
120 sc_arg_ty :: OutType, -- Argument type
121 sc_hole_ty :: OutType, -- Type of the function, presumably (forall a. blah)
122 -- See Note [The hole type in ApplyToTy]
123 sc_cont :: SimplCont }
124
125 | Select { -- case <hole> of alts
126 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
127 sc_bndr :: InId, -- case binder
128 sc_alts :: [InAlt], -- Alternatives
129 sc_env :: StaticEnv, -- and their static environment
130 sc_cont :: SimplCont }
131
132 -- The two strict forms have no DupFlag, because we never duplicate them
133 | StrictBind -- (\x* \xs. e) <hole>
134 InId [InBndr] -- let x* = <hole> in e
135 InExpr StaticEnv -- is a special case
136 SimplCont
137
138 | StrictArg -- f e1 ..en <hole>
139 ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
140 -- plus strictness flags for *further* args
141 CallCtxt -- Whether *this* argument position is interesting
142 SimplCont
143
144 | TickIt
145 (Tickish Id) -- Tick tickish <hole>
146 SimplCont
147
148 data DupFlag = NoDup -- Unsimplified, might be big
149 | Simplified -- Simplified
150 | OkToDup -- Simplified and small
151
152 isSimplified :: DupFlag -> Bool
153 isSimplified NoDup = False
154 isSimplified _ = True -- Invariant: the subst-env is empty
155
156 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
157 perhapsSubstTy dup env ty
158 | isSimplified dup = ty
159 | otherwise = substTy env ty
160
161 {-
162 Note [DupFlag invariants]
163 ~~~~~~~~~~~~~~~~~~~~~~~~~
164 In both (ApplyToVal dup _ env k)
165 and (Select dup _ _ env k)
166 the following invariants hold
167
168 (a) if dup = OkToDup, then continuation k is also ok-to-dup
169 (b) if dup = OkToDup or Simplified, the subst-env is empty
170 (and and hence no need to re-simplify)
171 -}
172
173 instance Outputable DupFlag where
174 ppr OkToDup = ptext (sLit "ok")
175 ppr NoDup = ptext (sLit "nodup")
176 ppr Simplified = ptext (sLit "simpl")
177
178 instance Outputable SimplCont where
179 ppr (Stop ty interesting) = ptext (sLit "Stop") <> brackets (ppr interesting) <+> ppr ty
180 ppr (CastIt co cont ) = (ptext (sLit "CastIt") <+> ppr co) $$ ppr cont
181 ppr (TickIt t cont) = (ptext (sLit "TickIt") <+> ppr t) $$ ppr cont
182 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
183 = (ptext (sLit "ApplyToTy") <+> pprParendType ty) $$ ppr cont
184 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
185 = (ptext (sLit "ApplyToVal") <+> ppr dup <+> pprParendExpr arg)
186 $$ ppr cont
187 ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
188 ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
189 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
190 = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
191 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
192
193
194 {- Note [The hole type in ApplyToTy]
195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
197 continuation. It is absolutely necessary to compute contHoleType, but it is
198 not used for anything else (and hence may not be evaluated).
199
200 Why is it necessary for contHoleType? Consider the continuation
201 ApplyToType Int (Stop Int)
202 corresponding to
203 (<hole> @Int) :: Int
204 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
205 and there is no way to know which, so we must record it.
206
207 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
208 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
209 doesn't matter because we'll never compute them all.
210
211 ************************************************************************
212 * *
213 ArgInfo and ArgSpec
214 * *
215 ************************************************************************
216 -}
217
218 data ArgInfo
219 = ArgInfo {
220 ai_fun :: OutId, -- The function
221 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
222 ai_type :: OutType, -- Type of (f a1 ... an)
223
224 ai_rules :: [CoreRule], -- Rules for this function
225
226 ai_encl :: Bool, -- Flag saying whether this function
227 -- or an enclosing one has rules (recursively)
228 -- True => be keener to inline in all args
229
230 ai_strs :: [Bool], -- Strictness of remaining arguments
231 -- Usually infinite, but if it is finite it guarantees
232 -- that the function diverges after being given
233 -- that number of args
234 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
235 -- Always infinite
236 }
237
238 data ArgSpec
239 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
240 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
241 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
242 | CastBy OutCoercion -- Cast by this; c.f. CastIt
243
244 instance Outputable ArgSpec where
245 ppr (ValArg e) = ptext (sLit "ValArg") <+> ppr e
246 ppr (TyArg { as_arg_ty = ty }) = ptext (sLit "TyArg") <+> ppr ty
247 ppr (CastBy c) = ptext (sLit "CastBy") <+> ppr c
248
249 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
250 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
251 , ai_type = funResultTy (ai_type ai) }
252
253 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
254 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
255 , ai_type = applyTy poly_fun_ty arg_ty }
256 where
257 poly_fun_ty = ai_type ai
258 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
259
260 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
261 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
262 , ai_type = pSnd (coercionKind co) }
263
264 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
265 argInfoAppArgs [] = []
266 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
267 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
268 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
269
270 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
271 pushSimplifiedArgs _env [] k = k
272 pushSimplifiedArgs env (arg : args) k
273 = case arg of
274 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
275 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
276 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
277 CastBy c -> CastIt c rest
278 where
279 rest = pushSimplifiedArgs env args k
280 -- The env has an empty SubstEnv
281
282 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
283 -- NB: the [ArgSpec] is reversed so that the first arg
284 -- in the list is the last one in the application
285 argInfoExpr fun rev_args
286 = go rev_args
287 where
288 go [] = Var fun
289 go (ValArg a : as) = go as `App` a
290 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
291 go (CastBy co : as) = mkCast (go as) co
292
293
294 {-
295 ************************************************************************
296 * *
297 Functions on SimplCont
298 * *
299 ************************************************************************
300 -}
301
302 mkBoringStop :: OutType -> SimplCont
303 mkBoringStop ty = Stop ty BoringCtxt
304
305 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
306 mkRhsStop ty = Stop ty RhsCtxt
307
308 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
309 mkLazyArgStop ty cci = Stop ty cci
310
311 -------------------
312 contIsRhsOrArg :: SimplCont -> Bool
313 contIsRhsOrArg (Stop {}) = True
314 contIsRhsOrArg (StrictBind {}) = True
315 contIsRhsOrArg (StrictArg {}) = True
316 contIsRhsOrArg _ = False
317
318 contIsRhs :: SimplCont -> Bool
319 contIsRhs (Stop _ RhsCtxt) = True
320 contIsRhs _ = False
321
322 -------------------
323 contIsDupable :: SimplCont -> Bool
324 contIsDupable (Stop {}) = True
325 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
326 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
327 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
328 contIsDupable (CastIt _ k) = contIsDupable k
329 contIsDupable _ = False
330
331 -------------------
332 contIsTrivial :: SimplCont -> Bool
333 contIsTrivial (Stop {}) = True
334 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
335 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
336 contIsTrivial (CastIt _ k) = contIsTrivial k
337 contIsTrivial _ = False
338
339 -------------------
340 contResultType :: SimplCont -> OutType
341 contResultType (Stop ty _) = ty
342 contResultType (CastIt _ k) = contResultType k
343 contResultType (StrictBind _ _ _ _ k) = contResultType k
344 contResultType (StrictArg _ _ k) = contResultType k
345 contResultType (Select { sc_cont = k }) = contResultType k
346 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
347 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
348 contResultType (TickIt _ k) = contResultType k
349
350 contHoleType :: SimplCont -> OutType
351 contHoleType (Stop ty _) = ty
352 contHoleType (TickIt _ k) = contHoleType k
353 contHoleType (CastIt co _) = pFst (coercionKind co)
354 contHoleType (StrictBind b _ _ se _) = substTy se (idType b)
355 contHoleType (StrictArg ai _ _) = funArgTy (ai_type ai)
356 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
357 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
358 = mkFunTy (perhapsSubstTy dup se (exprType e))
359 (contHoleType k)
360 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
361 = perhapsSubstTy d se (idType b)
362
363 -------------------
364 countValArgs :: SimplCont -> Int
365 -- Count value arguments excluding coercions
366 countValArgs (ApplyToVal { sc_arg = arg, sc_cont = cont })
367 | Coercion {} <- arg = countValArgs cont
368 | otherwise = 1 + countValArgs cont
369 countValArgs _ = 0
370
371 countArgs :: SimplCont -> Int
372 -- Count all arguments, including types, coercions, and other values
373 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
374 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
375 countArgs _ = 0
376
377 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
378 -- Summarises value args, discards type args and coercions
379 -- The returned continuation of the call is only used to
380 -- answer questions like "are you interesting?"
381 contArgs cont
382 | lone cont = (True, [], cont)
383 | otherwise = go [] cont
384 where
385 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
386 lone (ApplyToVal {}) = False
387 lone (CastIt {}) = False
388 lone _ = True
389
390 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
391 = go (is_interesting arg se : args) k
392 go args (ApplyToTy { sc_cont = k }) = go args k
393 go args (CastIt _ k) = go args k
394 go args k = (False, reverse args, k)
395
396 is_interesting arg se = interestingArg se arg
397 -- Do *not* use short-cutting substitution here
398 -- because we want to get as much IdInfo as possible
399
400
401 -------------------
402 mkArgInfo :: Id
403 -> [CoreRule] -- Rules for function
404 -> Int -- Number of value args
405 -> SimplCont -- Context of the call
406 -> ArgInfo
407
408 mkArgInfo fun rules n_val_args call_cont
409 | n_val_args < idArity fun -- Note [Unsaturated functions]
410 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
411 , ai_rules = rules, ai_encl = False
412 , ai_strs = vanilla_stricts
413 , ai_discs = vanilla_discounts }
414 | otherwise
415 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
416 , ai_rules = rules
417 , ai_encl = interestingArgContext rules call_cont
418 , ai_strs = add_type_str fun_ty arg_stricts
419 , ai_discs = arg_discounts }
420 where
421 fun_ty = idType fun
422
423 vanilla_discounts, arg_discounts :: [Int]
424 vanilla_discounts = repeat 0
425 arg_discounts = case idUnfolding fun of
426 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
427 -> discounts ++ vanilla_discounts
428 _ -> vanilla_discounts
429
430 vanilla_stricts, arg_stricts :: [Bool]
431 vanilla_stricts = repeat False
432
433 arg_stricts
434 = case splitStrictSig (idStrictness fun) of
435 (demands, result_info)
436 | not (demands `lengthExceeds` n_val_args)
437 -> -- Enough args, use the strictness given.
438 -- For bottoming functions we used to pretend that the arg
439 -- is lazy, so that we don't treat the arg as an
440 -- interesting context. This avoids substituting
441 -- top-level bindings for (say) strings into
442 -- calls to error. But now we are more careful about
443 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
444 if isBotRes result_info then
445 map isStrictDmd demands -- Finite => result is bottom
446 else
447 map isStrictDmd demands ++ vanilla_stricts
448 | otherwise
449 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
450 <+> ppr n_val_args <+> ppr demands )
451 vanilla_stricts -- Not enough args, or no strictness
452
453 add_type_str :: Type -> [Bool] -> [Bool]
454 -- If the function arg types are strict, record that in the 'strictness bits'
455 -- No need to instantiate because unboxed types (which dominate the strict
456 -- types) can't instantiate type variables.
457 -- add_type_str is done repeatedly (for each call); might be better
458 -- once-for-all in the function
459 -- But beware primops/datacons with no strictness
460 add_type_str _ [] = []
461 add_type_str fun_ty strs -- Look through foralls
462 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
463 = add_type_str fun_ty' strs
464 add_type_str fun_ty (str:strs) -- Add strict-type info
465 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
466 = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
467 add_type_str _ strs
468 = strs
469
470 {- Note [Unsaturated functions]
471 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
472 Consider (test eyeball/inline4)
473 x = a:as
474 y = f x
475 where f has arity 2. Then we do not want to inline 'x', because
476 it'll just be floated out again. Even if f has lots of discounts
477 on its first argument -- it must be saturated for these to kick in
478 -}
479
480
481 {-
482 ************************************************************************
483 * *
484 Interesting arguments
485 * *
486 ************************************************************************
487
488 Note [Interesting call context]
489 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
490 We want to avoid inlining an expression where there can't possibly be
491 any gain, such as in an argument position. Hence, if the continuation
492 is interesting (eg. a case scrutinee, application etc.) then we
493 inline, otherwise we don't.
494
495 Previously some_benefit used to return True only if the variable was
496 applied to some value arguments. This didn't work:
497
498 let x = _coerce_ (T Int) Int (I# 3) in
499 case _coerce_ Int (T Int) x of
500 I# y -> ....
501
502 we want to inline x, but can't see that it's a constructor in a case
503 scrutinee position, and some_benefit is False.
504
505 Another example:
506
507 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
508
509 .... case dMonadST _@_ x0 of (a,b,c) -> ....
510
511 we'd really like to inline dMonadST here, but we *don't* want to
512 inline if the case expression is just
513
514 case x of y { DEFAULT -> ... }
515
516 since we can just eliminate this case instead (x is in WHNF). Similar
517 applies when x is bound to a lambda expression. Hence
518 contIsInteresting looks for case expressions with just a single
519 default case.
520 -}
521
522 interestingCallContext :: SimplCont -> CallCtxt
523 -- See Note [Interesting call context]
524 interestingCallContext cont
525 = interesting cont
526 where
527 interesting (Select {}) = CaseCtxt
528 interesting (ApplyToVal {}) = ValAppCtxt
529 -- Can happen if we have (f Int |> co) y
530 -- If f has an INLINE prag we need to give it some
531 -- motivation to inline. See Note [Cast then apply]
532 -- in CoreUnfold
533 interesting (StrictArg _ cci _) = cci
534 interesting (StrictBind {}) = BoringCtxt
535 interesting (Stop _ cci) = cci
536 interesting (TickIt _ k) = interesting k
537 interesting (ApplyToTy { sc_cont = k }) = interesting k
538 interesting (CastIt _ k) = interesting k
539 -- If this call is the arg of a strict function, the context
540 -- is a bit interesting. If we inline here, we may get useful
541 -- evaluation information to avoid repeated evals: e.g.
542 -- x + (y * z)
543 -- Here the contIsInteresting makes the '*' keener to inline,
544 -- which in turn exposes a constructor which makes the '+' inline.
545 -- Assuming that +,* aren't small enough to inline regardless.
546 --
547 -- It's also very important to inline in a strict context for things
548 -- like
549 -- foldr k z (f x)
550 -- Here, the context of (f x) is strict, and if f's unfolding is
551 -- a build it's *great* to inline it here. So we must ensure that
552 -- the context for (f x) is not totally uninteresting.
553
554 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
555 -- If the argument has form (f x y), where x,y are boring,
556 -- and f is marked INLINE, then we don't want to inline f.
557 -- But if the context of the argument is
558 -- g (f x y)
559 -- where g has rules, then we *do* want to inline f, in case it
560 -- exposes a rule that might fire. Similarly, if the context is
561 -- h (g (f x x))
562 -- where h has rules, then we do want to inline f; hence the
563 -- call_cont argument to interestingArgContext
564 --
565 -- The ai-rules flag makes this happen; if it's
566 -- set, the inliner gets just enough keener to inline f
567 -- regardless of how boring f's arguments are, if it's marked INLINE
568 --
569 -- The alternative would be to *always* inline an INLINE function,
570 -- regardless of how boring its context is; but that seems overkill
571 -- For example, it'd mean that wrapper functions were always inlined
572 --
573 -- The call_cont passed to interestingArgContext is the context of
574 -- the call itself, e.g. g <hole> in the example above
575 interestingArgContext rules call_cont
576 = notNull rules || enclosing_fn_has_rules
577 where
578 enclosing_fn_has_rules = go call_cont
579
580 go (Select {}) = False
581 go (ApplyToVal {}) = False -- Shouldn't really happen
582 go (ApplyToTy {}) = False -- Ditto
583 go (StrictArg _ cci _) = interesting cci
584 go (StrictBind {}) = False -- ??
585 go (CastIt _ c) = go c
586 go (Stop _ cci) = interesting cci
587 go (TickIt _ c) = go c
588
589 interesting RuleArgCtxt = True
590 interesting _ = False
591
592
593 {- Note [Interesting arguments]
594 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
595 An argument is interesting if it deserves a discount for unfoldings
596 with a discount in that argument position. The idea is to avoid
597 unfolding a function that is applied only to variables that have no
598 unfolding (i.e. they are probably lambda bound): f x y z There is
599 little point in inlining f here.
600
601 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
602 we must look through lets, eg (let x = e in C a b), because the let will
603 float, exposing the value, if we inline. That makes it different to
604 exprIsHNF.
605
606 Before 2009 we said it was interesting if the argument had *any* structure
607 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
608
609 But we don't regard (f x y) as interesting, unless f is unsaturated.
610 If it's saturated and f hasn't inlined, then it's probably not going
611 to now!
612
613 Note [Conlike is interesting]
614 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
615 Consider
616 f d = ...((*) d x y)...
617 ... f (df d')...
618 where df is con-like. Then we'd really like to inline 'f' so that the
619 rule for (*) (df d) can fire. To do this
620 a) we give a discount for being an argument of a class-op (eg (*) d)
621 b) we say that a con-like argument (eg (df d)) is interesting
622 -}
623
624 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
625 -- See Note [Interesting arguments]
626 interestingArg env e = go env 0 e
627 where
628 -- n is # value args to which the expression is applied
629 go env n (Var v)
630 | SimplEnv { seIdSubst = ids, seInScope = in_scope } <- env
631 = case lookupVarEnv ids v of
632 Nothing -> go_var n (refineFromInScope in_scope v)
633 Just (DoneId v') -> go_var n (refineFromInScope in_scope v')
634 Just (DoneEx e) -> go (zapSubstEnv env) n e
635 Just (ContEx tvs cvs ids e) -> go (setSubstEnv env tvs cvs ids) n e
636
637 go _ _ (Lit {}) = ValueArg
638 go _ _ (Type _) = TrivArg
639 go _ _ (Coercion _) = TrivArg
640 go env n (App fn (Type _)) = go env n fn
641 go env n (App fn (Coercion _)) = go env n fn
642 go env n (App fn _) = go env (n+1) fn
643 go env n (Tick _ a) = go env n a
644 go env n (Cast e _) = go env n e
645 go env n (Lam v e)
646 | isTyVar v = go env n e
647 | n>0 = go env (n-1) e
648 | otherwise = ValueArg
649 go env n (Let _ e) = case go env n e of { ValueArg -> ValueArg; _ -> NonTrivArg }
650 go _ _ (Case {}) = NonTrivArg
651
652 go_var n v
653 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
654 -- data constructors here
655 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
656 | n > 0 = NonTrivArg -- Saturated or unknown call
657 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
658 -- See Note [Conlike is interesting]
659 | otherwise = TrivArg -- n==0, no useful unfolding
660 where
661 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
662
663 {-
664 ************************************************************************
665 * *
666 SimplifierMode
667 * *
668 ************************************************************************
669
670 The SimplifierMode controls several switches; see its definition in
671 CoreMonad
672 sm_rules :: Bool -- Whether RULES are enabled
673 sm_inline :: Bool -- Whether inlining is enabled
674 sm_case_case :: Bool -- Whether case-of-case is enabled
675 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
676 -}
677
678 simplEnvForGHCi :: DynFlags -> SimplEnv
679 simplEnvForGHCi dflags
680 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
681 , sm_phase = InitialPhase
682 , sm_rules = rules_on
683 , sm_inline = False
684 , sm_eta_expand = eta_expand_on
685 , sm_case_case = True }
686 where
687 rules_on = gopt Opt_EnableRewriteRules dflags
688 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
689 -- Do not do any inlining, in case we expose some unboxed
690 -- tuple stuff that confuses the bytecode interpreter
691
692 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
693 -- See Note [Simplifying inside stable unfoldings]
694 updModeForStableUnfoldings inline_rule_act current_mode
695 = current_mode { sm_phase = phaseFromActivation inline_rule_act
696 , sm_inline = True
697 , sm_eta_expand = False }
698 -- For sm_rules, just inherit; sm_rules might be "off"
699 -- because of -fno-enable-rewrite-rules
700 where
701 phaseFromActivation (ActiveAfter n) = Phase n
702 phaseFromActivation _ = InitialPhase
703
704 updModeForRuleLHS :: SimplifierMode -> SimplifierMode
705 -- See Note [Simplifying rule LHSs]
706 updModeForRuleLHS current_mode
707 = current_mode { sm_phase = InitialPhase
708 , sm_inline = False
709 , sm_rules = False
710 , sm_eta_expand = False }
711
712 {- Note [Simplifying rule LHSs]
713 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
714 When simplifying on the LHS of a rule, refrain from all inlining and
715 all RULES. Doing anything to the LHS is plain confusing, because it
716 means that what the rule matches is not what the user wrote.
717 c.f. Trac #10595, and #10528.
718
719 Moreover, inlining (or applying rules) on rule LHSs risks introducing
720 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
721
722
723 Note [Inlining in gentle mode]
724 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
725 Something is inlined if
726 (i) the sm_inline flag is on, AND
727 (ii) the thing has an INLINE pragma, AND
728 (iii) the thing is inlinable in the earliest phase.
729
730 Example of why (iii) is important:
731 {-# INLINE [~1] g #-}
732 g = ...
733
734 {-# INLINE f #-}
735 f x = g (g x)
736
737 If we were to inline g into f's inlining, then an importing module would
738 never be able to do
739 f e --> g (g e) ---> RULE fires
740 because the stable unfolding for f has had g inlined into it.
741
742 On the other hand, it is bad not to do ANY inlining into an
743 stable unfolding, because then recursive knots in instance declarations
744 don't get unravelled.
745
746 However, *sometimes* SimplGently must do no call-site inlining at all
747 (hence sm_inline = False). Before full laziness we must be careful
748 not to inline wrappers, because doing so inhibits floating
749 e.g. ...(case f x of ...)...
750 ==> ...(case (case x of I# x# -> fw x#) of ...)...
751 ==> ...(case x of I# x# -> case fw x# of ...)...
752 and now the redex (f x) isn't floatable any more.
753
754 The no-inlining thing is also important for Template Haskell. You might be
755 compiling in one-shot mode with -O2; but when TH compiles a splice before
756 running it, we don't want to use -O2. Indeed, we don't want to inline
757 anything, because the byte-code interpreter might get confused about
758 unboxed tuples and suchlike.
759
760 Note [Simplifying inside stable unfoldings]
761 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
762 We must take care with simplification inside stable unfoldings (which come from
763 INLINE pragmas).
764
765 First, consider the following example
766 let f = \pq -> BIG
767 in
768 let g = \y -> f y y
769 {-# INLINE g #-}
770 in ...g...g...g...g...g...
771 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
772 and thence copied multiple times when g is inlined. HENCE we treat
773 any occurrence in a stable unfolding as a multiple occurrence, not a single
774 one; see OccurAnal.addRuleUsage.
775
776 Second, we do want *do* to some modest rules/inlining stuff in stable
777 unfoldings, partly to eliminate senseless crap, and partly to break
778 the recursive knots generated by instance declarations.
779
780 However, suppose we have
781 {-# INLINE <act> f #-}
782 f = <rhs>
783 meaning "inline f in phases p where activation <act>(p) holds".
784 Then what inlinings/rules can we apply to the copy of <rhs> captured in
785 f's stable unfolding? Our model is that literally <rhs> is substituted for
786 f when it is inlined. So our conservative plan (implemented by
787 updModeForStableUnfoldings) is this:
788
789 -------------------------------------------------------------
790 When simplifying the RHS of an stable unfolding, set the phase
791 to the phase in which the stable unfolding first becomes active
792 -------------------------------------------------------------
793
794 That ensures that
795
796 a) Rules/inlinings that *cease* being active before p will
797 not apply to the stable unfolding, consistent with it being
798 inlined in its *original* form in phase p.
799
800 b) Rules/inlinings that only become active *after* p will
801 not apply to the stable unfolding, again to be consistent with
802 inlining the *original* rhs in phase p.
803
804 For example,
805 {-# INLINE f #-}
806 f x = ...g...
807
808 {-# NOINLINE [1] g #-}
809 g y = ...
810
811 {-# RULE h g = ... #-}
812 Here we must not inline g into f's RHS, even when we get to phase 0,
813 because when f is later inlined into some other module we want the
814 rule for h to fire.
815
816 Similarly, consider
817 {-# INLINE f #-}
818 f x = ...g...
819
820 g y = ...
821 and suppose that there are auto-generated specialisations and a strictness
822 wrapper for g. The specialisations get activation AlwaysActive, and the
823 strictness wrapper get activation (ActiveAfter 0). So the strictness
824 wrepper fails the test and won't be inlined into f's stable unfolding. That
825 means f can inline, expose the specialised call to g, so the specialisation
826 rules can fire.
827
828 A note about wrappers
829 ~~~~~~~~~~~~~~~~~~~~~
830 It's also important not to inline a worker back into a wrapper.
831 A wrapper looks like
832 wraper = inline_me (\x -> ...worker... )
833 Normally, the inline_me prevents the worker getting inlined into
834 the wrapper (initially, the worker's only call site!). But,
835 if the wrapper is sure to be called, the strictness analyser will
836 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
837 continuation.
838 -}
839
840 activeUnfolding :: SimplEnv -> Id -> Bool
841 activeUnfolding env
842 | not (sm_inline mode) = active_unfolding_minimal
843 | otherwise = case sm_phase mode of
844 InitialPhase -> active_unfolding_gentle
845 Phase n -> active_unfolding n
846 where
847 mode = getMode env
848
849 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
850 -- When matching in RULE, we want to "look through" an unfolding
851 -- (to see a constructor) if *rules* are on, even if *inlinings*
852 -- are not. A notable example is DFuns, which really we want to
853 -- match in rules like (op dfun) in gentle mode. Another example
854 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
855 -- see very early on
856 getUnfoldingInRuleMatch env
857 = (in_scope, id_unf)
858 where
859 in_scope = seInScope env
860 mode = getMode env
861 id_unf id | unf_is_active id = idUnfolding id
862 | otherwise = NoUnfolding
863 unf_is_active id
864 | not (sm_rules mode) = active_unfolding_minimal id
865 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
866
867 active_unfolding_minimal :: Id -> Bool
868 -- Compuslory unfoldings only
869 -- Ignore SimplGently, because we want to inline regardless;
870 -- the Id has no top-level binding at all
871 --
872 -- NB: we used to have a second exception, for data con wrappers.
873 -- On the grounds that we use gentle mode for rule LHSs, and
874 -- they match better when data con wrappers are inlined.
875 -- But that only really applies to the trivial wrappers (like (:)),
876 -- and they are now constructed as Compulsory unfoldings (in MkId)
877 -- so they'll happen anyway.
878 active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
879
880 active_unfolding :: PhaseNum -> Id -> Bool
881 active_unfolding n id = isActiveIn n (idInlineActivation id)
882
883 active_unfolding_gentle :: Id -> Bool
884 -- Anything that is early-active
885 -- See Note [Gentle mode]
886 active_unfolding_gentle id
887 = isInlinePragma prag
888 && isEarlyActive (inlinePragmaActivation prag)
889 -- NB: wrappers are not early-active
890 where
891 prag = idInlinePragma id
892
893 ----------------------
894 activeRule :: SimplEnv -> Activation -> Bool
895 -- Nothing => No rules at all
896 activeRule env
897 | not (sm_rules mode) = \_ -> False -- Rewriting is off
898 | otherwise = isActive (sm_phase mode)
899 where
900 mode = getMode env
901
902 {-
903 ************************************************************************
904 * *
905 preInlineUnconditionally
906 * *
907 ************************************************************************
908
909 preInlineUnconditionally
910 ~~~~~~~~~~~~~~~~~~~~~~~~
911 @preInlineUnconditionally@ examines a bndr to see if it is used just
912 once in a completely safe way, so that it is safe to discard the
913 binding inline its RHS at the (unique) usage site, REGARDLESS of how
914 big the RHS might be. If this is the case we don't simplify the RHS
915 first, but just inline it un-simplified.
916
917 This is much better than first simplifying a perhaps-huge RHS and then
918 inlining and re-simplifying it. Indeed, it can be at least quadratically
919 better. Consider
920
921 x1 = e1
922 x2 = e2[x1]
923 x3 = e3[x2]
924 ...etc...
925 xN = eN[xN-1]
926
927 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
928 This can happen with cascades of functions too:
929
930 f1 = \x1.e1
931 f2 = \xs.e2[f1]
932 f3 = \xs.e3[f3]
933 ...etc...
934
935 THE MAIN INVARIANT is this:
936
937 ---- preInlineUnconditionally invariant -----
938 IF preInlineUnconditionally chooses to inline x = <rhs>
939 THEN doing the inlining should not change the occurrence
940 info for the free vars of <rhs>
941 ----------------------------------------------
942
943 For example, it's tempting to look at trivial binding like
944 x = y
945 and inline it unconditionally. But suppose x is used many times,
946 but this is the unique occurrence of y. Then inlining x would change
947 y's occurrence info, which breaks the invariant. It matters: y
948 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
949
950
951 Even RHSs labelled InlineMe aren't caught here, because there might be
952 no benefit from inlining at the call site.
953
954 [Sept 01] Don't unconditionally inline a top-level thing, because that
955 can simply make a static thing into something built dynamically. E.g.
956 x = (a,b)
957 main = \s -> h x
958
959 [Remember that we treat \s as a one-shot lambda.] No point in
960 inlining x unless there is something interesting about the call site.
961
962 But watch out: if you aren't careful, some useful foldr/build fusion
963 can be lost (most notably in spectral/hartel/parstof) because the
964 foldr didn't see the build. Doing the dynamic allocation isn't a big
965 deal, in fact, but losing the fusion can be. But the right thing here
966 seems to be to do a callSiteInline based on the fact that there is
967 something interesting about the call site (it's strict). Hmm. That
968 seems a bit fragile.
969
970 Conclusion: inline top level things gaily until Phase 0 (the last
971 phase), at which point don't.
972
973 Note [pre/postInlineUnconditionally in gentle mode]
974 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
975 Even in gentle mode we want to do preInlineUnconditionally. The
976 reason is that too little clean-up happens if you don't inline
977 use-once things. Also a bit of inlining is *good* for full laziness;
978 it can expose constant sub-expressions. Example in
979 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
980 let-float if you inline windowToViewport
981
982 However, as usual for Gentle mode, do not inline things that are
983 inactive in the intial stages. See Note [Gentle mode].
984
985 Note [Stable unfoldings and preInlineUnconditionally]
986 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
987 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
988 Example
989
990 {-# INLINE f #-}
991 f :: Eq a => a -> a
992 f x = ...
993
994 fInt :: Int -> Int
995 fInt = f Int dEqInt
996
997 ...fInt...fInt...fInt...
998
999 Here f occurs just once, in the RHS of fInt. But if we inline it there
1000 we'll lose the opportunity to inline at each of fInt's call sites.
1001 The INLINE pragma will only inline when the application is saturated
1002 for exactly this reason; and we don't want PreInlineUnconditionally
1003 to second-guess it. A live example is Trac #3736.
1004 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1005
1006 Note [Top-level bottoming Ids]
1007 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1008 Don't inline top-level Ids that are bottoming, even if they are used just
1009 once, because FloatOut has gone to some trouble to extract them out.
1010 Inlining them won't make the program run faster!
1011
1012 Note [Do not inline CoVars unconditionally]
1013 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1014 Coercion variables appear inside coercions, and the RHS of a let-binding
1015 is a term (not a coercion) so we can't necessarily inline the latter in
1016 the former.
1017 -}
1018
1019 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1020 -- Precondition: rhs satisfies the let/app invariant
1021 -- See Note [CoreSyn let/app invariant] in CoreSyn
1022 -- Reason: we don't want to inline single uses, or discard dead bindings,
1023 -- for unlifted, side-effect-full bindings
1024 preInlineUnconditionally dflags env top_lvl bndr rhs
1025 | not active = False
1026 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1027 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1028 | not (gopt Opt_SimplPreInlining dflags) = False
1029 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1030 | otherwise = case idOccInfo bndr of
1031 IAmDead -> True -- Happens in ((\x.1) v)
1032 OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
1033 _ -> False
1034 where
1035 mode = getMode env
1036 active = isActive (sm_phase mode) act
1037 -- See Note [pre/postInlineUnconditionally in gentle mode]
1038 act = idInlineActivation bndr
1039 try_once in_lam int_cxt -- There's one textual occurrence
1040 | not in_lam = isNotTopLevel top_lvl || early_phase
1041 | otherwise = int_cxt && canInlineInLam rhs
1042
1043 -- Be very careful before inlining inside a lambda, because (a) we must not
1044 -- invalidate occurrence information, and (b) we want to avoid pushing a
1045 -- single allocation (here) into multiple allocations (inside lambda).
1046 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1047 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1048 -- where
1049 -- is_cheap = exprIsCheap rhs
1050 -- ok = is_cheap && int_cxt
1051
1052 -- int_cxt The context isn't totally boring
1053 -- E.g. let f = \ab.BIG in \y. map f xs
1054 -- Don't want to substitute for f, because then we allocate
1055 -- its closure every time the \y is called
1056 -- But: let f = \ab.BIG in \y. map (f y) xs
1057 -- Now we do want to substitute for f, even though it's not
1058 -- saturated, because we're going to allocate a closure for
1059 -- (f y) every time round the loop anyhow.
1060
1061 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1062 -- so substituting rhs inside a lambda doesn't change the occ info.
1063 -- Sadly, not quite the same as exprIsHNF.
1064 canInlineInLam (Lit _) = True
1065 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1066 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1067 canInlineInLam _ = False
1068 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1069 -- ticks around a Lam will disappear anyway.
1070
1071 early_phase = case sm_phase mode of
1072 Phase 0 -> False
1073 _ -> True
1074 -- If we don't have this early_phase test, consider
1075 -- x = length [1,2,3]
1076 -- The full laziness pass carefully floats all the cons cells to
1077 -- top level, and preInlineUnconditionally floats them all back in.
1078 -- Result is (a) static allocation replaced by dynamic allocation
1079 -- (b) many simplifier iterations because this tickles
1080 -- a related problem; only one inlining per pass
1081 --
1082 -- On the other hand, I have seen cases where top-level fusion is
1083 -- lost if we don't inline top level thing (e.g. string constants)
1084 -- Hence the test for phase zero (which is the phase for all the final
1085 -- simplifications). Until phase zero we take no special notice of
1086 -- top level things, but then we become more leery about inlining
1087 -- them.
1088
1089 {-
1090 ************************************************************************
1091 * *
1092 postInlineUnconditionally
1093 * *
1094 ************************************************************************
1095
1096 postInlineUnconditionally
1097 ~~~~~~~~~~~~~~~~~~~~~~~~~
1098 @postInlineUnconditionally@ decides whether to unconditionally inline
1099 a thing based on the form of its RHS; in particular if it has a
1100 trivial RHS. If so, we can inline and discard the binding altogether.
1101
1102 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1103 only have *forward* references. Hence, it's safe to discard the binding
1104
1105 NOTE: This isn't our last opportunity to inline. We're at the binding
1106 site right now, and we'll get another opportunity when we get to the
1107 ocurrence(s)
1108
1109 Note that we do this unconditional inlining only for trival RHSs.
1110 Don't inline even WHNFs inside lambdas; doing so may simply increase
1111 allocation when the function is called. This isn't the last chance; see
1112 NOTE above.
1113
1114 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1115 Because we don't even want to inline them into the RHS of constructor
1116 arguments. See NOTE above
1117
1118 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1119 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1120 with both a and b marked NOINLINE. But that seems incompatible with
1121 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1122 story for now.
1123 -}
1124
1125 postInlineUnconditionally
1126 :: DynFlags -> SimplEnv -> TopLevelFlag
1127 -> OutId -- The binder (an InId would be fine too)
1128 -- (*not* a CoVar)
1129 -> OccInfo -- From the InId
1130 -> OutExpr
1131 -> Unfolding
1132 -> Bool
1133 -- Precondition: rhs satisfies the let/app invariant
1134 -- See Note [CoreSyn let/app invariant] in CoreSyn
1135 -- Reason: we don't want to inline single uses, or discard dead bindings,
1136 -- for unlifted, side-effect-full bindings
1137 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1138 | not active = False
1139 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1140 -- because it might be referred to "earlier"
1141 | isExportedId bndr = False
1142 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1143 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1144 | exprIsTrivial rhs = True
1145 | otherwise
1146 = case occ_info of
1147 -- The point of examining occ_info here is that for *non-values*
1148 -- that occur outside a lambda, the call-site inliner won't have
1149 -- a chance (because it doesn't know that the thing
1150 -- only occurs once). The pre-inliner won't have gotten
1151 -- it either, if the thing occurs in more than one branch
1152 -- So the main target is things like
1153 -- let x = f y in
1154 -- case v of
1155 -- True -> case x of ...
1156 -- False -> case x of ...
1157 -- This is very important in practice; e.g. wheel-seive1 doubles
1158 -- in allocation if you miss this out
1159 OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue
1160 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1161 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1162 --
1163 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1164 -- Reason: doing so risks exponential behaviour. We simplify a big
1165 -- expression, inline it, and simplify it again. But if the
1166 -- very same thing happens in the big expression, we get
1167 -- exponential cost!
1168 -- PRINCIPLE: when we've already simplified an expression once,
1169 -- make sure that we only inline it if it's reasonably small.
1170
1171 && (not in_lam ||
1172 -- Outside a lambda, we want to be reasonably aggressive
1173 -- about inlining into multiple branches of case
1174 -- e.g. let x = <non-value>
1175 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1176 -- Inlining can be a big win if C3 is the hot-spot, even if
1177 -- the uses in C1, C2 are not 'interesting'
1178 -- An example that gets worse if you add int_cxt here is 'clausify'
1179
1180 (isCheapUnfolding unfolding && int_cxt))
1181 -- isCheap => acceptable work duplication; in_lam may be true
1182 -- int_cxt to prevent us inlining inside a lambda without some
1183 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1184
1185 IAmDead -> True -- This happens; for example, the case_bndr during case of
1186 -- known constructor: case (a,b) of x { (p,q) -> ... }
1187 -- Here x isn't mentioned in the RHS, so we don't want to
1188 -- create the (dead) let-binding let x = (a,b) in ...
1189
1190 _ -> False
1191
1192 -- Here's an example that we don't handle well:
1193 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1194 -- in \y. ....case f of {...} ....
1195 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1196 -- But
1197 -- - We can't preInlineUnconditionally because that woud invalidate
1198 -- the occ info for b.
1199 -- - We can't postInlineUnconditionally because the RHS is big, and
1200 -- that risks exponential behaviour
1201 -- - We can't call-site inline, because the rhs is big
1202 -- Alas!
1203
1204 where
1205 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1206 -- See Note [pre/postInlineUnconditionally in gentle mode]
1207
1208 {-
1209 Note [Top level and postInlineUnconditionally]
1210 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1211 We don't do postInlineUnconditionally for top-level things (even for
1212 ones that are trivial):
1213
1214 * Doing so will inline top-level error expressions that have been
1215 carefully floated out by FloatOut. More generally, it might
1216 replace static allocation with dynamic.
1217
1218 * Even for trivial expressions there's a problem. Consider
1219 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1220 blah xs = reverse xs
1221 ruggle = sort
1222 In one simplifier pass we might fire the rule, getting
1223 blah xs = ruggle xs
1224 but in *that* simplifier pass we must not do postInlineUnconditionally
1225 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1226
1227 If the rhs is trivial it'll be inlined by callSiteInline, and then
1228 the binding will be dead and discarded by the next use of OccurAnal
1229
1230 * There is less point, because the main goal is to get rid of local
1231 bindings used in multiple case branches.
1232
1233 * The inliner should inline trivial things at call sites anyway.
1234
1235 Note [Stable unfoldings and postInlineUnconditionally]
1236 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1237 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1238 otherwise we lose the unfolding. Example
1239
1240 -- f has stable unfolding with rhs (e |> co)
1241 -- where 'e' is big
1242 f = e |> co
1243
1244 Then there's a danger we'll optimise to
1245
1246 f' = e
1247 f = f' |> co
1248
1249 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1250 won't inline because 'e' is too big.
1251
1252 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1253
1254
1255 ************************************************************************
1256 * *
1257 Rebuilding a lambda
1258 * *
1259 ************************************************************************
1260 -}
1261
1262 mkLam :: [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1263 -- mkLam tries three things
1264 -- a) eta reduction, if that gives a trivial expression
1265 -- b) eta expansion [only if there are some value lambdas]
1266
1267 mkLam [] body _cont
1268 = return body
1269 mkLam bndrs body cont
1270 = do { dflags <- getDynFlags
1271 ; mkLam' dflags bndrs body }
1272 where
1273 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1274 mkLam' dflags bndrs (Cast body co)
1275 | not (any bad bndrs)
1276 -- Note [Casts and lambdas]
1277 = do { lam <- mkLam' dflags bndrs body
1278 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1279 where
1280 co_vars = tyCoVarsOfCo co
1281 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1282
1283 mkLam' dflags bndrs body@(Lam {})
1284 = mkLam' dflags (bndrs ++ bndrs1) body1
1285 where
1286 (bndrs1, body1) = collectBinders body
1287
1288 mkLam' dflags bndrs (Tick t expr)
1289 | tickishFloatable t
1290 = mkTick t <$> mkLam' dflags bndrs expr
1291
1292 mkLam' dflags bndrs body
1293 | gopt Opt_DoEtaReduction dflags
1294 , Just etad_lam <- tryEtaReduce bndrs body
1295 = do { tick (EtaReduction (head bndrs))
1296 ; return etad_lam }
1297
1298 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1299 , gopt Opt_DoLambdaEtaExpansion dflags
1300 , any isRuntimeVar bndrs
1301 , let body_arity = exprEtaExpandArity dflags body
1302 , body_arity > 0
1303 = do { tick (EtaExpansion (head bndrs))
1304 ; return (mkLams bndrs (etaExpand body_arity body)) }
1305
1306 | otherwise
1307 = return (mkLams bndrs body)
1308
1309 {-
1310 Note [Eta expanding lambdas]
1311 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1312 In general we *do* want to eta-expand lambdas. Consider
1313 f (\x -> case x of (a,b) -> \s -> blah)
1314 where 's' is a state token, and hence can be eta expanded. This
1315 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1316 important function!
1317
1318 The eta-expansion will never happen unless we do it now. (Well, it's
1319 possible that CorePrep will do it, but CorePrep only has a half-baked
1320 eta-expander that can't deal with casts. So it's much better to do it
1321 here.)
1322
1323 However, when the lambda is let-bound, as the RHS of a let, we have a
1324 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1325 bother to try expansion in mkLam in that case; hence the contIsRhs
1326 guard.
1327
1328 Note [Casts and lambdas]
1329 ~~~~~~~~~~~~~~~~~~~~~~~~
1330 Consider
1331 (\x. (\y. e) `cast` g1) `cast` g2
1332 There is a danger here that the two lambdas look separated, and the
1333 full laziness pass might float an expression to between the two.
1334
1335 So this equation in mkLam' floats the g1 out, thus:
1336 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1337 where x:tx.
1338
1339 In general, this floats casts outside lambdas, where (I hope) they
1340 might meet and cancel with some other cast:
1341 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1342 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1343 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1344 (if not (g `in` co))
1345
1346 Notice that it works regardless of 'e'. Originally it worked only
1347 if 'e' was itself a lambda, but in some cases that resulted in
1348 fruitless iteration in the simplifier. A good example was when
1349 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1350 like (\x. Get `cast` g)
1351 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1352 the Get, and the next iteration eta-reduced it, and then eta-expanded
1353 it again.
1354
1355 Note also the side condition for the case of coercion binders.
1356 It does not make sense to transform
1357 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1358 because the latter is not well-kinded.
1359
1360 ************************************************************************
1361 * *
1362 Eta expansion
1363 * *
1364 ************************************************************************
1365 -}
1366
1367 tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
1368 -- See Note [Eta-expanding at let bindings]
1369 tryEtaExpandRhs env bndr rhs
1370 = do { dflags <- getDynFlags
1371 ; (new_arity, new_rhs) <- try_expand dflags
1372
1373 ; WARN( new_arity < old_id_arity,
1374 (ptext (sLit "Arity decrease:") <+> (ppr bndr <+> ppr old_id_arity
1375 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1376 -- Note [Arity decrease] in Simplify
1377 return (new_arity, new_rhs) }
1378 where
1379 try_expand dflags
1380 | exprIsTrivial rhs
1381 = return (exprArity rhs, rhs)
1382
1383 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1384 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1385 new_arity2 = idCallArity bndr
1386 new_arity = max new_arity1 new_arity2
1387 , new_arity > old_arity -- And the current manifest arity isn't enough
1388 = do { tick (EtaExpansion bndr)
1389 ; return (new_arity, etaExpand new_arity rhs) }
1390 | otherwise
1391 = return (old_arity, rhs)
1392
1393 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1394 old_id_arity = idArity bndr
1395
1396 {-
1397 Note [Eta-expanding at let bindings]
1398 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1399 We now eta expand at let-bindings, which is where the payoff comes.
1400 The most significant thing is that we can do a simple arity analysis
1401 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1402
1403 One useful consequence of not eta-expanding lambdas is this example:
1404 genMap :: C a => ...
1405 {-# INLINE genMap #-}
1406 genMap f xs = ...
1407
1408 myMap :: D a => ...
1409 {-# INLINE myMap #-}
1410 myMap = genMap
1411
1412 Notice that 'genMap' should only inline if applied to two arguments.
1413 In the stable unfolding for myMap we'll have the unfolding
1414 (\d -> genMap Int (..d..))
1415 We do not want to eta-expand to
1416 (\d f xs -> genMap Int (..d..) f xs)
1417 because then 'genMap' will inline, and it really shouldn't: at least
1418 as far as the programmer is concerned, it's not applied to two
1419 arguments!
1420
1421 Note [Do not eta-expand PAPs]
1422 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1423 We used to have old_arity = manifestArity rhs, which meant that we
1424 would eta-expand even PAPs. But this gives no particular advantage,
1425 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1426 Suppose we have a PAP
1427 foo :: IO ()
1428 foo = returnIO ()
1429 Then we can eta-expand do
1430 foo = (\eta. (returnIO () |> sym g) eta) |> g
1431 where
1432 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1433
1434 But there is really no point in doing this, and it generates masses of
1435 coercions and whatnot that eventually disappear again. For T9020, GHC
1436 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1437 1.8G to 45M.
1438
1439 But note that this won't eta-expand, say
1440 f = \g -> map g
1441 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1442 strictness analysis will have less to bite on?
1443
1444
1445 ************************************************************************
1446 * *
1447 \subsection{Floating lets out of big lambdas}
1448 * *
1449 ************************************************************************
1450
1451 Note [Floating and type abstraction]
1452 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1453 Consider this:
1454 x = /\a. C e1 e2
1455 We'd like to float this to
1456 y1 = /\a. e1
1457 y2 = /\a. e2
1458 x = /\a. C (y1 a) (y2 a)
1459 for the usual reasons: we want to inline x rather vigorously.
1460
1461 You may think that this kind of thing is rare. But in some programs it is
1462 common. For example, if you do closure conversion you might get:
1463
1464 data a :-> b = forall e. (e -> a -> b) :$ e
1465
1466 f_cc :: forall a. a :-> a
1467 f_cc = /\a. (\e. id a) :$ ()
1468
1469 Now we really want to inline that f_cc thing so that the
1470 construction of the closure goes away.
1471
1472 So I have elaborated simplLazyBind to understand right-hand sides that look
1473 like
1474 /\ a1..an. body
1475
1476 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1477 but there is quite a bit of plumbing in simplLazyBind as well.
1478
1479 The same transformation is good when there are lets in the body:
1480
1481 /\abc -> let(rec) x = e in b
1482 ==>
1483 let(rec) x' = /\abc -> let x = x' a b c in e
1484 in
1485 /\abc -> let x = x' a b c in b
1486
1487 This is good because it can turn things like:
1488
1489 let f = /\a -> letrec g = ... g ... in g
1490 into
1491 letrec g' = /\a -> ... g' a ...
1492 in
1493 let f = /\ a -> g' a
1494
1495 which is better. In effect, it means that big lambdas don't impede
1496 let-floating.
1497
1498 This optimisation is CRUCIAL in eliminating the junk introduced by
1499 desugaring mutually recursive definitions. Don't eliminate it lightly!
1500
1501 [May 1999] If we do this transformation *regardless* then we can
1502 end up with some pretty silly stuff. For example,
1503
1504 let
1505 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1506 in ..
1507 becomes
1508 let y1 = /\s -> r1
1509 y2 = /\s -> r2
1510 st = /\s -> ...[y1 s/x1, y2 s/x2]
1511 in ..
1512
1513 Unless the "..." is a WHNF there is really no point in doing this.
1514 Indeed it can make things worse. Suppose x1 is used strictly,
1515 and is of the form
1516
1517 x1* = case f y of { (a,b) -> e }
1518
1519 If we abstract this wrt the tyvar we then can't do the case inline
1520 as we would normally do.
1521
1522 That's why the whole transformation is part of the same process that
1523 floats let-bindings and constructor arguments out of RHSs. In particular,
1524 it is guarded by the doFloatFromRhs call in simplLazyBind.
1525
1526 Note [Which type variables to abstract over]
1527 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1528 Abstract only over the type variables free in the rhs wrt which the
1529 new binding is abstracted. Note that
1530
1531 * The naive approach of abstracting wrt the
1532 tyvars free in the Id's /type/ fails. Consider:
1533 /\ a b -> let t :: (a,b) = (e1, e2)
1534 x :: a = fst t
1535 in ...
1536 Here, b isn't free in x's type, but we must nevertheless
1537 abstract wrt b as well, because t's type mentions b.
1538 Since t is floated too, we'd end up with the bogus:
1539 poly_t = /\ a b -> (e1, e2)
1540 poly_x = /\ a -> fst (poly_t a *b*)
1541
1542 * We must do closeOverKinds. Example (Trac #10934):
1543 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1544 Here we want to float 't', but we must remember to abstract over
1545 'k' as well, even though it is not explicitly mentioned in the RHS,
1546 otherwise we get
1547 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1548 which is obviously bogus.
1549 -}
1550
1551 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1552 abstractFloats main_tvs body_env body
1553 = ASSERT( notNull body_floats )
1554 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1555 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1556 where
1557 main_tv_set = mkVarSet main_tvs
1558 body_floats = getFloatBinds body_env
1559 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1560
1561 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1562 abstract subst (NonRec id rhs)
1563 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1564 ; let poly_rhs = mkLams tvs_here rhs'
1565 subst' = CoreSubst.extendIdSubst subst id poly_app
1566 ; return (subst', (NonRec poly_id poly_rhs)) }
1567 where
1568 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1569
1570 -- tvs_here: see Note [Which type variables to abstract over]
1571 tvs_here = varSetElemsKvsFirst $
1572 intersectVarSet main_tv_set $
1573 closeOverKinds $
1574 exprSomeFreeVars isTyVar rhs'
1575
1576 abstract subst (Rec prs)
1577 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1578 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1579 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1580 | rhs <- rhss]
1581 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1582 where
1583 (ids,rhss) = unzip prs
1584 -- For a recursive group, it's a bit of a pain to work out the minimal
1585 -- set of tyvars over which to abstract:
1586 -- /\ a b c. let x = ...a... in
1587 -- letrec { p = ...x...q...
1588 -- q = .....p...b... } in
1589 -- ...
1590 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1591 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1592 -- Since it's a pain, we just use the whole set, which is always safe
1593 --
1594 -- If you ever want to be more selective, remember this bizarre case too:
1595 -- x::a = x
1596 -- Here, we must abstract 'x' over 'a'.
1597 tvs_here = sortQuantVars main_tvs
1598
1599 mk_poly tvs_here var
1600 = do { uniq <- getUniqueM
1601 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1602 poly_ty = mkForAllTys tvs_here (idType var) -- But new type of course
1603 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1604 mkLocalId poly_name poly_ty
1605 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1606 -- In the olden days, it was crucial to copy the occInfo of the original var,
1607 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1608 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1609 -- at already simplified code, so it doesn't matter
1610 --
1611 -- It's even right to retain single-occurrence or dead-var info:
1612 -- Suppose we started with /\a -> let x = E in B
1613 -- where x occurs once in B. Then we transform to:
1614 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1615 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1616 -- the occurrences of x' will be just the occurrences originally
1617 -- pinned on x.
1618
1619 {-
1620 Note [Abstract over coercions]
1621 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1622 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1623 type variable a. Rather than sort this mess out, we simply bale out and abstract
1624 wrt all the type variables if any of them are coercion variables.
1625
1626
1627 Historical note: if you use let-bindings instead of a substitution, beware of this:
1628
1629 -- Suppose we start with:
1630 --
1631 -- x = /\ a -> let g = G in E
1632 --
1633 -- Then we'll float to get
1634 --
1635 -- x = let poly_g = /\ a -> G
1636 -- in /\ a -> let g = poly_g a in E
1637 --
1638 -- But now the occurrence analyser will see just one occurrence
1639 -- of poly_g, not inside a lambda, so the simplifier will
1640 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1641 -- (I used to think that the "don't inline lone occurrences" stuff
1642 -- would stop this happening, but since it's the *only* occurrence,
1643 -- PreInlineUnconditionally kicks in first!)
1644 --
1645 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1646 -- to appear many times. (NB: mkInlineMe eliminates
1647 -- such notes on trivial RHSs, so do it manually.)
1648
1649 ************************************************************************
1650 * *
1651 prepareAlts
1652 * *
1653 ************************************************************************
1654
1655 prepareAlts tries these things:
1656
1657 1. Eliminate alternatives that cannot match, including the
1658 DEFAULT alternative.
1659
1660 2. If the DEFAULT alternative can match only one possible constructor,
1661 then make that constructor explicit.
1662 e.g.
1663 case e of x { DEFAULT -> rhs }
1664 ===>
1665 case e of x { (a,b) -> rhs }
1666 where the type is a single constructor type. This gives better code
1667 when rhs also scrutinises x or e.
1668
1669 3. Returns a list of the constructors that cannot holds in the
1670 DEFAULT alternative (if there is one)
1671
1672 Here "cannot match" includes knowledge from GADTs
1673
1674 It's a good idea to do this stuff before simplifying the alternatives, to
1675 avoid simplifying alternatives we know can't happen, and to come up with
1676 the list of constructors that are handled, to put into the IdInfo of the
1677 case binder, for use when simplifying the alternatives.
1678
1679 Eliminating the default alternative in (1) isn't so obvious, but it can
1680 happen:
1681
1682 data Colour = Red | Green | Blue
1683
1684 f x = case x of
1685 Red -> ..
1686 Green -> ..
1687 DEFAULT -> h x
1688
1689 h y = case y of
1690 Blue -> ..
1691 DEFAULT -> [ case y of ... ]
1692
1693 If we inline h into f, the default case of the inlined h can't happen.
1694 If we don't notice this, we may end up filtering out *all* the cases
1695 of the inner case y, which give us nowhere to go!
1696 -}
1697
1698 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1699 -- The returned alternatives can be empty, none are possible
1700 prepareAlts scrut case_bndr' alts
1701 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1702 -- Case binder is needed just for its type. Note that as an
1703 -- OutId, it has maximum information; this is important.
1704 -- Test simpl013 is an example
1705 = do { us <- getUniquesM
1706 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1707 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1708 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1709 -- "idcs" stands for "impossible default data constructors"
1710 -- i.e. the constructors that can't match the default case
1711 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1712 ; when yes3 $ tick (AltMerge case_bndr')
1713 ; return (idcs3, alts3) }
1714
1715 | otherwise -- Not a data type, so nothing interesting happens
1716 = return ([], alts)
1717 where
1718 imposs_cons = case scrut of
1719 Var v -> otherCons (idUnfolding v)
1720 _ -> []
1721
1722
1723 {-
1724 ************************************************************************
1725 * *
1726 mkCase
1727 * *
1728 ************************************************************************
1729
1730 mkCase tries these things
1731
1732 1. Merge Nested Cases
1733
1734 case e of b { ==> case e of b {
1735 p1 -> rhs1 p1 -> rhs1
1736 ... ...
1737 pm -> rhsm pm -> rhsm
1738 _ -> case b of b' { pn -> let b'=b in rhsn
1739 pn -> rhsn ...
1740 ... po -> let b'=b in rhso
1741 po -> rhso _ -> let b'=b in rhsd
1742 _ -> rhsd
1743 }
1744
1745 which merges two cases in one case when -- the default alternative of
1746 the outer case scrutises the same variable as the outer case. This
1747 transformation is called Case Merging. It avoids that the same
1748 variable is scrutinised multiple times.
1749
1750 2. Eliminate Identity Case
1751
1752 case e of ===> e
1753 True -> True;
1754 False -> False
1755
1756 and similar friends.
1757 -}
1758
1759 mkCase, mkCase1, mkCase2
1760 :: DynFlags
1761 -> OutExpr -> OutId
1762 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1763 -> SimplM OutExpr
1764
1765 --------------------------------------------------
1766 -- 1. Merge Nested Cases
1767 --------------------------------------------------
1768
1769 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1770 | gopt Opt_CaseMerge dflags
1771 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1772 <- stripTicksTop tickishFloatable deflt_rhs
1773 , inner_scrut_var == outer_bndr
1774 = do { tick (CaseMerge outer_bndr)
1775
1776 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1777 (con, args, wrap_rhs rhs)
1778 -- Simplifier's no-shadowing invariant should ensure
1779 -- that outer_bndr is not shadowed by the inner patterns
1780 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1781 -- The let is OK even for unboxed binders,
1782
1783 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1784 | otherwise = map wrap_alt inner_alts
1785
1786 merged_alts = mergeAlts outer_alts wrapped_alts
1787 -- NB: mergeAlts gives priority to the left
1788 -- case x of
1789 -- A -> e1
1790 -- DEFAULT -> case x of
1791 -- A -> e2
1792 -- B -> e3
1793 -- When we merge, we must ensure that e1 takes
1794 -- precedence over e2 as the value for A!
1795
1796 ; fmap (mkTicks ticks) $
1797 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1798 }
1799 -- Warning: don't call mkCase recursively!
1800 -- Firstly, there's no point, because inner alts have already had
1801 -- mkCase applied to them, so they won't have a case in their default
1802 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1803 -- in munge_rhs may put a case into the DEFAULT branch!
1804
1805 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1806
1807 --------------------------------------------------
1808 -- 2. Eliminate Identity Case
1809 --------------------------------------------------
1810
1811 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1812 | all identity_alt alts
1813 = do { tick (CaseIdentity case_bndr)
1814 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1815 where
1816 ticks = concatMap (stripTicksT tickishFloatable . thirdOf3) (tail alts)
1817 identity_alt (con, args, rhs) = check_eq rhs con args
1818
1819 check_eq (Cast rhs co) con args
1820 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1821 -- See Note [RHS casts]
1822 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1823 check_eq (Var v) _ _ | v == case_bndr = True
1824 check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con
1825 -- Optimisation only
1826 check_eq (Tick t e) alt args = tickishFloatable t &&
1827 check_eq e alt args
1828 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1829 mkConApp con (arg_tys ++
1830 varsToCoreExprs args)
1831 check_eq _ _ _ = False
1832
1833 arg_tys = map Type (tyConAppArgs (idType case_bndr))
1834
1835 -- Note [RHS casts]
1836 -- ~~~~~~~~~~~~~~~~
1837 -- We've seen this:
1838 -- case e of x { _ -> x `cast` c }
1839 -- And we definitely want to eliminate this case, to give
1840 -- e `cast` c
1841 -- So we throw away the cast from the RHS, and reconstruct
1842 -- it at the other end. All the RHS casts must be the same
1843 -- if (all identity_alt alts) holds.
1844 --
1845 -- Don't worry about nested casts, because the simplifier combines them
1846
1847 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1848 re_cast scrut _ = scrut
1849
1850 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1851
1852 --------------------------------------------------
1853 -- Catch-all
1854 --------------------------------------------------
1855 mkCase2 _dflags scrut bndr alts_ty alts
1856 = return (Case scrut bndr alts_ty alts)
1857
1858 {-
1859 Note [Dead binders]
1860 ~~~~~~~~~~~~~~~~~~~~
1861 Note that dead-ness is maintained by the simplifier, so that it is
1862 accurate after simplification as well as before.
1863
1864
1865 Note [Cascading case merge]
1866 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1867 Case merging should cascade in one sweep, because it
1868 happens bottom-up
1869
1870 case e of a {
1871 DEFAULT -> case a of b
1872 DEFAULT -> case b of c {
1873 DEFAULT -> e
1874 A -> ea
1875 B -> eb
1876 C -> ec
1877 ==>
1878 case e of a {
1879 DEFAULT -> case a of b
1880 DEFAULT -> let c = b in e
1881 A -> let c = b in ea
1882 B -> eb
1883 C -> ec
1884 ==>
1885 case e of a {
1886 DEFAULT -> let b = a in let c = b in e
1887 A -> let b = a in let c = b in ea
1888 B -> let b = a in eb
1889 C -> ec
1890
1891
1892 However here's a tricky case that we still don't catch, and I don't
1893 see how to catch it in one pass:
1894
1895 case x of c1 { I# a1 ->
1896 case a1 of c2 ->
1897 0 -> ...
1898 DEFAULT -> case x of c3 { I# a2 ->
1899 case a2 of ...
1900
1901 After occurrence analysis (and its binder-swap) we get this
1902
1903 case x of c1 { I# a1 ->
1904 let x = c1 in -- Binder-swap addition
1905 case a1 of c2 ->
1906 0 -> ...
1907 DEFAULT -> case x of c3 { I# a2 ->
1908 case a2 of ...
1909
1910 When we simplify the inner case x, we'll see that
1911 x=c1=I# a1. So we'll bind a2 to a1, and get
1912
1913 case x of c1 { I# a1 ->
1914 case a1 of c2 ->
1915 0 -> ...
1916 DEFAULT -> case a1 of ...
1917
1918 This is corect, but we can't do a case merge in this sweep
1919 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
1920 without getting changed to c1=I# c2.
1921
1922 I don't think this is worth fixing, even if I knew how. It'll
1923 all come out in the next pass anyway.
1924 -}