Filter out BuiltinRules in occurrence analysis
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countValArgs, countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import DynFlags
41 import CoreSyn
42 import qualified CoreSubst
43 import PprCore
44 import CoreFVs
45 import CoreUtils
46 import CoreArity
47 import CoreUnfold
48 import Name
49 import Id
50 import Var
51 import Demand
52 import SimplMonad
53 import Type hiding( substTy )
54 import Coercion hiding( substCo )
55 import DataCon ( dataConWorkId )
56 import VarEnv
57 import VarSet
58 import BasicTypes
59 import Util
60 import MonadUtils
61 import Outputable
62 import Pair
63
64 import Control.Monad ( when )
65
66 {-
67 ************************************************************************
68 * *
69 The SimplCont and DupFlag types
70 * *
71 ************************************************************************
72
73 A SimplCont allows the simplifier to traverse the expression in a
74 zipper-like fashion. The SimplCont represents the rest of the expression,
75 "above" the point of interest.
76
77 You can also think of a SimplCont as an "evaluation context", using
78 that term in the way it is used for operational semantics. This is the
79 way I usually think of it, For example you'll often see a syntax for
80 evaluation context looking like
81 C ::= [] | C e | case C of alts | C `cast` co
82 That's the kind of thing we are doing here, and I use that syntax in
83 the comments.
84
85
86 Key points:
87 * A SimplCont describes a *strict* context (just like
88 evaluation contexts do). E.g. Just [] is not a SimplCont
89
90 * A SimplCont describes a context that *does not* bind
91 any variables. E.g. \x. [] is not a SimplCont
92 -}
93
94 data SimplCont
95 = Stop -- An empty context, or <hole>
96 OutType -- Type of the <hole>
97 CallCtxt -- Tells if there is something interesting about
98 -- the context, and hence the inliner
99 -- should be a bit keener (see interestingCallContext)
100 -- Specifically:
101 -- This is an argument of a function that has RULES
102 -- Inlining the call might allow the rule to fire
103 -- Never ValAppCxt (use ApplyToVal instead)
104 -- or CaseCtxt (use Select instead)
105
106 | CastIt -- <hole> `cast` co
107 OutCoercion -- The coercion simplified
108 -- Invariant: never an identity coercion
109 SimplCont
110
111 | ApplyToVal { -- <hole> arg
112 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
113 sc_arg :: InExpr, -- The argument,
114 sc_env :: StaticEnv, -- and its static env
115 sc_cont :: SimplCont }
116
117 | ApplyToTy { -- <hole> ty
118 sc_arg_ty :: OutType, -- Argument type
119 sc_hole_ty :: OutType, -- Type of the function, presumably (forall a. blah)
120 -- See Note [The hole type in ApplyToTy]
121 sc_cont :: SimplCont }
122
123 | Select { -- case <hole> of alts
124 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
125 sc_bndr :: InId, -- case binder
126 sc_alts :: [InAlt], -- Alternatives
127 sc_env :: StaticEnv, -- and their static environment
128 sc_cont :: SimplCont }
129
130 -- The two strict forms have no DupFlag, because we never duplicate them
131 | StrictBind -- (\x* \xs. e) <hole>
132 InId [InBndr] -- let x* = <hole> in e
133 InExpr StaticEnv -- is a special case
134 SimplCont
135
136 | StrictArg -- f e1 ..en <hole>
137 ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
138 -- plus strictness flags for *further* args
139 CallCtxt -- Whether *this* argument position is interesting
140 SimplCont
141
142 | TickIt
143 (Tickish Id) -- Tick tickish <hole>
144 SimplCont
145
146 data DupFlag = NoDup -- Unsimplified, might be big
147 | Simplified -- Simplified
148 | OkToDup -- Simplified and small
149
150 isSimplified :: DupFlag -> Bool
151 isSimplified NoDup = False
152 isSimplified _ = True -- Invariant: the subst-env is empty
153
154 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
155 perhapsSubstTy dup env ty
156 | isSimplified dup = ty
157 | otherwise = substTy env ty
158
159 {-
160 Note [DupFlag invariants]
161 ~~~~~~~~~~~~~~~~~~~~~~~~~
162 In both (ApplyToVal dup _ env k)
163 and (Select dup _ _ env k)
164 the following invariants hold
165
166 (a) if dup = OkToDup, then continuation k is also ok-to-dup
167 (b) if dup = OkToDup or Simplified, the subst-env is empty
168 (and and hence no need to re-simplify)
169 -}
170
171 instance Outputable DupFlag where
172 ppr OkToDup = text "ok"
173 ppr NoDup = text "nodup"
174 ppr Simplified = text "simpl"
175
176 instance Outputable SimplCont where
177 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
178 ppr (CastIt co cont ) = (text "CastIt" <+> ppr co) $$ ppr cont
179 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
180 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
181 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
182 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
183 = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg)
184 $$ ppr cont
185 ppr (StrictBind b _ _ _ cont) = (text "StrictBind" <+> ppr b) $$ ppr cont
186 ppr (StrictArg ai _ cont) = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
187 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
188 = (text "Select" <+> ppr dup <+> ppr bndr) $$
189 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
190
191
192 {- Note [The hole type in ApplyToTy]
193 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
194 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
195 continuation. It is absolutely necessary to compute contHoleType, but it is
196 not used for anything else (and hence may not be evaluated).
197
198 Why is it necessary for contHoleType? Consider the continuation
199 ApplyToType Int (Stop Int)
200 corresponding to
201 (<hole> @Int) :: Int
202 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
203 and there is no way to know which, so we must record it.
204
205 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
206 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
207 doesn't matter because we'll never compute them all.
208
209 ************************************************************************
210 * *
211 ArgInfo and ArgSpec
212 * *
213 ************************************************************************
214 -}
215
216 data ArgInfo
217 = ArgInfo {
218 ai_fun :: OutId, -- The function
219 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
220 ai_type :: OutType, -- Type of (f a1 ... an)
221
222 ai_rules :: [CoreRule], -- Rules for this function
223
224 ai_encl :: Bool, -- Flag saying whether this function
225 -- or an enclosing one has rules (recursively)
226 -- True => be keener to inline in all args
227
228 ai_strs :: [Bool], -- Strictness of remaining arguments
229 -- Usually infinite, but if it is finite it guarantees
230 -- that the function diverges after being given
231 -- that number of args
232 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
233 -- Always infinite
234 }
235
236 data ArgSpec
237 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
238 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
239 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
240 | CastBy OutCoercion -- Cast by this; c.f. CastIt
241
242 instance Outputable ArgSpec where
243 ppr (ValArg e) = text "ValArg" <+> ppr e
244 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
245 ppr (CastBy c) = text "CastBy" <+> ppr c
246
247 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
248 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
249 , ai_type = applyTypeToArg (ai_type ai) arg }
250
251 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
252 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
253 , ai_type = piResultTy poly_fun_ty arg_ty }
254 where
255 poly_fun_ty = ai_type ai
256 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
257
258 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
259 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
260 , ai_type = pSnd (coercionKind co) }
261
262 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
263 argInfoAppArgs [] = []
264 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
265 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
266 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
267
268 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
269 pushSimplifiedArgs _env [] k = k
270 pushSimplifiedArgs env (arg : args) k
271 = case arg of
272 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
273 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
274 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
275 CastBy c -> CastIt c rest
276 where
277 rest = pushSimplifiedArgs env args k
278 -- The env has an empty SubstEnv
279
280 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
281 -- NB: the [ArgSpec] is reversed so that the first arg
282 -- in the list is the last one in the application
283 argInfoExpr fun rev_args
284 = go rev_args
285 where
286 go [] = Var fun
287 go (ValArg a : as) = go as `App` a
288 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
289 go (CastBy co : as) = mkCast (go as) co
290
291
292 {-
293 ************************************************************************
294 * *
295 Functions on SimplCont
296 * *
297 ************************************************************************
298 -}
299
300 mkBoringStop :: OutType -> SimplCont
301 mkBoringStop ty = Stop ty BoringCtxt
302
303 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
304 mkRhsStop ty = Stop ty RhsCtxt
305
306 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
307 mkLazyArgStop ty cci = Stop ty cci
308
309 -------------------
310 contIsRhsOrArg :: SimplCont -> Bool
311 contIsRhsOrArg (Stop {}) = True
312 contIsRhsOrArg (StrictBind {}) = True
313 contIsRhsOrArg (StrictArg {}) = True
314 contIsRhsOrArg _ = False
315
316 contIsRhs :: SimplCont -> Bool
317 contIsRhs (Stop _ RhsCtxt) = True
318 contIsRhs _ = False
319
320 -------------------
321 contIsDupable :: SimplCont -> Bool
322 contIsDupable (Stop {}) = True
323 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
324 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
325 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
326 contIsDupable (CastIt _ k) = contIsDupable k
327 contIsDupable _ = False
328
329 -------------------
330 contIsTrivial :: SimplCont -> Bool
331 contIsTrivial (Stop {}) = True
332 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
333 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
334 contIsTrivial (CastIt _ k) = contIsTrivial k
335 contIsTrivial _ = False
336
337 -------------------
338 contResultType :: SimplCont -> OutType
339 contResultType (Stop ty _) = ty
340 contResultType (CastIt _ k) = contResultType k
341 contResultType (StrictBind _ _ _ _ k) = contResultType k
342 contResultType (StrictArg _ _ k) = contResultType k
343 contResultType (Select { sc_cont = k }) = contResultType k
344 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
345 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
346 contResultType (TickIt _ k) = contResultType k
347
348 contHoleType :: SimplCont -> OutType
349 contHoleType (Stop ty _) = ty
350 contHoleType (TickIt _ k) = contHoleType k
351 contHoleType (CastIt co _) = pFst (coercionKind co)
352 contHoleType (StrictBind b _ _ se _) = substTy se (idType b)
353 contHoleType (StrictArg ai _ _) = funArgTy (ai_type ai)
354 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
355 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
356 = mkFunTy (perhapsSubstTy dup se (exprType e))
357 (contHoleType k)
358 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
359 = perhapsSubstTy d se (idType b)
360
361 -------------------
362 countValArgs :: SimplCont -> Int
363 -- Count value arguments excluding coercions
364 countValArgs (ApplyToVal { sc_arg = arg, sc_cont = cont })
365 | Coercion {} <- arg = countValArgs cont
366 | otherwise = 1 + countValArgs cont
367 countValArgs _ = 0
368
369 countArgs :: SimplCont -> Int
370 -- Count all arguments, including types, coercions, and other values
371 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
372 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
373 countArgs _ = 0
374
375 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
376 -- Summarises value args, discards type args and coercions
377 -- The returned continuation of the call is only used to
378 -- answer questions like "are you interesting?"
379 contArgs cont
380 | lone cont = (True, [], cont)
381 | otherwise = go [] cont
382 where
383 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
384 lone (ApplyToVal {}) = False
385 lone (CastIt {}) = False
386 lone _ = True
387
388 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
389 = go (is_interesting arg se : args) k
390 go args (ApplyToTy { sc_cont = k }) = go args k
391 go args (CastIt _ k) = go args k
392 go args k = (False, reverse args, k)
393
394 is_interesting arg se = interestingArg se arg
395 -- Do *not* use short-cutting substitution here
396 -- because we want to get as much IdInfo as possible
397
398
399 -------------------
400 mkArgInfo :: Id
401 -> [CoreRule] -- Rules for function
402 -> Int -- Number of value args
403 -> SimplCont -- Context of the call
404 -> ArgInfo
405
406 mkArgInfo fun rules n_val_args call_cont
407 | n_val_args < idArity fun -- Note [Unsaturated functions]
408 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
409 , ai_rules = rules, ai_encl = False
410 , ai_strs = vanilla_stricts
411 , ai_discs = vanilla_discounts }
412 | otherwise
413 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
414 , ai_rules = rules
415 , ai_encl = interestingArgContext rules call_cont
416 , ai_strs = add_type_str fun_ty arg_stricts
417 , ai_discs = arg_discounts }
418 where
419 fun_ty = idType fun
420
421 vanilla_discounts, arg_discounts :: [Int]
422 vanilla_discounts = repeat 0
423 arg_discounts = case idUnfolding fun of
424 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
425 -> discounts ++ vanilla_discounts
426 _ -> vanilla_discounts
427
428 vanilla_stricts, arg_stricts :: [Bool]
429 vanilla_stricts = repeat False
430
431 arg_stricts
432 = case splitStrictSig (idStrictness fun) of
433 (demands, result_info)
434 | not (demands `lengthExceeds` n_val_args)
435 -> -- Enough args, use the strictness given.
436 -- For bottoming functions we used to pretend that the arg
437 -- is lazy, so that we don't treat the arg as an
438 -- interesting context. This avoids substituting
439 -- top-level bindings for (say) strings into
440 -- calls to error. But now we are more careful about
441 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
442 if isBotRes result_info then
443 map isStrictDmd demands -- Finite => result is bottom
444 else
445 map isStrictDmd demands ++ vanilla_stricts
446 | otherwise
447 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
448 <+> ppr n_val_args <+> ppr demands )
449 vanilla_stricts -- Not enough args, or no strictness
450
451 add_type_str :: Type -> [Bool] -> [Bool]
452 -- If the function arg types are strict, record that in the 'strictness bits'
453 -- No need to instantiate because unboxed types (which dominate the strict
454 -- types) can't instantiate type variables.
455 -- add_type_str is done repeatedly (for each call); might be better
456 -- once-for-all in the function
457 -- But beware primops/datacons with no strictness
458 add_type_str _ [] = []
459 add_type_str fun_ty strs -- Look through foralls
460 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
461 = add_type_str fun_ty' strs
462 add_type_str fun_ty (str:strs) -- Add strict-type info
463 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
464 = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
465 add_type_str _ strs
466 = strs
467
468 {- Note [Unsaturated functions]
469 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
470 Consider (test eyeball/inline4)
471 x = a:as
472 y = f x
473 where f has arity 2. Then we do not want to inline 'x', because
474 it'll just be floated out again. Even if f has lots of discounts
475 on its first argument -- it must be saturated for these to kick in
476 -}
477
478
479 {-
480 ************************************************************************
481 * *
482 Interesting arguments
483 * *
484 ************************************************************************
485
486 Note [Interesting call context]
487 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
488 We want to avoid inlining an expression where there can't possibly be
489 any gain, such as in an argument position. Hence, if the continuation
490 is interesting (eg. a case scrutinee, application etc.) then we
491 inline, otherwise we don't.
492
493 Previously some_benefit used to return True only if the variable was
494 applied to some value arguments. This didn't work:
495
496 let x = _coerce_ (T Int) Int (I# 3) in
497 case _coerce_ Int (T Int) x of
498 I# y -> ....
499
500 we want to inline x, but can't see that it's a constructor in a case
501 scrutinee position, and some_benefit is False.
502
503 Another example:
504
505 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
506
507 .... case dMonadST _@_ x0 of (a,b,c) -> ....
508
509 we'd really like to inline dMonadST here, but we *don't* want to
510 inline if the case expression is just
511
512 case x of y { DEFAULT -> ... }
513
514 since we can just eliminate this case instead (x is in WHNF). Similar
515 applies when x is bound to a lambda expression. Hence
516 contIsInteresting looks for case expressions with just a single
517 default case.
518 -}
519
520 interestingCallContext :: SimplCont -> CallCtxt
521 -- See Note [Interesting call context]
522 interestingCallContext cont
523 = interesting cont
524 where
525 interesting (Select {}) = CaseCtxt
526 interesting (ApplyToVal {}) = ValAppCtxt
527 -- Can happen if we have (f Int |> co) y
528 -- If f has an INLINE prag we need to give it some
529 -- motivation to inline. See Note [Cast then apply]
530 -- in CoreUnfold
531 interesting (StrictArg _ cci _) = cci
532 interesting (StrictBind {}) = BoringCtxt
533 interesting (Stop _ cci) = cci
534 interesting (TickIt _ k) = interesting k
535 interesting (ApplyToTy { sc_cont = k }) = interesting k
536 interesting (CastIt _ k) = interesting k
537 -- If this call is the arg of a strict function, the context
538 -- is a bit interesting. If we inline here, we may get useful
539 -- evaluation information to avoid repeated evals: e.g.
540 -- x + (y * z)
541 -- Here the contIsInteresting makes the '*' keener to inline,
542 -- which in turn exposes a constructor which makes the '+' inline.
543 -- Assuming that +,* aren't small enough to inline regardless.
544 --
545 -- It's also very important to inline in a strict context for things
546 -- like
547 -- foldr k z (f x)
548 -- Here, the context of (f x) is strict, and if f's unfolding is
549 -- a build it's *great* to inline it here. So we must ensure that
550 -- the context for (f x) is not totally uninteresting.
551
552 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
553 -- If the argument has form (f x y), where x,y are boring,
554 -- and f is marked INLINE, then we don't want to inline f.
555 -- But if the context of the argument is
556 -- g (f x y)
557 -- where g has rules, then we *do* want to inline f, in case it
558 -- exposes a rule that might fire. Similarly, if the context is
559 -- h (g (f x x))
560 -- where h has rules, then we do want to inline f; hence the
561 -- call_cont argument to interestingArgContext
562 --
563 -- The ai-rules flag makes this happen; if it's
564 -- set, the inliner gets just enough keener to inline f
565 -- regardless of how boring f's arguments are, if it's marked INLINE
566 --
567 -- The alternative would be to *always* inline an INLINE function,
568 -- regardless of how boring its context is; but that seems overkill
569 -- For example, it'd mean that wrapper functions were always inlined
570 --
571 -- The call_cont passed to interestingArgContext is the context of
572 -- the call itself, e.g. g <hole> in the example above
573 interestingArgContext rules call_cont
574 = notNull rules || enclosing_fn_has_rules
575 where
576 enclosing_fn_has_rules = go call_cont
577
578 go (Select {}) = False
579 go (ApplyToVal {}) = False -- Shouldn't really happen
580 go (ApplyToTy {}) = False -- Ditto
581 go (StrictArg _ cci _) = interesting cci
582 go (StrictBind {}) = False -- ??
583 go (CastIt _ c) = go c
584 go (Stop _ cci) = interesting cci
585 go (TickIt _ c) = go c
586
587 interesting RuleArgCtxt = True
588 interesting _ = False
589
590
591 {- Note [Interesting arguments]
592 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
593 An argument is interesting if it deserves a discount for unfoldings
594 with a discount in that argument position. The idea is to avoid
595 unfolding a function that is applied only to variables that have no
596 unfolding (i.e. they are probably lambda bound): f x y z There is
597 little point in inlining f here.
598
599 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
600 we must look through lets, eg (let x = e in C a b), because the let will
601 float, exposing the value, if we inline. That makes it different to
602 exprIsHNF.
603
604 Before 2009 we said it was interesting if the argument had *any* structure
605 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
606
607 But we don't regard (f x y) as interesting, unless f is unsaturated.
608 If it's saturated and f hasn't inlined, then it's probably not going
609 to now!
610
611 Note [Conlike is interesting]
612 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
613 Consider
614 f d = ...((*) d x y)...
615 ... f (df d')...
616 where df is con-like. Then we'd really like to inline 'f' so that the
617 rule for (*) (df d) can fire. To do this
618 a) we give a discount for being an argument of a class-op (eg (*) d)
619 b) we say that a con-like argument (eg (df d)) is interesting
620 -}
621
622 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
623 -- See Note [Interesting arguments]
624 interestingArg env e = go env 0 e
625 where
626 -- n is # value args to which the expression is applied
627 go env n (Var v)
628 | SimplEnv { seIdSubst = ids, seInScope = in_scope } <- env
629 = case lookupVarEnv ids v of
630 Nothing -> go_var n (refineFromInScope in_scope v)
631 Just (DoneId v') -> go_var n (refineFromInScope in_scope v')
632 Just (DoneEx e) -> go (zapSubstEnv env) n e
633 Just (ContEx tvs cvs ids e) -> go (setSubstEnv env tvs cvs ids) n e
634
635 go _ _ (Lit {}) = ValueArg
636 go _ _ (Type _) = TrivArg
637 go _ _ (Coercion _) = TrivArg
638 go env n (App fn (Type _)) = go env n fn
639 go env n (App fn _) = go env (n+1) fn
640 go env n (Tick _ a) = go env n a
641 go env n (Cast e _) = go env n e
642 go env n (Lam v e)
643 | isTyVar v = go env n e
644 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
645 | otherwise = ValueArg
646 go _ _ (Case {}) = NonTrivArg
647 go env n (Let b e) = case go env' n e of
648 ValueArg -> ValueArg
649 _ -> NonTrivArg
650 where
651 env' = env `addNewInScopeIds` bindersOf b
652
653 go_var n v
654 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
655 -- data constructors here
656 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
657 | n > 0 = NonTrivArg -- Saturated or unknown call
658 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
659 -- See Note [Conlike is interesting]
660 | otherwise = TrivArg -- n==0, no useful unfolding
661 where
662 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
663
664 {-
665 ************************************************************************
666 * *
667 SimplifierMode
668 * *
669 ************************************************************************
670
671 The SimplifierMode controls several switches; see its definition in
672 CoreMonad
673 sm_rules :: Bool -- Whether RULES are enabled
674 sm_inline :: Bool -- Whether inlining is enabled
675 sm_case_case :: Bool -- Whether case-of-case is enabled
676 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
677 -}
678
679 simplEnvForGHCi :: DynFlags -> SimplEnv
680 simplEnvForGHCi dflags
681 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
682 , sm_phase = InitialPhase
683 , sm_rules = rules_on
684 , sm_inline = False
685 , sm_eta_expand = eta_expand_on
686 , sm_case_case = True }
687 where
688 rules_on = gopt Opt_EnableRewriteRules dflags
689 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
690 -- Do not do any inlining, in case we expose some unboxed
691 -- tuple stuff that confuses the bytecode interpreter
692
693 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
694 -- See Note [Simplifying inside stable unfoldings]
695 updModeForStableUnfoldings inline_rule_act current_mode
696 = current_mode { sm_phase = phaseFromActivation inline_rule_act
697 , sm_inline = True
698 , sm_eta_expand = False }
699 -- For sm_rules, just inherit; sm_rules might be "off"
700 -- because of -fno-enable-rewrite-rules
701 where
702 phaseFromActivation (ActiveAfter _ n) = Phase n
703 phaseFromActivation _ = InitialPhase
704
705 updModeForRules :: SimplifierMode -> SimplifierMode
706 -- See Note [Simplifying rules]
707 updModeForRules current_mode
708 = current_mode { sm_phase = InitialPhase
709 , sm_inline = False
710 , sm_rules = False
711 , sm_eta_expand = False }
712
713 {- Note [Simplifying rules]
714 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
715 When simplifying a rule, refrain from any inlining or applying of other RULES.
716
717 Doing anything to the LHS is plain confusing, because it means that what the
718 rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
719 Moreover, inlining (or applying rules) on rule LHSs risks introducing
720 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
721
722 Doing this to either side confounds tools like HERMIT, which seek to reason
723 about and apply the RULES as originally written. See Trac #10829.
724
725 Note [Inlining in gentle mode]
726 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
727 Something is inlined if
728 (i) the sm_inline flag is on, AND
729 (ii) the thing has an INLINE pragma, AND
730 (iii) the thing is inlinable in the earliest phase.
731
732 Example of why (iii) is important:
733 {-# INLINE [~1] g #-}
734 g = ...
735
736 {-# INLINE f #-}
737 f x = g (g x)
738
739 If we were to inline g into f's inlining, then an importing module would
740 never be able to do
741 f e --> g (g e) ---> RULE fires
742 because the stable unfolding for f has had g inlined into it.
743
744 On the other hand, it is bad not to do ANY inlining into an
745 stable unfolding, because then recursive knots in instance declarations
746 don't get unravelled.
747
748 However, *sometimes* SimplGently must do no call-site inlining at all
749 (hence sm_inline = False). Before full laziness we must be careful
750 not to inline wrappers, because doing so inhibits floating
751 e.g. ...(case f x of ...)...
752 ==> ...(case (case x of I# x# -> fw x#) of ...)...
753 ==> ...(case x of I# x# -> case fw x# of ...)...
754 and now the redex (f x) isn't floatable any more.
755
756 The no-inlining thing is also important for Template Haskell. You might be
757 compiling in one-shot mode with -O2; but when TH compiles a splice before
758 running it, we don't want to use -O2. Indeed, we don't want to inline
759 anything, because the byte-code interpreter might get confused about
760 unboxed tuples and suchlike.
761
762 Note [Simplifying inside stable unfoldings]
763 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
764 We must take care with simplification inside stable unfoldings (which come from
765 INLINE pragmas).
766
767 First, consider the following example
768 let f = \pq -> BIG
769 in
770 let g = \y -> f y y
771 {-# INLINE g #-}
772 in ...g...g...g...g...g...
773 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
774 and thence copied multiple times when g is inlined. HENCE we treat
775 any occurrence in a stable unfolding as a multiple occurrence, not a single
776 one; see OccurAnal.addRuleUsage.
777
778 Second, we do want *do* to some modest rules/inlining stuff in stable
779 unfoldings, partly to eliminate senseless crap, and partly to break
780 the recursive knots generated by instance declarations.
781
782 However, suppose we have
783 {-# INLINE <act> f #-}
784 f = <rhs>
785 meaning "inline f in phases p where activation <act>(p) holds".
786 Then what inlinings/rules can we apply to the copy of <rhs> captured in
787 f's stable unfolding? Our model is that literally <rhs> is substituted for
788 f when it is inlined. So our conservative plan (implemented by
789 updModeForStableUnfoldings) is this:
790
791 -------------------------------------------------------------
792 When simplifying the RHS of an stable unfolding, set the phase
793 to the phase in which the stable unfolding first becomes active
794 -------------------------------------------------------------
795
796 That ensures that
797
798 a) Rules/inlinings that *cease* being active before p will
799 not apply to the stable unfolding, consistent with it being
800 inlined in its *original* form in phase p.
801
802 b) Rules/inlinings that only become active *after* p will
803 not apply to the stable unfolding, again to be consistent with
804 inlining the *original* rhs in phase p.
805
806 For example,
807 {-# INLINE f #-}
808 f x = ...g...
809
810 {-# NOINLINE [1] g #-}
811 g y = ...
812
813 {-# RULE h g = ... #-}
814 Here we must not inline g into f's RHS, even when we get to phase 0,
815 because when f is later inlined into some other module we want the
816 rule for h to fire.
817
818 Similarly, consider
819 {-# INLINE f #-}
820 f x = ...g...
821
822 g y = ...
823 and suppose that there are auto-generated specialisations and a strictness
824 wrapper for g. The specialisations get activation AlwaysActive, and the
825 strictness wrapper get activation (ActiveAfter 0). So the strictness
826 wrepper fails the test and won't be inlined into f's stable unfolding. That
827 means f can inline, expose the specialised call to g, so the specialisation
828 rules can fire.
829
830 A note about wrappers
831 ~~~~~~~~~~~~~~~~~~~~~
832 It's also important not to inline a worker back into a wrapper.
833 A wrapper looks like
834 wraper = inline_me (\x -> ...worker... )
835 Normally, the inline_me prevents the worker getting inlined into
836 the wrapper (initially, the worker's only call site!). But,
837 if the wrapper is sure to be called, the strictness analyser will
838 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
839 continuation.
840 -}
841
842 activeUnfolding :: SimplEnv -> Id -> Bool
843 activeUnfolding env
844 | not (sm_inline mode) = active_unfolding_minimal
845 | otherwise = case sm_phase mode of
846 InitialPhase -> active_unfolding_gentle
847 Phase n -> active_unfolding n
848 where
849 mode = getMode env
850
851 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
852 -- When matching in RULE, we want to "look through" an unfolding
853 -- (to see a constructor) if *rules* are on, even if *inlinings*
854 -- are not. A notable example is DFuns, which really we want to
855 -- match in rules like (op dfun) in gentle mode. Another example
856 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
857 -- see very early on
858 getUnfoldingInRuleMatch env
859 = (in_scope, id_unf)
860 where
861 in_scope = seInScope env
862 mode = getMode env
863 id_unf id | unf_is_active id = idUnfolding id
864 | otherwise = NoUnfolding
865 unf_is_active id
866 | not (sm_rules mode) = active_unfolding_minimal id
867 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
868
869 active_unfolding_minimal :: Id -> Bool
870 -- Compuslory unfoldings only
871 -- Ignore SimplGently, because we want to inline regardless;
872 -- the Id has no top-level binding at all
873 --
874 -- NB: we used to have a second exception, for data con wrappers.
875 -- On the grounds that we use gentle mode for rule LHSs, and
876 -- they match better when data con wrappers are inlined.
877 -- But that only really applies to the trivial wrappers (like (:)),
878 -- and they are now constructed as Compulsory unfoldings (in MkId)
879 -- so they'll happen anyway.
880 active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
881
882 active_unfolding :: PhaseNum -> Id -> Bool
883 active_unfolding n id = isActiveIn n (idInlineActivation id)
884
885 active_unfolding_gentle :: Id -> Bool
886 -- Anything that is early-active
887 -- See Note [Gentle mode]
888 active_unfolding_gentle id
889 = isInlinePragma prag
890 && isEarlyActive (inlinePragmaActivation prag)
891 -- NB: wrappers are not early-active
892 where
893 prag = idInlinePragma id
894
895 ----------------------
896 activeRule :: SimplEnv -> Activation -> Bool
897 -- Nothing => No rules at all
898 activeRule env
899 | not (sm_rules mode) = \_ -> False -- Rewriting is off
900 | otherwise = isActive (sm_phase mode)
901 where
902 mode = getMode env
903
904 {-
905 ************************************************************************
906 * *
907 preInlineUnconditionally
908 * *
909 ************************************************************************
910
911 preInlineUnconditionally
912 ~~~~~~~~~~~~~~~~~~~~~~~~
913 @preInlineUnconditionally@ examines a bndr to see if it is used just
914 once in a completely safe way, so that it is safe to discard the
915 binding inline its RHS at the (unique) usage site, REGARDLESS of how
916 big the RHS might be. If this is the case we don't simplify the RHS
917 first, but just inline it un-simplified.
918
919 This is much better than first simplifying a perhaps-huge RHS and then
920 inlining and re-simplifying it. Indeed, it can be at least quadratically
921 better. Consider
922
923 x1 = e1
924 x2 = e2[x1]
925 x3 = e3[x2]
926 ...etc...
927 xN = eN[xN-1]
928
929 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
930 This can happen with cascades of functions too:
931
932 f1 = \x1.e1
933 f2 = \xs.e2[f1]
934 f3 = \xs.e3[f3]
935 ...etc...
936
937 THE MAIN INVARIANT is this:
938
939 ---- preInlineUnconditionally invariant -----
940 IF preInlineUnconditionally chooses to inline x = <rhs>
941 THEN doing the inlining should not change the occurrence
942 info for the free vars of <rhs>
943 ----------------------------------------------
944
945 For example, it's tempting to look at trivial binding like
946 x = y
947 and inline it unconditionally. But suppose x is used many times,
948 but this is the unique occurrence of y. Then inlining x would change
949 y's occurrence info, which breaks the invariant. It matters: y
950 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
951
952
953 Even RHSs labelled InlineMe aren't caught here, because there might be
954 no benefit from inlining at the call site.
955
956 [Sept 01] Don't unconditionally inline a top-level thing, because that
957 can simply make a static thing into something built dynamically. E.g.
958 x = (a,b)
959 main = \s -> h x
960
961 [Remember that we treat \s as a one-shot lambda.] No point in
962 inlining x unless there is something interesting about the call site.
963
964 But watch out: if you aren't careful, some useful foldr/build fusion
965 can be lost (most notably in spectral/hartel/parstof) because the
966 foldr didn't see the build. Doing the dynamic allocation isn't a big
967 deal, in fact, but losing the fusion can be. But the right thing here
968 seems to be to do a callSiteInline based on the fact that there is
969 something interesting about the call site (it's strict). Hmm. That
970 seems a bit fragile.
971
972 Conclusion: inline top level things gaily until Phase 0 (the last
973 phase), at which point don't.
974
975 Note [pre/postInlineUnconditionally in gentle mode]
976 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
977 Even in gentle mode we want to do preInlineUnconditionally. The
978 reason is that too little clean-up happens if you don't inline
979 use-once things. Also a bit of inlining is *good* for full laziness;
980 it can expose constant sub-expressions. Example in
981 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
982 let-float if you inline windowToViewport
983
984 However, as usual for Gentle mode, do not inline things that are
985 inactive in the intial stages. See Note [Gentle mode].
986
987 Note [Stable unfoldings and preInlineUnconditionally]
988 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
989 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
990 Example
991
992 {-# INLINE f #-}
993 f :: Eq a => a -> a
994 f x = ...
995
996 fInt :: Int -> Int
997 fInt = f Int dEqInt
998
999 ...fInt...fInt...fInt...
1000
1001 Here f occurs just once, in the RHS of fInt. But if we inline it there
1002 we'll lose the opportunity to inline at each of fInt's call sites.
1003 The INLINE pragma will only inline when the application is saturated
1004 for exactly this reason; and we don't want PreInlineUnconditionally
1005 to second-guess it. A live example is Trac #3736.
1006 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1007
1008 Note [Top-level bottoming Ids]
1009 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1010 Don't inline top-level Ids that are bottoming, even if they are used just
1011 once, because FloatOut has gone to some trouble to extract them out.
1012 Inlining them won't make the program run faster!
1013
1014 Note [Do not inline CoVars unconditionally]
1015 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1016 Coercion variables appear inside coercions, and the RHS of a let-binding
1017 is a term (not a coercion) so we can't necessarily inline the latter in
1018 the former.
1019 -}
1020
1021 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1022 -- Precondition: rhs satisfies the let/app invariant
1023 -- See Note [CoreSyn let/app invariant] in CoreSyn
1024 -- Reason: we don't want to inline single uses, or discard dead bindings,
1025 -- for unlifted, side-effect-full bindings
1026 preInlineUnconditionally dflags env top_lvl bndr rhs
1027 | not active = False
1028 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1029 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1030 | not (gopt Opt_SimplPreInlining dflags) = False
1031 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1032 | otherwise = case idOccInfo bndr of
1033 IAmDead -> True -- Happens in ((\x.1) v)
1034 OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
1035 _ -> False
1036 where
1037 mode = getMode env
1038 active = isActive (sm_phase mode) act
1039 -- See Note [pre/postInlineUnconditionally in gentle mode]
1040 act = idInlineActivation bndr
1041 try_once in_lam int_cxt -- There's one textual occurrence
1042 | not in_lam = isNotTopLevel top_lvl || early_phase
1043 | otherwise = int_cxt && canInlineInLam rhs
1044
1045 -- Be very careful before inlining inside a lambda, because (a) we must not
1046 -- invalidate occurrence information, and (b) we want to avoid pushing a
1047 -- single allocation (here) into multiple allocations (inside lambda).
1048 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1049 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1050 -- where
1051 -- is_cheap = exprIsCheap rhs
1052 -- ok = is_cheap && int_cxt
1053
1054 -- int_cxt The context isn't totally boring
1055 -- E.g. let f = \ab.BIG in \y. map f xs
1056 -- Don't want to substitute for f, because then we allocate
1057 -- its closure every time the \y is called
1058 -- But: let f = \ab.BIG in \y. map (f y) xs
1059 -- Now we do want to substitute for f, even though it's not
1060 -- saturated, because we're going to allocate a closure for
1061 -- (f y) every time round the loop anyhow.
1062
1063 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1064 -- so substituting rhs inside a lambda doesn't change the occ info.
1065 -- Sadly, not quite the same as exprIsHNF.
1066 canInlineInLam (Lit _) = True
1067 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1068 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1069 canInlineInLam _ = False
1070 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1071 -- ticks around a Lam will disappear anyway.
1072
1073 early_phase = case sm_phase mode of
1074 Phase 0 -> False
1075 _ -> True
1076 -- If we don't have this early_phase test, consider
1077 -- x = length [1,2,3]
1078 -- The full laziness pass carefully floats all the cons cells to
1079 -- top level, and preInlineUnconditionally floats them all back in.
1080 -- Result is (a) static allocation replaced by dynamic allocation
1081 -- (b) many simplifier iterations because this tickles
1082 -- a related problem; only one inlining per pass
1083 --
1084 -- On the other hand, I have seen cases where top-level fusion is
1085 -- lost if we don't inline top level thing (e.g. string constants)
1086 -- Hence the test for phase zero (which is the phase for all the final
1087 -- simplifications). Until phase zero we take no special notice of
1088 -- top level things, but then we become more leery about inlining
1089 -- them.
1090
1091 {-
1092 ************************************************************************
1093 * *
1094 postInlineUnconditionally
1095 * *
1096 ************************************************************************
1097
1098 postInlineUnconditionally
1099 ~~~~~~~~~~~~~~~~~~~~~~~~~
1100 @postInlineUnconditionally@ decides whether to unconditionally inline
1101 a thing based on the form of its RHS; in particular if it has a
1102 trivial RHS. If so, we can inline and discard the binding altogether.
1103
1104 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1105 only have *forward* references. Hence, it's safe to discard the binding
1106
1107 NOTE: This isn't our last opportunity to inline. We're at the binding
1108 site right now, and we'll get another opportunity when we get to the
1109 ocurrence(s)
1110
1111 Note that we do this unconditional inlining only for trival RHSs.
1112 Don't inline even WHNFs inside lambdas; doing so may simply increase
1113 allocation when the function is called. This isn't the last chance; see
1114 NOTE above.
1115
1116 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1117 Because we don't even want to inline them into the RHS of constructor
1118 arguments. See NOTE above
1119
1120 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1121 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1122 with both a and b marked NOINLINE. But that seems incompatible with
1123 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1124 story for now.
1125 -}
1126
1127 postInlineUnconditionally
1128 :: DynFlags -> SimplEnv -> TopLevelFlag
1129 -> OutId -- The binder (an InId would be fine too)
1130 -- (*not* a CoVar)
1131 -> OccInfo -- From the InId
1132 -> OutExpr
1133 -> Unfolding
1134 -> Bool
1135 -- Precondition: rhs satisfies the let/app invariant
1136 -- See Note [CoreSyn let/app invariant] in CoreSyn
1137 -- Reason: we don't want to inline single uses, or discard dead bindings,
1138 -- for unlifted, side-effect-full bindings
1139 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1140 | not active = False
1141 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1142 -- because it might be referred to "earlier"
1143 | isExportedId bndr = False
1144 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1145 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1146 | exprIsTrivial rhs = True
1147 | otherwise
1148 = case occ_info of
1149 -- The point of examining occ_info here is that for *non-values*
1150 -- that occur outside a lambda, the call-site inliner won't have
1151 -- a chance (because it doesn't know that the thing
1152 -- only occurs once). The pre-inliner won't have gotten
1153 -- it either, if the thing occurs in more than one branch
1154 -- So the main target is things like
1155 -- let x = f y in
1156 -- case v of
1157 -- True -> case x of ...
1158 -- False -> case x of ...
1159 -- This is very important in practice; e.g. wheel-seive1 doubles
1160 -- in allocation if you miss this out
1161 OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue
1162 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1163 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1164 --
1165 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1166 -- Reason: doing so risks exponential behaviour. We simplify a big
1167 -- expression, inline it, and simplify it again. But if the
1168 -- very same thing happens in the big expression, we get
1169 -- exponential cost!
1170 -- PRINCIPLE: when we've already simplified an expression once,
1171 -- make sure that we only inline it if it's reasonably small.
1172
1173 && (not in_lam ||
1174 -- Outside a lambda, we want to be reasonably aggressive
1175 -- about inlining into multiple branches of case
1176 -- e.g. let x = <non-value>
1177 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1178 -- Inlining can be a big win if C3 is the hot-spot, even if
1179 -- the uses in C1, C2 are not 'interesting'
1180 -- An example that gets worse if you add int_cxt here is 'clausify'
1181
1182 (isCheapUnfolding unfolding && int_cxt))
1183 -- isCheap => acceptable work duplication; in_lam may be true
1184 -- int_cxt to prevent us inlining inside a lambda without some
1185 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1186
1187 IAmDead -> True -- This happens; for example, the case_bndr during case of
1188 -- known constructor: case (a,b) of x { (p,q) -> ... }
1189 -- Here x isn't mentioned in the RHS, so we don't want to
1190 -- create the (dead) let-binding let x = (a,b) in ...
1191
1192 _ -> False
1193
1194 -- Here's an example that we don't handle well:
1195 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1196 -- in \y. ....case f of {...} ....
1197 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1198 -- But
1199 -- - We can't preInlineUnconditionally because that woud invalidate
1200 -- the occ info for b.
1201 -- - We can't postInlineUnconditionally because the RHS is big, and
1202 -- that risks exponential behaviour
1203 -- - We can't call-site inline, because the rhs is big
1204 -- Alas!
1205
1206 where
1207 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1208 -- See Note [pre/postInlineUnconditionally in gentle mode]
1209
1210 {-
1211 Note [Top level and postInlineUnconditionally]
1212 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1213 We don't do postInlineUnconditionally for top-level things (even for
1214 ones that are trivial):
1215
1216 * Doing so will inline top-level error expressions that have been
1217 carefully floated out by FloatOut. More generally, it might
1218 replace static allocation with dynamic.
1219
1220 * Even for trivial expressions there's a problem. Consider
1221 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1222 blah xs = reverse xs
1223 ruggle = sort
1224 In one simplifier pass we might fire the rule, getting
1225 blah xs = ruggle xs
1226 but in *that* simplifier pass we must not do postInlineUnconditionally
1227 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1228
1229 If the rhs is trivial it'll be inlined by callSiteInline, and then
1230 the binding will be dead and discarded by the next use of OccurAnal
1231
1232 * There is less point, because the main goal is to get rid of local
1233 bindings used in multiple case branches.
1234
1235 * The inliner should inline trivial things at call sites anyway.
1236
1237 Note [Stable unfoldings and postInlineUnconditionally]
1238 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1239 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1240 otherwise we lose the unfolding. Example
1241
1242 -- f has stable unfolding with rhs (e |> co)
1243 -- where 'e' is big
1244 f = e |> co
1245
1246 Then there's a danger we'll optimise to
1247
1248 f' = e
1249 f = f' |> co
1250
1251 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1252 won't inline because 'e' is too big.
1253
1254 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1255
1256
1257 ************************************************************************
1258 * *
1259 Rebuilding a lambda
1260 * *
1261 ************************************************************************
1262 -}
1263
1264 mkLam :: [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1265 -- mkLam tries three things
1266 -- a) eta reduction, if that gives a trivial expression
1267 -- b) eta expansion [only if there are some value lambdas]
1268
1269 mkLam [] body _cont
1270 = return body
1271 mkLam bndrs body cont
1272 = do { dflags <- getDynFlags
1273 ; mkLam' dflags bndrs body }
1274 where
1275 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1276 mkLam' dflags bndrs (Cast body co)
1277 | not (any bad bndrs)
1278 -- Note [Casts and lambdas]
1279 = do { lam <- mkLam' dflags bndrs body
1280 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1281 where
1282 co_vars = tyCoVarsOfCo co
1283 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1284
1285 mkLam' dflags bndrs body@(Lam {})
1286 = mkLam' dflags (bndrs ++ bndrs1) body1
1287 where
1288 (bndrs1, body1) = collectBinders body
1289
1290 mkLam' dflags bndrs (Tick t expr)
1291 | tickishFloatable t
1292 = mkTick t <$> mkLam' dflags bndrs expr
1293
1294 mkLam' dflags bndrs body
1295 | gopt Opt_DoEtaReduction dflags
1296 , Just etad_lam <- tryEtaReduce bndrs body
1297 = do { tick (EtaReduction (head bndrs))
1298 ; return etad_lam }
1299
1300 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1301 , gopt Opt_DoLambdaEtaExpansion dflags
1302 , any isRuntimeVar bndrs
1303 , let body_arity = exprEtaExpandArity dflags body
1304 , body_arity > 0
1305 = do { tick (EtaExpansion (head bndrs))
1306 ; let res = mkLams bndrs (etaExpand body_arity body)
1307 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1308 , text "after" <+> ppr res])
1309 ; return res }
1310
1311 | otherwise
1312 = return (mkLams bndrs body)
1313
1314 {-
1315 Note [Eta expanding lambdas]
1316 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1317 In general we *do* want to eta-expand lambdas. Consider
1318 f (\x -> case x of (a,b) -> \s -> blah)
1319 where 's' is a state token, and hence can be eta expanded. This
1320 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1321 important function!
1322
1323 The eta-expansion will never happen unless we do it now. (Well, it's
1324 possible that CorePrep will do it, but CorePrep only has a half-baked
1325 eta-expander that can't deal with casts. So it's much better to do it
1326 here.)
1327
1328 However, when the lambda is let-bound, as the RHS of a let, we have a
1329 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1330 bother to try expansion in mkLam in that case; hence the contIsRhs
1331 guard.
1332
1333 Note [Casts and lambdas]
1334 ~~~~~~~~~~~~~~~~~~~~~~~~
1335 Consider
1336 (\x. (\y. e) `cast` g1) `cast` g2
1337 There is a danger here that the two lambdas look separated, and the
1338 full laziness pass might float an expression to between the two.
1339
1340 So this equation in mkLam' floats the g1 out, thus:
1341 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1342 where x:tx.
1343
1344 In general, this floats casts outside lambdas, where (I hope) they
1345 might meet and cancel with some other cast:
1346 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1347 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1348 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1349 (if not (g `in` co))
1350
1351 Notice that it works regardless of 'e'. Originally it worked only
1352 if 'e' was itself a lambda, but in some cases that resulted in
1353 fruitless iteration in the simplifier. A good example was when
1354 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1355 like (\x. Get `cast` g)
1356 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1357 the Get, and the next iteration eta-reduced it, and then eta-expanded
1358 it again.
1359
1360 Note also the side condition for the case of coercion binders.
1361 It does not make sense to transform
1362 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1363 because the latter is not well-kinded.
1364
1365 ************************************************************************
1366 * *
1367 Eta expansion
1368 * *
1369 ************************************************************************
1370 -}
1371
1372 tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
1373 -- See Note [Eta-expanding at let bindings]
1374 tryEtaExpandRhs env bndr rhs
1375 = do { dflags <- getDynFlags
1376 ; (new_arity, new_rhs) <- try_expand dflags
1377
1378 ; WARN( new_arity < old_id_arity,
1379 (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
1380 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1381 -- Note [Arity decrease] in Simplify
1382 return (new_arity, new_rhs) }
1383 where
1384 try_expand dflags
1385 | exprIsTrivial rhs
1386 = return (exprArity rhs, rhs)
1387
1388 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1389 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1390 new_arity2 = idCallArity bndr
1391 new_arity = max new_arity1 new_arity2
1392 , new_arity > old_arity -- And the current manifest arity isn't enough
1393 = do { tick (EtaExpansion bndr)
1394 ; return (new_arity, etaExpand new_arity rhs) }
1395 | otherwise
1396 = return (old_arity, rhs)
1397
1398 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1399 old_id_arity = idArity bndr
1400
1401 {-
1402 Note [Eta-expanding at let bindings]
1403 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1404 We now eta expand at let-bindings, which is where the payoff comes.
1405 The most significant thing is that we can do a simple arity analysis
1406 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1407
1408 One useful consequence of not eta-expanding lambdas is this example:
1409 genMap :: C a => ...
1410 {-# INLINE genMap #-}
1411 genMap f xs = ...
1412
1413 myMap :: D a => ...
1414 {-# INLINE myMap #-}
1415 myMap = genMap
1416
1417 Notice that 'genMap' should only inline if applied to two arguments.
1418 In the stable unfolding for myMap we'll have the unfolding
1419 (\d -> genMap Int (..d..))
1420 We do not want to eta-expand to
1421 (\d f xs -> genMap Int (..d..) f xs)
1422 because then 'genMap' will inline, and it really shouldn't: at least
1423 as far as the programmer is concerned, it's not applied to two
1424 arguments!
1425
1426 Note [Do not eta-expand PAPs]
1427 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1428 We used to have old_arity = manifestArity rhs, which meant that we
1429 would eta-expand even PAPs. But this gives no particular advantage,
1430 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1431 Suppose we have a PAP
1432 foo :: IO ()
1433 foo = returnIO ()
1434 Then we can eta-expand do
1435 foo = (\eta. (returnIO () |> sym g) eta) |> g
1436 where
1437 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1438
1439 But there is really no point in doing this, and it generates masses of
1440 coercions and whatnot that eventually disappear again. For T9020, GHC
1441 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1442 1.8G to 45M.
1443
1444 But note that this won't eta-expand, say
1445 f = \g -> map g
1446 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1447 strictness analysis will have less to bite on?
1448
1449
1450 ************************************************************************
1451 * *
1452 \subsection{Floating lets out of big lambdas}
1453 * *
1454 ************************************************************************
1455
1456 Note [Floating and type abstraction]
1457 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1458 Consider this:
1459 x = /\a. C e1 e2
1460 We'd like to float this to
1461 y1 = /\a. e1
1462 y2 = /\a. e2
1463 x = /\a. C (y1 a) (y2 a)
1464 for the usual reasons: we want to inline x rather vigorously.
1465
1466 You may think that this kind of thing is rare. But in some programs it is
1467 common. For example, if you do closure conversion you might get:
1468
1469 data a :-> b = forall e. (e -> a -> b) :$ e
1470
1471 f_cc :: forall a. a :-> a
1472 f_cc = /\a. (\e. id a) :$ ()
1473
1474 Now we really want to inline that f_cc thing so that the
1475 construction of the closure goes away.
1476
1477 So I have elaborated simplLazyBind to understand right-hand sides that look
1478 like
1479 /\ a1..an. body
1480
1481 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1482 but there is quite a bit of plumbing in simplLazyBind as well.
1483
1484 The same transformation is good when there are lets in the body:
1485
1486 /\abc -> let(rec) x = e in b
1487 ==>
1488 let(rec) x' = /\abc -> let x = x' a b c in e
1489 in
1490 /\abc -> let x = x' a b c in b
1491
1492 This is good because it can turn things like:
1493
1494 let f = /\a -> letrec g = ... g ... in g
1495 into
1496 letrec g' = /\a -> ... g' a ...
1497 in
1498 let f = /\ a -> g' a
1499
1500 which is better. In effect, it means that big lambdas don't impede
1501 let-floating.
1502
1503 This optimisation is CRUCIAL in eliminating the junk introduced by
1504 desugaring mutually recursive definitions. Don't eliminate it lightly!
1505
1506 [May 1999] If we do this transformation *regardless* then we can
1507 end up with some pretty silly stuff. For example,
1508
1509 let
1510 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1511 in ..
1512 becomes
1513 let y1 = /\s -> r1
1514 y2 = /\s -> r2
1515 st = /\s -> ...[y1 s/x1, y2 s/x2]
1516 in ..
1517
1518 Unless the "..." is a WHNF there is really no point in doing this.
1519 Indeed it can make things worse. Suppose x1 is used strictly,
1520 and is of the form
1521
1522 x1* = case f y of { (a,b) -> e }
1523
1524 If we abstract this wrt the tyvar we then can't do the case inline
1525 as we would normally do.
1526
1527 That's why the whole transformation is part of the same process that
1528 floats let-bindings and constructor arguments out of RHSs. In particular,
1529 it is guarded by the doFloatFromRhs call in simplLazyBind.
1530
1531 Note [Which type variables to abstract over]
1532 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1533 Abstract only over the type variables free in the rhs wrt which the
1534 new binding is abstracted. Note that
1535
1536 * The naive approach of abstracting wrt the
1537 tyvars free in the Id's /type/ fails. Consider:
1538 /\ a b -> let t :: (a,b) = (e1, e2)
1539 x :: a = fst t
1540 in ...
1541 Here, b isn't free in x's type, but we must nevertheless
1542 abstract wrt b as well, because t's type mentions b.
1543 Since t is floated too, we'd end up with the bogus:
1544 poly_t = /\ a b -> (e1, e2)
1545 poly_x = /\ a -> fst (poly_t a *b*)
1546
1547 * We must do closeOverKinds. Example (Trac #10934):
1548 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1549 Here we want to float 't', but we must remember to abstract over
1550 'k' as well, even though it is not explicitly mentioned in the RHS,
1551 otherwise we get
1552 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1553 which is obviously bogus.
1554 -}
1555
1556 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1557 abstractFloats main_tvs body_env body
1558 = ASSERT( notNull body_floats )
1559 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1560 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1561 where
1562 main_tv_set = mkVarSet main_tvs
1563 body_floats = getFloatBinds body_env
1564 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1565
1566 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1567 abstract subst (NonRec id rhs)
1568 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1569 ; let poly_rhs = mkLams tvs_here rhs'
1570 subst' = CoreSubst.extendIdSubst subst id poly_app
1571 ; return (subst', (NonRec poly_id poly_rhs)) }
1572 where
1573 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1574
1575 -- tvs_here: see Note [Which type variables to abstract over]
1576 tvs_here = varSetElemsWellScoped $
1577 intersectVarSet main_tv_set $
1578 closeOverKinds $
1579 exprSomeFreeVars isTyVar rhs'
1580
1581 abstract subst (Rec prs)
1582 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1583 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1584 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1585 | rhs <- rhss]
1586 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1587 where
1588 (ids,rhss) = unzip prs
1589 -- For a recursive group, it's a bit of a pain to work out the minimal
1590 -- set of tyvars over which to abstract:
1591 -- /\ a b c. let x = ...a... in
1592 -- letrec { p = ...x...q...
1593 -- q = .....p...b... } in
1594 -- ...
1595 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1596 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1597 -- Since it's a pain, we just use the whole set, which is always safe
1598 --
1599 -- If you ever want to be more selective, remember this bizarre case too:
1600 -- x::a = x
1601 -- Here, we must abstract 'x' over 'a'.
1602 tvs_here = toposortTyVars main_tvs
1603
1604 mk_poly tvs_here var
1605 = do { uniq <- getUniqueM
1606 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1607 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1608 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1609 mkLocalIdOrCoVar poly_name poly_ty
1610 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1611 -- In the olden days, it was crucial to copy the occInfo of the original var,
1612 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1613 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1614 -- at already simplified code, so it doesn't matter
1615 --
1616 -- It's even right to retain single-occurrence or dead-var info:
1617 -- Suppose we started with /\a -> let x = E in B
1618 -- where x occurs once in B. Then we transform to:
1619 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1620 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1621 -- the occurrences of x' will be just the occurrences originally
1622 -- pinned on x.
1623
1624 {-
1625 Note [Abstract over coercions]
1626 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1627 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1628 type variable a. Rather than sort this mess out, we simply bale out and abstract
1629 wrt all the type variables if any of them are coercion variables.
1630
1631
1632 Historical note: if you use let-bindings instead of a substitution, beware of this:
1633
1634 -- Suppose we start with:
1635 --
1636 -- x = /\ a -> let g = G in E
1637 --
1638 -- Then we'll float to get
1639 --
1640 -- x = let poly_g = /\ a -> G
1641 -- in /\ a -> let g = poly_g a in E
1642 --
1643 -- But now the occurrence analyser will see just one occurrence
1644 -- of poly_g, not inside a lambda, so the simplifier will
1645 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1646 -- (I used to think that the "don't inline lone occurrences" stuff
1647 -- would stop this happening, but since it's the *only* occurrence,
1648 -- PreInlineUnconditionally kicks in first!)
1649 --
1650 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1651 -- to appear many times. (NB: mkInlineMe eliminates
1652 -- such notes on trivial RHSs, so do it manually.)
1653
1654 ************************************************************************
1655 * *
1656 prepareAlts
1657 * *
1658 ************************************************************************
1659
1660 prepareAlts tries these things:
1661
1662 1. Eliminate alternatives that cannot match, including the
1663 DEFAULT alternative.
1664
1665 2. If the DEFAULT alternative can match only one possible constructor,
1666 then make that constructor explicit.
1667 e.g.
1668 case e of x { DEFAULT -> rhs }
1669 ===>
1670 case e of x { (a,b) -> rhs }
1671 where the type is a single constructor type. This gives better code
1672 when rhs also scrutinises x or e.
1673
1674 3. Returns a list of the constructors that cannot holds in the
1675 DEFAULT alternative (if there is one)
1676
1677 Here "cannot match" includes knowledge from GADTs
1678
1679 It's a good idea to do this stuff before simplifying the alternatives, to
1680 avoid simplifying alternatives we know can't happen, and to come up with
1681 the list of constructors that are handled, to put into the IdInfo of the
1682 case binder, for use when simplifying the alternatives.
1683
1684 Eliminating the default alternative in (1) isn't so obvious, but it can
1685 happen:
1686
1687 data Colour = Red | Green | Blue
1688
1689 f x = case x of
1690 Red -> ..
1691 Green -> ..
1692 DEFAULT -> h x
1693
1694 h y = case y of
1695 Blue -> ..
1696 DEFAULT -> [ case y of ... ]
1697
1698 If we inline h into f, the default case of the inlined h can't happen.
1699 If we don't notice this, we may end up filtering out *all* the cases
1700 of the inner case y, which give us nowhere to go!
1701 -}
1702
1703 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1704 -- The returned alternatives can be empty, none are possible
1705 prepareAlts scrut case_bndr' alts
1706 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1707 -- Case binder is needed just for its type. Note that as an
1708 -- OutId, it has maximum information; this is important.
1709 -- Test simpl013 is an example
1710 = do { us <- getUniquesM
1711 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1712 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1713 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1714 -- "idcs" stands for "impossible default data constructors"
1715 -- i.e. the constructors that can't match the default case
1716 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1717 ; when yes3 $ tick (AltMerge case_bndr')
1718 ; return (idcs3, alts3) }
1719
1720 | otherwise -- Not a data type, so nothing interesting happens
1721 = return ([], alts)
1722 where
1723 imposs_cons = case scrut of
1724 Var v -> otherCons (idUnfolding v)
1725 _ -> []
1726
1727
1728 {-
1729 ************************************************************************
1730 * *
1731 mkCase
1732 * *
1733 ************************************************************************
1734
1735 mkCase tries these things
1736
1737 1. Merge Nested Cases
1738
1739 case e of b { ==> case e of b {
1740 p1 -> rhs1 p1 -> rhs1
1741 ... ...
1742 pm -> rhsm pm -> rhsm
1743 _ -> case b of b' { pn -> let b'=b in rhsn
1744 pn -> rhsn ...
1745 ... po -> let b'=b in rhso
1746 po -> rhso _ -> let b'=b in rhsd
1747 _ -> rhsd
1748 }
1749
1750 which merges two cases in one case when -- the default alternative of
1751 the outer case scrutises the same variable as the outer case. This
1752 transformation is called Case Merging. It avoids that the same
1753 variable is scrutinised multiple times.
1754
1755 2. Eliminate Identity Case
1756
1757 case e of ===> e
1758 True -> True;
1759 False -> False
1760
1761 and similar friends.
1762 -}
1763
1764 mkCase, mkCase1, mkCase2
1765 :: DynFlags
1766 -> OutExpr -> OutId
1767 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1768 -> SimplM OutExpr
1769
1770 --------------------------------------------------
1771 -- 1. Merge Nested Cases
1772 --------------------------------------------------
1773
1774 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1775 | gopt Opt_CaseMerge dflags
1776 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1777 <- stripTicksTop tickishFloatable deflt_rhs
1778 , inner_scrut_var == outer_bndr
1779 = do { tick (CaseMerge outer_bndr)
1780
1781 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1782 (con, args, wrap_rhs rhs)
1783 -- Simplifier's no-shadowing invariant should ensure
1784 -- that outer_bndr is not shadowed by the inner patterns
1785 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1786 -- The let is OK even for unboxed binders,
1787
1788 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1789 | otherwise = map wrap_alt inner_alts
1790
1791 merged_alts = mergeAlts outer_alts wrapped_alts
1792 -- NB: mergeAlts gives priority to the left
1793 -- case x of
1794 -- A -> e1
1795 -- DEFAULT -> case x of
1796 -- A -> e2
1797 -- B -> e3
1798 -- When we merge, we must ensure that e1 takes
1799 -- precedence over e2 as the value for A!
1800
1801 ; fmap (mkTicks ticks) $
1802 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1803 }
1804 -- Warning: don't call mkCase recursively!
1805 -- Firstly, there's no point, because inner alts have already had
1806 -- mkCase applied to them, so they won't have a case in their default
1807 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1808 -- in munge_rhs may put a case into the DEFAULT branch!
1809
1810 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1811
1812 --------------------------------------------------
1813 -- 2. Eliminate Identity Case
1814 --------------------------------------------------
1815
1816 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1817 | all identity_alt alts
1818 = do { tick (CaseIdentity case_bndr)
1819 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1820 where
1821 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
1822 identity_alt (con, args, rhs) = check_eq rhs con args
1823
1824 check_eq (Cast rhs co) con args
1825 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1826 -- See Note [RHS casts]
1827 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1828 check_eq (Var v) _ _ | v == case_bndr = True
1829 check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con
1830 -- Optimisation only
1831 check_eq (Tick t e) alt args = tickishFloatable t &&
1832 check_eq e alt args
1833 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1834 mkConApp con (arg_tys ++
1835 varsToCoreExprs args)
1836 check_eq _ _ _ = False
1837
1838 arg_tys = map Type (tyConAppArgs (idType case_bndr))
1839
1840 -- Note [RHS casts]
1841 -- ~~~~~~~~~~~~~~~~
1842 -- We've seen this:
1843 -- case e of x { _ -> x `cast` c }
1844 -- And we definitely want to eliminate this case, to give
1845 -- e `cast` c
1846 -- So we throw away the cast from the RHS, and reconstruct
1847 -- it at the other end. All the RHS casts must be the same
1848 -- if (all identity_alt alts) holds.
1849 --
1850 -- Don't worry about nested casts, because the simplifier combines them
1851
1852 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1853 re_cast scrut _ = scrut
1854
1855 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1856
1857 --------------------------------------------------
1858 -- Catch-all
1859 --------------------------------------------------
1860 mkCase2 _dflags scrut bndr alts_ty alts
1861 = return (Case scrut bndr alts_ty alts)
1862
1863 {-
1864 Note [Dead binders]
1865 ~~~~~~~~~~~~~~~~~~~~
1866 Note that dead-ness is maintained by the simplifier, so that it is
1867 accurate after simplification as well as before.
1868
1869
1870 Note [Cascading case merge]
1871 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1872 Case merging should cascade in one sweep, because it
1873 happens bottom-up
1874
1875 case e of a {
1876 DEFAULT -> case a of b
1877 DEFAULT -> case b of c {
1878 DEFAULT -> e
1879 A -> ea
1880 B -> eb
1881 C -> ec
1882 ==>
1883 case e of a {
1884 DEFAULT -> case a of b
1885 DEFAULT -> let c = b in e
1886 A -> let c = b in ea
1887 B -> eb
1888 C -> ec
1889 ==>
1890 case e of a {
1891 DEFAULT -> let b = a in let c = b in e
1892 A -> let b = a in let c = b in ea
1893 B -> let b = a in eb
1894 C -> ec
1895
1896
1897 However here's a tricky case that we still don't catch, and I don't
1898 see how to catch it in one pass:
1899
1900 case x of c1 { I# a1 ->
1901 case a1 of c2 ->
1902 0 -> ...
1903 DEFAULT -> case x of c3 { I# a2 ->
1904 case a2 of ...
1905
1906 After occurrence analysis (and its binder-swap) we get this
1907
1908 case x of c1 { I# a1 ->
1909 let x = c1 in -- Binder-swap addition
1910 case a1 of c2 ->
1911 0 -> ...
1912 DEFAULT -> case x of c3 { I# a2 ->
1913 case a2 of ...
1914
1915 When we simplify the inner case x, we'll see that
1916 x=c1=I# a1. So we'll bind a2 to a1, and get
1917
1918 case x of c1 { I# a1 ->
1919 case a1 of c2 ->
1920 0 -> ...
1921 DEFAULT -> case a1 of ...
1922
1923 This is corect, but we can't do a case merge in this sweep
1924 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
1925 without getting changed to c1=I# c2.
1926
1927 I don't think this is worth fixing, even if I knew how. It'll
1928 all come out in the next pass anyway.
1929 -}