Replace .lhs with .hs in compiler comments
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countValArgs, countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext, interestingArg,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import MkCore ( sortQuantVars )
41 import DynFlags
42 import CoreSyn
43 import qualified CoreSubst
44 import PprCore
45 import CoreFVs
46 import CoreUtils
47 import CoreArity
48 import CoreUnfold
49 import Name
50 import Id
51 import Var
52 import Demand
53 import SimplMonad
54 import Type hiding( substTy )
55 import Coercion hiding( substCo, substTy )
56 import DataCon ( dataConWorkId )
57 import VarSet
58 import BasicTypes
59 import Util
60 import MonadUtils
61 import Outputable
62 import FastString
63 import Pair
64
65 import Control.Monad ( when )
66 import Data.List ( partition )
67
68 {-
69 ************************************************************************
70 * *
71 The SimplCont and DupFlag types
72 * *
73 ************************************************************************
74
75 A SimplCont allows the simplifier to traverse the expression in a
76 zipper-like fashion. The SimplCont represents the rest of the expression,
77 "above" the point of interest.
78
79 You can also think of a SimplCont as an "evaluation context", using
80 that term in the way it is used for operational semantics. This is the
81 way I usually think of it, For example you'll often see a syntax for
82 evaluation context looking like
83 C ::= [] | C e | case C of alts | C `cast` co
84 That's the kind of thing we are doing here, and I use that syntax in
85 the comments.
86
87
88 Key points:
89 * A SimplCont describes a *strict* context (just like
90 evaluation contexts do). E.g. Just [] is not a SimplCont
91
92 * A SimplCont describes a context that *does not* bind
93 any variables. E.g. \x. [] is not a SimplCont
94 -}
95
96 data SimplCont
97 = Stop -- An empty context, or <hole>
98 OutType -- Type of the <hole>
99 CallCtxt -- Tells if there is something interesting about
100 -- the context, and hence the inliner
101 -- should be a bit keener (see interestingCallContext)
102 -- Specifically:
103 -- This is an argument of a function that has RULES
104 -- Inlining the call might allow the rule to fire
105 -- Never ValAppCxt (use ApplyToVal instead)
106 -- or CaseCtxt (use Select instead)
107
108 | CastIt -- <hole> `cast` co
109 OutCoercion -- The coercion simplified
110 -- Invariant: never an identity coercion
111 SimplCont
112
113 | ApplyToVal { -- <hole> arg
114 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
115 sc_arg :: InExpr, -- The argument,
116 sc_env :: StaticEnv, -- and its static env
117 sc_cont :: SimplCont }
118
119 | ApplyToTy { -- <hole> ty
120 sc_arg_ty :: OutType, -- Argument type
121 sc_hole_ty :: OutType, -- Type of the function, presumably (forall a. blah)
122 -- See Note [The hole type in ApplyToTy]
123 sc_cont :: SimplCont }
124
125 | Select -- case <hole> of alts
126 DupFlag -- See Note [DupFlag invariants]
127 InId [InAlt] StaticEnv -- The case binder, alts type, alts, and subst-env
128 SimplCont
129
130 -- The two strict forms have no DupFlag, because we never duplicate them
131 | StrictBind -- (\x* \xs. e) <hole>
132 InId [InBndr] -- let x* = <hole> in e
133 InExpr StaticEnv -- is a special case
134 SimplCont
135
136 | StrictArg -- f e1 ..en <hole>
137 ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
138 -- plus strictness flags for *further* args
139 CallCtxt -- Whether *this* argument position is interesting
140 SimplCont
141
142 | TickIt
143 (Tickish Id) -- Tick tickish <hole>
144 SimplCont
145
146 data DupFlag = NoDup -- Unsimplified, might be big
147 | Simplified -- Simplified
148 | OkToDup -- Simplified and small
149
150 isSimplified :: DupFlag -> Bool
151 isSimplified NoDup = False
152 isSimplified _ = True -- Invariant: the subst-env is empty
153
154 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
155 perhapsSubstTy dup env ty
156 | isSimplified dup = ty
157 | otherwise = substTy env ty
158
159 {-
160 Note [DupFlag invariants]
161 ~~~~~~~~~~~~~~~~~~~~~~~~~
162 In both (ApplyToVal dup _ env k)
163 and (Select dup _ _ env k)
164 the following invariants hold
165
166 (a) if dup = OkToDup, then continuation k is also ok-to-dup
167 (b) if dup = OkToDup or Simplified, the subst-env is empty
168 (and and hence no need to re-simplify)
169 -}
170
171 instance Outputable DupFlag where
172 ppr OkToDup = ptext (sLit "ok")
173 ppr NoDup = ptext (sLit "nodup")
174 ppr Simplified = ptext (sLit "simpl")
175
176 instance Outputable SimplCont where
177 ppr (Stop ty interesting) = ptext (sLit "Stop") <> brackets (ppr interesting) <+> ppr ty
178 ppr (ApplyToTy { sc_arg_ty = ty
179 , sc_cont = cont }) = (ptext (sLit "ApplyToTy") <+> pprParendType ty) $$ ppr cont
180 ppr (ApplyToVal { sc_arg = arg
181 , sc_dup = dup
182 , sc_cont = cont }) = (ptext (sLit "ApplyToVal") <+> ppr dup <+> pprParendExpr arg)
183 $$ ppr cont
184 ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
185 ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
186 ppr (Select dup bndr alts se cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
187 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
188 ppr (CastIt co cont ) = (ptext (sLit "CastIt") <+> ppr co) $$ ppr cont
189 ppr (TickIt t cont) = (ptext (sLit "TickIt") <+> ppr t) $$ ppr cont
190
191
192 {- Note [The hole type in ApplyToTy]
193 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
194 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
195 continuation. It is absolutely necessary to compute contHoleType, but it is
196 not used for anything else (and hence may not be evaluated).
197
198 Why is it necessary for contHoleType? Consider the continuation
199 ApplyToType Int (Stop Int)
200 corresponding to
201 (<hole> @Int) :: Int
202 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
203 and there is no way to know which, so we must record it.
204
205 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
206 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
207 doesn't matter because we'll never compute them all.
208
209 ************************************************************************
210 * *
211 ArgInfo and ArgSpec
212 * *
213 ************************************************************************
214 -}
215
216 data ArgInfo
217 = ArgInfo {
218 ai_fun :: OutId, -- The function
219 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
220 ai_type :: OutType, -- Type of (f a1 ... an)
221
222 ai_rules :: [CoreRule], -- Rules for this function
223
224 ai_encl :: Bool, -- Flag saying whether this function
225 -- or an enclosing one has rules (recursively)
226 -- True => be keener to inline in all args
227
228 ai_strs :: [Bool], -- Strictness of remaining arguments
229 -- Usually infinite, but if it is finite it guarantees
230 -- that the function diverges after being given
231 -- that number of args
232 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
233 -- Always infinite
234 }
235
236 data ArgSpec
237 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
238 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
239 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
240 | CastBy OutCoercion -- Cast by this; c.f. CastIt
241
242 instance Outputable ArgSpec where
243 ppr (ValArg e) = ptext (sLit "ValArg") <+> ppr e
244 ppr (TyArg { as_arg_ty = ty }) = ptext (sLit "TyArg") <+> ppr ty
245 ppr (CastBy c) = ptext (sLit "CastBy") <+> ppr c
246
247 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
248 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
249 , ai_type = funResultTy (ai_type ai) }
250
251 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
252 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
253 , ai_type = applyTy poly_fun_ty arg_ty }
254 where
255 poly_fun_ty = ai_type ai
256 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
257
258 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
259 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
260 , ai_type = pSnd (coercionKind co) }
261
262 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
263 argInfoAppArgs [] = []
264 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
265 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
266 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
267
268 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
269 pushSimplifiedArgs _env [] k = k
270 pushSimplifiedArgs env (arg : args) k
271 = case arg of
272 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
273 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
274 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
275 CastBy c -> CastIt c rest
276 where
277 rest = pushSimplifiedArgs env args k
278 -- The env has an empty SubstEnv
279
280 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
281 -- NB: the [ArgSpec] is reversed so that the first arg
282 -- in the list is the last one in the application
283 argInfoExpr fun rev_args
284 = go rev_args
285 where
286 go [] = Var fun
287 go (ValArg a : as) = go as `App` a
288 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
289 go (CastBy co : as) = mkCast (go as) co
290
291
292 {-
293 ************************************************************************
294 * *
295 Functions on SimplCont
296 * *
297 ************************************************************************
298 -}
299
300 mkBoringStop :: OutType -> SimplCont
301 mkBoringStop ty = Stop ty BoringCtxt
302
303 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
304 mkRhsStop ty = Stop ty RhsCtxt
305
306 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
307 mkLazyArgStop ty cci = Stop ty cci
308
309 -------------------
310 contIsRhsOrArg :: SimplCont -> Bool
311 contIsRhsOrArg (Stop {}) = True
312 contIsRhsOrArg (StrictBind {}) = True
313 contIsRhsOrArg (StrictArg {}) = True
314 contIsRhsOrArg _ = False
315
316 contIsRhs :: SimplCont -> Bool
317 contIsRhs (Stop _ RhsCtxt) = True
318 contIsRhs _ = False
319
320 -------------------
321 contIsDupable :: SimplCont -> Bool
322 contIsDupable (Stop {}) = True
323 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
324 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
325 contIsDupable (Select OkToDup _ _ _ _) = True -- ...ditto...
326 contIsDupable (CastIt _ k) = contIsDupable k
327 contIsDupable _ = False
328
329 -------------------
330 contIsTrivial :: SimplCont -> Bool
331 contIsTrivial (Stop {}) = True
332 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
333 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
334 contIsTrivial (CastIt _ k) = contIsTrivial k
335 contIsTrivial _ = False
336
337 -------------------
338 contResultType :: SimplCont -> OutType
339 contResultType (Stop ty _) = ty
340 contResultType (CastIt _ k) = contResultType k
341 contResultType (StrictBind _ _ _ _ k) = contResultType k
342 contResultType (StrictArg _ _ k) = contResultType k
343 contResultType (Select _ _ _ _ k) = contResultType k
344 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
345 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
346 contResultType (TickIt _ k) = contResultType k
347
348 contHoleType :: SimplCont -> OutType
349 contHoleType (Stop ty _) = ty
350 contHoleType (TickIt _ k) = contHoleType k
351 contHoleType (CastIt co _) = pFst (coercionKind co)
352 contHoleType (Select d b _ se _) = perhapsSubstTy d se (idType b)
353 contHoleType (StrictBind b _ _ se _) = substTy se (idType b)
354 contHoleType (StrictArg ai _ _) = funArgTy (ai_type ai)
355 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
356 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
357 = mkFunTy (perhapsSubstTy dup se (exprType e))
358 (contHoleType k)
359
360 -------------------
361 countValArgs :: SimplCont -> Int
362 -- Count value arguments excluding coercions
363 countValArgs (ApplyToVal { sc_arg = arg, sc_cont = cont })
364 | Coercion {} <- arg = countValArgs cont
365 | otherwise = 1 + countValArgs cont
366 countValArgs _ = 0
367
368 countArgs :: SimplCont -> Int
369 -- Count all arguments, including types, coercions, and other values
370 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
371 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
372 countArgs _ = 0
373
374 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
375 -- Summarises value args, discards type args and coercions
376 -- The returned continuation of the call is only used to
377 -- answer questions like "are you interesting?"
378 contArgs cont
379 | lone cont = (True, [], cont)
380 | otherwise = go [] cont
381 where
382 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
383 lone (ApplyToVal {}) = False
384 lone (CastIt {}) = False
385 lone _ = True
386
387 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
388 = go (is_interesting arg se : args) k
389 go args (ApplyToTy { sc_cont = k }) = go args k
390 go args (CastIt _ k) = go args k
391 go args k = (False, reverse args, k)
392
393 is_interesting arg se = interestingArg (substExpr (text "contArgs") se arg)
394 -- Do *not* use short-cutting substitution here
395 -- because we want to get as much IdInfo as possible
396
397 {-
398 Note [Interesting call context]
399 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
400 We want to avoid inlining an expression where there can't possibly be
401 any gain, such as in an argument position. Hence, if the continuation
402 is interesting (eg. a case scrutinee, application etc.) then we
403 inline, otherwise we don't.
404
405 Previously some_benefit used to return True only if the variable was
406 applied to some value arguments. This didn't work:
407
408 let x = _coerce_ (T Int) Int (I# 3) in
409 case _coerce_ Int (T Int) x of
410 I# y -> ....
411
412 we want to inline x, but can't see that it's a constructor in a case
413 scrutinee position, and some_benefit is False.
414
415 Another example:
416
417 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
418
419 .... case dMonadST _@_ x0 of (a,b,c) -> ....
420
421 we'd really like to inline dMonadST here, but we *don't* want to
422 inline if the case expression is just
423
424 case x of y { DEFAULT -> ... }
425
426 since we can just eliminate this case instead (x is in WHNF). Similar
427 applies when x is bound to a lambda expression. Hence
428 contIsInteresting looks for case expressions with just a single
429 default case.
430 -}
431
432 interestingCallContext :: SimplCont -> CallCtxt
433 -- See Note [Interesting call context]
434 interestingCallContext cont
435 = interesting cont
436 where
437 interesting (Select _ _bndr _ _ _) = CaseCtxt
438 interesting (ApplyToVal {}) = ValAppCtxt
439 -- Can happen if we have (f Int |> co) y
440 -- If f has an INLINE prag we need to give it some
441 -- motivation to inline. See Note [Cast then apply]
442 -- in CoreUnfold
443 interesting (StrictArg _ cci _) = cci
444 interesting (StrictBind {}) = BoringCtxt
445 interesting (Stop _ cci) = cci
446 interesting (TickIt _ k) = interesting k
447 interesting (ApplyToTy { sc_cont = k }) = interesting k
448 interesting (CastIt _ k) = interesting k
449 -- If this call is the arg of a strict function, the context
450 -- is a bit interesting. If we inline here, we may get useful
451 -- evaluation information to avoid repeated evals: e.g.
452 -- x + (y * z)
453 -- Here the contIsInteresting makes the '*' keener to inline,
454 -- which in turn exposes a constructor which makes the '+' inline.
455 -- Assuming that +,* aren't small enough to inline regardless.
456 --
457 -- It's also very important to inline in a strict context for things
458 -- like
459 -- foldr k z (f x)
460 -- Here, the context of (f x) is strict, and if f's unfolding is
461 -- a build it's *great* to inline it here. So we must ensure that
462 -- the context for (f x) is not totally uninteresting.
463
464
465 -------------------
466 mkArgInfo :: Id
467 -> [CoreRule] -- Rules for function
468 -> Int -- Number of value args
469 -> SimplCont -- Context of the call
470 -> ArgInfo
471
472 mkArgInfo fun rules n_val_args call_cont
473 | n_val_args < idArity fun -- Note [Unsaturated functions]
474 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
475 , ai_rules = rules, ai_encl = False
476 , ai_strs = vanilla_stricts
477 , ai_discs = vanilla_discounts }
478 | otherwise
479 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
480 , ai_rules = rules
481 , ai_encl = interestingArgContext rules call_cont
482 , ai_strs = add_type_str fun_ty arg_stricts
483 , ai_discs = arg_discounts }
484 where
485 fun_ty = idType fun
486
487 vanilla_discounts, arg_discounts :: [Int]
488 vanilla_discounts = repeat 0
489 arg_discounts = case idUnfolding fun of
490 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
491 -> discounts ++ vanilla_discounts
492 _ -> vanilla_discounts
493
494 vanilla_stricts, arg_stricts :: [Bool]
495 vanilla_stricts = repeat False
496
497 arg_stricts
498 = case splitStrictSig (idStrictness fun) of
499 (demands, result_info)
500 | not (demands `lengthExceeds` n_val_args)
501 -> -- Enough args, use the strictness given.
502 -- For bottoming functions we used to pretend that the arg
503 -- is lazy, so that we don't treat the arg as an
504 -- interesting context. This avoids substituting
505 -- top-level bindings for (say) strings into
506 -- calls to error. But now we are more careful about
507 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
508 if isBotRes result_info then
509 map isStrictDmd demands -- Finite => result is bottom
510 else
511 map isStrictDmd demands ++ vanilla_stricts
512 | otherwise
513 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
514 <+> ppr n_val_args <+> ppr demands )
515 vanilla_stricts -- Not enough args, or no strictness
516
517 add_type_str :: Type -> [Bool] -> [Bool]
518 -- If the function arg types are strict, record that in the 'strictness bits'
519 -- No need to instantiate because unboxed types (which dominate the strict
520 -- types) can't instantiate type variables.
521 -- add_type_str is done repeatedly (for each call); might be better
522 -- once-for-all in the function
523 -- But beware primops/datacons with no strictness
524 add_type_str _ [] = []
525 add_type_str fun_ty strs -- Look through foralls
526 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
527 = add_type_str fun_ty' strs
528 add_type_str fun_ty (str:strs) -- Add strict-type info
529 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
530 = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
531 add_type_str _ strs
532 = strs
533
534 {- Note [Unsaturated functions]
535 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
536 Consider (test eyeball/inline4)
537 x = a:as
538 y = f x
539 where f has arity 2. Then we do not want to inline 'x', because
540 it'll just be floated out again. Even if f has lots of discounts
541 on its first argument -- it must be saturated for these to kick in
542 -}
543
544 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
545 -- If the argument has form (f x y), where x,y are boring,
546 -- and f is marked INLINE, then we don't want to inline f.
547 -- But if the context of the argument is
548 -- g (f x y)
549 -- where g has rules, then we *do* want to inline f, in case it
550 -- exposes a rule that might fire. Similarly, if the context is
551 -- h (g (f x x))
552 -- where h has rules, then we do want to inline f; hence the
553 -- call_cont argument to interestingArgContext
554 --
555 -- The ai-rules flag makes this happen; if it's
556 -- set, the inliner gets just enough keener to inline f
557 -- regardless of how boring f's arguments are, if it's marked INLINE
558 --
559 -- The alternative would be to *always* inline an INLINE function,
560 -- regardless of how boring its context is; but that seems overkill
561 -- For example, it'd mean that wrapper functions were always inlined
562 --
563 -- The call_cont passed to interestingArgContext is the context of
564 -- the call itself, e.g. g <hole> in the example above
565 interestingArgContext rules call_cont
566 = notNull rules || enclosing_fn_has_rules
567 where
568 enclosing_fn_has_rules = go call_cont
569
570 go (Select {}) = False
571 go (ApplyToVal {}) = False -- Shouldn't really happen
572 go (ApplyToTy {}) = False -- Ditto
573 go (StrictArg _ cci _) = interesting cci
574 go (StrictBind {}) = False -- ??
575 go (CastIt _ c) = go c
576 go (Stop _ cci) = interesting cci
577 go (TickIt _ c) = go c
578
579 interesting RuleArgCtxt = True
580 interesting _ = False
581
582 {-
583 ************************************************************************
584 * *
585 SimplifierMode
586 * *
587 ************************************************************************
588
589 The SimplifierMode controls several switches; see its definition in
590 CoreMonad
591 sm_rules :: Bool -- Whether RULES are enabled
592 sm_inline :: Bool -- Whether inlining is enabled
593 sm_case_case :: Bool -- Whether case-of-case is enabled
594 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
595 -}
596
597 simplEnvForGHCi :: DynFlags -> SimplEnv
598 simplEnvForGHCi dflags
599 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
600 , sm_phase = InitialPhase
601 , sm_rules = rules_on
602 , sm_inline = False
603 , sm_eta_expand = eta_expand_on
604 , sm_case_case = True }
605 where
606 rules_on = gopt Opt_EnableRewriteRules dflags
607 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
608 -- Do not do any inlining, in case we expose some unboxed
609 -- tuple stuff that confuses the bytecode interpreter
610
611 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
612 -- See Note [Simplifying inside stable unfoldings]
613 updModeForStableUnfoldings inline_rule_act current_mode
614 = current_mode { sm_phase = phaseFromActivation inline_rule_act
615 , sm_inline = True
616 , sm_eta_expand = False }
617 -- For sm_rules, just inherit; sm_rules might be "off"
618 -- because of -fno-enable-rewrite-rules
619 where
620 phaseFromActivation (ActiveAfter n) = Phase n
621 phaseFromActivation _ = InitialPhase
622
623 {-
624 Note [Inlining in gentle mode]
625 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
626 Something is inlined if
627 (i) the sm_inline flag is on, AND
628 (ii) the thing has an INLINE pragma, AND
629 (iii) the thing is inlinable in the earliest phase.
630
631 Example of why (iii) is important:
632 {-# INLINE [~1] g #-}
633 g = ...
634
635 {-# INLINE f #-}
636 f x = g (g x)
637
638 If we were to inline g into f's inlining, then an importing module would
639 never be able to do
640 f e --> g (g e) ---> RULE fires
641 because the stable unfolding for f has had g inlined into it.
642
643 On the other hand, it is bad not to do ANY inlining into an
644 stable unfolding, because then recursive knots in instance declarations
645 don't get unravelled.
646
647 However, *sometimes* SimplGently must do no call-site inlining at all
648 (hence sm_inline = False). Before full laziness we must be careful
649 not to inline wrappers, because doing so inhibits floating
650 e.g. ...(case f x of ...)...
651 ==> ...(case (case x of I# x# -> fw x#) of ...)...
652 ==> ...(case x of I# x# -> case fw x# of ...)...
653 and now the redex (f x) isn't floatable any more.
654
655 The no-inlining thing is also important for Template Haskell. You might be
656 compiling in one-shot mode with -O2; but when TH compiles a splice before
657 running it, we don't want to use -O2. Indeed, we don't want to inline
658 anything, because the byte-code interpreter might get confused about
659 unboxed tuples and suchlike.
660
661 Note [Simplifying inside stable unfoldings]
662 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
663 We must take care with simplification inside stable unfoldings (which come from
664 INLINE pragmas).
665
666 First, consider the following example
667 let f = \pq -> BIG
668 in
669 let g = \y -> f y y
670 {-# INLINE g #-}
671 in ...g...g...g...g...g...
672 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
673 and thence copied multiple times when g is inlined. HENCE we treat
674 any occurrence in a stable unfolding as a multiple occurrence, not a single
675 one; see OccurAnal.addRuleUsage.
676
677 Second, we do want *do* to some modest rules/inlining stuff in stable
678 unfoldings, partly to eliminate senseless crap, and partly to break
679 the recursive knots generated by instance declarations.
680
681 However, suppose we have
682 {-# INLINE <act> f #-}
683 f = <rhs>
684 meaning "inline f in phases p where activation <act>(p) holds".
685 Then what inlinings/rules can we apply to the copy of <rhs> captured in
686 f's stable unfolding? Our model is that literally <rhs> is substituted for
687 f when it is inlined. So our conservative plan (implemented by
688 updModeForStableUnfoldings) is this:
689
690 -------------------------------------------------------------
691 When simplifying the RHS of an stable unfolding, set the phase
692 to the phase in which the stable unfolding first becomes active
693 -------------------------------------------------------------
694
695 That ensures that
696
697 a) Rules/inlinings that *cease* being active before p will
698 not apply to the stable unfolding, consistent with it being
699 inlined in its *original* form in phase p.
700
701 b) Rules/inlinings that only become active *after* p will
702 not apply to the stable unfolding, again to be consistent with
703 inlining the *original* rhs in phase p.
704
705 For example,
706 {-# INLINE f #-}
707 f x = ...g...
708
709 {-# NOINLINE [1] g #-}
710 g y = ...
711
712 {-# RULE h g = ... #-}
713 Here we must not inline g into f's RHS, even when we get to phase 0,
714 because when f is later inlined into some other module we want the
715 rule for h to fire.
716
717 Similarly, consider
718 {-# INLINE f #-}
719 f x = ...g...
720
721 g y = ...
722 and suppose that there are auto-generated specialisations and a strictness
723 wrapper for g. The specialisations get activation AlwaysActive, and the
724 strictness wrapper get activation (ActiveAfter 0). So the strictness
725 wrepper fails the test and won't be inlined into f's stable unfolding. That
726 means f can inline, expose the specialised call to g, so the specialisation
727 rules can fire.
728
729 A note about wrappers
730 ~~~~~~~~~~~~~~~~~~~~~
731 It's also important not to inline a worker back into a wrapper.
732 A wrapper looks like
733 wraper = inline_me (\x -> ...worker... )
734 Normally, the inline_me prevents the worker getting inlined into
735 the wrapper (initially, the worker's only call site!). But,
736 if the wrapper is sure to be called, the strictness analyser will
737 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
738 continuation.
739 -}
740
741 activeUnfolding :: SimplEnv -> Id -> Bool
742 activeUnfolding env
743 | not (sm_inline mode) = active_unfolding_minimal
744 | otherwise = case sm_phase mode of
745 InitialPhase -> active_unfolding_gentle
746 Phase n -> active_unfolding n
747 where
748 mode = getMode env
749
750 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
751 -- When matching in RULE, we want to "look through" an unfolding
752 -- (to see a constructor) if *rules* are on, even if *inlinings*
753 -- are not. A notable example is DFuns, which really we want to
754 -- match in rules like (op dfun) in gentle mode. Another example
755 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
756 -- see very early on
757 getUnfoldingInRuleMatch env
758 = (in_scope, id_unf)
759 where
760 in_scope = seInScope env
761 mode = getMode env
762 id_unf id | unf_is_active id = idUnfolding id
763 | otherwise = NoUnfolding
764 unf_is_active id
765 | not (sm_rules mode) = active_unfolding_minimal id
766 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
767
768 active_unfolding_minimal :: Id -> Bool
769 -- Compuslory unfoldings only
770 -- Ignore SimplGently, because we want to inline regardless;
771 -- the Id has no top-level binding at all
772 --
773 -- NB: we used to have a second exception, for data con wrappers.
774 -- On the grounds that we use gentle mode for rule LHSs, and
775 -- they match better when data con wrappers are inlined.
776 -- But that only really applies to the trivial wrappers (like (:)),
777 -- and they are now constructed as Compulsory unfoldings (in MkId)
778 -- so they'll happen anyway.
779 active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
780
781 active_unfolding :: PhaseNum -> Id -> Bool
782 active_unfolding n id = isActiveIn n (idInlineActivation id)
783
784 active_unfolding_gentle :: Id -> Bool
785 -- Anything that is early-active
786 -- See Note [Gentle mode]
787 active_unfolding_gentle id
788 = isInlinePragma prag
789 && isEarlyActive (inlinePragmaActivation prag)
790 -- NB: wrappers are not early-active
791 where
792 prag = idInlinePragma id
793
794 ----------------------
795 activeRule :: SimplEnv -> Activation -> Bool
796 -- Nothing => No rules at all
797 activeRule env
798 | not (sm_rules mode) = \_ -> False -- Rewriting is off
799 | otherwise = isActive (sm_phase mode)
800 where
801 mode = getMode env
802
803 {-
804 ************************************************************************
805 * *
806 preInlineUnconditionally
807 * *
808 ************************************************************************
809
810 preInlineUnconditionally
811 ~~~~~~~~~~~~~~~~~~~~~~~~
812 @preInlineUnconditionally@ examines a bndr to see if it is used just
813 once in a completely safe way, so that it is safe to discard the
814 binding inline its RHS at the (unique) usage site, REGARDLESS of how
815 big the RHS might be. If this is the case we don't simplify the RHS
816 first, but just inline it un-simplified.
817
818 This is much better than first simplifying a perhaps-huge RHS and then
819 inlining and re-simplifying it. Indeed, it can be at least quadratically
820 better. Consider
821
822 x1 = e1
823 x2 = e2[x1]
824 x3 = e3[x2]
825 ...etc...
826 xN = eN[xN-1]
827
828 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
829 This can happen with cascades of functions too:
830
831 f1 = \x1.e1
832 f2 = \xs.e2[f1]
833 f3 = \xs.e3[f3]
834 ...etc...
835
836 THE MAIN INVARIANT is this:
837
838 ---- preInlineUnconditionally invariant -----
839 IF preInlineUnconditionally chooses to inline x = <rhs>
840 THEN doing the inlining should not change the occurrence
841 info for the free vars of <rhs>
842 ----------------------------------------------
843
844 For example, it's tempting to look at trivial binding like
845 x = y
846 and inline it unconditionally. But suppose x is used many times,
847 but this is the unique occurrence of y. Then inlining x would change
848 y's occurrence info, which breaks the invariant. It matters: y
849 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
850
851
852 Even RHSs labelled InlineMe aren't caught here, because there might be
853 no benefit from inlining at the call site.
854
855 [Sept 01] Don't unconditionally inline a top-level thing, because that
856 can simply make a static thing into something built dynamically. E.g.
857 x = (a,b)
858 main = \s -> h x
859
860 [Remember that we treat \s as a one-shot lambda.] No point in
861 inlining x unless there is something interesting about the call site.
862
863 But watch out: if you aren't careful, some useful foldr/build fusion
864 can be lost (most notably in spectral/hartel/parstof) because the
865 foldr didn't see the build. Doing the dynamic allocation isn't a big
866 deal, in fact, but losing the fusion can be. But the right thing here
867 seems to be to do a callSiteInline based on the fact that there is
868 something interesting about the call site (it's strict). Hmm. That
869 seems a bit fragile.
870
871 Conclusion: inline top level things gaily until Phase 0 (the last
872 phase), at which point don't.
873
874 Note [pre/postInlineUnconditionally in gentle mode]
875 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
876 Even in gentle mode we want to do preInlineUnconditionally. The
877 reason is that too little clean-up happens if you don't inline
878 use-once things. Also a bit of inlining is *good* for full laziness;
879 it can expose constant sub-expressions. Example in
880 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
881 let-float if you inline windowToViewport
882
883 However, as usual for Gentle mode, do not inline things that are
884 inactive in the intial stages. See Note [Gentle mode].
885
886 Note [Stable unfoldings and preInlineUnconditionally]
887 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
888 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
889 Example
890
891 {-# INLINE f #-}
892 f :: Eq a => a -> a
893 f x = ...
894
895 fInt :: Int -> Int
896 fInt = f Int dEqInt
897
898 ...fInt...fInt...fInt...
899
900 Here f occurs just once, in the RHS of f1. But if we inline it there
901 we'll lose the opportunity to inline at each of fInt's call sites.
902 The INLINE pragma will only inline when the application is saturated
903 for exactly this reason; and we don't want PreInlineUnconditionally
904 to second-guess it. A live example is Trac #3736.
905 c.f. Note [Stable unfoldings and postInlineUnconditionally]
906
907 Note [Top-level botomming Ids]
908 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
909 Don't inline top-level Ids that are bottoming, even if they are used just
910 once, because FloatOut has gone to some trouble to extract them out.
911 Inlining them won't make the program run faster!
912
913 Note [Do not inline CoVars unconditionally]
914 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
915 Coercion variables appear inside coercions, and the RHS of a let-binding
916 is a term (not a coercion) so we can't necessarily inline the latter in
917 the former.
918 -}
919
920 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
921 -- Precondition: rhs satisfies the let/app invariant
922 -- See Note [CoreSyn let/app invariant] in CoreSyn
923 -- Reason: we don't want to inline single uses, or discard dead bindings,
924 -- for unlifted, side-effect-full bindings
925 preInlineUnconditionally dflags env top_lvl bndr rhs
926 | not active = False
927 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
928 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
929 | not (gopt Opt_SimplPreInlining dflags) = False
930 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
931 | otherwise = case idOccInfo bndr of
932 IAmDead -> True -- Happens in ((\x.1) v)
933 OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
934 _ -> False
935 where
936 mode = getMode env
937 active = isActive (sm_phase mode) act
938 -- See Note [pre/postInlineUnconditionally in gentle mode]
939 act = idInlineActivation bndr
940 try_once in_lam int_cxt -- There's one textual occurrence
941 | not in_lam = isNotTopLevel top_lvl || early_phase
942 | otherwise = int_cxt && canInlineInLam rhs
943
944 -- Be very careful before inlining inside a lambda, because (a) we must not
945 -- invalidate occurrence information, and (b) we want to avoid pushing a
946 -- single allocation (here) into multiple allocations (inside lambda).
947 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
948 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
949 -- where
950 -- is_cheap = exprIsCheap rhs
951 -- ok = is_cheap && int_cxt
952
953 -- int_cxt The context isn't totally boring
954 -- E.g. let f = \ab.BIG in \y. map f xs
955 -- Don't want to substitute for f, because then we allocate
956 -- its closure every time the \y is called
957 -- But: let f = \ab.BIG in \y. map (f y) xs
958 -- Now we do want to substitute for f, even though it's not
959 -- saturated, because we're going to allocate a closure for
960 -- (f y) every time round the loop anyhow.
961
962 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
963 -- so substituting rhs inside a lambda doesn't change the occ info.
964 -- Sadly, not quite the same as exprIsHNF.
965 canInlineInLam (Lit _) = True
966 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
967 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
968 canInlineInLam _ = False
969 -- not ticks. Counting ticks cannot be duplicated, and non-counting
970 -- ticks around a Lam will disappear anyway.
971
972 early_phase = case sm_phase mode of
973 Phase 0 -> False
974 _ -> True
975 -- If we don't have this early_phase test, consider
976 -- x = length [1,2,3]
977 -- The full laziness pass carefully floats all the cons cells to
978 -- top level, and preInlineUnconditionally floats them all back in.
979 -- Result is (a) static allocation replaced by dynamic allocation
980 -- (b) many simplifier iterations because this tickles
981 -- a related problem; only one inlining per pass
982 --
983 -- On the other hand, I have seen cases where top-level fusion is
984 -- lost if we don't inline top level thing (e.g. string constants)
985 -- Hence the test for phase zero (which is the phase for all the final
986 -- simplifications). Until phase zero we take no special notice of
987 -- top level things, but then we become more leery about inlining
988 -- them.
989
990 {-
991 ************************************************************************
992 * *
993 postInlineUnconditionally
994 * *
995 ************************************************************************
996
997 postInlineUnconditionally
998 ~~~~~~~~~~~~~~~~~~~~~~~~~
999 @postInlineUnconditionally@ decides whether to unconditionally inline
1000 a thing based on the form of its RHS; in particular if it has a
1001 trivial RHS. If so, we can inline and discard the binding altogether.
1002
1003 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1004 only have *forward* references. Hence, it's safe to discard the binding
1005
1006 NOTE: This isn't our last opportunity to inline. We're at the binding
1007 site right now, and we'll get another opportunity when we get to the
1008 ocurrence(s)
1009
1010 Note that we do this unconditional inlining only for trival RHSs.
1011 Don't inline even WHNFs inside lambdas; doing so may simply increase
1012 allocation when the function is called. This isn't the last chance; see
1013 NOTE above.
1014
1015 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1016 Because we don't even want to inline them into the RHS of constructor
1017 arguments. See NOTE above
1018
1019 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1020 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1021 with both a and b marked NOINLINE. But that seems incompatible with
1022 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1023 story for now.
1024 -}
1025
1026 postInlineUnconditionally
1027 :: DynFlags -> SimplEnv -> TopLevelFlag
1028 -> OutId -- The binder (an InId would be fine too)
1029 -- (*not* a CoVar)
1030 -> OccInfo -- From the InId
1031 -> OutExpr
1032 -> Unfolding
1033 -> Bool
1034 -- Precondition: rhs satisfies the let/app invariant
1035 -- See Note [CoreSyn let/app invariant] in CoreSyn
1036 -- Reason: we don't want to inline single uses, or discard dead bindings,
1037 -- for unlifted, side-effect-full bindings
1038 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1039 | not active = False
1040 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1041 -- because it might be referred to "earlier"
1042 | isExportedId bndr = False
1043 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1044 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1045 | exprIsTrivial rhs = True
1046 | otherwise
1047 = case occ_info of
1048 -- The point of examining occ_info here is that for *non-values*
1049 -- that occur outside a lambda, the call-site inliner won't have
1050 -- a chance (because it doesn't know that the thing
1051 -- only occurs once). The pre-inliner won't have gotten
1052 -- it either, if the thing occurs in more than one branch
1053 -- So the main target is things like
1054 -- let x = f y in
1055 -- case v of
1056 -- True -> case x of ...
1057 -- False -> case x of ...
1058 -- This is very important in practice; e.g. wheel-seive1 doubles
1059 -- in allocation if you miss this out
1060 OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue
1061 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1062 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1063 --
1064 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1065 -- Reason: doing so risks exponential behaviour. We simplify a big
1066 -- expression, inline it, and simplify it again. But if the
1067 -- very same thing happens in the big expression, we get
1068 -- exponential cost!
1069 -- PRINCIPLE: when we've already simplified an expression once,
1070 -- make sure that we only inline it if it's reasonably small.
1071
1072 && (not in_lam ||
1073 -- Outside a lambda, we want to be reasonably aggressive
1074 -- about inlining into multiple branches of case
1075 -- e.g. let x = <non-value>
1076 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1077 -- Inlining can be a big win if C3 is the hot-spot, even if
1078 -- the uses in C1, C2 are not 'interesting'
1079 -- An example that gets worse if you add int_cxt here is 'clausify'
1080
1081 (isCheapUnfolding unfolding && int_cxt))
1082 -- isCheap => acceptable work duplication; in_lam may be true
1083 -- int_cxt to prevent us inlining inside a lambda without some
1084 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1085
1086 IAmDead -> True -- This happens; for example, the case_bndr during case of
1087 -- known constructor: case (a,b) of x { (p,q) -> ... }
1088 -- Here x isn't mentioned in the RHS, so we don't want to
1089 -- create the (dead) let-binding let x = (a,b) in ...
1090
1091 _ -> False
1092
1093 -- Here's an example that we don't handle well:
1094 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1095 -- in \y. ....case f of {...} ....
1096 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1097 -- But
1098 -- - We can't preInlineUnconditionally because that woud invalidate
1099 -- the occ info for b.
1100 -- - We can't postInlineUnconditionally because the RHS is big, and
1101 -- that risks exponential behaviour
1102 -- - We can't call-site inline, because the rhs is big
1103 -- Alas!
1104
1105 where
1106 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1107 -- See Note [pre/postInlineUnconditionally in gentle mode]
1108
1109 {-
1110 Note [Top level and postInlineUnconditionally]
1111 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1112 We don't do postInlineUnconditionally for top-level things (even for
1113 ones that are trivial):
1114
1115 * Doing so will inline top-level error expressions that have been
1116 carefully floated out by FloatOut. More generally, it might
1117 replace static allocation with dynamic.
1118
1119 * Even for trivial expressions there's a problem. Consider
1120 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1121 blah xs = reverse xs
1122 ruggle = sort
1123 In one simplifier pass we might fire the rule, getting
1124 blah xs = ruggle xs
1125 but in *that* simplifier pass we must not do postInlineUnconditionally
1126 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1127
1128 If the rhs is trivial it'll be inlined by callSiteInline, and then
1129 the binding will be dead and discarded by the next use of OccurAnal
1130
1131 * There is less point, because the main goal is to get rid of local
1132 bindings used in multiple case branches.
1133
1134 * The inliner should inline trivial things at call sites anyway.
1135
1136 Note [Stable unfoldings and postInlineUnconditionally]
1137 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1138 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1139 otherwise we lose the unfolding. Example
1140
1141 -- f has stable unfolding with rhs (e |> co)
1142 -- where 'e' is big
1143 f = e |> co
1144
1145 Then there's a danger we'll optimise to
1146
1147 f' = e
1148 f = f' |> co
1149
1150 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1151 won't inline because 'e' is too big.
1152
1153 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1154
1155
1156 ************************************************************************
1157 * *
1158 Rebuilding a lambda
1159 * *
1160 ************************************************************************
1161 -}
1162
1163 mkLam :: [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1164 -- mkLam tries three things
1165 -- a) eta reduction, if that gives a trivial expression
1166 -- b) eta expansion [only if there are some value lambdas]
1167
1168 mkLam [] body _cont
1169 = return body
1170 mkLam bndrs body cont
1171 = do { dflags <- getDynFlags
1172 ; mkLam' dflags bndrs body }
1173 where
1174 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1175 mkLam' dflags bndrs (Cast body co)
1176 | not (any bad bndrs)
1177 -- Note [Casts and lambdas]
1178 = do { lam <- mkLam' dflags bndrs body
1179 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1180 where
1181 co_vars = tyCoVarsOfCo co
1182 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1183
1184 mkLam' dflags bndrs body@(Lam {})
1185 = mkLam' dflags (bndrs ++ bndrs1) body1
1186 where
1187 (bndrs1, body1) = collectBinders body
1188
1189 mkLam' dflags bndrs (Tick t expr)
1190 | tickishFloatable t
1191 = mkTick t <$> mkLam' dflags bndrs expr
1192
1193 mkLam' dflags bndrs body
1194 | gopt Opt_DoEtaReduction dflags
1195 , Just etad_lam <- tryEtaReduce bndrs body
1196 = do { tick (EtaReduction (head bndrs))
1197 ; return etad_lam }
1198
1199 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1200 , gopt Opt_DoLambdaEtaExpansion dflags
1201 , any isRuntimeVar bndrs
1202 , let body_arity = exprEtaExpandArity dflags body
1203 , body_arity > 0
1204 = do { tick (EtaExpansion (head bndrs))
1205 ; return (mkLams bndrs (etaExpand body_arity body)) }
1206
1207 | otherwise
1208 = return (mkLams bndrs body)
1209
1210 {-
1211 Note [Eta expanding lambdas]
1212 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1213 In general we *do* want to eta-expand lambdas. Consider
1214 f (\x -> case x of (a,b) -> \s -> blah)
1215 where 's' is a state token, and hence can be eta expanded. This
1216 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1217 important function!
1218
1219 The eta-expansion will never happen unless we do it now. (Well, it's
1220 possible that CorePrep will do it, but CorePrep only has a half-baked
1221 eta-expander that can't deal with casts. So it's much better to do it
1222 here.)
1223
1224 However, when the lambda is let-bound, as the RHS of a let, we have a
1225 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1226 bother to try expansion in mkLam in that case; hence the contIsRhs
1227 guard.
1228
1229 Note [Casts and lambdas]
1230 ~~~~~~~~~~~~~~~~~~~~~~~~
1231 Consider
1232 (\x. (\y. e) `cast` g1) `cast` g2
1233 There is a danger here that the two lambdas look separated, and the
1234 full laziness pass might float an expression to between the two.
1235
1236 So this equation in mkLam' floats the g1 out, thus:
1237 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1238 where x:tx.
1239
1240 In general, this floats casts outside lambdas, where (I hope) they
1241 might meet and cancel with some other cast:
1242 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1243 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1244 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1245 (if not (g `in` co))
1246
1247 Notice that it works regardless of 'e'. Originally it worked only
1248 if 'e' was itself a lambda, but in some cases that resulted in
1249 fruitless iteration in the simplifier. A good example was when
1250 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1251 like (\x. Get `cast` g)
1252 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1253 the Get, and the next iteration eta-reduced it, and then eta-expanded
1254 it again.
1255
1256 Note also the side condition for the case of coercion binders.
1257 It does not make sense to transform
1258 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1259 because the latter is not well-kinded.
1260
1261 ************************************************************************
1262 * *
1263 Eta expansion
1264 * *
1265 ************************************************************************
1266 -}
1267
1268 tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
1269 -- See Note [Eta-expanding at let bindings]
1270 tryEtaExpandRhs env bndr rhs
1271 = do { dflags <- getDynFlags
1272 ; (new_arity, new_rhs) <- try_expand dflags
1273
1274 ; WARN( new_arity < old_id_arity,
1275 (ptext (sLit "Arity decrease:") <+> (ppr bndr <+> ppr old_id_arity
1276 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1277 -- Note [Arity decrease] in Simplify
1278 return (new_arity, new_rhs) }
1279 where
1280 try_expand dflags
1281 | exprIsTrivial rhs
1282 = return (exprArity rhs, rhs)
1283
1284 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1285 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1286 new_arity2 = idCallArity bndr
1287 new_arity = max new_arity1 new_arity2
1288 , new_arity > old_arity -- And the current manifest arity isn't enough
1289 = do { tick (EtaExpansion bndr)
1290 ; return (new_arity, etaExpand new_arity rhs) }
1291 | otherwise
1292 = return (old_arity, rhs)
1293
1294 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1295 old_id_arity = idArity bndr
1296
1297 {-
1298 Note [Eta-expanding at let bindings]
1299 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1300 We now eta expand at let-bindings, which is where the payoff comes.
1301 The most significant thing is that we can do a simple arity analysis
1302 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1303
1304 One useful consequence of not eta-expanding lambdas is this example:
1305 genMap :: C a => ...
1306 {-# INLINE genMap #-}
1307 genMap f xs = ...
1308
1309 myMap :: D a => ...
1310 {-# INLINE myMap #-}
1311 myMap = genMap
1312
1313 Notice that 'genMap' should only inline if applied to two arguments.
1314 In the stable unfolding for myMap we'll have the unfolding
1315 (\d -> genMap Int (..d..))
1316 We do not want to eta-expand to
1317 (\d f xs -> genMap Int (..d..) f xs)
1318 because then 'genMap' will inline, and it really shouldn't: at least
1319 as far as the programmer is concerned, it's not applied to two
1320 arguments!
1321
1322 Note [Do not eta-expand PAPs]
1323 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1324 We used to have old_arity = manifestArity rhs, which meant that we
1325 would eta-expand even PAPs. But this gives no particular advantage,
1326 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1327 Suppose we have a PAP
1328 foo :: IO ()
1329 foo = returnIO ()
1330 Then we can eta-expand do
1331 foo = (\eta. (returnIO () |> sym g) eta) |> g
1332 where
1333 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1334
1335 But there is really no point in doing this, and it generates masses of
1336 coercions and whatnot that eventually disappear again. For T9020, GHC
1337 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1338 1.8G to 45M.
1339
1340 But note that this won't eta-expand, say
1341 f = \g -> map g
1342 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1343 strictness analysis will have less to bite on?
1344
1345
1346 ************************************************************************
1347 * *
1348 \subsection{Floating lets out of big lambdas}
1349 * *
1350 ************************************************************************
1351
1352 Note [Floating and type abstraction]
1353 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1354 Consider this:
1355 x = /\a. C e1 e2
1356 We'd like to float this to
1357 y1 = /\a. e1
1358 y2 = /\a. e2
1359 x = /\a. C (y1 a) (y2 a)
1360 for the usual reasons: we want to inline x rather vigorously.
1361
1362 You may think that this kind of thing is rare. But in some programs it is
1363 common. For example, if you do closure conversion you might get:
1364
1365 data a :-> b = forall e. (e -> a -> b) :$ e
1366
1367 f_cc :: forall a. a :-> a
1368 f_cc = /\a. (\e. id a) :$ ()
1369
1370 Now we really want to inline that f_cc thing so that the
1371 construction of the closure goes away.
1372
1373 So I have elaborated simplLazyBind to understand right-hand sides that look
1374 like
1375 /\ a1..an. body
1376
1377 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1378 but there is quite a bit of plumbing in simplLazyBind as well.
1379
1380 The same transformation is good when there are lets in the body:
1381
1382 /\abc -> let(rec) x = e in b
1383 ==>
1384 let(rec) x' = /\abc -> let x = x' a b c in e
1385 in
1386 /\abc -> let x = x' a b c in b
1387
1388 This is good because it can turn things like:
1389
1390 let f = /\a -> letrec g = ... g ... in g
1391 into
1392 letrec g' = /\a -> ... g' a ...
1393 in
1394 let f = /\ a -> g' a
1395
1396 which is better. In effect, it means that big lambdas don't impede
1397 let-floating.
1398
1399 This optimisation is CRUCIAL in eliminating the junk introduced by
1400 desugaring mutually recursive definitions. Don't eliminate it lightly!
1401
1402 [May 1999] If we do this transformation *regardless* then we can
1403 end up with some pretty silly stuff. For example,
1404
1405 let
1406 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1407 in ..
1408 becomes
1409 let y1 = /\s -> r1
1410 y2 = /\s -> r2
1411 st = /\s -> ...[y1 s/x1, y2 s/x2]
1412 in ..
1413
1414 Unless the "..." is a WHNF there is really no point in doing this.
1415 Indeed it can make things worse. Suppose x1 is used strictly,
1416 and is of the form
1417
1418 x1* = case f y of { (a,b) -> e }
1419
1420 If we abstract this wrt the tyvar we then can't do the case inline
1421 as we would normally do.
1422
1423 That's why the whole transformation is part of the same process that
1424 floats let-bindings and constructor arguments out of RHSs. In particular,
1425 it is guarded by the doFloatFromRhs call in simplLazyBind.
1426 -}
1427
1428 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1429 abstractFloats main_tvs body_env body
1430 = ASSERT( notNull body_floats )
1431 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1432 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1433 where
1434 main_tv_set = mkVarSet main_tvs
1435 body_floats = getFloatBinds body_env
1436 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1437
1438 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1439 abstract subst (NonRec id rhs)
1440 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1441 ; let poly_rhs = mkLams tvs_here rhs'
1442 subst' = CoreSubst.extendIdSubst subst id poly_app
1443 ; return (subst', (NonRec poly_id poly_rhs)) }
1444 where
1445 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1446 tvs_here = varSetElemsKvsFirst (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
1447
1448 -- Abstract only over the type variables free in the rhs
1449 -- wrt which the new binding is abstracted. But the naive
1450 -- approach of abstract wrt the tyvars free in the Id's type
1451 -- fails. Consider:
1452 -- /\ a b -> let t :: (a,b) = (e1, e2)
1453 -- x :: a = fst t
1454 -- in ...
1455 -- Here, b isn't free in x's type, but we must nevertheless
1456 -- abstract wrt b as well, because t's type mentions b.
1457 -- Since t is floated too, we'd end up with the bogus:
1458 -- poly_t = /\ a b -> (e1, e2)
1459 -- poly_x = /\ a -> fst (poly_t a *b*)
1460 -- So for now we adopt the even more naive approach of
1461 -- abstracting wrt *all* the tyvars. We'll see if that
1462 -- gives rise to problems. SLPJ June 98
1463
1464 abstract subst (Rec prs)
1465 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1466 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1467 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1468 | rhs <- rhss]
1469 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1470 where
1471 (ids,rhss) = unzip prs
1472 -- For a recursive group, it's a bit of a pain to work out the minimal
1473 -- set of tyvars over which to abstract:
1474 -- /\ a b c. let x = ...a... in
1475 -- letrec { p = ...x...q...
1476 -- q = .....p...b... } in
1477 -- ...
1478 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1479 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1480 -- Since it's a pain, we just use the whole set, which is always safe
1481 --
1482 -- If you ever want to be more selective, remember this bizarre case too:
1483 -- x::a = x
1484 -- Here, we must abstract 'x' over 'a'.
1485 tvs_here = sortQuantVars main_tvs
1486
1487 mk_poly tvs_here var
1488 = do { uniq <- getUniqueM
1489 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1490 poly_ty = mkForAllTys tvs_here (idType var) -- But new type of course
1491 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1492 mkLocalId poly_name poly_ty
1493 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1494 -- In the olden days, it was crucial to copy the occInfo of the original var,
1495 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1496 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1497 -- at already simplified code, so it doesn't matter
1498 --
1499 -- It's even right to retain single-occurrence or dead-var info:
1500 -- Suppose we started with /\a -> let x = E in B
1501 -- where x occurs once in B. Then we transform to:
1502 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1503 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1504 -- the occurrences of x' will be just the occurrences originally
1505 -- pinned on x.
1506
1507 {-
1508 Note [Abstract over coercions]
1509 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1510 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1511 type variable a. Rather than sort this mess out, we simply bale out and abstract
1512 wrt all the type variables if any of them are coercion variables.
1513
1514
1515 Historical note: if you use let-bindings instead of a substitution, beware of this:
1516
1517 -- Suppose we start with:
1518 --
1519 -- x = /\ a -> let g = G in E
1520 --
1521 -- Then we'll float to get
1522 --
1523 -- x = let poly_g = /\ a -> G
1524 -- in /\ a -> let g = poly_g a in E
1525 --
1526 -- But now the occurrence analyser will see just one occurrence
1527 -- of poly_g, not inside a lambda, so the simplifier will
1528 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1529 -- (I used to think that the "don't inline lone occurrences" stuff
1530 -- would stop this happening, but since it's the *only* occurrence,
1531 -- PreInlineUnconditionally kicks in first!)
1532 --
1533 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1534 -- to appear many times. (NB: mkInlineMe eliminates
1535 -- such notes on trivial RHSs, so do it manually.)
1536
1537 ************************************************************************
1538 * *
1539 prepareAlts
1540 * *
1541 ************************************************************************
1542
1543 prepareAlts tries these things:
1544
1545 1. Eliminate alternatives that cannot match, including the
1546 DEFAULT alternative.
1547
1548 2. If the DEFAULT alternative can match only one possible constructor,
1549 then make that constructor explicit.
1550 e.g.
1551 case e of x { DEFAULT -> rhs }
1552 ===>
1553 case e of x { (a,b) -> rhs }
1554 where the type is a single constructor type. This gives better code
1555 when rhs also scrutinises x or e.
1556
1557 3. Returns a list of the constructors that cannot holds in the
1558 DEFAULT alternative (if there is one)
1559
1560 Here "cannot match" includes knowledge from GADTs
1561
1562 It's a good idea to do this stuff before simplifying the alternatives, to
1563 avoid simplifying alternatives we know can't happen, and to come up with
1564 the list of constructors that are handled, to put into the IdInfo of the
1565 case binder, for use when simplifying the alternatives.
1566
1567 Eliminating the default alternative in (1) isn't so obvious, but it can
1568 happen:
1569
1570 data Colour = Red | Green | Blue
1571
1572 f x = case x of
1573 Red -> ..
1574 Green -> ..
1575 DEFAULT -> h x
1576
1577 h y = case y of
1578 Blue -> ..
1579 DEFAULT -> [ case y of ... ]
1580
1581 If we inline h into f, the default case of the inlined h can't happen.
1582 If we don't notice this, we may end up filtering out *all* the cases
1583 of the inner case y, which give us nowhere to go!
1584 -}
1585
1586 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1587 -- The returned alternatives can be empty, none are possible
1588 prepareAlts scrut case_bndr' alts
1589 -- Case binder is needed just for its type. Note that as an
1590 -- OutId, it has maximum information; this is important.
1591 -- Test simpl013 is an example
1592 = do { us <- getUniquesM
1593 ; let (imposs_deflt_cons, refined_deflt, alts')
1594 = filterAlts us (varType case_bndr') imposs_cons alts
1595 ; when refined_deflt $ tick (FillInCaseDefault case_bndr')
1596
1597 ; alts'' <- combineIdenticalAlts case_bndr' alts'
1598 ; return (imposs_deflt_cons, alts'') }
1599 where
1600 imposs_cons = case scrut of
1601 Var v -> otherCons (idUnfolding v)
1602 _ -> []
1603
1604 {-
1605 Note [Combine identical alternatives]
1606 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1607 If several alternatives are identical, merge them into
1608 a single DEFAULT alternative. I've occasionally seen this
1609 making a big difference:
1610
1611 case e of =====> case e of
1612 C _ -> f x D v -> ....v....
1613 D v -> ....v.... DEFAULT -> f x
1614 DEFAULT -> f x
1615
1616 The point is that we merge common RHSs, at least for the DEFAULT case.
1617 [One could do something more elaborate but I've never seen it needed.]
1618 To avoid an expensive test, we just merge branches equal to the *first*
1619 alternative; this picks up the common cases
1620 a) all branches equal
1621 b) some branches equal to the DEFAULT (which occurs first)
1622
1623 The case where Combine Identical Alternatives transformation showed up
1624 was like this (base/Foreign/C/Err/Error.hs):
1625
1626 x | p `is` 1 -> e1
1627 | p `is` 2 -> e2
1628 ...etc...
1629
1630 where @is@ was something like
1631
1632 p `is` n = p /= (-1) && p == n
1633
1634 This gave rise to a horrible sequence of cases
1635
1636 case p of
1637 (-1) -> $j p
1638 1 -> e1
1639 DEFAULT -> $j p
1640
1641 and similarly in cascade for all the join points!
1642
1643 NB: it's important that all this is done in [InAlt], *before* we work
1644 on the alternatives themselves, because Simpify.simplAlt may zap the
1645 occurrence info on the binders in the alternatives, which in turn
1646 defeats combineIdenticalAlts (see Trac #7360).
1647 -}
1648
1649 combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
1650 -- See Note [Combine identical alternatives]
1651 combineIdenticalAlts case_bndr ((_con1,bndrs1,rhs1) : con_alts)
1652 | all isDeadBinder bndrs1 -- Remember the default
1653 , not (null eliminated_alts) -- alternative comes first
1654 = do { tick (AltMerge case_bndr)
1655 ; return ((DEFAULT, [], mkTicks (concat tickss) rhs1) : filtered_alts) }
1656 where
1657 (eliminated_alts, filtered_alts) = partition identical_to_alt1 con_alts
1658 cheapEqTicked e1 e2 = cheapEqExpr' tickishFloatable e1 e2
1659 identical_to_alt1 (_con,bndrs,rhs)
1660 = all isDeadBinder bndrs && rhs `cheapEqTicked` rhs1
1661 tickss = map (stripTicksT tickishFloatable . thirdOf3) eliminated_alts
1662
1663 combineIdenticalAlts _ alts = return alts
1664
1665 {-
1666 ************************************************************************
1667 * *
1668 mkCase
1669 * *
1670 ************************************************************************
1671
1672 mkCase tries these things
1673
1674 1. Merge Nested Cases
1675
1676 case e of b { ==> case e of b {
1677 p1 -> rhs1 p1 -> rhs1
1678 ... ...
1679 pm -> rhsm pm -> rhsm
1680 _ -> case b of b' { pn -> let b'=b in rhsn
1681 pn -> rhsn ...
1682 ... po -> let b'=b in rhso
1683 po -> rhso _ -> let b'=b in rhsd
1684 _ -> rhsd
1685 }
1686
1687 which merges two cases in one case when -- the default alternative of
1688 the outer case scrutises the same variable as the outer case. This
1689 transformation is called Case Merging. It avoids that the same
1690 variable is scrutinised multiple times.
1691
1692 2. Eliminate Identity Case
1693
1694 case e of ===> e
1695 True -> True;
1696 False -> False
1697
1698 and similar friends.
1699 -}
1700
1701 mkCase, mkCase1, mkCase2
1702 :: DynFlags
1703 -> OutExpr -> OutId
1704 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1705 -> SimplM OutExpr
1706
1707 --------------------------------------------------
1708 -- 1. Merge Nested Cases
1709 --------------------------------------------------
1710
1711 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1712 | gopt Opt_CaseMerge dflags
1713 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1714 <- stripTicksTop tickishFloatable deflt_rhs
1715 , inner_scrut_var == outer_bndr
1716 = do { tick (CaseMerge outer_bndr)
1717
1718 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1719 (con, args, wrap_rhs rhs)
1720 -- Simplifier's no-shadowing invariant should ensure
1721 -- that outer_bndr is not shadowed by the inner patterns
1722 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1723 -- The let is OK even for unboxed binders,
1724
1725 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1726 | otherwise = map wrap_alt inner_alts
1727
1728 merged_alts = mergeAlts outer_alts wrapped_alts
1729 -- NB: mergeAlts gives priority to the left
1730 -- case x of
1731 -- A -> e1
1732 -- DEFAULT -> case x of
1733 -- A -> e2
1734 -- B -> e3
1735 -- When we merge, we must ensure that e1 takes
1736 -- precedence over e2 as the value for A!
1737
1738 ; fmap (mkTicks ticks) $
1739 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1740 }
1741 -- Warning: don't call mkCase recursively!
1742 -- Firstly, there's no point, because inner alts have already had
1743 -- mkCase applied to them, so they won't have a case in their default
1744 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1745 -- in munge_rhs may put a case into the DEFAULT branch!
1746
1747 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1748
1749 --------------------------------------------------
1750 -- 2. Eliminate Identity Case
1751 --------------------------------------------------
1752
1753 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1754 | all identity_alt alts
1755 = do { tick (CaseIdentity case_bndr)
1756 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1757 where
1758 ticks = concatMap (stripTicksT tickishFloatable . thirdOf3) (tail alts)
1759 identity_alt (con, args, rhs) = check_eq rhs con args
1760
1761 check_eq (Cast rhs co) con args
1762 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1763 -- See Note [RHS casts]
1764 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1765 check_eq (Var v) _ _ | v == case_bndr = True
1766 check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con
1767 -- Optimisation only
1768 check_eq (Tick t e) alt args = tickishFloatable t &&
1769 check_eq e alt args
1770 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1771 mkConApp con (arg_tys ++
1772 varsToCoreExprs args)
1773 check_eq _ _ _ = False
1774
1775 arg_tys = map Type (tyConAppArgs (idType case_bndr))
1776
1777 -- Note [RHS casts]
1778 -- ~~~~~~~~~~~~~~~~
1779 -- We've seen this:
1780 -- case e of x { _ -> x `cast` c }
1781 -- And we definitely want to eliminate this case, to give
1782 -- e `cast` c
1783 -- So we throw away the cast from the RHS, and reconstruct
1784 -- it at the other end. All the RHS casts must be the same
1785 -- if (all identity_alt alts) holds.
1786 --
1787 -- Don't worry about nested casts, because the simplifier combines them
1788
1789 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1790 re_cast scrut _ = scrut
1791
1792 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1793
1794 --------------------------------------------------
1795 -- Catch-all
1796 --------------------------------------------------
1797 mkCase2 _dflags scrut bndr alts_ty alts
1798 = return (Case scrut bndr alts_ty alts)
1799
1800 {-
1801 Note [Dead binders]
1802 ~~~~~~~~~~~~~~~~~~~~
1803 Note that dead-ness is maintained by the simplifier, so that it is
1804 accurate after simplification as well as before.
1805
1806
1807 Note [Cascading case merge]
1808 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1809 Case merging should cascade in one sweep, because it
1810 happens bottom-up
1811
1812 case e of a {
1813 DEFAULT -> case a of b
1814 DEFAULT -> case b of c {
1815 DEFAULT -> e
1816 A -> ea
1817 B -> eb
1818 C -> ec
1819 ==>
1820 case e of a {
1821 DEFAULT -> case a of b
1822 DEFAULT -> let c = b in e
1823 A -> let c = b in ea
1824 B -> eb
1825 C -> ec
1826 ==>
1827 case e of a {
1828 DEFAULT -> let b = a in let c = b in e
1829 A -> let b = a in let c = b in ea
1830 B -> let b = a in eb
1831 C -> ec
1832
1833
1834 However here's a tricky case that we still don't catch, and I don't
1835 see how to catch it in one pass:
1836
1837 case x of c1 { I# a1 ->
1838 case a1 of c2 ->
1839 0 -> ...
1840 DEFAULT -> case x of c3 { I# a2 ->
1841 case a2 of ...
1842
1843 After occurrence analysis (and its binder-swap) we get this
1844
1845 case x of c1 { I# a1 ->
1846 let x = c1 in -- Binder-swap addition
1847 case a1 of c2 ->
1848 0 -> ...
1849 DEFAULT -> case x of c3 { I# a2 ->
1850 case a2 of ...
1851
1852 When we simplify the inner case x, we'll see that
1853 x=c1=I# a1. So we'll bind a2 to a1, and get
1854
1855 case x of c1 { I# a1 ->
1856 case a1 of c2 ->
1857 0 -> ...
1858 DEFAULT -> case a1 of ...
1859
1860 This is corect, but we can't do a case merge in this sweep
1861 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
1862 without getting changed to c1=I# c2.
1863
1864 I don't think this is worth fixing, even if I knew how. It'll
1865 all come out in the next pass anyway.
1866 -}