b1e8c1e36a07072033f16151dd7c2b97a9c49c44
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countValArgs, countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import MkCore ( sortQuantVars )
41 import DynFlags
42 import CoreSyn
43 import qualified CoreSubst
44 import PprCore
45 import CoreFVs
46 import CoreUtils
47 import CoreArity
48 import CoreUnfold
49 import Name
50 import Id
51 import Var
52 import Demand
53 import SimplMonad
54 import Type hiding( substTy )
55 import Coercion hiding( substCo, substTy )
56 import DataCon ( dataConWorkId )
57 import VarEnv
58 import VarSet
59 import BasicTypes
60 import Util
61 import MonadUtils
62 import Outputable
63 import FastString
64 import Pair
65
66 import Control.Monad ( when )
67
68 {-
69 ************************************************************************
70 * *
71 The SimplCont and DupFlag types
72 * *
73 ************************************************************************
74
75 A SimplCont allows the simplifier to traverse the expression in a
76 zipper-like fashion. The SimplCont represents the rest of the expression,
77 "above" the point of interest.
78
79 You can also think of a SimplCont as an "evaluation context", using
80 that term in the way it is used for operational semantics. This is the
81 way I usually think of it, For example you'll often see a syntax for
82 evaluation context looking like
83 C ::= [] | C e | case C of alts | C `cast` co
84 That's the kind of thing we are doing here, and I use that syntax in
85 the comments.
86
87
88 Key points:
89 * A SimplCont describes a *strict* context (just like
90 evaluation contexts do). E.g. Just [] is not a SimplCont
91
92 * A SimplCont describes a context that *does not* bind
93 any variables. E.g. \x. [] is not a SimplCont
94 -}
95
96 data SimplCont
97 = Stop -- An empty context, or <hole>
98 OutType -- Type of the <hole>
99 CallCtxt -- Tells if there is something interesting about
100 -- the context, and hence the inliner
101 -- should be a bit keener (see interestingCallContext)
102 -- Specifically:
103 -- This is an argument of a function that has RULES
104 -- Inlining the call might allow the rule to fire
105 -- Never ValAppCxt (use ApplyToVal instead)
106 -- or CaseCtxt (use Select instead)
107
108 | CastIt -- <hole> `cast` co
109 OutCoercion -- The coercion simplified
110 -- Invariant: never an identity coercion
111 SimplCont
112
113 | ApplyToVal { -- <hole> arg
114 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
115 sc_arg :: InExpr, -- The argument,
116 sc_env :: StaticEnv, -- and its static env
117 sc_cont :: SimplCont }
118
119 | ApplyToTy { -- <hole> ty
120 sc_arg_ty :: OutType, -- Argument type
121 sc_hole_ty :: OutType, -- Type of the function, presumably (forall a. blah)
122 -- See Note [The hole type in ApplyToTy]
123 sc_cont :: SimplCont }
124
125 | Select { -- case <hole> of alts
126 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
127 sc_bndr :: InId, -- case binder
128 sc_alts :: [InAlt], -- Alternatives
129 sc_env :: StaticEnv, -- and their static environment
130 sc_cont :: SimplCont }
131
132 -- The two strict forms have no DupFlag, because we never duplicate them
133 | StrictBind -- (\x* \xs. e) <hole>
134 InId [InBndr] -- let x* = <hole> in e
135 InExpr StaticEnv -- is a special case
136 SimplCont
137
138 | StrictArg -- f e1 ..en <hole>
139 ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
140 -- plus strictness flags for *further* args
141 CallCtxt -- Whether *this* argument position is interesting
142 SimplCont
143
144 | TickIt
145 (Tickish Id) -- Tick tickish <hole>
146 SimplCont
147
148 data DupFlag = NoDup -- Unsimplified, might be big
149 | Simplified -- Simplified
150 | OkToDup -- Simplified and small
151
152 isSimplified :: DupFlag -> Bool
153 isSimplified NoDup = False
154 isSimplified _ = True -- Invariant: the subst-env is empty
155
156 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
157 perhapsSubstTy dup env ty
158 | isSimplified dup = ty
159 | otherwise = substTy env ty
160
161 {-
162 Note [DupFlag invariants]
163 ~~~~~~~~~~~~~~~~~~~~~~~~~
164 In both (ApplyToVal dup _ env k)
165 and (Select dup _ _ env k)
166 the following invariants hold
167
168 (a) if dup = OkToDup, then continuation k is also ok-to-dup
169 (b) if dup = OkToDup or Simplified, the subst-env is empty
170 (and and hence no need to re-simplify)
171 -}
172
173 instance Outputable DupFlag where
174 ppr OkToDup = ptext (sLit "ok")
175 ppr NoDup = ptext (sLit "nodup")
176 ppr Simplified = ptext (sLit "simpl")
177
178 instance Outputable SimplCont where
179 ppr (Stop ty interesting) = ptext (sLit "Stop") <> brackets (ppr interesting) <+> ppr ty
180 ppr (CastIt co cont ) = (ptext (sLit "CastIt") <+> ppr co) $$ ppr cont
181 ppr (TickIt t cont) = (ptext (sLit "TickIt") <+> ppr t) $$ ppr cont
182 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
183 = (ptext (sLit "ApplyToTy") <+> pprParendType ty) $$ ppr cont
184 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
185 = (ptext (sLit "ApplyToVal") <+> ppr dup <+> pprParendExpr arg)
186 $$ ppr cont
187 ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
188 ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
189 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
190 = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
191 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
192
193
194 {- Note [The hole type in ApplyToTy]
195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
197 continuation. It is absolutely necessary to compute contHoleType, but it is
198 not used for anything else (and hence may not be evaluated).
199
200 Why is it necessary for contHoleType? Consider the continuation
201 ApplyToType Int (Stop Int)
202 corresponding to
203 (<hole> @Int) :: Int
204 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
205 and there is no way to know which, so we must record it.
206
207 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
208 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
209 doesn't matter because we'll never compute them all.
210
211 ************************************************************************
212 * *
213 ArgInfo and ArgSpec
214 * *
215 ************************************************************************
216 -}
217
218 data ArgInfo
219 = ArgInfo {
220 ai_fun :: OutId, -- The function
221 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
222 ai_type :: OutType, -- Type of (f a1 ... an)
223
224 ai_rules :: [CoreRule], -- Rules for this function
225
226 ai_encl :: Bool, -- Flag saying whether this function
227 -- or an enclosing one has rules (recursively)
228 -- True => be keener to inline in all args
229
230 ai_strs :: [Bool], -- Strictness of remaining arguments
231 -- Usually infinite, but if it is finite it guarantees
232 -- that the function diverges after being given
233 -- that number of args
234 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
235 -- Always infinite
236 }
237
238 data ArgSpec
239 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
240 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
241 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
242 | CastBy OutCoercion -- Cast by this; c.f. CastIt
243
244 instance Outputable ArgSpec where
245 ppr (ValArg e) = ptext (sLit "ValArg") <+> ppr e
246 ppr (TyArg { as_arg_ty = ty }) = ptext (sLit "TyArg") <+> ppr ty
247 ppr (CastBy c) = ptext (sLit "CastBy") <+> ppr c
248
249 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
250 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
251 , ai_type = funResultTy (ai_type ai) }
252
253 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
254 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
255 , ai_type = applyTy poly_fun_ty arg_ty }
256 where
257 poly_fun_ty = ai_type ai
258 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
259
260 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
261 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
262 , ai_type = pSnd (coercionKind co) }
263
264 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
265 argInfoAppArgs [] = []
266 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
267 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
268 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
269
270 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
271 pushSimplifiedArgs _env [] k = k
272 pushSimplifiedArgs env (arg : args) k
273 = case arg of
274 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
275 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
276 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
277 CastBy c -> CastIt c rest
278 where
279 rest = pushSimplifiedArgs env args k
280 -- The env has an empty SubstEnv
281
282 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
283 -- NB: the [ArgSpec] is reversed so that the first arg
284 -- in the list is the last one in the application
285 argInfoExpr fun rev_args
286 = go rev_args
287 where
288 go [] = Var fun
289 go (ValArg a : as) = go as `App` a
290 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
291 go (CastBy co : as) = mkCast (go as) co
292
293
294 {-
295 ************************************************************************
296 * *
297 Functions on SimplCont
298 * *
299 ************************************************************************
300 -}
301
302 mkBoringStop :: OutType -> SimplCont
303 mkBoringStop ty = Stop ty BoringCtxt
304
305 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
306 mkRhsStop ty = Stop ty RhsCtxt
307
308 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
309 mkLazyArgStop ty cci = Stop ty cci
310
311 -------------------
312 contIsRhsOrArg :: SimplCont -> Bool
313 contIsRhsOrArg (Stop {}) = True
314 contIsRhsOrArg (StrictBind {}) = True
315 contIsRhsOrArg (StrictArg {}) = True
316 contIsRhsOrArg _ = False
317
318 contIsRhs :: SimplCont -> Bool
319 contIsRhs (Stop _ RhsCtxt) = True
320 contIsRhs _ = False
321
322 -------------------
323 contIsDupable :: SimplCont -> Bool
324 contIsDupable (Stop {}) = True
325 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
326 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
327 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
328 contIsDupable (CastIt _ k) = contIsDupable k
329 contIsDupable _ = False
330
331 -------------------
332 contIsTrivial :: SimplCont -> Bool
333 contIsTrivial (Stop {}) = True
334 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
335 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
336 contIsTrivial (CastIt _ k) = contIsTrivial k
337 contIsTrivial _ = False
338
339 -------------------
340 contResultType :: SimplCont -> OutType
341 contResultType (Stop ty _) = ty
342 contResultType (CastIt _ k) = contResultType k
343 contResultType (StrictBind _ _ _ _ k) = contResultType k
344 contResultType (StrictArg _ _ k) = contResultType k
345 contResultType (Select { sc_cont = k }) = contResultType k
346 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
347 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
348 contResultType (TickIt _ k) = contResultType k
349
350 contHoleType :: SimplCont -> OutType
351 contHoleType (Stop ty _) = ty
352 contHoleType (TickIt _ k) = contHoleType k
353 contHoleType (CastIt co _) = pFst (coercionKind co)
354 contHoleType (StrictBind b _ _ se _) = substTy se (idType b)
355 contHoleType (StrictArg ai _ _) = funArgTy (ai_type ai)
356 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
357 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
358 = mkFunTy (perhapsSubstTy dup se (exprType e))
359 (contHoleType k)
360 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
361 = perhapsSubstTy d se (idType b)
362
363 -------------------
364 countValArgs :: SimplCont -> Int
365 -- Count value arguments excluding coercions
366 countValArgs (ApplyToVal { sc_arg = arg, sc_cont = cont })
367 | Coercion {} <- arg = countValArgs cont
368 | otherwise = 1 + countValArgs cont
369 countValArgs _ = 0
370
371 countArgs :: SimplCont -> Int
372 -- Count all arguments, including types, coercions, and other values
373 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
374 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
375 countArgs _ = 0
376
377 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
378 -- Summarises value args, discards type args and coercions
379 -- The returned continuation of the call is only used to
380 -- answer questions like "are you interesting?"
381 contArgs cont
382 | lone cont = (True, [], cont)
383 | otherwise = go [] cont
384 where
385 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
386 lone (ApplyToVal {}) = False
387 lone (CastIt {}) = False
388 lone _ = True
389
390 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
391 = go (is_interesting arg se : args) k
392 go args (ApplyToTy { sc_cont = k }) = go args k
393 go args (CastIt _ k) = go args k
394 go args k = (False, reverse args, k)
395
396 is_interesting arg se = interestingArg se arg
397 -- Do *not* use short-cutting substitution here
398 -- because we want to get as much IdInfo as possible
399
400
401 -------------------
402 mkArgInfo :: Id
403 -> [CoreRule] -- Rules for function
404 -> Int -- Number of value args
405 -> SimplCont -- Context of the call
406 -> ArgInfo
407
408 mkArgInfo fun rules n_val_args call_cont
409 | n_val_args < idArity fun -- Note [Unsaturated functions]
410 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
411 , ai_rules = rules, ai_encl = False
412 , ai_strs = vanilla_stricts
413 , ai_discs = vanilla_discounts }
414 | otherwise
415 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
416 , ai_rules = rules
417 , ai_encl = interestingArgContext rules call_cont
418 , ai_strs = add_type_str fun_ty arg_stricts
419 , ai_discs = arg_discounts }
420 where
421 fun_ty = idType fun
422
423 vanilla_discounts, arg_discounts :: [Int]
424 vanilla_discounts = repeat 0
425 arg_discounts = case idUnfolding fun of
426 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
427 -> discounts ++ vanilla_discounts
428 _ -> vanilla_discounts
429
430 vanilla_stricts, arg_stricts :: [Bool]
431 vanilla_stricts = repeat False
432
433 arg_stricts
434 = case splitStrictSig (idStrictness fun) of
435 (demands, result_info)
436 | not (demands `lengthExceeds` n_val_args)
437 -> -- Enough args, use the strictness given.
438 -- For bottoming functions we used to pretend that the arg
439 -- is lazy, so that we don't treat the arg as an
440 -- interesting context. This avoids substituting
441 -- top-level bindings for (say) strings into
442 -- calls to error. But now we are more careful about
443 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
444 if isBotRes result_info then
445 map isStrictDmd demands -- Finite => result is bottom
446 else
447 map isStrictDmd demands ++ vanilla_stricts
448 | otherwise
449 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
450 <+> ppr n_val_args <+> ppr demands )
451 vanilla_stricts -- Not enough args, or no strictness
452
453 add_type_str :: Type -> [Bool] -> [Bool]
454 -- If the function arg types are strict, record that in the 'strictness bits'
455 -- No need to instantiate because unboxed types (which dominate the strict
456 -- types) can't instantiate type variables.
457 -- add_type_str is done repeatedly (for each call); might be better
458 -- once-for-all in the function
459 -- But beware primops/datacons with no strictness
460 add_type_str _ [] = []
461 add_type_str fun_ty strs -- Look through foralls
462 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
463 = add_type_str fun_ty' strs
464 add_type_str fun_ty (str:strs) -- Add strict-type info
465 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
466 = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
467 add_type_str _ strs
468 = strs
469
470 {- Note [Unsaturated functions]
471 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
472 Consider (test eyeball/inline4)
473 x = a:as
474 y = f x
475 where f has arity 2. Then we do not want to inline 'x', because
476 it'll just be floated out again. Even if f has lots of discounts
477 on its first argument -- it must be saturated for these to kick in
478 -}
479
480
481 {-
482 ************************************************************************
483 * *
484 Interesting arguments
485 * *
486 ************************************************************************
487
488 Note [Interesting call context]
489 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
490 We want to avoid inlining an expression where there can't possibly be
491 any gain, such as in an argument position. Hence, if the continuation
492 is interesting (eg. a case scrutinee, application etc.) then we
493 inline, otherwise we don't.
494
495 Previously some_benefit used to return True only if the variable was
496 applied to some value arguments. This didn't work:
497
498 let x = _coerce_ (T Int) Int (I# 3) in
499 case _coerce_ Int (T Int) x of
500 I# y -> ....
501
502 we want to inline x, but can't see that it's a constructor in a case
503 scrutinee position, and some_benefit is False.
504
505 Another example:
506
507 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
508
509 .... case dMonadST _@_ x0 of (a,b,c) -> ....
510
511 we'd really like to inline dMonadST here, but we *don't* want to
512 inline if the case expression is just
513
514 case x of y { DEFAULT -> ... }
515
516 since we can just eliminate this case instead (x is in WHNF). Similar
517 applies when x is bound to a lambda expression. Hence
518 contIsInteresting looks for case expressions with just a single
519 default case.
520 -}
521
522 interestingCallContext :: SimplCont -> CallCtxt
523 -- See Note [Interesting call context]
524 interestingCallContext cont
525 = interesting cont
526 where
527 interesting (Select {}) = CaseCtxt
528 interesting (ApplyToVal {}) = ValAppCtxt
529 -- Can happen if we have (f Int |> co) y
530 -- If f has an INLINE prag we need to give it some
531 -- motivation to inline. See Note [Cast then apply]
532 -- in CoreUnfold
533 interesting (StrictArg _ cci _) = cci
534 interesting (StrictBind {}) = BoringCtxt
535 interesting (Stop _ cci) = cci
536 interesting (TickIt _ k) = interesting k
537 interesting (ApplyToTy { sc_cont = k }) = interesting k
538 interesting (CastIt _ k) = interesting k
539 -- If this call is the arg of a strict function, the context
540 -- is a bit interesting. If we inline here, we may get useful
541 -- evaluation information to avoid repeated evals: e.g.
542 -- x + (y * z)
543 -- Here the contIsInteresting makes the '*' keener to inline,
544 -- which in turn exposes a constructor which makes the '+' inline.
545 -- Assuming that +,* aren't small enough to inline regardless.
546 --
547 -- It's also very important to inline in a strict context for things
548 -- like
549 -- foldr k z (f x)
550 -- Here, the context of (f x) is strict, and if f's unfolding is
551 -- a build it's *great* to inline it here. So we must ensure that
552 -- the context for (f x) is not totally uninteresting.
553
554 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
555 -- If the argument has form (f x y), where x,y are boring,
556 -- and f is marked INLINE, then we don't want to inline f.
557 -- But if the context of the argument is
558 -- g (f x y)
559 -- where g has rules, then we *do* want to inline f, in case it
560 -- exposes a rule that might fire. Similarly, if the context is
561 -- h (g (f x x))
562 -- where h has rules, then we do want to inline f; hence the
563 -- call_cont argument to interestingArgContext
564 --
565 -- The ai-rules flag makes this happen; if it's
566 -- set, the inliner gets just enough keener to inline f
567 -- regardless of how boring f's arguments are, if it's marked INLINE
568 --
569 -- The alternative would be to *always* inline an INLINE function,
570 -- regardless of how boring its context is; but that seems overkill
571 -- For example, it'd mean that wrapper functions were always inlined
572 --
573 -- The call_cont passed to interestingArgContext is the context of
574 -- the call itself, e.g. g <hole> in the example above
575 interestingArgContext rules call_cont
576 = notNull rules || enclosing_fn_has_rules
577 where
578 enclosing_fn_has_rules = go call_cont
579
580 go (Select {}) = False
581 go (ApplyToVal {}) = False -- Shouldn't really happen
582 go (ApplyToTy {}) = False -- Ditto
583 go (StrictArg _ cci _) = interesting cci
584 go (StrictBind {}) = False -- ??
585 go (CastIt _ c) = go c
586 go (Stop _ cci) = interesting cci
587 go (TickIt _ c) = go c
588
589 interesting RuleArgCtxt = True
590 interesting _ = False
591
592
593 {- Note [Interesting arguments]
594 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
595 An argument is interesting if it deserves a discount for unfoldings
596 with a discount in that argument position. The idea is to avoid
597 unfolding a function that is applied only to variables that have no
598 unfolding (i.e. they are probably lambda bound): f x y z There is
599 little point in inlining f here.
600
601 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
602 we must look through lets, eg (let x = e in C a b), because the let will
603 float, exposing the value, if we inline. That makes it different to
604 exprIsHNF.
605
606 Before 2009 we said it was interesting if the argument had *any* structure
607 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
608
609 But we don't regard (f x y) as interesting, unless f is unsaturated.
610 If it's saturated and f hasn't inlined, then it's probably not going
611 to now!
612
613 Note [Conlike is interesting]
614 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
615 Consider
616 f d = ...((*) d x y)...
617 ... f (df d')...
618 where df is con-like. Then we'd really like to inline 'f' so that the
619 rule for (*) (df d) can fire. To do this
620 a) we give a discount for being an argument of a class-op (eg (*) d)
621 b) we say that a con-like argument (eg (df d)) is interesting
622 -}
623
624 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
625 -- See Note [Interesting arguments]
626 interestingArg env e = go env 0 e
627 where
628 -- n is # value args to which the expression is applied
629 go env n (Var v)
630 | SimplEnv { seIdSubst = ids, seInScope = in_scope } <- env
631 = case lookupVarEnv ids v of
632 Nothing -> go_var n (refineFromInScope in_scope v)
633 Just (DoneId v') -> go_var n (refineFromInScope in_scope v')
634 Just (DoneEx e) -> go (zapSubstEnv env) n e
635 Just (ContEx tvs cvs ids e) -> go (setSubstEnv env tvs cvs ids) n e
636
637 go _ _ (Lit {}) = ValueArg
638 go _ _ (Type _) = TrivArg
639 go _ _ (Coercion _) = TrivArg
640 go env n (App fn (Type _)) = go env n fn
641 go env n (App fn (Coercion _)) = go env n fn
642 go env n (App fn _) = go env (n+1) fn
643 go env n (Tick _ a) = go env n a
644 go env n (Cast e _) = go env n e
645 go env n (Lam v e)
646 | isTyVar v = go env n e
647 | n>0 = go env (n-1) e
648 | otherwise = ValueArg
649 go env n (Let _ e) = case go env n e of { ValueArg -> ValueArg; _ -> NonTrivArg }
650 go _ _ (Case {}) = NonTrivArg
651
652 go_var n v
653 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
654 -- data constructors here
655 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
656 | n > 0 = NonTrivArg -- Saturated or unknown call
657 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
658 -- See Note [Conlike is interesting]
659 | otherwise = TrivArg -- n==0, no useful unfolding
660 where
661 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
662
663 {-
664 ************************************************************************
665 * *
666 SimplifierMode
667 * *
668 ************************************************************************
669
670 The SimplifierMode controls several switches; see its definition in
671 CoreMonad
672 sm_rules :: Bool -- Whether RULES are enabled
673 sm_inline :: Bool -- Whether inlining is enabled
674 sm_case_case :: Bool -- Whether case-of-case is enabled
675 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
676 -}
677
678 simplEnvForGHCi :: DynFlags -> SimplEnv
679 simplEnvForGHCi dflags
680 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
681 , sm_phase = InitialPhase
682 , sm_rules = rules_on
683 , sm_inline = False
684 , sm_eta_expand = eta_expand_on
685 , sm_case_case = True }
686 where
687 rules_on = gopt Opt_EnableRewriteRules dflags
688 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
689 -- Do not do any inlining, in case we expose some unboxed
690 -- tuple stuff that confuses the bytecode interpreter
691
692 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
693 -- See Note [Simplifying inside stable unfoldings]
694 updModeForStableUnfoldings inline_rule_act current_mode
695 = current_mode { sm_phase = phaseFromActivation inline_rule_act
696 , sm_inline = True
697 , sm_eta_expand = False }
698 -- For sm_rules, just inherit; sm_rules might be "off"
699 -- because of -fno-enable-rewrite-rules
700 where
701 phaseFromActivation (ActiveAfter n) = Phase n
702 phaseFromActivation _ = InitialPhase
703
704 {-
705 Note [Inlining in gentle mode]
706 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
707 Something is inlined if
708 (i) the sm_inline flag is on, AND
709 (ii) the thing has an INLINE pragma, AND
710 (iii) the thing is inlinable in the earliest phase.
711
712 Example of why (iii) is important:
713 {-# INLINE [~1] g #-}
714 g = ...
715
716 {-# INLINE f #-}
717 f x = g (g x)
718
719 If we were to inline g into f's inlining, then an importing module would
720 never be able to do
721 f e --> g (g e) ---> RULE fires
722 because the stable unfolding for f has had g inlined into it.
723
724 On the other hand, it is bad not to do ANY inlining into an
725 stable unfolding, because then recursive knots in instance declarations
726 don't get unravelled.
727
728 However, *sometimes* SimplGently must do no call-site inlining at all
729 (hence sm_inline = False). Before full laziness we must be careful
730 not to inline wrappers, because doing so inhibits floating
731 e.g. ...(case f x of ...)...
732 ==> ...(case (case x of I# x# -> fw x#) of ...)...
733 ==> ...(case x of I# x# -> case fw x# of ...)...
734 and now the redex (f x) isn't floatable any more.
735
736 The no-inlining thing is also important for Template Haskell. You might be
737 compiling in one-shot mode with -O2; but when TH compiles a splice before
738 running it, we don't want to use -O2. Indeed, we don't want to inline
739 anything, because the byte-code interpreter might get confused about
740 unboxed tuples and suchlike.
741
742 Note [Simplifying inside stable unfoldings]
743 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
744 We must take care with simplification inside stable unfoldings (which come from
745 INLINE pragmas).
746
747 First, consider the following example
748 let f = \pq -> BIG
749 in
750 let g = \y -> f y y
751 {-# INLINE g #-}
752 in ...g...g...g...g...g...
753 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
754 and thence copied multiple times when g is inlined. HENCE we treat
755 any occurrence in a stable unfolding as a multiple occurrence, not a single
756 one; see OccurAnal.addRuleUsage.
757
758 Second, we do want *do* to some modest rules/inlining stuff in stable
759 unfoldings, partly to eliminate senseless crap, and partly to break
760 the recursive knots generated by instance declarations.
761
762 However, suppose we have
763 {-# INLINE <act> f #-}
764 f = <rhs>
765 meaning "inline f in phases p where activation <act>(p) holds".
766 Then what inlinings/rules can we apply to the copy of <rhs> captured in
767 f's stable unfolding? Our model is that literally <rhs> is substituted for
768 f when it is inlined. So our conservative plan (implemented by
769 updModeForStableUnfoldings) is this:
770
771 -------------------------------------------------------------
772 When simplifying the RHS of an stable unfolding, set the phase
773 to the phase in which the stable unfolding first becomes active
774 -------------------------------------------------------------
775
776 That ensures that
777
778 a) Rules/inlinings that *cease* being active before p will
779 not apply to the stable unfolding, consistent with it being
780 inlined in its *original* form in phase p.
781
782 b) Rules/inlinings that only become active *after* p will
783 not apply to the stable unfolding, again to be consistent with
784 inlining the *original* rhs in phase p.
785
786 For example,
787 {-# INLINE f #-}
788 f x = ...g...
789
790 {-# NOINLINE [1] g #-}
791 g y = ...
792
793 {-# RULE h g = ... #-}
794 Here we must not inline g into f's RHS, even when we get to phase 0,
795 because when f is later inlined into some other module we want the
796 rule for h to fire.
797
798 Similarly, consider
799 {-# INLINE f #-}
800 f x = ...g...
801
802 g y = ...
803 and suppose that there are auto-generated specialisations and a strictness
804 wrapper for g. The specialisations get activation AlwaysActive, and the
805 strictness wrapper get activation (ActiveAfter 0). So the strictness
806 wrepper fails the test and won't be inlined into f's stable unfolding. That
807 means f can inline, expose the specialised call to g, so the specialisation
808 rules can fire.
809
810 A note about wrappers
811 ~~~~~~~~~~~~~~~~~~~~~
812 It's also important not to inline a worker back into a wrapper.
813 A wrapper looks like
814 wraper = inline_me (\x -> ...worker... )
815 Normally, the inline_me prevents the worker getting inlined into
816 the wrapper (initially, the worker's only call site!). But,
817 if the wrapper is sure to be called, the strictness analyser will
818 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
819 continuation.
820 -}
821
822 activeUnfolding :: SimplEnv -> Id -> Bool
823 activeUnfolding env
824 | not (sm_inline mode) = active_unfolding_minimal
825 | otherwise = case sm_phase mode of
826 InitialPhase -> active_unfolding_gentle
827 Phase n -> active_unfolding n
828 where
829 mode = getMode env
830
831 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
832 -- When matching in RULE, we want to "look through" an unfolding
833 -- (to see a constructor) if *rules* are on, even if *inlinings*
834 -- are not. A notable example is DFuns, which really we want to
835 -- match in rules like (op dfun) in gentle mode. Another example
836 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
837 -- see very early on
838 getUnfoldingInRuleMatch env
839 = (in_scope, id_unf)
840 where
841 in_scope = seInScope env
842 mode = getMode env
843 id_unf id | unf_is_active id = idUnfolding id
844 | otherwise = NoUnfolding
845 unf_is_active id
846 | not (sm_rules mode) = active_unfolding_minimal id
847 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
848
849 active_unfolding_minimal :: Id -> Bool
850 -- Compuslory unfoldings only
851 -- Ignore SimplGently, because we want to inline regardless;
852 -- the Id has no top-level binding at all
853 --
854 -- NB: we used to have a second exception, for data con wrappers.
855 -- On the grounds that we use gentle mode for rule LHSs, and
856 -- they match better when data con wrappers are inlined.
857 -- But that only really applies to the trivial wrappers (like (:)),
858 -- and they are now constructed as Compulsory unfoldings (in MkId)
859 -- so they'll happen anyway.
860 active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
861
862 active_unfolding :: PhaseNum -> Id -> Bool
863 active_unfolding n id = isActiveIn n (idInlineActivation id)
864
865 active_unfolding_gentle :: Id -> Bool
866 -- Anything that is early-active
867 -- See Note [Gentle mode]
868 active_unfolding_gentle id
869 = isInlinePragma prag
870 && isEarlyActive (inlinePragmaActivation prag)
871 -- NB: wrappers are not early-active
872 where
873 prag = idInlinePragma id
874
875 ----------------------
876 activeRule :: SimplEnv -> Activation -> Bool
877 -- Nothing => No rules at all
878 activeRule env
879 | not (sm_rules mode) = \_ -> False -- Rewriting is off
880 | otherwise = isActive (sm_phase mode)
881 where
882 mode = getMode env
883
884 {-
885 ************************************************************************
886 * *
887 preInlineUnconditionally
888 * *
889 ************************************************************************
890
891 preInlineUnconditionally
892 ~~~~~~~~~~~~~~~~~~~~~~~~
893 @preInlineUnconditionally@ examines a bndr to see if it is used just
894 once in a completely safe way, so that it is safe to discard the
895 binding inline its RHS at the (unique) usage site, REGARDLESS of how
896 big the RHS might be. If this is the case we don't simplify the RHS
897 first, but just inline it un-simplified.
898
899 This is much better than first simplifying a perhaps-huge RHS and then
900 inlining and re-simplifying it. Indeed, it can be at least quadratically
901 better. Consider
902
903 x1 = e1
904 x2 = e2[x1]
905 x3 = e3[x2]
906 ...etc...
907 xN = eN[xN-1]
908
909 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
910 This can happen with cascades of functions too:
911
912 f1 = \x1.e1
913 f2 = \xs.e2[f1]
914 f3 = \xs.e3[f3]
915 ...etc...
916
917 THE MAIN INVARIANT is this:
918
919 ---- preInlineUnconditionally invariant -----
920 IF preInlineUnconditionally chooses to inline x = <rhs>
921 THEN doing the inlining should not change the occurrence
922 info for the free vars of <rhs>
923 ----------------------------------------------
924
925 For example, it's tempting to look at trivial binding like
926 x = y
927 and inline it unconditionally. But suppose x is used many times,
928 but this is the unique occurrence of y. Then inlining x would change
929 y's occurrence info, which breaks the invariant. It matters: y
930 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
931
932
933 Even RHSs labelled InlineMe aren't caught here, because there might be
934 no benefit from inlining at the call site.
935
936 [Sept 01] Don't unconditionally inline a top-level thing, because that
937 can simply make a static thing into something built dynamically. E.g.
938 x = (a,b)
939 main = \s -> h x
940
941 [Remember that we treat \s as a one-shot lambda.] No point in
942 inlining x unless there is something interesting about the call site.
943
944 But watch out: if you aren't careful, some useful foldr/build fusion
945 can be lost (most notably in spectral/hartel/parstof) because the
946 foldr didn't see the build. Doing the dynamic allocation isn't a big
947 deal, in fact, but losing the fusion can be. But the right thing here
948 seems to be to do a callSiteInline based on the fact that there is
949 something interesting about the call site (it's strict). Hmm. That
950 seems a bit fragile.
951
952 Conclusion: inline top level things gaily until Phase 0 (the last
953 phase), at which point don't.
954
955 Note [pre/postInlineUnconditionally in gentle mode]
956 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
957 Even in gentle mode we want to do preInlineUnconditionally. The
958 reason is that too little clean-up happens if you don't inline
959 use-once things. Also a bit of inlining is *good* for full laziness;
960 it can expose constant sub-expressions. Example in
961 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
962 let-float if you inline windowToViewport
963
964 However, as usual for Gentle mode, do not inline things that are
965 inactive in the intial stages. See Note [Gentle mode].
966
967 Note [Stable unfoldings and preInlineUnconditionally]
968 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
969 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
970 Example
971
972 {-# INLINE f #-}
973 f :: Eq a => a -> a
974 f x = ...
975
976 fInt :: Int -> Int
977 fInt = f Int dEqInt
978
979 ...fInt...fInt...fInt...
980
981 Here f occurs just once, in the RHS of f1. But if we inline it there
982 we'll lose the opportunity to inline at each of fInt's call sites.
983 The INLINE pragma will only inline when the application is saturated
984 for exactly this reason; and we don't want PreInlineUnconditionally
985 to second-guess it. A live example is Trac #3736.
986 c.f. Note [Stable unfoldings and postInlineUnconditionally]
987
988 Note [Top-level botomming Ids]
989 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
990 Don't inline top-level Ids that are bottoming, even if they are used just
991 once, because FloatOut has gone to some trouble to extract them out.
992 Inlining them won't make the program run faster!
993
994 Note [Do not inline CoVars unconditionally]
995 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
996 Coercion variables appear inside coercions, and the RHS of a let-binding
997 is a term (not a coercion) so we can't necessarily inline the latter in
998 the former.
999 -}
1000
1001 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1002 -- Precondition: rhs satisfies the let/app invariant
1003 -- See Note [CoreSyn let/app invariant] in CoreSyn
1004 -- Reason: we don't want to inline single uses, or discard dead bindings,
1005 -- for unlifted, side-effect-full bindings
1006 preInlineUnconditionally dflags env top_lvl bndr rhs
1007 | not active = False
1008 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1009 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1010 | not (gopt Opt_SimplPreInlining dflags) = False
1011 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1012 | otherwise = case idOccInfo bndr of
1013 IAmDead -> True -- Happens in ((\x.1) v)
1014 OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
1015 _ -> False
1016 where
1017 mode = getMode env
1018 active = isActive (sm_phase mode) act
1019 -- See Note [pre/postInlineUnconditionally in gentle mode]
1020 act = idInlineActivation bndr
1021 try_once in_lam int_cxt -- There's one textual occurrence
1022 | not in_lam = isNotTopLevel top_lvl || early_phase
1023 | otherwise = int_cxt && canInlineInLam rhs
1024
1025 -- Be very careful before inlining inside a lambda, because (a) we must not
1026 -- invalidate occurrence information, and (b) we want to avoid pushing a
1027 -- single allocation (here) into multiple allocations (inside lambda).
1028 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1029 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1030 -- where
1031 -- is_cheap = exprIsCheap rhs
1032 -- ok = is_cheap && int_cxt
1033
1034 -- int_cxt The context isn't totally boring
1035 -- E.g. let f = \ab.BIG in \y. map f xs
1036 -- Don't want to substitute for f, because then we allocate
1037 -- its closure every time the \y is called
1038 -- But: let f = \ab.BIG in \y. map (f y) xs
1039 -- Now we do want to substitute for f, even though it's not
1040 -- saturated, because we're going to allocate a closure for
1041 -- (f y) every time round the loop anyhow.
1042
1043 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1044 -- so substituting rhs inside a lambda doesn't change the occ info.
1045 -- Sadly, not quite the same as exprIsHNF.
1046 canInlineInLam (Lit _) = True
1047 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1048 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1049 canInlineInLam _ = False
1050 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1051 -- ticks around a Lam will disappear anyway.
1052
1053 early_phase = case sm_phase mode of
1054 Phase 0 -> False
1055 _ -> True
1056 -- If we don't have this early_phase test, consider
1057 -- x = length [1,2,3]
1058 -- The full laziness pass carefully floats all the cons cells to
1059 -- top level, and preInlineUnconditionally floats them all back in.
1060 -- Result is (a) static allocation replaced by dynamic allocation
1061 -- (b) many simplifier iterations because this tickles
1062 -- a related problem; only one inlining per pass
1063 --
1064 -- On the other hand, I have seen cases where top-level fusion is
1065 -- lost if we don't inline top level thing (e.g. string constants)
1066 -- Hence the test for phase zero (which is the phase for all the final
1067 -- simplifications). Until phase zero we take no special notice of
1068 -- top level things, but then we become more leery about inlining
1069 -- them.
1070
1071 {-
1072 ************************************************************************
1073 * *
1074 postInlineUnconditionally
1075 * *
1076 ************************************************************************
1077
1078 postInlineUnconditionally
1079 ~~~~~~~~~~~~~~~~~~~~~~~~~
1080 @postInlineUnconditionally@ decides whether to unconditionally inline
1081 a thing based on the form of its RHS; in particular if it has a
1082 trivial RHS. If so, we can inline and discard the binding altogether.
1083
1084 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1085 only have *forward* references. Hence, it's safe to discard the binding
1086
1087 NOTE: This isn't our last opportunity to inline. We're at the binding
1088 site right now, and we'll get another opportunity when we get to the
1089 ocurrence(s)
1090
1091 Note that we do this unconditional inlining only for trival RHSs.
1092 Don't inline even WHNFs inside lambdas; doing so may simply increase
1093 allocation when the function is called. This isn't the last chance; see
1094 NOTE above.
1095
1096 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1097 Because we don't even want to inline them into the RHS of constructor
1098 arguments. See NOTE above
1099
1100 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1101 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1102 with both a and b marked NOINLINE. But that seems incompatible with
1103 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1104 story for now.
1105 -}
1106
1107 postInlineUnconditionally
1108 :: DynFlags -> SimplEnv -> TopLevelFlag
1109 -> OutId -- The binder (an InId would be fine too)
1110 -- (*not* a CoVar)
1111 -> OccInfo -- From the InId
1112 -> OutExpr
1113 -> Unfolding
1114 -> Bool
1115 -- Precondition: rhs satisfies the let/app invariant
1116 -- See Note [CoreSyn let/app invariant] in CoreSyn
1117 -- Reason: we don't want to inline single uses, or discard dead bindings,
1118 -- for unlifted, side-effect-full bindings
1119 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1120 | not active = False
1121 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1122 -- because it might be referred to "earlier"
1123 | isExportedId bndr = False
1124 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1125 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1126 | exprIsTrivial rhs = True
1127 | otherwise
1128 = case occ_info of
1129 -- The point of examining occ_info here is that for *non-values*
1130 -- that occur outside a lambda, the call-site inliner won't have
1131 -- a chance (because it doesn't know that the thing
1132 -- only occurs once). The pre-inliner won't have gotten
1133 -- it either, if the thing occurs in more than one branch
1134 -- So the main target is things like
1135 -- let x = f y in
1136 -- case v of
1137 -- True -> case x of ...
1138 -- False -> case x of ...
1139 -- This is very important in practice; e.g. wheel-seive1 doubles
1140 -- in allocation if you miss this out
1141 OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue
1142 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1143 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1144 --
1145 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1146 -- Reason: doing so risks exponential behaviour. We simplify a big
1147 -- expression, inline it, and simplify it again. But if the
1148 -- very same thing happens in the big expression, we get
1149 -- exponential cost!
1150 -- PRINCIPLE: when we've already simplified an expression once,
1151 -- make sure that we only inline it if it's reasonably small.
1152
1153 && (not in_lam ||
1154 -- Outside a lambda, we want to be reasonably aggressive
1155 -- about inlining into multiple branches of case
1156 -- e.g. let x = <non-value>
1157 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1158 -- Inlining can be a big win if C3 is the hot-spot, even if
1159 -- the uses in C1, C2 are not 'interesting'
1160 -- An example that gets worse if you add int_cxt here is 'clausify'
1161
1162 (isCheapUnfolding unfolding && int_cxt))
1163 -- isCheap => acceptable work duplication; in_lam may be true
1164 -- int_cxt to prevent us inlining inside a lambda without some
1165 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1166
1167 IAmDead -> True -- This happens; for example, the case_bndr during case of
1168 -- known constructor: case (a,b) of x { (p,q) -> ... }
1169 -- Here x isn't mentioned in the RHS, so we don't want to
1170 -- create the (dead) let-binding let x = (a,b) in ...
1171
1172 _ -> False
1173
1174 -- Here's an example that we don't handle well:
1175 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1176 -- in \y. ....case f of {...} ....
1177 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1178 -- But
1179 -- - We can't preInlineUnconditionally because that woud invalidate
1180 -- the occ info for b.
1181 -- - We can't postInlineUnconditionally because the RHS is big, and
1182 -- that risks exponential behaviour
1183 -- - We can't call-site inline, because the rhs is big
1184 -- Alas!
1185
1186 where
1187 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1188 -- See Note [pre/postInlineUnconditionally in gentle mode]
1189
1190 {-
1191 Note [Top level and postInlineUnconditionally]
1192 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1193 We don't do postInlineUnconditionally for top-level things (even for
1194 ones that are trivial):
1195
1196 * Doing so will inline top-level error expressions that have been
1197 carefully floated out by FloatOut. More generally, it might
1198 replace static allocation with dynamic.
1199
1200 * Even for trivial expressions there's a problem. Consider
1201 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1202 blah xs = reverse xs
1203 ruggle = sort
1204 In one simplifier pass we might fire the rule, getting
1205 blah xs = ruggle xs
1206 but in *that* simplifier pass we must not do postInlineUnconditionally
1207 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1208
1209 If the rhs is trivial it'll be inlined by callSiteInline, and then
1210 the binding will be dead and discarded by the next use of OccurAnal
1211
1212 * There is less point, because the main goal is to get rid of local
1213 bindings used in multiple case branches.
1214
1215 * The inliner should inline trivial things at call sites anyway.
1216
1217 Note [Stable unfoldings and postInlineUnconditionally]
1218 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1219 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1220 otherwise we lose the unfolding. Example
1221
1222 -- f has stable unfolding with rhs (e |> co)
1223 -- where 'e' is big
1224 f = e |> co
1225
1226 Then there's a danger we'll optimise to
1227
1228 f' = e
1229 f = f' |> co
1230
1231 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1232 won't inline because 'e' is too big.
1233
1234 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1235
1236
1237 ************************************************************************
1238 * *
1239 Rebuilding a lambda
1240 * *
1241 ************************************************************************
1242 -}
1243
1244 mkLam :: [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1245 -- mkLam tries three things
1246 -- a) eta reduction, if that gives a trivial expression
1247 -- b) eta expansion [only if there are some value lambdas]
1248
1249 mkLam [] body _cont
1250 = return body
1251 mkLam bndrs body cont
1252 = do { dflags <- getDynFlags
1253 ; mkLam' dflags bndrs body }
1254 where
1255 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1256 mkLam' dflags bndrs (Cast body co)
1257 | not (any bad bndrs)
1258 -- Note [Casts and lambdas]
1259 = do { lam <- mkLam' dflags bndrs body
1260 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1261 where
1262 co_vars = tyCoVarsOfCo co
1263 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1264
1265 mkLam' dflags bndrs body@(Lam {})
1266 = mkLam' dflags (bndrs ++ bndrs1) body1
1267 where
1268 (bndrs1, body1) = collectBinders body
1269
1270 mkLam' dflags bndrs (Tick t expr)
1271 | tickishFloatable t
1272 = mkTick t <$> mkLam' dflags bndrs expr
1273
1274 mkLam' dflags bndrs body
1275 | gopt Opt_DoEtaReduction dflags
1276 , Just etad_lam <- tryEtaReduce bndrs body
1277 = do { tick (EtaReduction (head bndrs))
1278 ; return etad_lam }
1279
1280 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1281 , gopt Opt_DoLambdaEtaExpansion dflags
1282 , any isRuntimeVar bndrs
1283 , let body_arity = exprEtaExpandArity dflags body
1284 , body_arity > 0
1285 = do { tick (EtaExpansion (head bndrs))
1286 ; return (mkLams bndrs (etaExpand body_arity body)) }
1287
1288 | otherwise
1289 = return (mkLams bndrs body)
1290
1291 {-
1292 Note [Eta expanding lambdas]
1293 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1294 In general we *do* want to eta-expand lambdas. Consider
1295 f (\x -> case x of (a,b) -> \s -> blah)
1296 where 's' is a state token, and hence can be eta expanded. This
1297 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1298 important function!
1299
1300 The eta-expansion will never happen unless we do it now. (Well, it's
1301 possible that CorePrep will do it, but CorePrep only has a half-baked
1302 eta-expander that can't deal with casts. So it's much better to do it
1303 here.)
1304
1305 However, when the lambda is let-bound, as the RHS of a let, we have a
1306 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1307 bother to try expansion in mkLam in that case; hence the contIsRhs
1308 guard.
1309
1310 Note [Casts and lambdas]
1311 ~~~~~~~~~~~~~~~~~~~~~~~~
1312 Consider
1313 (\x. (\y. e) `cast` g1) `cast` g2
1314 There is a danger here that the two lambdas look separated, and the
1315 full laziness pass might float an expression to between the two.
1316
1317 So this equation in mkLam' floats the g1 out, thus:
1318 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1319 where x:tx.
1320
1321 In general, this floats casts outside lambdas, where (I hope) they
1322 might meet and cancel with some other cast:
1323 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1324 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1325 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1326 (if not (g `in` co))
1327
1328 Notice that it works regardless of 'e'. Originally it worked only
1329 if 'e' was itself a lambda, but in some cases that resulted in
1330 fruitless iteration in the simplifier. A good example was when
1331 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1332 like (\x. Get `cast` g)
1333 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1334 the Get, and the next iteration eta-reduced it, and then eta-expanded
1335 it again.
1336
1337 Note also the side condition for the case of coercion binders.
1338 It does not make sense to transform
1339 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1340 because the latter is not well-kinded.
1341
1342 ************************************************************************
1343 * *
1344 Eta expansion
1345 * *
1346 ************************************************************************
1347 -}
1348
1349 tryEtaExpandRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
1350 -- See Note [Eta-expanding at let bindings]
1351 tryEtaExpandRhs env bndr rhs
1352 = do { dflags <- getDynFlags
1353 ; (new_arity, new_rhs) <- try_expand dflags
1354
1355 ; WARN( new_arity < old_id_arity,
1356 (ptext (sLit "Arity decrease:") <+> (ppr bndr <+> ppr old_id_arity
1357 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1358 -- Note [Arity decrease] in Simplify
1359 return (new_arity, new_rhs) }
1360 where
1361 try_expand dflags
1362 | exprIsTrivial rhs
1363 = return (exprArity rhs, rhs)
1364
1365 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1366 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1367 new_arity2 = idCallArity bndr
1368 new_arity = max new_arity1 new_arity2
1369 , new_arity > old_arity -- And the current manifest arity isn't enough
1370 = do { tick (EtaExpansion bndr)
1371 ; return (new_arity, etaExpand new_arity rhs) }
1372 | otherwise
1373 = return (old_arity, rhs)
1374
1375 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1376 old_id_arity = idArity bndr
1377
1378 {-
1379 Note [Eta-expanding at let bindings]
1380 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1381 We now eta expand at let-bindings, which is where the payoff comes.
1382 The most significant thing is that we can do a simple arity analysis
1383 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1384
1385 One useful consequence of not eta-expanding lambdas is this example:
1386 genMap :: C a => ...
1387 {-# INLINE genMap #-}
1388 genMap f xs = ...
1389
1390 myMap :: D a => ...
1391 {-# INLINE myMap #-}
1392 myMap = genMap
1393
1394 Notice that 'genMap' should only inline if applied to two arguments.
1395 In the stable unfolding for myMap we'll have the unfolding
1396 (\d -> genMap Int (..d..))
1397 We do not want to eta-expand to
1398 (\d f xs -> genMap Int (..d..) f xs)
1399 because then 'genMap' will inline, and it really shouldn't: at least
1400 as far as the programmer is concerned, it's not applied to two
1401 arguments!
1402
1403 Note [Do not eta-expand PAPs]
1404 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1405 We used to have old_arity = manifestArity rhs, which meant that we
1406 would eta-expand even PAPs. But this gives no particular advantage,
1407 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1408 Suppose we have a PAP
1409 foo :: IO ()
1410 foo = returnIO ()
1411 Then we can eta-expand do
1412 foo = (\eta. (returnIO () |> sym g) eta) |> g
1413 where
1414 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1415
1416 But there is really no point in doing this, and it generates masses of
1417 coercions and whatnot that eventually disappear again. For T9020, GHC
1418 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1419 1.8G to 45M.
1420
1421 But note that this won't eta-expand, say
1422 f = \g -> map g
1423 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1424 strictness analysis will have less to bite on?
1425
1426
1427 ************************************************************************
1428 * *
1429 \subsection{Floating lets out of big lambdas}
1430 * *
1431 ************************************************************************
1432
1433 Note [Floating and type abstraction]
1434 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1435 Consider this:
1436 x = /\a. C e1 e2
1437 We'd like to float this to
1438 y1 = /\a. e1
1439 y2 = /\a. e2
1440 x = /\a. C (y1 a) (y2 a)
1441 for the usual reasons: we want to inline x rather vigorously.
1442
1443 You may think that this kind of thing is rare. But in some programs it is
1444 common. For example, if you do closure conversion you might get:
1445
1446 data a :-> b = forall e. (e -> a -> b) :$ e
1447
1448 f_cc :: forall a. a :-> a
1449 f_cc = /\a. (\e. id a) :$ ()
1450
1451 Now we really want to inline that f_cc thing so that the
1452 construction of the closure goes away.
1453
1454 So I have elaborated simplLazyBind to understand right-hand sides that look
1455 like
1456 /\ a1..an. body
1457
1458 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1459 but there is quite a bit of plumbing in simplLazyBind as well.
1460
1461 The same transformation is good when there are lets in the body:
1462
1463 /\abc -> let(rec) x = e in b
1464 ==>
1465 let(rec) x' = /\abc -> let x = x' a b c in e
1466 in
1467 /\abc -> let x = x' a b c in b
1468
1469 This is good because it can turn things like:
1470
1471 let f = /\a -> letrec g = ... g ... in g
1472 into
1473 letrec g' = /\a -> ... g' a ...
1474 in
1475 let f = /\ a -> g' a
1476
1477 which is better. In effect, it means that big lambdas don't impede
1478 let-floating.
1479
1480 This optimisation is CRUCIAL in eliminating the junk introduced by
1481 desugaring mutually recursive definitions. Don't eliminate it lightly!
1482
1483 [May 1999] If we do this transformation *regardless* then we can
1484 end up with some pretty silly stuff. For example,
1485
1486 let
1487 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1488 in ..
1489 becomes
1490 let y1 = /\s -> r1
1491 y2 = /\s -> r2
1492 st = /\s -> ...[y1 s/x1, y2 s/x2]
1493 in ..
1494
1495 Unless the "..." is a WHNF there is really no point in doing this.
1496 Indeed it can make things worse. Suppose x1 is used strictly,
1497 and is of the form
1498
1499 x1* = case f y of { (a,b) -> e }
1500
1501 If we abstract this wrt the tyvar we then can't do the case inline
1502 as we would normally do.
1503
1504 That's why the whole transformation is part of the same process that
1505 floats let-bindings and constructor arguments out of RHSs. In particular,
1506 it is guarded by the doFloatFromRhs call in simplLazyBind.
1507 -}
1508
1509 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1510 abstractFloats main_tvs body_env body
1511 = ASSERT( notNull body_floats )
1512 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1513 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1514 where
1515 main_tv_set = mkVarSet main_tvs
1516 body_floats = getFloatBinds body_env
1517 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1518
1519 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1520 abstract subst (NonRec id rhs)
1521 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1522 ; let poly_rhs = mkLams tvs_here rhs'
1523 subst' = CoreSubst.extendIdSubst subst id poly_app
1524 ; return (subst', (NonRec poly_id poly_rhs)) }
1525 where
1526 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1527 tvs_here = varSetElemsKvsFirst (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
1528
1529 -- Abstract only over the type variables free in the rhs
1530 -- wrt which the new binding is abstracted. But the naive
1531 -- approach of abstract wrt the tyvars free in the Id's type
1532 -- fails. Consider:
1533 -- /\ a b -> let t :: (a,b) = (e1, e2)
1534 -- x :: a = fst t
1535 -- in ...
1536 -- Here, b isn't free in x's type, but we must nevertheless
1537 -- abstract wrt b as well, because t's type mentions b.
1538 -- Since t is floated too, we'd end up with the bogus:
1539 -- poly_t = /\ a b -> (e1, e2)
1540 -- poly_x = /\ a -> fst (poly_t a *b*)
1541 -- So for now we adopt the even more naive approach of
1542 -- abstracting wrt *all* the tyvars. We'll see if that
1543 -- gives rise to problems. SLPJ June 98
1544
1545 abstract subst (Rec prs)
1546 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1547 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1548 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1549 | rhs <- rhss]
1550 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1551 where
1552 (ids,rhss) = unzip prs
1553 -- For a recursive group, it's a bit of a pain to work out the minimal
1554 -- set of tyvars over which to abstract:
1555 -- /\ a b c. let x = ...a... in
1556 -- letrec { p = ...x...q...
1557 -- q = .....p...b... } in
1558 -- ...
1559 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1560 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1561 -- Since it's a pain, we just use the whole set, which is always safe
1562 --
1563 -- If you ever want to be more selective, remember this bizarre case too:
1564 -- x::a = x
1565 -- Here, we must abstract 'x' over 'a'.
1566 tvs_here = sortQuantVars main_tvs
1567
1568 mk_poly tvs_here var
1569 = do { uniq <- getUniqueM
1570 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1571 poly_ty = mkForAllTys tvs_here (idType var) -- But new type of course
1572 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1573 mkLocalId poly_name poly_ty
1574 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1575 -- In the olden days, it was crucial to copy the occInfo of the original var,
1576 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1577 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1578 -- at already simplified code, so it doesn't matter
1579 --
1580 -- It's even right to retain single-occurrence or dead-var info:
1581 -- Suppose we started with /\a -> let x = E in B
1582 -- where x occurs once in B. Then we transform to:
1583 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1584 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1585 -- the occurrences of x' will be just the occurrences originally
1586 -- pinned on x.
1587
1588 {-
1589 Note [Abstract over coercions]
1590 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1591 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1592 type variable a. Rather than sort this mess out, we simply bale out and abstract
1593 wrt all the type variables if any of them are coercion variables.
1594
1595
1596 Historical note: if you use let-bindings instead of a substitution, beware of this:
1597
1598 -- Suppose we start with:
1599 --
1600 -- x = /\ a -> let g = G in E
1601 --
1602 -- Then we'll float to get
1603 --
1604 -- x = let poly_g = /\ a -> G
1605 -- in /\ a -> let g = poly_g a in E
1606 --
1607 -- But now the occurrence analyser will see just one occurrence
1608 -- of poly_g, not inside a lambda, so the simplifier will
1609 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1610 -- (I used to think that the "don't inline lone occurrences" stuff
1611 -- would stop this happening, but since it's the *only* occurrence,
1612 -- PreInlineUnconditionally kicks in first!)
1613 --
1614 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1615 -- to appear many times. (NB: mkInlineMe eliminates
1616 -- such notes on trivial RHSs, so do it manually.)
1617
1618 ************************************************************************
1619 * *
1620 prepareAlts
1621 * *
1622 ************************************************************************
1623
1624 prepareAlts tries these things:
1625
1626 1. Eliminate alternatives that cannot match, including the
1627 DEFAULT alternative.
1628
1629 2. If the DEFAULT alternative can match only one possible constructor,
1630 then make that constructor explicit.
1631 e.g.
1632 case e of x { DEFAULT -> rhs }
1633 ===>
1634 case e of x { (a,b) -> rhs }
1635 where the type is a single constructor type. This gives better code
1636 when rhs also scrutinises x or e.
1637
1638 3. Returns a list of the constructors that cannot holds in the
1639 DEFAULT alternative (if there is one)
1640
1641 Here "cannot match" includes knowledge from GADTs
1642
1643 It's a good idea to do this stuff before simplifying the alternatives, to
1644 avoid simplifying alternatives we know can't happen, and to come up with
1645 the list of constructors that are handled, to put into the IdInfo of the
1646 case binder, for use when simplifying the alternatives.
1647
1648 Eliminating the default alternative in (1) isn't so obvious, but it can
1649 happen:
1650
1651 data Colour = Red | Green | Blue
1652
1653 f x = case x of
1654 Red -> ..
1655 Green -> ..
1656 DEFAULT -> h x
1657
1658 h y = case y of
1659 Blue -> ..
1660 DEFAULT -> [ case y of ... ]
1661
1662 If we inline h into f, the default case of the inlined h can't happen.
1663 If we don't notice this, we may end up filtering out *all* the cases
1664 of the inner case y, which give us nowhere to go!
1665 -}
1666
1667 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1668 -- The returned alternatives can be empty, none are possible
1669 prepareAlts scrut case_bndr' alts
1670 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1671 -- Case binder is needed just for its type. Note that as an
1672 -- OutId, it has maximum information; this is important.
1673 -- Test simpl013 is an example
1674 = do { us <- getUniquesM
1675 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1676 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1677 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1678 -- "idcs" stands for "impossible default data constructors"
1679 -- i.e. the constructors that can't match the default case
1680 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1681 ; when yes3 $ tick (AltMerge case_bndr')
1682 ; return (idcs3, alts3) }
1683
1684 | otherwise -- Not a data type, so nothing interesting happens
1685 = return ([], alts)
1686 where
1687 imposs_cons = case scrut of
1688 Var v -> otherCons (idUnfolding v)
1689 _ -> []
1690
1691
1692 {-
1693 ************************************************************************
1694 * *
1695 mkCase
1696 * *
1697 ************************************************************************
1698
1699 mkCase tries these things
1700
1701 1. Merge Nested Cases
1702
1703 case e of b { ==> case e of b {
1704 p1 -> rhs1 p1 -> rhs1
1705 ... ...
1706 pm -> rhsm pm -> rhsm
1707 _ -> case b of b' { pn -> let b'=b in rhsn
1708 pn -> rhsn ...
1709 ... po -> let b'=b in rhso
1710 po -> rhso _ -> let b'=b in rhsd
1711 _ -> rhsd
1712 }
1713
1714 which merges two cases in one case when -- the default alternative of
1715 the outer case scrutises the same variable as the outer case. This
1716 transformation is called Case Merging. It avoids that the same
1717 variable is scrutinised multiple times.
1718
1719 2. Eliminate Identity Case
1720
1721 case e of ===> e
1722 True -> True;
1723 False -> False
1724
1725 and similar friends.
1726 -}
1727
1728 mkCase, mkCase1, mkCase2
1729 :: DynFlags
1730 -> OutExpr -> OutId
1731 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1732 -> SimplM OutExpr
1733
1734 --------------------------------------------------
1735 -- 1. Merge Nested Cases
1736 --------------------------------------------------
1737
1738 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1739 | gopt Opt_CaseMerge dflags
1740 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1741 <- stripTicksTop tickishFloatable deflt_rhs
1742 , inner_scrut_var == outer_bndr
1743 = do { tick (CaseMerge outer_bndr)
1744
1745 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1746 (con, args, wrap_rhs rhs)
1747 -- Simplifier's no-shadowing invariant should ensure
1748 -- that outer_bndr is not shadowed by the inner patterns
1749 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1750 -- The let is OK even for unboxed binders,
1751
1752 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1753 | otherwise = map wrap_alt inner_alts
1754
1755 merged_alts = mergeAlts outer_alts wrapped_alts
1756 -- NB: mergeAlts gives priority to the left
1757 -- case x of
1758 -- A -> e1
1759 -- DEFAULT -> case x of
1760 -- A -> e2
1761 -- B -> e3
1762 -- When we merge, we must ensure that e1 takes
1763 -- precedence over e2 as the value for A!
1764
1765 ; fmap (mkTicks ticks) $
1766 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1767 }
1768 -- Warning: don't call mkCase recursively!
1769 -- Firstly, there's no point, because inner alts have already had
1770 -- mkCase applied to them, so they won't have a case in their default
1771 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1772 -- in munge_rhs may put a case into the DEFAULT branch!
1773
1774 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1775
1776 --------------------------------------------------
1777 -- 2. Eliminate Identity Case
1778 --------------------------------------------------
1779
1780 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1781 | all identity_alt alts
1782 = do { tick (CaseIdentity case_bndr)
1783 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1784 where
1785 ticks = concatMap (stripTicksT tickishFloatable . thirdOf3) (tail alts)
1786 identity_alt (con, args, rhs) = check_eq rhs con args
1787
1788 check_eq (Cast rhs co) con args
1789 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1790 -- See Note [RHS casts]
1791 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1792 check_eq (Var v) _ _ | v == case_bndr = True
1793 check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con
1794 -- Optimisation only
1795 check_eq (Tick t e) alt args = tickishFloatable t &&
1796 check_eq e alt args
1797 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1798 mkConApp con (arg_tys ++
1799 varsToCoreExprs args)
1800 check_eq _ _ _ = False
1801
1802 arg_tys = map Type (tyConAppArgs (idType case_bndr))
1803
1804 -- Note [RHS casts]
1805 -- ~~~~~~~~~~~~~~~~
1806 -- We've seen this:
1807 -- case e of x { _ -> x `cast` c }
1808 -- And we definitely want to eliminate this case, to give
1809 -- e `cast` c
1810 -- So we throw away the cast from the RHS, and reconstruct
1811 -- it at the other end. All the RHS casts must be the same
1812 -- if (all identity_alt alts) holds.
1813 --
1814 -- Don't worry about nested casts, because the simplifier combines them
1815
1816 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1817 re_cast scrut _ = scrut
1818
1819 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1820
1821 --------------------------------------------------
1822 -- Catch-all
1823 --------------------------------------------------
1824 mkCase2 _dflags scrut bndr alts_ty alts
1825 = return (Case scrut bndr alts_ty alts)
1826
1827 {-
1828 Note [Dead binders]
1829 ~~~~~~~~~~~~~~~~~~~~
1830 Note that dead-ness is maintained by the simplifier, so that it is
1831 accurate after simplification as well as before.
1832
1833
1834 Note [Cascading case merge]
1835 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1836 Case merging should cascade in one sweep, because it
1837 happens bottom-up
1838
1839 case e of a {
1840 DEFAULT -> case a of b
1841 DEFAULT -> case b of c {
1842 DEFAULT -> e
1843 A -> ea
1844 B -> eb
1845 C -> ec
1846 ==>
1847 case e of a {
1848 DEFAULT -> case a of b
1849 DEFAULT -> let c = b in e
1850 A -> let c = b in ea
1851 B -> eb
1852 C -> ec
1853 ==>
1854 case e of a {
1855 DEFAULT -> let b = a in let c = b in e
1856 A -> let b = a in let c = b in ea
1857 B -> let b = a in eb
1858 C -> ec
1859
1860
1861 However here's a tricky case that we still don't catch, and I don't
1862 see how to catch it in one pass:
1863
1864 case x of c1 { I# a1 ->
1865 case a1 of c2 ->
1866 0 -> ...
1867 DEFAULT -> case x of c3 { I# a2 ->
1868 case a2 of ...
1869
1870 After occurrence analysis (and its binder-swap) we get this
1871
1872 case x of c1 { I# a1 ->
1873 let x = c1 in -- Binder-swap addition
1874 case a1 of c2 ->
1875 0 -> ...
1876 DEFAULT -> case x of c3 { I# a2 ->
1877 case a2 of ...
1878
1879 When we simplify the inner case x, we'll see that
1880 x=c1=I# a1. So we'll bind a2 to a1, and get
1881
1882 case x of c1 { I# a1 ->
1883 case a1 of c2 ->
1884 0 -> ...
1885 DEFAULT -> case a1 of ...
1886
1887 This is corect, but we can't do a case merge in this sweep
1888 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
1889 without getting changed to c1=I# c2.
1890
1891 I don't think this is worth fixing, even if I knew how. It'll
1892 all come out in the next pass anyway.
1893 -}