79a6c610e7758e6167a166d7aafc963b031f0a7f
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType, applyContToJoinType,
23 contIsTrivial, contArgs,
24 countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import DynFlags
41 import CoreSyn
42 import qualified CoreSubst
43 import PprCore
44 import CoreFVs
45 import CoreUtils
46 import CoreArity
47 import CoreUnfold
48 import Name
49 import Id
50 import IdInfo
51 import Var
52 import Demand
53 import SimplMonad
54 import Type hiding( substTy )
55 import Coercion hiding( substCo )
56 import DataCon ( dataConWorkId )
57 import VarEnv
58 import VarSet
59 import BasicTypes
60 import Util
61 import MonadUtils
62 import Outputable
63 import Pair
64 import PrelRules
65 import Literal
66
67 import Control.Monad ( when )
68 import Data.List ( sortBy )
69
70 {-
71 ************************************************************************
72 * *
73 The SimplCont and DupFlag types
74 * *
75 ************************************************************************
76
77 A SimplCont allows the simplifier to traverse the expression in a
78 zipper-like fashion. The SimplCont represents the rest of the expression,
79 "above" the point of interest.
80
81 You can also think of a SimplCont as an "evaluation context", using
82 that term in the way it is used for operational semantics. This is the
83 way I usually think of it, For example you'll often see a syntax for
84 evaluation context looking like
85 C ::= [] | C e | case C of alts | C `cast` co
86 That's the kind of thing we are doing here, and I use that syntax in
87 the comments.
88
89
90 Key points:
91 * A SimplCont describes a *strict* context (just like
92 evaluation contexts do). E.g. Just [] is not a SimplCont
93
94 * A SimplCont describes a context that *does not* bind
95 any variables. E.g. \x. [] is not a SimplCont
96 -}
97
98 data SimplCont
99 = Stop -- An empty context, or <hole>
100 OutType -- Type of the <hole>
101 CallCtxt -- Tells if there is something interesting about
102 -- the context, and hence the inliner
103 -- should be a bit keener (see interestingCallContext)
104 -- Specifically:
105 -- This is an argument of a function that has RULES
106 -- Inlining the call might allow the rule to fire
107 -- Never ValAppCxt (use ApplyToVal instead)
108 -- or CaseCtxt (use Select instead)
109
110 | CastIt -- <hole> `cast` co
111 OutCoercion -- The coercion simplified
112 -- Invariant: never an identity coercion
113 SimplCont
114
115 | ApplyToVal { -- <hole> arg
116 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
117 sc_arg :: InExpr, -- The argument,
118 sc_env :: StaticEnv, -- and its static env
119 sc_cont :: SimplCont }
120
121 | ApplyToTy { -- <hole> ty
122 sc_arg_ty :: OutType, -- Argument type
123 sc_hole_ty :: OutType, -- Type of the function, presumably (forall a. blah)
124 -- See Note [The hole type in ApplyToTy]
125 sc_cont :: SimplCont }
126
127 | Select { -- case <hole> of alts
128 sc_dup :: DupFlag, -- See Note [DupFlag invariants]
129 sc_bndr :: InId, -- case binder
130 sc_alts :: [InAlt], -- Alternatives
131 sc_env :: StaticEnv, -- and their static environment
132 sc_cont :: SimplCont }
133
134 -- The two strict forms have no DupFlag, because we never duplicate them
135 | StrictBind -- (\x* \xs. e) <hole>
136 InId [InBndr] -- let x* = <hole> in e
137 InExpr StaticEnv -- is a special case
138 SimplCont
139
140 | StrictArg -- f e1 ..en <hole>
141 ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
142 -- plus strictness flags for *further* args
143 CallCtxt -- Whether *this* argument position is interesting
144 SimplCont
145
146 | TickIt
147 (Tickish Id) -- Tick tickish <hole>
148 SimplCont
149
150 data DupFlag = NoDup -- Unsimplified, might be big
151 | Simplified -- Simplified
152 | OkToDup -- Simplified and small
153
154 isSimplified :: DupFlag -> Bool
155 isSimplified NoDup = False
156 isSimplified _ = True -- Invariant: the subst-env is empty
157
158 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
159 perhapsSubstTy dup env ty
160 | isSimplified dup = ty
161 | otherwise = substTy env ty
162
163 {-
164 Note [DupFlag invariants]
165 ~~~~~~~~~~~~~~~~~~~~~~~~~
166 In both (ApplyToVal dup _ env k)
167 and (Select dup _ _ env k)
168 the following invariants hold
169
170 (a) if dup = OkToDup, then continuation k is also ok-to-dup
171 (b) if dup = OkToDup or Simplified, the subst-env is empty
172 (and and hence no need to re-simplify)
173 -}
174
175 instance Outputable DupFlag where
176 ppr OkToDup = text "ok"
177 ppr NoDup = text "nodup"
178 ppr Simplified = text "simpl"
179
180 instance Outputable SimplCont where
181 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
182 ppr (CastIt co cont ) = (text "CastIt" <+> pprOptCo co) $$ ppr cont
183 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
184 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
185 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
186 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
187 = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg)
188 $$ ppr cont
189 ppr (StrictBind b _ _ _ cont) = (text "StrictBind" <+> ppr b) $$ ppr cont
190 ppr (StrictArg ai _ cont) = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
191 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
192 = (text "Select" <+> ppr dup <+> ppr bndr) $$
193 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
194
195
196 {- Note [The hole type in ApplyToTy]
197 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
198 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
199 continuation. It is absolutely necessary to compute contHoleType, but it is
200 not used for anything else (and hence may not be evaluated).
201
202 Why is it necessary for contHoleType? Consider the continuation
203 ApplyToType Int (Stop Int)
204 corresponding to
205 (<hole> @Int) :: Int
206 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
207 and there is no way to know which, so we must record it.
208
209 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
210 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
211 doesn't matter because we'll never compute them all.
212
213 ************************************************************************
214 * *
215 ArgInfo and ArgSpec
216 * *
217 ************************************************************************
218 -}
219
220 data ArgInfo
221 = ArgInfo {
222 ai_fun :: OutId, -- The function
223 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
224 ai_type :: OutType, -- Type of (f a1 ... an)
225
226 ai_rules :: [CoreRule], -- Rules for this function
227
228 ai_encl :: Bool, -- Flag saying whether this function
229 -- or an enclosing one has rules (recursively)
230 -- True => be keener to inline in all args
231
232 ai_strs :: [Bool], -- Strictness of remaining arguments
233 -- Usually infinite, but if it is finite it guarantees
234 -- that the function diverges after being given
235 -- that number of args
236 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
237 -- Always infinite
238 }
239
240 data ArgSpec
241 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
242 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
243 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
244 | CastBy OutCoercion -- Cast by this; c.f. CastIt
245
246 instance Outputable ArgSpec where
247 ppr (ValArg e) = text "ValArg" <+> ppr e
248 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
249 ppr (CastBy c) = text "CastBy" <+> ppr c
250
251 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
252 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
253 , ai_type = applyTypeToArg (ai_type ai) arg }
254
255 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
256 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
257 , ai_type = piResultTy poly_fun_ty arg_ty }
258 where
259 poly_fun_ty = ai_type ai
260 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
261
262 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
263 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
264 , ai_type = pSnd (coercionKind co) }
265
266 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
267 argInfoAppArgs [] = []
268 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
269 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
270 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
271
272 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
273 pushSimplifiedArgs _env [] k = k
274 pushSimplifiedArgs env (arg : args) k
275 = case arg of
276 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
277 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
278 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
279 CastBy c -> CastIt c rest
280 where
281 rest = pushSimplifiedArgs env args k
282 -- The env has an empty SubstEnv
283
284 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
285 -- NB: the [ArgSpec] is reversed so that the first arg
286 -- in the list is the last one in the application
287 argInfoExpr fun rev_args
288 = go rev_args
289 where
290 go [] = Var fun
291 go (ValArg a : as) = go as `App` a
292 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
293 go (CastBy co : as) = mkCast (go as) co
294
295
296 {-
297 ************************************************************************
298 * *
299 Functions on SimplCont
300 * *
301 ************************************************************************
302 -}
303
304 mkBoringStop :: OutType -> SimplCont
305 mkBoringStop ty = Stop ty BoringCtxt
306
307 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
308 mkRhsStop ty = Stop ty RhsCtxt
309
310 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
311 mkLazyArgStop ty cci = Stop ty cci
312
313 -------------------
314 contIsRhsOrArg :: SimplCont -> Bool
315 contIsRhsOrArg (Stop {}) = True
316 contIsRhsOrArg (StrictBind {}) = True
317 contIsRhsOrArg (StrictArg {}) = True
318 contIsRhsOrArg _ = False
319
320 contIsRhs :: SimplCont -> Bool
321 contIsRhs (Stop _ RhsCtxt) = True
322 contIsRhs _ = False
323
324 -------------------
325 contIsDupable :: SimplCont -> Bool
326 contIsDupable (Stop {}) = True
327 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
328 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
329 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
330 contIsDupable (CastIt _ k) = contIsDupable k
331 contIsDupable _ = False
332
333 -------------------
334 contIsTrivial :: SimplCont -> Bool
335 contIsTrivial (Stop {}) = True
336 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
337 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
338 contIsTrivial (CastIt _ k) = contIsTrivial k
339 contIsTrivial _ = False
340
341 -------------------
342 contResultType :: SimplCont -> OutType
343 contResultType (Stop ty _) = ty
344 contResultType (CastIt _ k) = contResultType k
345 contResultType (StrictBind _ _ _ _ k) = contResultType k
346 contResultType (StrictArg _ _ k) = contResultType k
347 contResultType (Select { sc_cont = k }) = contResultType k
348 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
349 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
350 contResultType (TickIt _ k) = contResultType k
351
352 contHoleType :: SimplCont -> OutType
353 contHoleType (Stop ty _) = ty
354 contHoleType (TickIt _ k) = contHoleType k
355 contHoleType (CastIt co _) = pFst (coercionKind co)
356 contHoleType (StrictBind b _ _ se _) = substTy se (idType b)
357 contHoleType (StrictArg ai _ _) = funArgTy (ai_type ai)
358 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
359 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
360 = mkFunTy (perhapsSubstTy dup se (exprType e))
361 (contHoleType k)
362 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
363 = perhapsSubstTy d se (idType b)
364
365 applyContToJoinType :: JoinArity -> SimplCont -> OutType -> OutType
366 applyContToJoinType ar cont ty
367 = setJoinResTy ar (contResultType cont) ty
368
369 -------------------
370 countArgs :: SimplCont -> Int
371 -- Count all arguments, including types, coercions, and other values
372 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
373 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
374 countArgs _ = 0
375
376 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
377 -- Summarises value args, discards type args and coercions
378 -- The returned continuation of the call is only used to
379 -- answer questions like "are you interesting?"
380 contArgs cont
381 | lone cont = (True, [], cont)
382 | otherwise = go [] cont
383 where
384 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
385 lone (ApplyToVal {}) = False
386 lone (CastIt {}) = False
387 lone _ = True
388
389 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
390 = go (is_interesting arg se : args) k
391 go args (ApplyToTy { sc_cont = k }) = go args k
392 go args (CastIt _ k) = go args k
393 go args k = (False, reverse args, k)
394
395 is_interesting arg se = interestingArg se arg
396 -- Do *not* use short-cutting substitution here
397 -- because we want to get as much IdInfo as possible
398
399
400 -------------------
401 mkArgInfo :: Id
402 -> [CoreRule] -- Rules for function
403 -> Int -- Number of value args
404 -> SimplCont -- Context of the call
405 -> ArgInfo
406
407 mkArgInfo fun rules n_val_args call_cont
408 | n_val_args < idArity fun -- Note [Unsaturated functions]
409 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
410 , ai_rules = rules, ai_encl = False
411 , ai_strs = vanilla_stricts
412 , ai_discs = vanilla_discounts }
413 | otherwise
414 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
415 , ai_rules = rules
416 , ai_encl = interestingArgContext rules call_cont
417 , ai_strs = add_type_str fun_ty arg_stricts
418 , ai_discs = arg_discounts }
419 where
420 fun_ty = idType fun
421
422 vanilla_discounts, arg_discounts :: [Int]
423 vanilla_discounts = repeat 0
424 arg_discounts = case idUnfolding fun of
425 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
426 -> discounts ++ vanilla_discounts
427 _ -> vanilla_discounts
428
429 vanilla_stricts, arg_stricts :: [Bool]
430 vanilla_stricts = repeat False
431
432 arg_stricts
433 = case splitStrictSig (idStrictness fun) of
434 (demands, result_info)
435 | not (demands `lengthExceeds` n_val_args)
436 -> -- Enough args, use the strictness given.
437 -- For bottoming functions we used to pretend that the arg
438 -- is lazy, so that we don't treat the arg as an
439 -- interesting context. This avoids substituting
440 -- top-level bindings for (say) strings into
441 -- calls to error. But now we are more careful about
442 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
443 if isBotRes result_info then
444 map isStrictDmd demands -- Finite => result is bottom
445 else
446 map isStrictDmd demands ++ vanilla_stricts
447 | otherwise
448 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
449 <+> ppr n_val_args <+> ppr demands )
450 vanilla_stricts -- Not enough args, or no strictness
451
452 add_type_str :: Type -> [Bool] -> [Bool]
453 -- If the function arg types are strict, record that in the 'strictness bits'
454 -- No need to instantiate because unboxed types (which dominate the strict
455 -- types) can't instantiate type variables.
456 -- add_type_str is done repeatedly (for each call); might be better
457 -- once-for-all in the function
458 -- But beware primops/datacons with no strictness
459
460 add_type_str
461 = go
462 where
463 go _ [] = []
464 go fun_ty strs -- Look through foralls
465 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
466 = go fun_ty' strs
467 go fun_ty (str:strs) -- Add strict-type info
468 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
469 = (str || Just False == isLiftedType_maybe arg_ty) : go fun_ty' strs
470 -- If the type is levity-polymorphic, we can't know whether it's
471 -- strict. isLiftedType_maybe will return Just False only when
472 -- we're sure the type is unlifted.
473 go _ strs
474 = strs
475
476 {- Note [Unsaturated functions]
477 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
478 Consider (test eyeball/inline4)
479 x = a:as
480 y = f x
481 where f has arity 2. Then we do not want to inline 'x', because
482 it'll just be floated out again. Even if f has lots of discounts
483 on its first argument -- it must be saturated for these to kick in
484 -}
485
486
487 {-
488 ************************************************************************
489 * *
490 Interesting arguments
491 * *
492 ************************************************************************
493
494 Note [Interesting call context]
495 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
496 We want to avoid inlining an expression where there can't possibly be
497 any gain, such as in an argument position. Hence, if the continuation
498 is interesting (eg. a case scrutinee, application etc.) then we
499 inline, otherwise we don't.
500
501 Previously some_benefit used to return True only if the variable was
502 applied to some value arguments. This didn't work:
503
504 let x = _coerce_ (T Int) Int (I# 3) in
505 case _coerce_ Int (T Int) x of
506 I# y -> ....
507
508 we want to inline x, but can't see that it's a constructor in a case
509 scrutinee position, and some_benefit is False.
510
511 Another example:
512
513 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
514
515 .... case dMonadST _@_ x0 of (a,b,c) -> ....
516
517 we'd really like to inline dMonadST here, but we *don't* want to
518 inline if the case expression is just
519
520 case x of y { DEFAULT -> ... }
521
522 since we can just eliminate this case instead (x is in WHNF). Similar
523 applies when x is bound to a lambda expression. Hence
524 contIsInteresting looks for case expressions with just a single
525 default case.
526 -}
527
528 interestingCallContext :: SimplCont -> CallCtxt
529 -- See Note [Interesting call context]
530 interestingCallContext cont
531 = interesting cont
532 where
533 interesting (Select {}) = CaseCtxt
534 interesting (ApplyToVal {}) = ValAppCtxt
535 -- Can happen if we have (f Int |> co) y
536 -- If f has an INLINE prag we need to give it some
537 -- motivation to inline. See Note [Cast then apply]
538 -- in CoreUnfold
539 interesting (StrictArg _ cci _) = cci
540 interesting (StrictBind {}) = BoringCtxt
541 interesting (Stop _ cci) = cci
542 interesting (TickIt _ k) = interesting k
543 interesting (ApplyToTy { sc_cont = k }) = interesting k
544 interesting (CastIt _ k) = interesting k
545 -- If this call is the arg of a strict function, the context
546 -- is a bit interesting. If we inline here, we may get useful
547 -- evaluation information to avoid repeated evals: e.g.
548 -- x + (y * z)
549 -- Here the contIsInteresting makes the '*' keener to inline,
550 -- which in turn exposes a constructor which makes the '+' inline.
551 -- Assuming that +,* aren't small enough to inline regardless.
552 --
553 -- It's also very important to inline in a strict context for things
554 -- like
555 -- foldr k z (f x)
556 -- Here, the context of (f x) is strict, and if f's unfolding is
557 -- a build it's *great* to inline it here. So we must ensure that
558 -- the context for (f x) is not totally uninteresting.
559
560 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
561 -- If the argument has form (f x y), where x,y are boring,
562 -- and f is marked INLINE, then we don't want to inline f.
563 -- But if the context of the argument is
564 -- g (f x y)
565 -- where g has rules, then we *do* want to inline f, in case it
566 -- exposes a rule that might fire. Similarly, if the context is
567 -- h (g (f x x))
568 -- where h has rules, then we do want to inline f; hence the
569 -- call_cont argument to interestingArgContext
570 --
571 -- The ai-rules flag makes this happen; if it's
572 -- set, the inliner gets just enough keener to inline f
573 -- regardless of how boring f's arguments are, if it's marked INLINE
574 --
575 -- The alternative would be to *always* inline an INLINE function,
576 -- regardless of how boring its context is; but that seems overkill
577 -- For example, it'd mean that wrapper functions were always inlined
578 --
579 -- The call_cont passed to interestingArgContext is the context of
580 -- the call itself, e.g. g <hole> in the example above
581 interestingArgContext rules call_cont
582 = notNull rules || enclosing_fn_has_rules
583 where
584 enclosing_fn_has_rules = go call_cont
585
586 go (Select {}) = False
587 go (ApplyToVal {}) = False -- Shouldn't really happen
588 go (ApplyToTy {}) = False -- Ditto
589 go (StrictArg _ cci _) = interesting cci
590 go (StrictBind {}) = False -- ??
591 go (CastIt _ c) = go c
592 go (Stop _ cci) = interesting cci
593 go (TickIt _ c) = go c
594
595 interesting RuleArgCtxt = True
596 interesting _ = False
597
598
599 {- Note [Interesting arguments]
600 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
601 An argument is interesting if it deserves a discount for unfoldings
602 with a discount in that argument position. The idea is to avoid
603 unfolding a function that is applied only to variables that have no
604 unfolding (i.e. they are probably lambda bound): f x y z There is
605 little point in inlining f here.
606
607 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
608 we must look through lets, eg (let x = e in C a b), because the let will
609 float, exposing the value, if we inline. That makes it different to
610 exprIsHNF.
611
612 Before 2009 we said it was interesting if the argument had *any* structure
613 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
614
615 But we don't regard (f x y) as interesting, unless f is unsaturated.
616 If it's saturated and f hasn't inlined, then it's probably not going
617 to now!
618
619 Note [Conlike is interesting]
620 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
621 Consider
622 f d = ...((*) d x y)...
623 ... f (df d')...
624 where df is con-like. Then we'd really like to inline 'f' so that the
625 rule for (*) (df d) can fire. To do this
626 a) we give a discount for being an argument of a class-op (eg (*) d)
627 b) we say that a con-like argument (eg (df d)) is interesting
628 -}
629
630 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
631 -- See Note [Interesting arguments]
632 interestingArg env e = go env 0 e
633 where
634 -- n is # value args to which the expression is applied
635 go env n (Var v)
636 | SimplEnv { seIdSubst = ids, seInScope = in_scope } <- env
637 = case snd <$> lookupVarEnv ids v of
638 Nothing -> go_var n (refineFromInScope in_scope v)
639 Just (DoneId v') -> go_var n (refineFromInScope in_scope v')
640 Just (DoneEx e) -> go (zapSubstEnv env) n e
641 Just (ContEx tvs cvs ids e) -> go (setSubstEnv env tvs cvs ids) n e
642
643 go _ _ (Lit {}) = ValueArg
644 go _ _ (Type _) = TrivArg
645 go _ _ (Coercion _) = TrivArg
646 go env n (App fn (Type _)) = go env n fn
647 go env n (App fn _) = go env (n+1) fn
648 go env n (Tick _ a) = go env n a
649 go env n (Cast e _) = go env n e
650 go env n (Lam v e)
651 | isTyVar v = go env n e
652 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
653 | otherwise = ValueArg
654 go _ _ (Case {}) = NonTrivArg
655 go env n (Let b e) = case go env' n e of
656 ValueArg -> ValueArg
657 _ -> NonTrivArg
658 where
659 env' = env `addNewInScopeIds` bindersOf b
660
661 go_var n v
662 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
663 -- data constructors here
664 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
665 | n > 0 = NonTrivArg -- Saturated or unknown call
666 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
667 -- See Note [Conlike is interesting]
668 | otherwise = TrivArg -- n==0, no useful unfolding
669 where
670 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
671
672 {-
673 ************************************************************************
674 * *
675 SimplifierMode
676 * *
677 ************************************************************************
678
679 The SimplifierMode controls several switches; see its definition in
680 CoreMonad
681 sm_rules :: Bool -- Whether RULES are enabled
682 sm_inline :: Bool -- Whether inlining is enabled
683 sm_case_case :: Bool -- Whether case-of-case is enabled
684 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
685 -}
686
687 simplEnvForGHCi :: DynFlags -> SimplEnv
688 simplEnvForGHCi dflags
689 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
690 , sm_phase = InitialPhase
691 , sm_rules = rules_on
692 , sm_inline = False
693 , sm_eta_expand = eta_expand_on
694 , sm_case_case = True }
695 where
696 rules_on = gopt Opt_EnableRewriteRules dflags
697 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
698 -- Do not do any inlining, in case we expose some unboxed
699 -- tuple stuff that confuses the bytecode interpreter
700
701 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
702 -- See Note [Simplifying inside stable unfoldings]
703 updModeForStableUnfoldings inline_rule_act current_mode
704 = current_mode { sm_phase = phaseFromActivation inline_rule_act
705 , sm_inline = True
706 , sm_eta_expand = False }
707 -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
708 -- For sm_rules, just inherit; sm_rules might be "off"
709 -- because of -fno-enable-rewrite-rules
710 where
711 phaseFromActivation (ActiveAfter _ n) = Phase n
712 phaseFromActivation _ = InitialPhase
713
714 updModeForRules :: SimplifierMode -> SimplifierMode
715 -- See Note [Simplifying rules]
716 updModeForRules current_mode
717 = current_mode { sm_phase = InitialPhase
718 , sm_inline = False
719 , sm_rules = False
720 , sm_eta_expand = False }
721
722 {- Note [Simplifying rules]
723 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
724 When simplifying a rule, refrain from any inlining or applying of other RULES.
725
726 Doing anything to the LHS is plain confusing, because it means that what the
727 rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
728 Moreover, inlining (or applying rules) on rule LHSs risks introducing
729 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
730
731 Doing this to either side confounds tools like HERMIT, which seek to reason
732 about and apply the RULES as originally written. See Trac #10829.
733
734 Note [No eta expansion in stable unfoldings]
735 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
736 If we have a stable unfolding
737
738 f :: Ord a => a -> IO ()
739 -- Unfolding template
740 -- = /\a \(d:Ord a) (x:a). bla
741
742 we do not want to eta-expand to
743
744 f :: Ord a => a -> IO ()
745 -- Unfolding template
746 -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co
747
748 because not specialisation of the overloading doesn't work properly
749 (see Note [Specialisation shape] in Specialise), Trac #9509.
750
751 So we disable eta-expansion in stable unfoldings.
752
753 Note [Inlining in gentle mode]
754 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
755 Something is inlined if
756 (i) the sm_inline flag is on, AND
757 (ii) the thing has an INLINE pragma, AND
758 (iii) the thing is inlinable in the earliest phase.
759
760 Example of why (iii) is important:
761 {-# INLINE [~1] g #-}
762 g = ...
763
764 {-# INLINE f #-}
765 f x = g (g x)
766
767 If we were to inline g into f's inlining, then an importing module would
768 never be able to do
769 f e --> g (g e) ---> RULE fires
770 because the stable unfolding for f has had g inlined into it.
771
772 On the other hand, it is bad not to do ANY inlining into an
773 stable unfolding, because then recursive knots in instance declarations
774 don't get unravelled.
775
776 However, *sometimes* SimplGently must do no call-site inlining at all
777 (hence sm_inline = False). Before full laziness we must be careful
778 not to inline wrappers, because doing so inhibits floating
779 e.g. ...(case f x of ...)...
780 ==> ...(case (case x of I# x# -> fw x#) of ...)...
781 ==> ...(case x of I# x# -> case fw x# of ...)...
782 and now the redex (f x) isn't floatable any more.
783
784 The no-inlining thing is also important for Template Haskell. You might be
785 compiling in one-shot mode with -O2; but when TH compiles a splice before
786 running it, we don't want to use -O2. Indeed, we don't want to inline
787 anything, because the byte-code interpreter might get confused about
788 unboxed tuples and suchlike.
789
790 Note [Simplifying inside stable unfoldings]
791 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
792 We must take care with simplification inside stable unfoldings (which come from
793 INLINE pragmas).
794
795 First, consider the following example
796 let f = \pq -> BIG
797 in
798 let g = \y -> f y y
799 {-# INLINE g #-}
800 in ...g...g...g...g...g...
801 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
802 and thence copied multiple times when g is inlined. HENCE we treat
803 any occurrence in a stable unfolding as a multiple occurrence, not a single
804 one; see OccurAnal.addRuleUsage.
805
806 Second, we do want *do* to some modest rules/inlining stuff in stable
807 unfoldings, partly to eliminate senseless crap, and partly to break
808 the recursive knots generated by instance declarations.
809
810 However, suppose we have
811 {-# INLINE <act> f #-}
812 f = <rhs>
813 meaning "inline f in phases p where activation <act>(p) holds".
814 Then what inlinings/rules can we apply to the copy of <rhs> captured in
815 f's stable unfolding? Our model is that literally <rhs> is substituted for
816 f when it is inlined. So our conservative plan (implemented by
817 updModeForStableUnfoldings) is this:
818
819 -------------------------------------------------------------
820 When simplifying the RHS of an stable unfolding, set the phase
821 to the phase in which the stable unfolding first becomes active
822 -------------------------------------------------------------
823
824 That ensures that
825
826 a) Rules/inlinings that *cease* being active before p will
827 not apply to the stable unfolding, consistent with it being
828 inlined in its *original* form in phase p.
829
830 b) Rules/inlinings that only become active *after* p will
831 not apply to the stable unfolding, again to be consistent with
832 inlining the *original* rhs in phase p.
833
834 For example,
835 {-# INLINE f #-}
836 f x = ...g...
837
838 {-# NOINLINE [1] g #-}
839 g y = ...
840
841 {-# RULE h g = ... #-}
842 Here we must not inline g into f's RHS, even when we get to phase 0,
843 because when f is later inlined into some other module we want the
844 rule for h to fire.
845
846 Similarly, consider
847 {-# INLINE f #-}
848 f x = ...g...
849
850 g y = ...
851 and suppose that there are auto-generated specialisations and a strictness
852 wrapper for g. The specialisations get activation AlwaysActive, and the
853 strictness wrapper get activation (ActiveAfter 0). So the strictness
854 wrepper fails the test and won't be inlined into f's stable unfolding. That
855 means f can inline, expose the specialised call to g, so the specialisation
856 rules can fire.
857
858 A note about wrappers
859 ~~~~~~~~~~~~~~~~~~~~~
860 It's also important not to inline a worker back into a wrapper.
861 A wrapper looks like
862 wraper = inline_me (\x -> ...worker... )
863 Normally, the inline_me prevents the worker getting inlined into
864 the wrapper (initially, the worker's only call site!). But,
865 if the wrapper is sure to be called, the strictness analyser will
866 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
867 continuation.
868 -}
869
870 activeUnfolding :: SimplEnv -> Id -> Bool
871 activeUnfolding env
872 | not (sm_inline mode) = active_unfolding_minimal
873 | otherwise = case sm_phase mode of
874 InitialPhase -> active_unfolding_gentle
875 Phase n -> active_unfolding n
876 where
877 mode = getMode env
878
879 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
880 -- When matching in RULE, we want to "look through" an unfolding
881 -- (to see a constructor) if *rules* are on, even if *inlinings*
882 -- are not. A notable example is DFuns, which really we want to
883 -- match in rules like (op dfun) in gentle mode. Another example
884 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
885 -- see very early on
886 getUnfoldingInRuleMatch env
887 = (in_scope, id_unf)
888 where
889 in_scope = seInScope env
890 mode = getMode env
891 id_unf id | unf_is_active id = idUnfolding id
892 | otherwise = NoUnfolding
893 unf_is_active id
894 | not (sm_rules mode) = active_unfolding_minimal id
895 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
896
897 active_unfolding_minimal :: Id -> Bool
898 -- Compuslory unfoldings only
899 -- Ignore SimplGently, because we want to inline regardless;
900 -- the Id has no top-level binding at all
901 --
902 -- NB: we used to have a second exception, for data con wrappers.
903 -- On the grounds that we use gentle mode for rule LHSs, and
904 -- they match better when data con wrappers are inlined.
905 -- But that only really applies to the trivial wrappers (like (:)),
906 -- and they are now constructed as Compulsory unfoldings (in MkId)
907 -- so they'll happen anyway.
908 active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
909
910 active_unfolding :: PhaseNum -> Id -> Bool
911 active_unfolding n id = isActiveIn n (idInlineActivation id)
912
913 active_unfolding_gentle :: Id -> Bool
914 -- Anything that is early-active
915 -- See Note [Gentle mode]
916 active_unfolding_gentle id
917 = isInlinePragma prag
918 && isEarlyActive (inlinePragmaActivation prag)
919 -- NB: wrappers are not early-active
920 where
921 prag = idInlinePragma id
922
923 ----------------------
924 activeRule :: SimplEnv -> Activation -> Bool
925 -- Nothing => No rules at all
926 activeRule env
927 | not (sm_rules mode) = \_ -> False -- Rewriting is off
928 | otherwise = isActive (sm_phase mode)
929 where
930 mode = getMode env
931
932 {-
933 ************************************************************************
934 * *
935 preInlineUnconditionally
936 * *
937 ************************************************************************
938
939 preInlineUnconditionally
940 ~~~~~~~~~~~~~~~~~~~~~~~~
941 @preInlineUnconditionally@ examines a bndr to see if it is used just
942 once in a completely safe way, so that it is safe to discard the
943 binding inline its RHS at the (unique) usage site, REGARDLESS of how
944 big the RHS might be. If this is the case we don't simplify the RHS
945 first, but just inline it un-simplified.
946
947 This is much better than first simplifying a perhaps-huge RHS and then
948 inlining and re-simplifying it. Indeed, it can be at least quadratically
949 better. Consider
950
951 x1 = e1
952 x2 = e2[x1]
953 x3 = e3[x2]
954 ...etc...
955 xN = eN[xN-1]
956
957 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
958 This can happen with cascades of functions too:
959
960 f1 = \x1.e1
961 f2 = \xs.e2[f1]
962 f3 = \xs.e3[f3]
963 ...etc...
964
965 THE MAIN INVARIANT is this:
966
967 ---- preInlineUnconditionally invariant -----
968 IF preInlineUnconditionally chooses to inline x = <rhs>
969 THEN doing the inlining should not change the occurrence
970 info for the free vars of <rhs>
971 ----------------------------------------------
972
973 For example, it's tempting to look at trivial binding like
974 x = y
975 and inline it unconditionally. But suppose x is used many times,
976 but this is the unique occurrence of y. Then inlining x would change
977 y's occurrence info, which breaks the invariant. It matters: y
978 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
979
980
981 Even RHSs labelled InlineMe aren't caught here, because there might be
982 no benefit from inlining at the call site.
983
984 [Sept 01] Don't unconditionally inline a top-level thing, because that
985 can simply make a static thing into something built dynamically. E.g.
986 x = (a,b)
987 main = \s -> h x
988
989 [Remember that we treat \s as a one-shot lambda.] No point in
990 inlining x unless there is something interesting about the call site.
991
992 But watch out: if you aren't careful, some useful foldr/build fusion
993 can be lost (most notably in spectral/hartel/parstof) because the
994 foldr didn't see the build. Doing the dynamic allocation isn't a big
995 deal, in fact, but losing the fusion can be. But the right thing here
996 seems to be to do a callSiteInline based on the fact that there is
997 something interesting about the call site (it's strict). Hmm. That
998 seems a bit fragile.
999
1000 Conclusion: inline top level things gaily until Phase 0 (the last
1001 phase), at which point don't.
1002
1003 Note [pre/postInlineUnconditionally in gentle mode]
1004 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1005 Even in gentle mode we want to do preInlineUnconditionally. The
1006 reason is that too little clean-up happens if you don't inline
1007 use-once things. Also a bit of inlining is *good* for full laziness;
1008 it can expose constant sub-expressions. Example in
1009 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
1010 let-float if you inline windowToViewport
1011
1012 However, as usual for Gentle mode, do not inline things that are
1013 inactive in the intial stages. See Note [Gentle mode].
1014
1015 Note [Stable unfoldings and preInlineUnconditionally]
1016 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1017 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
1018 Example
1019
1020 {-# INLINE f #-}
1021 f :: Eq a => a -> a
1022 f x = ...
1023
1024 fInt :: Int -> Int
1025 fInt = f Int dEqInt
1026
1027 ...fInt...fInt...fInt...
1028
1029 Here f occurs just once, in the RHS of fInt. But if we inline it there
1030 we'll lose the opportunity to inline at each of fInt's call sites.
1031 The INLINE pragma will only inline when the application is saturated
1032 for exactly this reason; and we don't want PreInlineUnconditionally
1033 to second-guess it. A live example is Trac #3736.
1034 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1035
1036 Note [Top-level bottoming Ids]
1037 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1038 Don't inline top-level Ids that are bottoming, even if they are used just
1039 once, because FloatOut has gone to some trouble to extract them out.
1040 Inlining them won't make the program run faster!
1041
1042 Note [Do not inline CoVars unconditionally]
1043 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1044 Coercion variables appear inside coercions, and the RHS of a let-binding
1045 is a term (not a coercion) so we can't necessarily inline the latter in
1046 the former.
1047 -}
1048
1049 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1050 -- Precondition: rhs satisfies the let/app invariant
1051 -- See Note [CoreSyn let/app invariant] in CoreSyn
1052 -- Reason: we don't want to inline single uses, or discard dead bindings,
1053 -- for unlifted, side-effect-ful bindings
1054 preInlineUnconditionally dflags env top_lvl bndr rhs
1055 | not active = False
1056 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1057 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1058 | not (gopt Opt_SimplPreInlining dflags) = False
1059 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1060 | otherwise = case idOccInfo bndr of
1061 IAmDead -> True -- Happens in ((\x.1) v)
1062 occ@OneOcc { occ_one_br = True }
1063 -> try_once (occ_in_lam occ)
1064 (occ_int_cxt occ)
1065 _ -> False
1066 where
1067 mode = getMode env
1068 active = isActive (sm_phase mode) act
1069 -- See Note [pre/postInlineUnconditionally in gentle mode]
1070 act = idInlineActivation bndr
1071 try_once in_lam int_cxt -- There's one textual occurrence
1072 | not in_lam = isNotTopLevel top_lvl || early_phase
1073 | otherwise = int_cxt && canInlineInLam rhs
1074
1075 -- Be very careful before inlining inside a lambda, because (a) we must not
1076 -- invalidate occurrence information, and (b) we want to avoid pushing a
1077 -- single allocation (here) into multiple allocations (inside lambda).
1078 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1079 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1080 -- where
1081 -- is_cheap = exprIsCheap rhs
1082 -- ok = is_cheap && int_cxt
1083
1084 -- int_cxt The context isn't totally boring
1085 -- E.g. let f = \ab.BIG in \y. map f xs
1086 -- Don't want to substitute for f, because then we allocate
1087 -- its closure every time the \y is called
1088 -- But: let f = \ab.BIG in \y. map (f y) xs
1089 -- Now we do want to substitute for f, even though it's not
1090 -- saturated, because we're going to allocate a closure for
1091 -- (f y) every time round the loop anyhow.
1092
1093 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1094 -- so substituting rhs inside a lambda doesn't change the occ info.
1095 -- Sadly, not quite the same as exprIsHNF.
1096 canInlineInLam (Lit _) = True
1097 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1098 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1099 canInlineInLam _ = False
1100 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1101 -- ticks around a Lam will disappear anyway.
1102
1103 early_phase = case sm_phase mode of
1104 Phase 0 -> False
1105 _ -> True
1106 -- If we don't have this early_phase test, consider
1107 -- x = length [1,2,3]
1108 -- The full laziness pass carefully floats all the cons cells to
1109 -- top level, and preInlineUnconditionally floats them all back in.
1110 -- Result is (a) static allocation replaced by dynamic allocation
1111 -- (b) many simplifier iterations because this tickles
1112 -- a related problem; only one inlining per pass
1113 --
1114 -- On the other hand, I have seen cases where top-level fusion is
1115 -- lost if we don't inline top level thing (e.g. string constants)
1116 -- Hence the test for phase zero (which is the phase for all the final
1117 -- simplifications). Until phase zero we take no special notice of
1118 -- top level things, but then we become more leery about inlining
1119 -- them.
1120
1121 {-
1122 ************************************************************************
1123 * *
1124 postInlineUnconditionally
1125 * *
1126 ************************************************************************
1127
1128 postInlineUnconditionally
1129 ~~~~~~~~~~~~~~~~~~~~~~~~~
1130 @postInlineUnconditionally@ decides whether to unconditionally inline
1131 a thing based on the form of its RHS; in particular if it has a
1132 trivial RHS. If so, we can inline and discard the binding altogether.
1133
1134 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1135 only have *forward* references. Hence, it's safe to discard the binding
1136
1137 NOTE: This isn't our last opportunity to inline. We're at the binding
1138 site right now, and we'll get another opportunity when we get to the
1139 occurrence(s)
1140
1141 Note that we do this unconditional inlining only for trival RHSs.
1142 Don't inline even WHNFs inside lambdas; doing so may simply increase
1143 allocation when the function is called. This isn't the last chance; see
1144 NOTE above.
1145
1146 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1147 Because we don't even want to inline them into the RHS of constructor
1148 arguments. See NOTE above
1149
1150 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1151 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1152 with both a and b marked NOINLINE. But that seems incompatible with
1153 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1154 story for now.
1155 -}
1156
1157 postInlineUnconditionally
1158 :: DynFlags -> SimplEnv -> TopLevelFlag
1159 -> OutId -- The binder (an InId would be fine too)
1160 -- (*not* a CoVar)
1161 -> OccInfo -- From the InId
1162 -> OutExpr
1163 -> Unfolding
1164 -> Bool
1165 -- Precondition: rhs satisfies the let/app invariant
1166 -- See Note [CoreSyn let/app invariant] in CoreSyn
1167 -- Reason: we don't want to inline single uses, or discard dead bindings,
1168 -- for unlifted, side-effect-ful bindings
1169 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1170 | not active = False
1171 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1172 -- because it might be referred to "earlier"
1173 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1174 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1175 | exprIsTrivial rhs = True
1176 | otherwise
1177 = case occ_info of
1178 -- The point of examining occ_info here is that for *non-values*
1179 -- that occur outside a lambda, the call-site inliner won't have
1180 -- a chance (because it doesn't know that the thing
1181 -- only occurs once). The pre-inliner won't have gotten
1182 -- it either, if the thing occurs in more than one branch
1183 -- So the main target is things like
1184 -- let x = f y in
1185 -- case v of
1186 -- True -> case x of ...
1187 -- False -> case x of ...
1188 -- This is very important in practice; e.g. wheel-seive1 doubles
1189 -- in allocation if you miss this out
1190 OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt }
1191 -- OneOcc => no code-duplication issue
1192 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1193 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1194 --
1195 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1196 -- Reason: doing so risks exponential behaviour. We simplify a big
1197 -- expression, inline it, and simplify it again. But if the
1198 -- very same thing happens in the big expression, we get
1199 -- exponential cost!
1200 -- PRINCIPLE: when we've already simplified an expression once,
1201 -- make sure that we only inline it if it's reasonably small.
1202
1203 && (not in_lam ||
1204 -- Outside a lambda, we want to be reasonably aggressive
1205 -- about inlining into multiple branches of case
1206 -- e.g. let x = <non-value>
1207 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1208 -- Inlining can be a big win if C3 is the hot-spot, even if
1209 -- the uses in C1, C2 are not 'interesting'
1210 -- An example that gets worse if you add int_cxt here is 'clausify'
1211
1212 (isCheapUnfolding unfolding && int_cxt))
1213 -- isCheap => acceptable work duplication; in_lam may be true
1214 -- int_cxt to prevent us inlining inside a lambda without some
1215 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1216
1217 IAmDead -> True -- This happens; for example, the case_bndr during case of
1218 -- known constructor: case (a,b) of x { (p,q) -> ... }
1219 -- Here x isn't mentioned in the RHS, so we don't want to
1220 -- create the (dead) let-binding let x = (a,b) in ...
1221
1222 _ -> False
1223
1224 -- Here's an example that we don't handle well:
1225 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1226 -- in \y. ....case f of {...} ....
1227 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1228 -- But
1229 -- - We can't preInlineUnconditionally because that woud invalidate
1230 -- the occ info for b.
1231 -- - We can't postInlineUnconditionally because the RHS is big, and
1232 -- that risks exponential behaviour
1233 -- - We can't call-site inline, because the rhs is big
1234 -- Alas!
1235
1236 where
1237 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1238 -- See Note [pre/postInlineUnconditionally in gentle mode]
1239
1240 {-
1241 Note [Top level and postInlineUnconditionally]
1242 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1243 We don't do postInlineUnconditionally for top-level things (even for
1244 ones that are trivial):
1245
1246 * Doing so will inline top-level error expressions that have been
1247 carefully floated out by FloatOut. More generally, it might
1248 replace static allocation with dynamic.
1249
1250 * Even for trivial expressions there's a problem. Consider
1251 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1252 blah xs = reverse xs
1253 ruggle = sort
1254 In one simplifier pass we might fire the rule, getting
1255 blah xs = ruggle xs
1256 but in *that* simplifier pass we must not do postInlineUnconditionally
1257 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1258
1259 If the rhs is trivial it'll be inlined by callSiteInline, and then
1260 the binding will be dead and discarded by the next use of OccurAnal
1261
1262 * There is less point, because the main goal is to get rid of local
1263 bindings used in multiple case branches.
1264
1265 * The inliner should inline trivial things at call sites anyway.
1266
1267 * The Id might be exported. We could check for that separately,
1268 but since we aren't going to postInlineUnconditionally /any/
1269 top-level bindings, we don't need to test.
1270
1271 Note [Stable unfoldings and postInlineUnconditionally]
1272 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1273 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1274 otherwise we lose the unfolding. Example
1275
1276 -- f has stable unfolding with rhs (e |> co)
1277 -- where 'e' is big
1278 f = e |> co
1279
1280 Then there's a danger we'll optimise to
1281
1282 f' = e
1283 f = f' |> co
1284
1285 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1286 won't inline because 'e' is too big.
1287
1288 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1289
1290
1291 ************************************************************************
1292 * *
1293 Rebuilding a lambda
1294 * *
1295 ************************************************************************
1296 -}
1297
1298 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1299 -- mkLam tries three things
1300 -- a) eta reduction, if that gives a trivial expression
1301 -- b) eta expansion [only if there are some value lambdas]
1302
1303 mkLam _env [] body _cont
1304 = return body
1305 mkLam env bndrs body cont
1306 = do { dflags <- getDynFlags
1307 ; mkLam' dflags bndrs body }
1308 where
1309 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1310 mkLam' dflags bndrs (Cast body co)
1311 | not (any bad bndrs)
1312 -- Note [Casts and lambdas]
1313 = do { lam <- mkLam' dflags bndrs body
1314 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1315 where
1316 co_vars = tyCoVarsOfCo co
1317 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1318
1319 mkLam' dflags bndrs body@(Lam {})
1320 = mkLam' dflags (bndrs ++ bndrs1) body1
1321 where
1322 (bndrs1, body1) = collectBinders body
1323
1324 mkLam' dflags bndrs (Tick t expr)
1325 | tickishFloatable t
1326 = mkTick t <$> mkLam' dflags bndrs expr
1327
1328 mkLam' dflags bndrs body
1329 | gopt Opt_DoEtaReduction dflags
1330 , Just etad_lam <- tryEtaReduce bndrs body
1331 = do { tick (EtaReduction (head bndrs))
1332 ; return etad_lam }
1333
1334 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1335 , sm_eta_expand (getMode env)
1336 , any isRuntimeVar bndrs
1337 , let body_arity = exprEtaExpandArity dflags body
1338 , body_arity > 0
1339 = do { tick (EtaExpansion (head bndrs))
1340 ; let res = mkLams bndrs (etaExpand body_arity body)
1341 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1342 , text "after" <+> ppr res])
1343 ; return res }
1344
1345 | otherwise
1346 = return (mkLams bndrs body)
1347
1348 {-
1349 Note [Eta expanding lambdas]
1350 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1351 In general we *do* want to eta-expand lambdas. Consider
1352 f (\x -> case x of (a,b) -> \s -> blah)
1353 where 's' is a state token, and hence can be eta expanded. This
1354 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1355 important function!
1356
1357 The eta-expansion will never happen unless we do it now. (Well, it's
1358 possible that CorePrep will do it, but CorePrep only has a half-baked
1359 eta-expander that can't deal with casts. So it's much better to do it
1360 here.)
1361
1362 However, when the lambda is let-bound, as the RHS of a let, we have a
1363 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1364 bother to try expansion in mkLam in that case; hence the contIsRhs
1365 guard.
1366
1367 NB: We check the SimplEnv (sm_eta_expand), not DynFlags.
1368 See Note [No eta expansion in stable unfoldings]
1369
1370 Note [Casts and lambdas]
1371 ~~~~~~~~~~~~~~~~~~~~~~~~
1372 Consider
1373 (\x. (\y. e) `cast` g1) `cast` g2
1374 There is a danger here that the two lambdas look separated, and the
1375 full laziness pass might float an expression to between the two.
1376
1377 So this equation in mkLam' floats the g1 out, thus:
1378 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1379 where x:tx.
1380
1381 In general, this floats casts outside lambdas, where (I hope) they
1382 might meet and cancel with some other cast:
1383 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1384 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1385 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1386 (if not (g `in` co))
1387
1388 Notice that it works regardless of 'e'. Originally it worked only
1389 if 'e' was itself a lambda, but in some cases that resulted in
1390 fruitless iteration in the simplifier. A good example was when
1391 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1392 like (\x. Get `cast` g)
1393 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1394 the Get, and the next iteration eta-reduced it, and then eta-expanded
1395 it again.
1396
1397 Note also the side condition for the case of coercion binders.
1398 It does not make sense to transform
1399 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1400 because the latter is not well-kinded.
1401
1402 ************************************************************************
1403 * *
1404 Eta expansion
1405 * *
1406 ************************************************************************
1407 -}
1408
1409 tryEtaExpandRhs :: SimplEnv -> RecFlag -> OutId -> OutExpr
1410 -> SimplM (Arity, OutExpr)
1411 -- See Note [Eta-expanding at let bindings]
1412 tryEtaExpandRhs env is_rec bndr rhs
1413 = do { dflags <- getDynFlags
1414 ; (new_arity, new_rhs) <- try_expand dflags
1415
1416 ; WARN( new_arity < old_id_arity,
1417 (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
1418 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1419 -- Note [Arity decrease] in Simplify
1420 return (new_arity, new_rhs) }
1421 where
1422 try_expand dflags
1423 | exprIsTrivial rhs
1424 = return (exprArity rhs, rhs)
1425
1426 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1427 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1428 new_arity2 = idCallArity bndr
1429 new_arity = max new_arity1 new_arity2
1430 , new_arity > old_arity -- And the current manifest arity isn't enough
1431 = if is_rec == Recursive && isJoinId bndr
1432 then WARN(True, text "Can't eta-expand recursive join point:" <+>
1433 ppr bndr)
1434 return (old_arity, rhs)
1435 else do { tick (EtaExpansion bndr)
1436 ; return (new_arity, etaExpand new_arity rhs) }
1437 | otherwise
1438 = return (old_arity, rhs)
1439
1440 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1441 old_id_arity = idArity bndr
1442
1443 {-
1444 Note [Eta-expanding at let bindings]
1445 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1446 We now eta expand at let-bindings, which is where the payoff comes.
1447 The most significant thing is that we can do a simple arity analysis
1448 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1449
1450 One useful consequence of not eta-expanding lambdas is this example:
1451 genMap :: C a => ...
1452 {-# INLINE genMap #-}
1453 genMap f xs = ...
1454
1455 myMap :: D a => ...
1456 {-# INLINE myMap #-}
1457 myMap = genMap
1458
1459 Notice that 'genMap' should only inline if applied to two arguments.
1460 In the stable unfolding for myMap we'll have the unfolding
1461 (\d -> genMap Int (..d..))
1462 We do not want to eta-expand to
1463 (\d f xs -> genMap Int (..d..) f xs)
1464 because then 'genMap' will inline, and it really shouldn't: at least
1465 as far as the programmer is concerned, it's not applied to two
1466 arguments!
1467
1468 Note [Do not eta-expand PAPs]
1469 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1470 We used to have old_arity = manifestArity rhs, which meant that we
1471 would eta-expand even PAPs. But this gives no particular advantage,
1472 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1473 Suppose we have a PAP
1474 foo :: IO ()
1475 foo = returnIO ()
1476 Then we can eta-expand do
1477 foo = (\eta. (returnIO () |> sym g) eta) |> g
1478 where
1479 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1480
1481 But there is really no point in doing this, and it generates masses of
1482 coercions and whatnot that eventually disappear again. For T9020, GHC
1483 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1484 1.8G to 45M.
1485
1486 But note that this won't eta-expand, say
1487 f = \g -> map g
1488 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1489 strictness analysis will have less to bite on?
1490
1491
1492 ************************************************************************
1493 * *
1494 \subsection{Floating lets out of big lambdas}
1495 * *
1496 ************************************************************************
1497
1498 Note [Floating and type abstraction]
1499 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1500 Consider this:
1501 x = /\a. C e1 e2
1502 We'd like to float this to
1503 y1 = /\a. e1
1504 y2 = /\a. e2
1505 x = /\a. C (y1 a) (y2 a)
1506 for the usual reasons: we want to inline x rather vigorously.
1507
1508 You may think that this kind of thing is rare. But in some programs it is
1509 common. For example, if you do closure conversion you might get:
1510
1511 data a :-> b = forall e. (e -> a -> b) :$ e
1512
1513 f_cc :: forall a. a :-> a
1514 f_cc = /\a. (\e. id a) :$ ()
1515
1516 Now we really want to inline that f_cc thing so that the
1517 construction of the closure goes away.
1518
1519 So I have elaborated simplLazyBind to understand right-hand sides that look
1520 like
1521 /\ a1..an. body
1522
1523 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1524 but there is quite a bit of plumbing in simplLazyBind as well.
1525
1526 The same transformation is good when there are lets in the body:
1527
1528 /\abc -> let(rec) x = e in b
1529 ==>
1530 let(rec) x' = /\abc -> let x = x' a b c in e
1531 in
1532 /\abc -> let x = x' a b c in b
1533
1534 This is good because it can turn things like:
1535
1536 let f = /\a -> letrec g = ... g ... in g
1537 into
1538 letrec g' = /\a -> ... g' a ...
1539 in
1540 let f = /\ a -> g' a
1541
1542 which is better. In effect, it means that big lambdas don't impede
1543 let-floating.
1544
1545 This optimisation is CRUCIAL in eliminating the junk introduced by
1546 desugaring mutually recursive definitions. Don't eliminate it lightly!
1547
1548 [May 1999] If we do this transformation *regardless* then we can
1549 end up with some pretty silly stuff. For example,
1550
1551 let
1552 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1553 in ..
1554 becomes
1555 let y1 = /\s -> r1
1556 y2 = /\s -> r2
1557 st = /\s -> ...[y1 s/x1, y2 s/x2]
1558 in ..
1559
1560 Unless the "..." is a WHNF there is really no point in doing this.
1561 Indeed it can make things worse. Suppose x1 is used strictly,
1562 and is of the form
1563
1564 x1* = case f y of { (a,b) -> e }
1565
1566 If we abstract this wrt the tyvar we then can't do the case inline
1567 as we would normally do.
1568
1569 That's why the whole transformation is part of the same process that
1570 floats let-bindings and constructor arguments out of RHSs. In particular,
1571 it is guarded by the doFloatFromRhs call in simplLazyBind.
1572
1573 Note [Which type variables to abstract over]
1574 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1575 Abstract only over the type variables free in the rhs wrt which the
1576 new binding is abstracted. Note that
1577
1578 * The naive approach of abstracting wrt the
1579 tyvars free in the Id's /type/ fails. Consider:
1580 /\ a b -> let t :: (a,b) = (e1, e2)
1581 x :: a = fst t
1582 in ...
1583 Here, b isn't free in x's type, but we must nevertheless
1584 abstract wrt b as well, because t's type mentions b.
1585 Since t is floated too, we'd end up with the bogus:
1586 poly_t = /\ a b -> (e1, e2)
1587 poly_x = /\ a -> fst (poly_t a *b*)
1588
1589 * We must do closeOverKinds. Example (Trac #10934):
1590 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1591 Here we want to float 't', but we must remember to abstract over
1592 'k' as well, even though it is not explicitly mentioned in the RHS,
1593 otherwise we get
1594 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1595 which is obviously bogus.
1596 -}
1597
1598 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1599 abstractFloats main_tvs body_env body
1600 = ASSERT( notNull body_floats )
1601 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1602 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1603 where
1604 main_tv_set = mkVarSet main_tvs
1605 body_floats = getFloatBinds body_env
1606 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1607
1608 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1609 abstract subst (NonRec id rhs)
1610 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1611 ; let poly_rhs = mkLams tvs_here rhs'
1612 subst' = CoreSubst.extendIdSubst subst id poly_app
1613 ; return (subst', (NonRec poly_id poly_rhs)) }
1614 where
1615 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1616
1617 -- tvs_here: see Note [Which type variables to abstract over]
1618 tvs_here = toposortTyVars $
1619 filter (`elemVarSet` main_tv_set) $
1620 closeOverKindsList $
1621 exprSomeFreeVarsList isTyVar rhs'
1622
1623 abstract subst (Rec prs)
1624 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1625 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1626 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1627 | rhs <- rhss]
1628 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1629 where
1630 (ids,rhss) = unzip prs
1631 -- For a recursive group, it's a bit of a pain to work out the minimal
1632 -- set of tyvars over which to abstract:
1633 -- /\ a b c. let x = ...a... in
1634 -- letrec { p = ...x...q...
1635 -- q = .....p...b... } in
1636 -- ...
1637 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1638 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1639 -- Since it's a pain, we just use the whole set, which is always safe
1640 --
1641 -- If you ever want to be more selective, remember this bizarre case too:
1642 -- x::a = x
1643 -- Here, we must abstract 'x' over 'a'.
1644 tvs_here = toposortTyVars main_tvs
1645
1646 mk_poly tvs_here var
1647 = do { uniq <- getUniqueM
1648 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1649 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1650 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1651 mkLocalIdOrCoVar poly_name poly_ty
1652 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1653 -- In the olden days, it was crucial to copy the occInfo of the original var,
1654 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1655 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1656 -- at already simplified code, so it doesn't matter
1657 --
1658 -- It's even right to retain single-occurrence or dead-var info:
1659 -- Suppose we started with /\a -> let x = E in B
1660 -- where x occurs once in B. Then we transform to:
1661 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1662 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1663 -- the occurrences of x' will be just the occurrences originally
1664 -- pinned on x.
1665
1666 {-
1667 Note [Abstract over coercions]
1668 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1669 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1670 type variable a. Rather than sort this mess out, we simply bale out and abstract
1671 wrt all the type variables if any of them are coercion variables.
1672
1673
1674 Historical note: if you use let-bindings instead of a substitution, beware of this:
1675
1676 -- Suppose we start with:
1677 --
1678 -- x = /\ a -> let g = G in E
1679 --
1680 -- Then we'll float to get
1681 --
1682 -- x = let poly_g = /\ a -> G
1683 -- in /\ a -> let g = poly_g a in E
1684 --
1685 -- But now the occurrence analyser will see just one occurrence
1686 -- of poly_g, not inside a lambda, so the simplifier will
1687 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1688 -- (I used to think that the "don't inline lone occurrences" stuff
1689 -- would stop this happening, but since it's the *only* occurrence,
1690 -- PreInlineUnconditionally kicks in first!)
1691 --
1692 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1693 -- to appear many times. (NB: mkInlineMe eliminates
1694 -- such notes on trivial RHSs, so do it manually.)
1695
1696 ************************************************************************
1697 * *
1698 prepareAlts
1699 * *
1700 ************************************************************************
1701
1702 prepareAlts tries these things:
1703
1704 1. Eliminate alternatives that cannot match, including the
1705 DEFAULT alternative.
1706
1707 2. If the DEFAULT alternative can match only one possible constructor,
1708 then make that constructor explicit.
1709 e.g.
1710 case e of x { DEFAULT -> rhs }
1711 ===>
1712 case e of x { (a,b) -> rhs }
1713 where the type is a single constructor type. This gives better code
1714 when rhs also scrutinises x or e.
1715
1716 3. Returns a list of the constructors that cannot holds in the
1717 DEFAULT alternative (if there is one)
1718
1719 Here "cannot match" includes knowledge from GADTs
1720
1721 It's a good idea to do this stuff before simplifying the alternatives, to
1722 avoid simplifying alternatives we know can't happen, and to come up with
1723 the list of constructors that are handled, to put into the IdInfo of the
1724 case binder, for use when simplifying the alternatives.
1725
1726 Eliminating the default alternative in (1) isn't so obvious, but it can
1727 happen:
1728
1729 data Colour = Red | Green | Blue
1730
1731 f x = case x of
1732 Red -> ..
1733 Green -> ..
1734 DEFAULT -> h x
1735
1736 h y = case y of
1737 Blue -> ..
1738 DEFAULT -> [ case y of ... ]
1739
1740 If we inline h into f, the default case of the inlined h can't happen.
1741 If we don't notice this, we may end up filtering out *all* the cases
1742 of the inner case y, which give us nowhere to go!
1743 -}
1744
1745 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1746 -- The returned alternatives can be empty, none are possible
1747 prepareAlts scrut case_bndr' alts
1748 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1749 -- Case binder is needed just for its type. Note that as an
1750 -- OutId, it has maximum information; this is important.
1751 -- Test simpl013 is an example
1752 = do { us <- getUniquesM
1753 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1754 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1755 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1756 -- "idcs" stands for "impossible default data constructors"
1757 -- i.e. the constructors that can't match the default case
1758 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1759 ; when yes3 $ tick (AltMerge case_bndr')
1760 ; return (idcs3, alts3) }
1761
1762 | otherwise -- Not a data type, so nothing interesting happens
1763 = return ([], alts)
1764 where
1765 imposs_cons = case scrut of
1766 Var v -> otherCons (idUnfolding v)
1767 _ -> []
1768
1769
1770 {-
1771 ************************************************************************
1772 * *
1773 mkCase
1774 * *
1775 ************************************************************************
1776
1777 mkCase tries these things
1778
1779 1. Merge Nested Cases
1780
1781 case e of b { ==> case e of b {
1782 p1 -> rhs1 p1 -> rhs1
1783 ... ...
1784 pm -> rhsm pm -> rhsm
1785 _ -> case b of b' { pn -> let b'=b in rhsn
1786 pn -> rhsn ...
1787 ... po -> let b'=b in rhso
1788 po -> rhso _ -> let b'=b in rhsd
1789 _ -> rhsd
1790 }
1791
1792 which merges two cases in one case when -- the default alternative of
1793 the outer case scrutises the same variable as the outer case. This
1794 transformation is called Case Merging. It avoids that the same
1795 variable is scrutinised multiple times.
1796
1797 2. Eliminate Identity Case
1798
1799 case e of ===> e
1800 True -> True;
1801 False -> False
1802
1803 and similar friends.
1804
1805 3. Scrutinee Constant Folding
1806
1807 case x op# k# of _ { ===> case x of _ {
1808 a1# -> e1 (a1# inv_op# k#) -> e1
1809 a2# -> e2 (a2# inv_op# k#) -> e2
1810 ... ...
1811 DEFAULT -> ed DEFAULT -> ed
1812
1813 where (x op# k#) inv_op# k# == x
1814
1815 And similarly for commuted arguments and for some unary operations.
1816
1817 The purpose of this transformation is not only to avoid an arithmetic
1818 operation at runtime but to allow other transformations to apply in cascade.
1819
1820 Example with the "Merge Nested Cases" optimization (from #12877):
1821
1822 main = case t of t0
1823 0## -> ...
1824 DEFAULT -> case t0 `minusWord#` 1## of t1
1825 0## -> ...
1826 DEFAUT -> case t1 `minusWord#` 1## of t2
1827 0## -> ...
1828 DEFAULT -> case t2 `minusWord#` 1## of _
1829 0## -> ...
1830 DEFAULT -> ...
1831
1832 becomes:
1833
1834 main = case t of _
1835 0## -> ...
1836 1## -> ...
1837 2## -> ...
1838 3## -> ...
1839 DEFAULT -> ...
1840
1841 -}
1842
1843 mkCase, mkCase1, mkCase2, mkCase3
1844 :: DynFlags
1845 -> OutExpr -> OutId
1846 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1847 -> SimplM OutExpr
1848
1849 --------------------------------------------------
1850 -- 1. Merge Nested Cases
1851 --------------------------------------------------
1852
1853 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1854 | gopt Opt_CaseMerge dflags
1855 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1856 <- stripTicksTop tickishFloatable deflt_rhs
1857 , inner_scrut_var == outer_bndr
1858 = do { tick (CaseMerge outer_bndr)
1859
1860 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1861 (con, args, wrap_rhs rhs)
1862 -- Simplifier's no-shadowing invariant should ensure
1863 -- that outer_bndr is not shadowed by the inner patterns
1864 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1865 -- The let is OK even for unboxed binders,
1866
1867 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1868 | otherwise = map wrap_alt inner_alts
1869
1870 merged_alts = mergeAlts outer_alts wrapped_alts
1871 -- NB: mergeAlts gives priority to the left
1872 -- case x of
1873 -- A -> e1
1874 -- DEFAULT -> case x of
1875 -- A -> e2
1876 -- B -> e3
1877 -- When we merge, we must ensure that e1 takes
1878 -- precedence over e2 as the value for A!
1879
1880 ; fmap (mkTicks ticks) $
1881 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1882 }
1883 -- Warning: don't call mkCase recursively!
1884 -- Firstly, there's no point, because inner alts have already had
1885 -- mkCase applied to them, so they won't have a case in their default
1886 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1887 -- in munge_rhs may put a case into the DEFAULT branch!
1888
1889 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1890
1891 --------------------------------------------------
1892 -- 2. Eliminate Identity Case
1893 --------------------------------------------------
1894
1895 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1896 | all identity_alt alts
1897 = do { tick (CaseIdentity case_bndr)
1898 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1899 where
1900 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
1901 identity_alt (con, args, rhs) = check_eq rhs con args
1902
1903 check_eq (Cast rhs co) con args
1904 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1905 -- See Note [RHS casts]
1906 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1907 check_eq (Var v) _ _ | v == case_bndr = True
1908 check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con
1909 -- Optimisation only
1910 check_eq (Tick t e) alt args = tickishFloatable t &&
1911 check_eq e alt args
1912 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1913 mkConApp con (arg_tys ++
1914 varsToCoreExprs args)
1915 check_eq _ _ _ = False
1916
1917 arg_tys = map Type (tyConAppArgs (idType case_bndr))
1918
1919 -- Note [RHS casts]
1920 -- ~~~~~~~~~~~~~~~~
1921 -- We've seen this:
1922 -- case e of x { _ -> x `cast` c }
1923 -- And we definitely want to eliminate this case, to give
1924 -- e `cast` c
1925 -- So we throw away the cast from the RHS, and reconstruct
1926 -- it at the other end. All the RHS casts must be the same
1927 -- if (all identity_alt alts) holds.
1928 --
1929 -- Don't worry about nested casts, because the simplifier combines them
1930
1931 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1932 re_cast scrut _ = scrut
1933
1934 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1935
1936 --------------------------------------------------
1937 -- 2. Scrutinee Constant Folding
1938 --------------------------------------------------
1939
1940 mkCase2 dflags scrut bndr alts_ty alts
1941 | gopt Opt_CaseFolding dflags
1942 , Just (scrut',f) <- caseRules dflags scrut
1943 = mkCase3 dflags scrut' bndr alts_ty (new_alts f)
1944 | otherwise
1945 = mkCase3 dflags scrut bndr alts_ty alts
1946 where
1947 -- We need to keep the correct association between the scrutinee and its
1948 -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
1949 -- "let bndr = ... in":
1950 --
1951 -- case v + 10 of y =====> case v of y
1952 -- 20 -> e1 10 -> let y = 20 in e1
1953 -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2
1954 --
1955 -- Other transformations give: =====> case v of y'
1956 -- 10 -> let y = 20 in e1
1957 -- DEFAULT -> let y = y' + 10 in e2
1958 --
1959 wrap_rhs l rhs
1960 | isDeadBinder bndr = rhs
1961 | otherwise = Let (NonRec bndr l) rhs
1962
1963 -- We need to re-sort the alternatives to preserve the #case_invariants#
1964 new_alts f = sortBy cmpAlt (map (mapAlt f) alts)
1965
1966 mapAlt f alt@(c,bs,e) = case c of
1967 DEFAULT -> (c, bs, wrap_rhs scrut e)
1968 LitAlt l
1969 | isLitValue l -> (LitAlt (mapLitValue dflags f l),
1970 bs, wrap_rhs (Lit l) e)
1971 _ -> pprPanic "Unexpected alternative (mkCase2)" (ppr alt)
1972
1973 --------------------------------------------------
1974 -- Catch-all
1975 --------------------------------------------------
1976 mkCase3 _dflags scrut bndr alts_ty alts
1977 = return (Case scrut bndr alts_ty alts)
1978
1979 {-
1980 Note [Dead binders]
1981 ~~~~~~~~~~~~~~~~~~~~
1982 Note that dead-ness is maintained by the simplifier, so that it is
1983 accurate after simplification as well as before.
1984
1985
1986 Note [Cascading case merge]
1987 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1988 Case merging should cascade in one sweep, because it
1989 happens bottom-up
1990
1991 case e of a {
1992 DEFAULT -> case a of b
1993 DEFAULT -> case b of c {
1994 DEFAULT -> e
1995 A -> ea
1996 B -> eb
1997 C -> ec
1998 ==>
1999 case e of a {
2000 DEFAULT -> case a of b
2001 DEFAULT -> let c = b in e
2002 A -> let c = b in ea
2003 B -> eb
2004 C -> ec
2005 ==>
2006 case e of a {
2007 DEFAULT -> let b = a in let c = b in e
2008 A -> let b = a in let c = b in ea
2009 B -> let b = a in eb
2010 C -> ec
2011
2012
2013 However here's a tricky case that we still don't catch, and I don't
2014 see how to catch it in one pass:
2015
2016 case x of c1 { I# a1 ->
2017 case a1 of c2 ->
2018 0 -> ...
2019 DEFAULT -> case x of c3 { I# a2 ->
2020 case a2 of ...
2021
2022 After occurrence analysis (and its binder-swap) we get this
2023
2024 case x of c1 { I# a1 ->
2025 let x = c1 in -- Binder-swap addition
2026 case a1 of c2 ->
2027 0 -> ...
2028 DEFAULT -> case x of c3 { I# a2 ->
2029 case a2 of ...
2030
2031 When we simplify the inner case x, we'll see that
2032 x=c1=I# a1. So we'll bind a2 to a1, and get
2033
2034 case x of c1 { I# a1 ->
2035 case a1 of c2 ->
2036 0 -> ...
2037 DEFAULT -> case a1 of ...
2038
2039 This is corect, but we can't do a case merge in this sweep
2040 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
2041 without getting changed to c1=I# c2.
2042
2043 I don't think this is worth fixing, even if I knew how. It'll
2044 all come out in the next pass anyway.
2045 -}