b01955c8bec08f1affde2c10d83315058faeaa4b
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplifierMode(..), Tick(..) )
40 import DynFlags
41 import CoreSyn
42 import qualified CoreSubst
43 import PprCore
44 import CoreFVs
45 import CoreUtils
46 import CoreArity
47 import CoreUnfold
48 import Name
49 import Id
50 import IdInfo
51 import Var
52 import Demand
53 import SimplMonad
54 import Type hiding( substTy )
55 import Coercion hiding( substCo )
56 import DataCon ( dataConWorkId, isNullaryRepDataCon )
57 import VarSet
58 import BasicTypes
59 import Util
60 import MonadUtils
61 import Outputable
62 import Pair
63 import PrelRules
64 import FastString ( fsLit )
65
66 import Control.Monad ( when )
67 import Data.List ( sortBy )
68
69 {-
70 ************************************************************************
71 * *
72 The SimplCont and DupFlag types
73 * *
74 ************************************************************************
75
76 A SimplCont allows the simplifier to traverse the expression in a
77 zipper-like fashion. The SimplCont represents the rest of the expression,
78 "above" the point of interest.
79
80 You can also think of a SimplCont as an "evaluation context", using
81 that term in the way it is used for operational semantics. This is the
82 way I usually think of it, For example you'll often see a syntax for
83 evaluation context looking like
84 C ::= [] | C e | case C of alts | C `cast` co
85 That's the kind of thing we are doing here, and I use that syntax in
86 the comments.
87
88
89 Key points:
90 * A SimplCont describes a *strict* context (just like
91 evaluation contexts do). E.g. Just [] is not a SimplCont
92
93 * A SimplCont describes a context that *does not* bind
94 any variables. E.g. \x. [] is not a SimplCont
95 -}
96
97 data SimplCont
98 = Stop -- Stop[e] = e
99 OutType -- Type of the <hole>
100 CallCtxt -- Tells if there is something interesting about
101 -- the context, and hence the inliner
102 -- should be a bit keener (see interestingCallContext)
103 -- Specifically:
104 -- This is an argument of a function that has RULES
105 -- Inlining the call might allow the rule to fire
106 -- Never ValAppCxt (use ApplyToVal instead)
107 -- or CaseCtxt (use Select instead)
108
109 | CastIt -- (CastIt co K)[e] = K[ e `cast` co ]
110 OutCoercion -- The coercion simplified
111 -- Invariant: never an identity coercion
112 SimplCont
113
114 | ApplyToVal -- (ApplyToVal arg K)[e] = K[ e arg ]
115 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
116 , sc_arg :: InExpr -- The argument,
117 , sc_env :: StaticEnv -- and its static env
118 , sc_cont :: SimplCont }
119
120 | ApplyToTy -- (ApplyToTy ty K)[e] = K[ e ty ]
121 { sc_arg_ty :: OutType -- Argument type
122 , sc_hole_ty :: OutType -- Type of the function, presumably (forall a. blah)
123 -- See Note [The hole type in ApplyToTy]
124 , sc_cont :: SimplCont }
125
126 | Select -- (Select alts K)[e] = K[ case e of alts ]
127 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
128 , sc_bndr :: InId -- case binder
129 , sc_alts :: [InAlt] -- Alternatives
130 , sc_env :: StaticEnv -- and their static environment
131 , sc_cont :: SimplCont }
132
133 -- The two strict forms have no DupFlag, because we never duplicate them
134 | StrictBind -- (StrictBind x xs b K)[e] = let x = e in K[\xs.b]
135 -- or, equivalently, = K[ (\x xs.b) e ]
136 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
137 , sc_bndr :: InId
138 , sc_bndrs :: [InBndr]
139 , sc_body :: InExpr
140 , sc_env :: StaticEnv
141 , sc_cont :: SimplCont }
142
143 | StrictArg -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ]
144 { sc_dup :: DupFlag -- Always Simplified or OkToDup
145 , sc_fun :: ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
146 -- plus strictness flags for *further* args
147 , sc_cci :: CallCtxt -- Whether *this* argument position is interesting
148 , sc_cont :: SimplCont }
149
150 | TickIt -- (TickIt t K)[e] = K[ tick t e ]
151 (Tickish Id) -- Tick tickish <hole>
152 SimplCont
153
154 data DupFlag = NoDup -- Unsimplified, might be big
155 | Simplified -- Simplified
156 | OkToDup -- Simplified and small
157
158 isSimplified :: DupFlag -> Bool
159 isSimplified NoDup = False
160 isSimplified _ = True -- Invariant: the subst-env is empty
161
162 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
163 perhapsSubstTy dup env ty
164 | isSimplified dup = ty
165 | otherwise = substTy env ty
166
167 {-
168 Note [DupFlag invariants]
169 ~~~~~~~~~~~~~~~~~~~~~~~~~
170 In both (ApplyToVal dup _ env k)
171 and (Select dup _ _ env k)
172 the following invariants hold
173
174 (a) if dup = OkToDup, then continuation k is also ok-to-dup
175 (b) if dup = OkToDup or Simplified, the subst-env is empty
176 (and and hence no need to re-simplify)
177 -}
178
179 instance Outputable DupFlag where
180 ppr OkToDup = text "ok"
181 ppr NoDup = text "nodup"
182 ppr Simplified = text "simpl"
183
184 instance Outputable SimplCont where
185 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
186 ppr (CastIt co cont ) = (text "CastIt" <+> pprOptCo co) $$ ppr cont
187 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
188 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
189 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
190 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
191 = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg)
192 $$ ppr cont
193 ppr (StrictBind { sc_bndr = b, sc_cont = cont })
194 = (text "StrictBind" <+> ppr b) $$ ppr cont
195 ppr (StrictArg { sc_fun = ai, sc_cont = cont })
196 = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
197 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
198 = (text "Select" <+> ppr dup <+> ppr bndr) $$
199 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
200
201
202 {- Note [The hole type in ApplyToTy]
203 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
204 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
205 continuation. It is absolutely necessary to compute contHoleType, but it is
206 not used for anything else (and hence may not be evaluated).
207
208 Why is it necessary for contHoleType? Consider the continuation
209 ApplyToType Int (Stop Int)
210 corresponding to
211 (<hole> @Int) :: Int
212 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
213 and there is no way to know which, so we must record it.
214
215 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
216 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
217 doesn't matter because we'll never compute them all.
218
219 ************************************************************************
220 * *
221 ArgInfo and ArgSpec
222 * *
223 ************************************************************************
224 -}
225
226 data ArgInfo
227 = ArgInfo {
228 ai_fun :: OutId, -- The function
229 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
230
231 ai_type :: OutType, -- Type of (f a1 ... an)
232
233 ai_rules :: FunRules, -- Rules for this function
234
235 ai_encl :: Bool, -- Flag saying whether this function
236 -- or an enclosing one has rules (recursively)
237 -- True => be keener to inline in all args
238
239 ai_strs :: [Bool], -- Strictness of remaining arguments
240 -- Usually infinite, but if it is finite it guarantees
241 -- that the function diverges after being given
242 -- that number of args
243 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
244 -- Always infinite
245 }
246
247 data ArgSpec
248 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
249 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
250 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
251 | CastBy OutCoercion -- Cast by this; c.f. CastIt
252
253 instance Outputable ArgSpec where
254 ppr (ValArg e) = text "ValArg" <+> ppr e
255 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
256 ppr (CastBy c) = text "CastBy" <+> ppr c
257
258 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
259 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
260 , ai_type = applyTypeToArg (ai_type ai) arg
261 , ai_rules = decRules (ai_rules ai) }
262
263 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
264 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
265 , ai_type = piResultTy poly_fun_ty arg_ty
266 , ai_rules = decRules (ai_rules ai) }
267 where
268 poly_fun_ty = ai_type ai
269 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
270
271 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
272 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
273 , ai_type = pSnd (coercionKind co) }
274
275 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
276 argInfoAppArgs [] = []
277 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
278 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
279 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
280
281 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
282 pushSimplifiedArgs _env [] k = k
283 pushSimplifiedArgs env (arg : args) k
284 = case arg of
285 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
286 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
287 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
288 CastBy c -> CastIt c rest
289 where
290 rest = pushSimplifiedArgs env args k
291 -- The env has an empty SubstEnv
292
293 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
294 -- NB: the [ArgSpec] is reversed so that the first arg
295 -- in the list is the last one in the application
296 argInfoExpr fun rev_args
297 = go rev_args
298 where
299 go [] = Var fun
300 go (ValArg a : as) = go as `App` a
301 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
302 go (CastBy co : as) = mkCast (go as) co
303
304
305 type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function
306 -- Nothing => No rules
307 -- Just (n, rules) => some rules, requiring at least n more type/value args
308
309 decRules :: FunRules -> FunRules
310 decRules (Just (n, rules)) = Just (n-1, rules)
311 decRules Nothing = Nothing
312
313 mkFunRules :: [CoreRule] -> FunRules
314 mkFunRules [] = Nothing
315 mkFunRules rs = Just (n_required, rs)
316 where
317 n_required = maximum (map ruleArity rs)
318
319 {-
320 ************************************************************************
321 * *
322 Functions on SimplCont
323 * *
324 ************************************************************************
325 -}
326
327 mkBoringStop :: OutType -> SimplCont
328 mkBoringStop ty = Stop ty BoringCtxt
329
330 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
331 mkRhsStop ty = Stop ty RhsCtxt
332
333 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
334 mkLazyArgStop ty cci = Stop ty cci
335
336 -------------------
337 contIsRhsOrArg :: SimplCont -> Bool
338 contIsRhsOrArg (Stop {}) = True
339 contIsRhsOrArg (StrictBind {}) = True
340 contIsRhsOrArg (StrictArg {}) = True
341 contIsRhsOrArg _ = False
342
343 contIsRhs :: SimplCont -> Bool
344 contIsRhs (Stop _ RhsCtxt) = True
345 contIsRhs _ = False
346
347 -------------------
348 contIsDupable :: SimplCont -> Bool
349 contIsDupable (Stop {}) = True
350 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
351 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
352 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
353 contIsDupable (StrictArg { sc_dup = OkToDup }) = True -- ...ditto...
354 contIsDupable (CastIt _ k) = contIsDupable k
355 contIsDupable _ = False
356
357 -------------------
358 contIsTrivial :: SimplCont -> Bool
359 contIsTrivial (Stop {}) = True
360 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
361 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
362 contIsTrivial (CastIt _ k) = contIsTrivial k
363 contIsTrivial _ = False
364
365 -------------------
366 contResultType :: SimplCont -> OutType
367 contResultType (Stop ty _) = ty
368 contResultType (CastIt _ k) = contResultType k
369 contResultType (StrictBind { sc_cont = k }) = contResultType k
370 contResultType (StrictArg { sc_cont = k }) = contResultType k
371 contResultType (Select { sc_cont = k }) = contResultType k
372 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
373 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
374 contResultType (TickIt _ k) = contResultType k
375
376 contHoleType :: SimplCont -> OutType
377 contHoleType (Stop ty _) = ty
378 contHoleType (TickIt _ k) = contHoleType k
379 contHoleType (CastIt co _) = pFst (coercionKind co)
380 contHoleType (StrictBind { sc_bndr = b, sc_dup = dup, sc_env = se })
381 = perhapsSubstTy dup se (idType b)
382 contHoleType (StrictArg { sc_fun = ai }) = funArgTy (ai_type ai)
383 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
384 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
385 = mkFunTy (perhapsSubstTy dup se (exprType e))
386 (contHoleType k)
387 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
388 = perhapsSubstTy d se (idType b)
389
390 -------------------
391 countArgs :: SimplCont -> Int
392 -- Count all arguments, including types, coercions, and other values
393 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
394 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
395 countArgs _ = 0
396
397 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
398 -- Summarises value args, discards type args and coercions
399 -- The returned continuation of the call is only used to
400 -- answer questions like "are you interesting?"
401 contArgs cont
402 | lone cont = (True, [], cont)
403 | otherwise = go [] cont
404 where
405 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
406 lone (ApplyToVal {}) = False
407 lone (CastIt {}) = False
408 lone _ = True
409
410 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
411 = go (is_interesting arg se : args) k
412 go args (ApplyToTy { sc_cont = k }) = go args k
413 go args (CastIt _ k) = go args k
414 go args k = (False, reverse args, k)
415
416 is_interesting arg se = interestingArg se arg
417 -- Do *not* use short-cutting substitution here
418 -- because we want to get as much IdInfo as possible
419
420
421 -------------------
422 mkArgInfo :: Id
423 -> [CoreRule] -- Rules for function
424 -> Int -- Number of value args
425 -> SimplCont -- Context of the call
426 -> ArgInfo
427
428 mkArgInfo fun rules n_val_args call_cont
429 | n_val_args < idArity fun -- Note [Unsaturated functions]
430 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
431 , ai_rules = fun_rules, ai_encl = False
432 , ai_strs = vanilla_stricts
433 , ai_discs = vanilla_discounts }
434 | otherwise
435 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
436 , ai_rules = fun_rules
437 , ai_encl = interestingArgContext rules call_cont
438 , ai_strs = add_type_str fun_ty arg_stricts
439 , ai_discs = arg_discounts }
440 where
441 fun_ty = idType fun
442
443 fun_rules = mkFunRules rules
444
445 vanilla_discounts, arg_discounts :: [Int]
446 vanilla_discounts = repeat 0
447 arg_discounts = case idUnfolding fun of
448 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
449 -> discounts ++ vanilla_discounts
450 _ -> vanilla_discounts
451
452 vanilla_stricts, arg_stricts :: [Bool]
453 vanilla_stricts = repeat False
454
455 arg_stricts
456 = case splitStrictSig (idStrictness fun) of
457 (demands, result_info)
458 | not (demands `lengthExceeds` n_val_args)
459 -> -- Enough args, use the strictness given.
460 -- For bottoming functions we used to pretend that the arg
461 -- is lazy, so that we don't treat the arg as an
462 -- interesting context. This avoids substituting
463 -- top-level bindings for (say) strings into
464 -- calls to error. But now we are more careful about
465 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
466 if isBotRes result_info then
467 map isStrictDmd demands -- Finite => result is bottom
468 else
469 map isStrictDmd demands ++ vanilla_stricts
470 | otherwise
471 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
472 <+> ppr n_val_args <+> ppr demands )
473 vanilla_stricts -- Not enough args, or no strictness
474
475 add_type_str :: Type -> [Bool] -> [Bool]
476 -- If the function arg types are strict, record that in the 'strictness bits'
477 -- No need to instantiate because unboxed types (which dominate the strict
478 -- types) can't instantiate type variables.
479 -- add_type_str is done repeatedly (for each call); might be better
480 -- once-for-all in the function
481 -- But beware primops/datacons with no strictness
482
483 add_type_str
484 = go
485 where
486 go _ [] = []
487 go fun_ty strs -- Look through foralls
488 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
489 = go fun_ty' strs
490 go fun_ty (str:strs) -- Add strict-type info
491 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
492 = (str || Just False == isLiftedType_maybe arg_ty) : go fun_ty' strs
493 -- If the type is levity-polymorphic, we can't know whether it's
494 -- strict. isLiftedType_maybe will return Just False only when
495 -- we're sure the type is unlifted.
496 go _ strs
497 = strs
498
499 {- Note [Unsaturated functions]
500 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
501 Consider (test eyeball/inline4)
502 x = a:as
503 y = f x
504 where f has arity 2. Then we do not want to inline 'x', because
505 it'll just be floated out again. Even if f has lots of discounts
506 on its first argument -- it must be saturated for these to kick in
507 -}
508
509
510 {-
511 ************************************************************************
512 * *
513 Interesting arguments
514 * *
515 ************************************************************************
516
517 Note [Interesting call context]
518 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
519 We want to avoid inlining an expression where there can't possibly be
520 any gain, such as in an argument position. Hence, if the continuation
521 is interesting (eg. a case scrutinee, application etc.) then we
522 inline, otherwise we don't.
523
524 Previously some_benefit used to return True only if the variable was
525 applied to some value arguments. This didn't work:
526
527 let x = _coerce_ (T Int) Int (I# 3) in
528 case _coerce_ Int (T Int) x of
529 I# y -> ....
530
531 we want to inline x, but can't see that it's a constructor in a case
532 scrutinee position, and some_benefit is False.
533
534 Another example:
535
536 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
537
538 .... case dMonadST _@_ x0 of (a,b,c) -> ....
539
540 we'd really like to inline dMonadST here, but we *don't* want to
541 inline if the case expression is just
542
543 case x of y { DEFAULT -> ... }
544
545 since we can just eliminate this case instead (x is in WHNF). Similar
546 applies when x is bound to a lambda expression. Hence
547 contIsInteresting looks for case expressions with just a single
548 default case.
549 -}
550
551 interestingCallContext :: SimplCont -> CallCtxt
552 -- See Note [Interesting call context]
553 interestingCallContext cont
554 = interesting cont
555 where
556 interesting (Select {}) = CaseCtxt
557 interesting (ApplyToVal {}) = ValAppCtxt
558 -- Can happen if we have (f Int |> co) y
559 -- If f has an INLINE prag we need to give it some
560 -- motivation to inline. See Note [Cast then apply]
561 -- in CoreUnfold
562
563 interesting (StrictArg { sc_cci = cci }) = cci
564 interesting (StrictBind {}) = BoringCtxt
565 interesting (Stop _ cci) = cci
566 interesting (TickIt _ k) = interesting k
567 interesting (ApplyToTy { sc_cont = k }) = interesting k
568 interesting (CastIt _ k) = interesting k
569 -- If this call is the arg of a strict function, the context
570 -- is a bit interesting. If we inline here, we may get useful
571 -- evaluation information to avoid repeated evals: e.g.
572 -- x + (y * z)
573 -- Here the contIsInteresting makes the '*' keener to inline,
574 -- which in turn exposes a constructor which makes the '+' inline.
575 -- Assuming that +,* aren't small enough to inline regardless.
576 --
577 -- It's also very important to inline in a strict context for things
578 -- like
579 -- foldr k z (f x)
580 -- Here, the context of (f x) is strict, and if f's unfolding is
581 -- a build it's *great* to inline it here. So we must ensure that
582 -- the context for (f x) is not totally uninteresting.
583
584 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
585 -- If the argument has form (f x y), where x,y are boring,
586 -- and f is marked INLINE, then we don't want to inline f.
587 -- But if the context of the argument is
588 -- g (f x y)
589 -- where g has rules, then we *do* want to inline f, in case it
590 -- exposes a rule that might fire. Similarly, if the context is
591 -- h (g (f x x))
592 -- where h has rules, then we do want to inline f; hence the
593 -- call_cont argument to interestingArgContext
594 --
595 -- The ai-rules flag makes this happen; if it's
596 -- set, the inliner gets just enough keener to inline f
597 -- regardless of how boring f's arguments are, if it's marked INLINE
598 --
599 -- The alternative would be to *always* inline an INLINE function,
600 -- regardless of how boring its context is; but that seems overkill
601 -- For example, it'd mean that wrapper functions were always inlined
602 --
603 -- The call_cont passed to interestingArgContext is the context of
604 -- the call itself, e.g. g <hole> in the example above
605 interestingArgContext rules call_cont
606 = notNull rules || enclosing_fn_has_rules
607 where
608 enclosing_fn_has_rules = go call_cont
609
610 go (Select {}) = False
611 go (ApplyToVal {}) = False -- Shouldn't really happen
612 go (ApplyToTy {}) = False -- Ditto
613 go (StrictArg { sc_cci = cci }) = interesting cci
614 go (StrictBind {}) = False -- ??
615 go (CastIt _ c) = go c
616 go (Stop _ cci) = interesting cci
617 go (TickIt _ c) = go c
618
619 interesting RuleArgCtxt = True
620 interesting _ = False
621
622
623 {- Note [Interesting arguments]
624 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
625 An argument is interesting if it deserves a discount for unfoldings
626 with a discount in that argument position. The idea is to avoid
627 unfolding a function that is applied only to variables that have no
628 unfolding (i.e. they are probably lambda bound): f x y z There is
629 little point in inlining f here.
630
631 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
632 we must look through lets, eg (let x = e in C a b), because the let will
633 float, exposing the value, if we inline. That makes it different to
634 exprIsHNF.
635
636 Before 2009 we said it was interesting if the argument had *any* structure
637 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
638
639 But we don't regard (f x y) as interesting, unless f is unsaturated.
640 If it's saturated and f hasn't inlined, then it's probably not going
641 to now!
642
643 Note [Conlike is interesting]
644 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
645 Consider
646 f d = ...((*) d x y)...
647 ... f (df d')...
648 where df is con-like. Then we'd really like to inline 'f' so that the
649 rule for (*) (df d) can fire. To do this
650 a) we give a discount for being an argument of a class-op (eg (*) d)
651 b) we say that a con-like argument (eg (df d)) is interesting
652 -}
653
654 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
655 -- See Note [Interesting arguments]
656 interestingArg env e = go env 0 e
657 where
658 -- n is # value args to which the expression is applied
659 go env n (Var v)
660 = case substId env v of
661 DoneId v' -> go_var n v'
662 DoneEx e _ -> go (zapSubstEnv env) n e
663 ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
664
665 go _ _ (Lit {}) = ValueArg
666 go _ _ (Type _) = TrivArg
667 go _ _ (Coercion _) = TrivArg
668 go env n (App fn (Type _)) = go env n fn
669 go env n (App fn _) = go env (n+1) fn
670 go env n (Tick _ a) = go env n a
671 go env n (Cast e _) = go env n e
672 go env n (Lam v e)
673 | isTyVar v = go env n e
674 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
675 | otherwise = ValueArg
676 go _ _ (Case {}) = NonTrivArg
677 go env n (Let b e) = case go env' n e of
678 ValueArg -> ValueArg
679 _ -> NonTrivArg
680 where
681 env' = env `addNewInScopeIds` bindersOf b
682
683 go_var n v
684 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
685 -- data constructors here
686 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
687 | n > 0 = NonTrivArg -- Saturated or unknown call
688 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
689 -- See Note [Conlike is interesting]
690 | otherwise = TrivArg -- n==0, no useful unfolding
691 where
692 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
693
694 {-
695 ************************************************************************
696 * *
697 SimplifierMode
698 * *
699 ************************************************************************
700
701 The SimplifierMode controls several switches; see its definition in
702 CoreMonad
703 sm_rules :: Bool -- Whether RULES are enabled
704 sm_inline :: Bool -- Whether inlining is enabled
705 sm_case_case :: Bool -- Whether case-of-case is enabled
706 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
707 -}
708
709 simplEnvForGHCi :: DynFlags -> SimplEnv
710 simplEnvForGHCi dflags
711 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
712 , sm_phase = InitialPhase
713 , sm_rules = rules_on
714 , sm_inline = False
715 , sm_eta_expand = eta_expand_on
716 , sm_case_case = True }
717 where
718 rules_on = gopt Opt_EnableRewriteRules dflags
719 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
720 -- Do not do any inlining, in case we expose some unboxed
721 -- tuple stuff that confuses the bytecode interpreter
722
723 updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
724 -- See Note [Simplifying inside stable unfoldings]
725 updModeForStableUnfoldings inline_rule_act current_mode
726 = current_mode { sm_phase = phaseFromActivation inline_rule_act
727 , sm_inline = True
728 , sm_eta_expand = False }
729 -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
730 -- For sm_rules, just inherit; sm_rules might be "off"
731 -- because of -fno-enable-rewrite-rules
732 where
733 phaseFromActivation (ActiveAfter _ n) = Phase n
734 phaseFromActivation _ = InitialPhase
735
736 updModeForRules :: SimplifierMode -> SimplifierMode
737 -- See Note [Simplifying rules]
738 updModeForRules current_mode
739 = current_mode { sm_phase = InitialPhase
740 , sm_inline = False
741 , sm_rules = False
742 , sm_eta_expand = False }
743
744 {- Note [Simplifying rules]
745 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
746 When simplifying a rule LHS, refrain from /any/ inlining or applying
747 of other RULES.
748
749 Doing anything to the LHS is plain confusing, because it means that what the
750 rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
751 Moreover, inlining (or applying rules) on rule LHSs risks introducing
752 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
753
754 Doing this to either side confounds tools like HERMIT, which seek to reason
755 about and apply the RULES as originally written. See Trac #10829.
756
757 Note [No eta expansion in stable unfoldings]
758 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
759 If we have a stable unfolding
760
761 f :: Ord a => a -> IO ()
762 -- Unfolding template
763 -- = /\a \(d:Ord a) (x:a). bla
764
765 we do not want to eta-expand to
766
767 f :: Ord a => a -> IO ()
768 -- Unfolding template
769 -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co
770
771 because not specialisation of the overloading doesn't work properly
772 (see Note [Specialisation shape] in Specialise), Trac #9509.
773
774 So we disable eta-expansion in stable unfoldings.
775
776 Note [Inlining in gentle mode]
777 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
778 Something is inlined if
779 (i) the sm_inline flag is on, AND
780 (ii) the thing has an INLINE pragma, AND
781 (iii) the thing is inlinable in the earliest phase.
782
783 Example of why (iii) is important:
784 {-# INLINE [~1] g #-}
785 g = ...
786
787 {-# INLINE f #-}
788 f x = g (g x)
789
790 If we were to inline g into f's inlining, then an importing module would
791 never be able to do
792 f e --> g (g e) ---> RULE fires
793 because the stable unfolding for f has had g inlined into it.
794
795 On the other hand, it is bad not to do ANY inlining into an
796 stable unfolding, because then recursive knots in instance declarations
797 don't get unravelled.
798
799 However, *sometimes* SimplGently must do no call-site inlining at all
800 (hence sm_inline = False). Before full laziness we must be careful
801 not to inline wrappers, because doing so inhibits floating
802 e.g. ...(case f x of ...)...
803 ==> ...(case (case x of I# x# -> fw x#) of ...)...
804 ==> ...(case x of I# x# -> case fw x# of ...)...
805 and now the redex (f x) isn't floatable any more.
806
807 The no-inlining thing is also important for Template Haskell. You might be
808 compiling in one-shot mode with -O2; but when TH compiles a splice before
809 running it, we don't want to use -O2. Indeed, we don't want to inline
810 anything, because the byte-code interpreter might get confused about
811 unboxed tuples and suchlike.
812
813 Note [Simplifying inside stable unfoldings]
814 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
815 We must take care with simplification inside stable unfoldings (which come from
816 INLINE pragmas).
817
818 First, consider the following example
819 let f = \pq -> BIG
820 in
821 let g = \y -> f y y
822 {-# INLINE g #-}
823 in ...g...g...g...g...g...
824 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
825 and thence copied multiple times when g is inlined. HENCE we treat
826 any occurrence in a stable unfolding as a multiple occurrence, not a single
827 one; see OccurAnal.addRuleUsage.
828
829 Second, we do want *do* to some modest rules/inlining stuff in stable
830 unfoldings, partly to eliminate senseless crap, and partly to break
831 the recursive knots generated by instance declarations.
832
833 However, suppose we have
834 {-# INLINE <act> f #-}
835 f = <rhs>
836 meaning "inline f in phases p where activation <act>(p) holds".
837 Then what inlinings/rules can we apply to the copy of <rhs> captured in
838 f's stable unfolding? Our model is that literally <rhs> is substituted for
839 f when it is inlined. So our conservative plan (implemented by
840 updModeForStableUnfoldings) is this:
841
842 -------------------------------------------------------------
843 When simplifying the RHS of an stable unfolding, set the phase
844 to the phase in which the stable unfolding first becomes active
845 -------------------------------------------------------------
846
847 That ensures that
848
849 a) Rules/inlinings that *cease* being active before p will
850 not apply to the stable unfolding, consistent with it being
851 inlined in its *original* form in phase p.
852
853 b) Rules/inlinings that only become active *after* p will
854 not apply to the stable unfolding, again to be consistent with
855 inlining the *original* rhs in phase p.
856
857 For example,
858 {-# INLINE f #-}
859 f x = ...g...
860
861 {-# NOINLINE [1] g #-}
862 g y = ...
863
864 {-# RULE h g = ... #-}
865 Here we must not inline g into f's RHS, even when we get to phase 0,
866 because when f is later inlined into some other module we want the
867 rule for h to fire.
868
869 Similarly, consider
870 {-# INLINE f #-}
871 f x = ...g...
872
873 g y = ...
874 and suppose that there are auto-generated specialisations and a strictness
875 wrapper for g. The specialisations get activation AlwaysActive, and the
876 strictness wrapper get activation (ActiveAfter 0). So the strictness
877 wrepper fails the test and won't be inlined into f's stable unfolding. That
878 means f can inline, expose the specialised call to g, so the specialisation
879 rules can fire.
880
881 A note about wrappers
882 ~~~~~~~~~~~~~~~~~~~~~
883 It's also important not to inline a worker back into a wrapper.
884 A wrapper looks like
885 wraper = inline_me (\x -> ...worker... )
886 Normally, the inline_me prevents the worker getting inlined into
887 the wrapper (initially, the worker's only call site!). But,
888 if the wrapper is sure to be called, the strictness analyser will
889 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
890 continuation.
891 -}
892
893 activeUnfolding :: SimplEnv -> Id -> Bool
894 activeUnfolding env id
895 | isCompulsoryUnfolding (realIdUnfolding id)
896 = True -- Even sm_inline can't override compulsory unfoldings
897 | otherwise
898 = isActive (sm_phase mode) (idInlineActivation id)
899 && sm_inline mode
900 -- `or` isStableUnfolding (realIdUnfolding id)
901 -- Inline things when
902 -- (a) they are active
903 -- (b) sm_inline says so, except that for stable unfoldings
904 -- (ie pragmas) we inline anyway
905 where
906 mode = getMode env
907
908 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
909 -- When matching in RULE, we want to "look through" an unfolding
910 -- (to see a constructor) if *rules* are on, even if *inlinings*
911 -- are not. A notable example is DFuns, which really we want to
912 -- match in rules like (op dfun) in gentle mode. Another example
913 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
914 -- see very early on
915 getUnfoldingInRuleMatch env
916 = (in_scope, id_unf)
917 where
918 in_scope = seInScope env
919 mode = getMode env
920 id_unf id | unf_is_active id = idUnfolding id
921 | otherwise = NoUnfolding
922 unf_is_active id
923 | not (sm_rules mode) = -- active_unfolding_minimal id
924 isStableUnfolding (realIdUnfolding id)
925 -- Do we even need to test this? I think this InScopeEnv
926 -- is only consulted if activeRule returns True, which
927 -- never happens if sm_rules is False
928 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
929
930 ----------------------
931 activeRule :: SimplEnv -> Activation -> Bool
932 -- Nothing => No rules at all
933 activeRule env
934 | not (sm_rules mode) = \_ -> False -- Rewriting is off
935 | otherwise = isActive (sm_phase mode)
936 where
937 mode = getMode env
938
939 {-
940 ************************************************************************
941 * *
942 preInlineUnconditionally
943 * *
944 ************************************************************************
945
946 preInlineUnconditionally
947 ~~~~~~~~~~~~~~~~~~~~~~~~
948 @preInlineUnconditionally@ examines a bndr to see if it is used just
949 once in a completely safe way, so that it is safe to discard the
950 binding inline its RHS at the (unique) usage site, REGARDLESS of how
951 big the RHS might be. If this is the case we don't simplify the RHS
952 first, but just inline it un-simplified.
953
954 This is much better than first simplifying a perhaps-huge RHS and then
955 inlining and re-simplifying it. Indeed, it can be at least quadratically
956 better. Consider
957
958 x1 = e1
959 x2 = e2[x1]
960 x3 = e3[x2]
961 ...etc...
962 xN = eN[xN-1]
963
964 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
965 This can happen with cascades of functions too:
966
967 f1 = \x1.e1
968 f2 = \xs.e2[f1]
969 f3 = \xs.e3[f3]
970 ...etc...
971
972 THE MAIN INVARIANT is this:
973
974 ---- preInlineUnconditionally invariant -----
975 IF preInlineUnconditionally chooses to inline x = <rhs>
976 THEN doing the inlining should not change the occurrence
977 info for the free vars of <rhs>
978 ----------------------------------------------
979
980 For example, it's tempting to look at trivial binding like
981 x = y
982 and inline it unconditionally. But suppose x is used many times,
983 but this is the unique occurrence of y. Then inlining x would change
984 y's occurrence info, which breaks the invariant. It matters: y
985 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
986
987
988 Even RHSs labelled InlineMe aren't caught here, because there might be
989 no benefit from inlining at the call site.
990
991 [Sept 01] Don't unconditionally inline a top-level thing, because that
992 can simply make a static thing into something built dynamically. E.g.
993 x = (a,b)
994 main = \s -> h x
995
996 [Remember that we treat \s as a one-shot lambda.] No point in
997 inlining x unless there is something interesting about the call site.
998
999 But watch out: if you aren't careful, some useful foldr/build fusion
1000 can be lost (most notably in spectral/hartel/parstof) because the
1001 foldr didn't see the build. Doing the dynamic allocation isn't a big
1002 deal, in fact, but losing the fusion can be. But the right thing here
1003 seems to be to do a callSiteInline based on the fact that there is
1004 something interesting about the call site (it's strict). Hmm. That
1005 seems a bit fragile.
1006
1007 Conclusion: inline top level things gaily until Phase 0 (the last
1008 phase), at which point don't.
1009
1010 Note [pre/postInlineUnconditionally in gentle mode]
1011 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1012 Even in gentle mode we want to do preInlineUnconditionally. The
1013 reason is that too little clean-up happens if you don't inline
1014 use-once things. Also a bit of inlining is *good* for full laziness;
1015 it can expose constant sub-expressions. Example in
1016 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
1017 let-float if you inline windowToViewport
1018
1019 However, as usual for Gentle mode, do not inline things that are
1020 inactive in the intial stages. See Note [Gentle mode].
1021
1022 Note [Stable unfoldings and preInlineUnconditionally]
1023 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1024 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
1025 Example
1026
1027 {-# INLINE f #-}
1028 f :: Eq a => a -> a
1029 f x = ...
1030
1031 fInt :: Int -> Int
1032 fInt = f Int dEqInt
1033
1034 ...fInt...fInt...fInt...
1035
1036 Here f occurs just once, in the RHS of fInt. But if we inline it there
1037 it might make fInt look big, and we'll lose the opportunity to inline f
1038 at each of fInt's call sites. The INLINE pragma will only inline when
1039 the application is saturated for exactly this reason; and we don't
1040 want PreInlineUnconditionally to second-guess it. A live example is
1041 Trac #3736.
1042 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1043
1044 Note [Top-level bottoming Ids]
1045 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1046 Don't inline top-level Ids that are bottoming, even if they are used just
1047 once, because FloatOut has gone to some trouble to extract them out.
1048 Inlining them won't make the program run faster!
1049
1050 Note [Do not inline CoVars unconditionally]
1051 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1052 Coercion variables appear inside coercions, and the RHS of a let-binding
1053 is a term (not a coercion) so we can't necessarily inline the latter in
1054 the former.
1055 -}
1056
1057 preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1058 -- Precondition: rhs satisfies the let/app invariant
1059 -- See Note [CoreSyn let/app invariant] in CoreSyn
1060 -- Reason: we don't want to inline single uses, or discard dead bindings,
1061 -- for unlifted, side-effect-ful bindings
1062 preInlineUnconditionally dflags env top_lvl bndr rhs
1063 | not active = False
1064 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1065 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1066 | not (gopt Opt_SimplPreInlining dflags) = False
1067 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1068 | otherwise = case idOccInfo bndr of
1069 IAmDead -> True -- Happens in ((\x.1) v)
1070 occ@OneOcc { occ_one_br = True }
1071 -> try_once (occ_in_lam occ)
1072 (occ_int_cxt occ)
1073 _ -> False
1074 where
1075 mode = getMode env
1076 active = isActive (sm_phase mode) act
1077 -- See Note [pre/postInlineUnconditionally in gentle mode]
1078 act = idInlineActivation bndr
1079 try_once in_lam int_cxt -- There's one textual occurrence
1080 | not in_lam = isNotTopLevel top_lvl || early_phase
1081 | otherwise = int_cxt && canInlineInLam rhs
1082
1083 -- Be very careful before inlining inside a lambda, because (a) we must not
1084 -- invalidate occurrence information, and (b) we want to avoid pushing a
1085 -- single allocation (here) into multiple allocations (inside lambda).
1086 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1087 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1088 -- where
1089 -- is_cheap = exprIsCheap rhs
1090 -- ok = is_cheap && int_cxt
1091
1092 -- int_cxt The context isn't totally boring
1093 -- E.g. let f = \ab.BIG in \y. map f xs
1094 -- Don't want to substitute for f, because then we allocate
1095 -- its closure every time the \y is called
1096 -- But: let f = \ab.BIG in \y. map (f y) xs
1097 -- Now we do want to substitute for f, even though it's not
1098 -- saturated, because we're going to allocate a closure for
1099 -- (f y) every time round the loop anyhow.
1100
1101 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1102 -- so substituting rhs inside a lambda doesn't change the occ info.
1103 -- Sadly, not quite the same as exprIsHNF.
1104 canInlineInLam (Lit _) = True
1105 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1106 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1107 canInlineInLam _ = False
1108 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1109 -- ticks around a Lam will disappear anyway.
1110
1111 early_phase = case sm_phase mode of
1112 Phase 0 -> False
1113 _ -> True
1114 -- If we don't have this early_phase test, consider
1115 -- x = length [1,2,3]
1116 -- The full laziness pass carefully floats all the cons cells to
1117 -- top level, and preInlineUnconditionally floats them all back in.
1118 -- Result is (a) static allocation replaced by dynamic allocation
1119 -- (b) many simplifier iterations because this tickles
1120 -- a related problem; only one inlining per pass
1121 --
1122 -- On the other hand, I have seen cases where top-level fusion is
1123 -- lost if we don't inline top level thing (e.g. string constants)
1124 -- Hence the test for phase zero (which is the phase for all the final
1125 -- simplifications). Until phase zero we take no special notice of
1126 -- top level things, but then we become more leery about inlining
1127 -- them.
1128
1129 {-
1130 ************************************************************************
1131 * *
1132 postInlineUnconditionally
1133 * *
1134 ************************************************************************
1135
1136 postInlineUnconditionally
1137 ~~~~~~~~~~~~~~~~~~~~~~~~~
1138 @postInlineUnconditionally@ decides whether to unconditionally inline
1139 a thing based on the form of its RHS; in particular if it has a
1140 trivial RHS. If so, we can inline and discard the binding altogether.
1141
1142 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1143 only have *forward* references. Hence, it's safe to discard the binding
1144
1145 NOTE: This isn't our last opportunity to inline. We're at the binding
1146 site right now, and we'll get another opportunity when we get to the
1147 occurrence(s)
1148
1149 Note that we do this unconditional inlining only for trival RHSs.
1150 Don't inline even WHNFs inside lambdas; doing so may simply increase
1151 allocation when the function is called. This isn't the last chance; see
1152 NOTE above.
1153
1154 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1155 Because we don't even want to inline them into the RHS of constructor
1156 arguments. See NOTE above
1157
1158 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1159 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1160 with both a and b marked NOINLINE. But that seems incompatible with
1161 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1162 story for now.
1163 -}
1164
1165 postInlineUnconditionally
1166 :: DynFlags -> SimplEnv -> TopLevelFlag
1167 -> OutId -- The binder (an InId would be fine too)
1168 -- (*not* a CoVar)
1169 -> OccInfo -- From the InId
1170 -> OutExpr
1171 -> Unfolding
1172 -> Bool
1173 -- Precondition: rhs satisfies the let/app invariant
1174 -- See Note [CoreSyn let/app invariant] in CoreSyn
1175 -- Reason: we don't want to inline single uses, or discard dead bindings,
1176 -- for unlifted, side-effect-ful bindings
1177 postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
1178 | not active = False
1179 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1180 -- because it might be referred to "earlier"
1181 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1182 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1183 | exprIsTrivial rhs = True
1184 | otherwise
1185 = case occ_info of
1186 -- The point of examining occ_info here is that for *non-values*
1187 -- that occur outside a lambda, the call-site inliner won't have
1188 -- a chance (because it doesn't know that the thing
1189 -- only occurs once). The pre-inliner won't have gotten
1190 -- it either, if the thing occurs in more than one branch
1191 -- So the main target is things like
1192 -- let x = f y in
1193 -- case v of
1194 -- True -> case x of ...
1195 -- False -> case x of ...
1196 -- This is very important in practice; e.g. wheel-seive1 doubles
1197 -- in allocation if you miss this out
1198 OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt }
1199 -- OneOcc => no code-duplication issue
1200 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1201 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1202 --
1203 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1204 -- Reason: doing so risks exponential behaviour. We simplify a big
1205 -- expression, inline it, and simplify it again. But if the
1206 -- very same thing happens in the big expression, we get
1207 -- exponential cost!
1208 -- PRINCIPLE: when we've already simplified an expression once,
1209 -- make sure that we only inline it if it's reasonably small.
1210
1211 && (not in_lam ||
1212 -- Outside a lambda, we want to be reasonably aggressive
1213 -- about inlining into multiple branches of case
1214 -- e.g. let x = <non-value>
1215 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1216 -- Inlining can be a big win if C3 is the hot-spot, even if
1217 -- the uses in C1, C2 are not 'interesting'
1218 -- An example that gets worse if you add int_cxt here is 'clausify'
1219
1220 (isCheapUnfolding unfolding && int_cxt))
1221 -- isCheap => acceptable work duplication; in_lam may be true
1222 -- int_cxt to prevent us inlining inside a lambda without some
1223 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1224
1225 IAmDead -> True -- This happens; for example, the case_bndr during case of
1226 -- known constructor: case (a,b) of x { (p,q) -> ... }
1227 -- Here x isn't mentioned in the RHS, so we don't want to
1228 -- create the (dead) let-binding let x = (a,b) in ...
1229
1230 _ -> False
1231
1232 -- Here's an example that we don't handle well:
1233 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1234 -- in \y. ....case f of {...} ....
1235 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1236 -- But
1237 -- - We can't preInlineUnconditionally because that woud invalidate
1238 -- the occ info for b.
1239 -- - We can't postInlineUnconditionally because the RHS is big, and
1240 -- that risks exponential behaviour
1241 -- - We can't call-site inline, because the rhs is big
1242 -- Alas!
1243
1244 where
1245 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1246 -- See Note [pre/postInlineUnconditionally in gentle mode]
1247
1248 {-
1249 Note [Top level and postInlineUnconditionally]
1250 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1251 We don't do postInlineUnconditionally for top-level things (even for
1252 ones that are trivial):
1253
1254 * Doing so will inline top-level error expressions that have been
1255 carefully floated out by FloatOut. More generally, it might
1256 replace static allocation with dynamic.
1257
1258 * Even for trivial expressions there's a problem. Consider
1259 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1260 blah xs = reverse xs
1261 ruggle = sort
1262 In one simplifier pass we might fire the rule, getting
1263 blah xs = ruggle xs
1264 but in *that* simplifier pass we must not do postInlineUnconditionally
1265 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1266
1267 If the rhs is trivial it'll be inlined by callSiteInline, and then
1268 the binding will be dead and discarded by the next use of OccurAnal
1269
1270 * There is less point, because the main goal is to get rid of local
1271 bindings used in multiple case branches.
1272
1273 * The inliner should inline trivial things at call sites anyway.
1274
1275 * The Id might be exported. We could check for that separately,
1276 but since we aren't going to postInlineUnconditionally /any/
1277 top-level bindings, we don't need to test.
1278
1279 Note [Stable unfoldings and postInlineUnconditionally]
1280 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1281 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1282 otherwise we lose the unfolding. Example
1283
1284 -- f has stable unfolding with rhs (e |> co)
1285 -- where 'e' is big
1286 f = e |> co
1287
1288 Then there's a danger we'll optimise to
1289
1290 f' = e
1291 f = f' |> co
1292
1293 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1294 won't inline because 'e' is too big.
1295
1296 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1297
1298
1299 ************************************************************************
1300 * *
1301 Rebuilding a lambda
1302 * *
1303 ************************************************************************
1304 -}
1305
1306 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1307 -- mkLam tries three things
1308 -- a) eta reduction, if that gives a trivial expression
1309 -- b) eta expansion [only if there are some value lambdas]
1310
1311 mkLam _env [] body _cont
1312 = return body
1313 mkLam env bndrs body cont
1314 = do { dflags <- getDynFlags
1315 ; mkLam' dflags bndrs body }
1316 where
1317 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1318 mkLam' dflags bndrs (Cast body co)
1319 | not (any bad bndrs)
1320 -- Note [Casts and lambdas]
1321 = do { lam <- mkLam' dflags bndrs body
1322 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1323 where
1324 co_vars = tyCoVarsOfCo co
1325 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1326
1327 mkLam' dflags bndrs body@(Lam {})
1328 = mkLam' dflags (bndrs ++ bndrs1) body1
1329 where
1330 (bndrs1, body1) = collectBinders body
1331
1332 mkLam' dflags bndrs (Tick t expr)
1333 | tickishFloatable t
1334 = mkTick t <$> mkLam' dflags bndrs expr
1335
1336 mkLam' dflags bndrs body
1337 | gopt Opt_DoEtaReduction dflags
1338 , Just etad_lam <- tryEtaReduce bndrs body
1339 = do { tick (EtaReduction (head bndrs))
1340 ; return etad_lam }
1341
1342 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1343 , sm_eta_expand (getMode env)
1344 , any isRuntimeVar bndrs
1345 , let body_arity = exprEtaExpandArity dflags body
1346 , body_arity > 0
1347 = do { tick (EtaExpansion (head bndrs))
1348 ; let res = mkLams bndrs (etaExpand body_arity body)
1349 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1350 , text "after" <+> ppr res])
1351 ; return res }
1352
1353 | otherwise
1354 = return (mkLams bndrs body)
1355
1356 {-
1357 Note [Eta expanding lambdas]
1358 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1359 In general we *do* want to eta-expand lambdas. Consider
1360 f (\x -> case x of (a,b) -> \s -> blah)
1361 where 's' is a state token, and hence can be eta expanded. This
1362 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1363 important function!
1364
1365 The eta-expansion will never happen unless we do it now. (Well, it's
1366 possible that CorePrep will do it, but CorePrep only has a half-baked
1367 eta-expander that can't deal with casts. So it's much better to do it
1368 here.)
1369
1370 However, when the lambda is let-bound, as the RHS of a let, we have a
1371 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1372 bother to try expansion in mkLam in that case; hence the contIsRhs
1373 guard.
1374
1375 NB: We check the SimplEnv (sm_eta_expand), not DynFlags.
1376 See Note [No eta expansion in stable unfoldings]
1377
1378 Note [Casts and lambdas]
1379 ~~~~~~~~~~~~~~~~~~~~~~~~
1380 Consider
1381 (\x. (\y. e) `cast` g1) `cast` g2
1382 There is a danger here that the two lambdas look separated, and the
1383 full laziness pass might float an expression to between the two.
1384
1385 So this equation in mkLam' floats the g1 out, thus:
1386 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1387 where x:tx.
1388
1389 In general, this floats casts outside lambdas, where (I hope) they
1390 might meet and cancel with some other cast:
1391 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1392 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1393 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1394 (if not (g `in` co))
1395
1396 Notice that it works regardless of 'e'. Originally it worked only
1397 if 'e' was itself a lambda, but in some cases that resulted in
1398 fruitless iteration in the simplifier. A good example was when
1399 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1400 like (\x. Get `cast` g)
1401 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1402 the Get, and the next iteration eta-reduced it, and then eta-expanded
1403 it again.
1404
1405 Note also the side condition for the case of coercion binders.
1406 It does not make sense to transform
1407 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1408 because the latter is not well-kinded.
1409
1410 ************************************************************************
1411 * *
1412 Eta expansion
1413 * *
1414 ************************************************************************
1415 -}
1416
1417 tryEtaExpandRhs :: SimplEnv -> RecFlag -> OutId -> OutExpr
1418 -> SimplM (Arity, OutExpr)
1419 -- See Note [Eta-expanding at let bindings]
1420 tryEtaExpandRhs env is_rec bndr rhs
1421 = do { dflags <- getDynFlags
1422 ; (new_arity, new_rhs) <- try_expand dflags
1423
1424 ; WARN( new_arity < old_id_arity,
1425 (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
1426 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1427 -- Note [Arity decrease] in Simplify
1428 return (new_arity, new_rhs) }
1429 where
1430 try_expand dflags
1431 | exprIsTrivial rhs
1432 = return (exprArity rhs, rhs)
1433
1434 | sm_eta_expand (getMode env) -- Provided eta-expansion is on
1435 , let new_arity1 = findRhsArity dflags bndr rhs old_arity
1436 new_arity2 = idCallArity bndr
1437 new_arity = max new_arity1 new_arity2
1438 , new_arity > old_arity -- And the current manifest arity isn't enough
1439 = if is_rec == Recursive && isJoinId bndr
1440 then WARN(True, text "Can't eta-expand recursive join point:" <+>
1441 ppr bndr)
1442 return (old_arity, rhs)
1443 else do { tick (EtaExpansion bndr)
1444 ; return (new_arity, etaExpand new_arity rhs) }
1445 | otherwise
1446 = return (old_arity, rhs)
1447
1448 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1449 old_id_arity = idArity bndr
1450
1451 {-
1452 Note [Eta-expanding at let bindings]
1453 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1454 We now eta expand at let-bindings, which is where the payoff comes.
1455 The most significant thing is that we can do a simple arity analysis
1456 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1457
1458 One useful consequence of not eta-expanding lambdas is this example:
1459 genMap :: C a => ...
1460 {-# INLINE genMap #-}
1461 genMap f xs = ...
1462
1463 myMap :: D a => ...
1464 {-# INLINE myMap #-}
1465 myMap = genMap
1466
1467 Notice that 'genMap' should only inline if applied to two arguments.
1468 In the stable unfolding for myMap we'll have the unfolding
1469 (\d -> genMap Int (..d..))
1470 We do not want to eta-expand to
1471 (\d f xs -> genMap Int (..d..) f xs)
1472 because then 'genMap' will inline, and it really shouldn't: at least
1473 as far as the programmer is concerned, it's not applied to two
1474 arguments!
1475
1476 Note [Do not eta-expand PAPs]
1477 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1478 We used to have old_arity = manifestArity rhs, which meant that we
1479 would eta-expand even PAPs. But this gives no particular advantage,
1480 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1481 Suppose we have a PAP
1482 foo :: IO ()
1483 foo = returnIO ()
1484 Then we can eta-expand do
1485 foo = (\eta. (returnIO () |> sym g) eta) |> g
1486 where
1487 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1488
1489 But there is really no point in doing this, and it generates masses of
1490 coercions and whatnot that eventually disappear again. For T9020, GHC
1491 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1492 1.8G to 45M.
1493
1494 But note that this won't eta-expand, say
1495 f = \g -> map g
1496 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1497 strictness analysis will have less to bite on?
1498
1499
1500 ************************************************************************
1501 * *
1502 \subsection{Floating lets out of big lambdas}
1503 * *
1504 ************************************************************************
1505
1506 Note [Floating and type abstraction]
1507 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1508 Consider this:
1509 x = /\a. C e1 e2
1510 We'd like to float this to
1511 y1 = /\a. e1
1512 y2 = /\a. e2
1513 x = /\a. C (y1 a) (y2 a)
1514 for the usual reasons: we want to inline x rather vigorously.
1515
1516 You may think that this kind of thing is rare. But in some programs it is
1517 common. For example, if you do closure conversion you might get:
1518
1519 data a :-> b = forall e. (e -> a -> b) :$ e
1520
1521 f_cc :: forall a. a :-> a
1522 f_cc = /\a. (\e. id a) :$ ()
1523
1524 Now we really want to inline that f_cc thing so that the
1525 construction of the closure goes away.
1526
1527 So I have elaborated simplLazyBind to understand right-hand sides that look
1528 like
1529 /\ a1..an. body
1530
1531 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1532 but there is quite a bit of plumbing in simplLazyBind as well.
1533
1534 The same transformation is good when there are lets in the body:
1535
1536 /\abc -> let(rec) x = e in b
1537 ==>
1538 let(rec) x' = /\abc -> let x = x' a b c in e
1539 in
1540 /\abc -> let x = x' a b c in b
1541
1542 This is good because it can turn things like:
1543
1544 let f = /\a -> letrec g = ... g ... in g
1545 into
1546 letrec g' = /\a -> ... g' a ...
1547 in
1548 let f = /\ a -> g' a
1549
1550 which is better. In effect, it means that big lambdas don't impede
1551 let-floating.
1552
1553 This optimisation is CRUCIAL in eliminating the junk introduced by
1554 desugaring mutually recursive definitions. Don't eliminate it lightly!
1555
1556 [May 1999] If we do this transformation *regardless* then we can
1557 end up with some pretty silly stuff. For example,
1558
1559 let
1560 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1561 in ..
1562 becomes
1563 let y1 = /\s -> r1
1564 y2 = /\s -> r2
1565 st = /\s -> ...[y1 s/x1, y2 s/x2]
1566 in ..
1567
1568 Unless the "..." is a WHNF there is really no point in doing this.
1569 Indeed it can make things worse. Suppose x1 is used strictly,
1570 and is of the form
1571
1572 x1* = case f y of { (a,b) -> e }
1573
1574 If we abstract this wrt the tyvar we then can't do the case inline
1575 as we would normally do.
1576
1577 That's why the whole transformation is part of the same process that
1578 floats let-bindings and constructor arguments out of RHSs. In particular,
1579 it is guarded by the doFloatFromRhs call in simplLazyBind.
1580
1581 Note [Which type variables to abstract over]
1582 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1583 Abstract only over the type variables free in the rhs wrt which the
1584 new binding is abstracted. Note that
1585
1586 * The naive approach of abstracting wrt the
1587 tyvars free in the Id's /type/ fails. Consider:
1588 /\ a b -> let t :: (a,b) = (e1, e2)
1589 x :: a = fst t
1590 in ...
1591 Here, b isn't free in x's type, but we must nevertheless
1592 abstract wrt b as well, because t's type mentions b.
1593 Since t is floated too, we'd end up with the bogus:
1594 poly_t = /\ a b -> (e1, e2)
1595 poly_x = /\ a -> fst (poly_t a *b*)
1596
1597 * We must do closeOverKinds. Example (Trac #10934):
1598 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1599 Here we want to float 't', but we must remember to abstract over
1600 'k' as well, even though it is not explicitly mentioned in the RHS,
1601 otherwise we get
1602 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1603 which is obviously bogus.
1604 -}
1605
1606 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1607 abstractFloats main_tvs body_env body
1608 = ASSERT( notNull body_floats )
1609 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1610 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1611 where
1612 main_tv_set = mkVarSet main_tvs
1613 body_floats = getFloatBinds body_env
1614 empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1615
1616 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1617 abstract subst (NonRec id rhs)
1618 = do { (poly_id, poly_app) <- mk_poly tvs_here id
1619 ; let poly_rhs = mkLams tvs_here rhs'
1620 subst' = CoreSubst.extendIdSubst subst id poly_app
1621 ; return (subst', (NonRec poly_id poly_rhs)) }
1622 where
1623 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1624
1625 -- tvs_here: see Note [Which type variables to abstract over]
1626 tvs_here = toposortTyVars $
1627 filter (`elemVarSet` main_tv_set) $
1628 closeOverKindsList $
1629 exprSomeFreeVarsList isTyVar rhs'
1630
1631 abstract subst (Rec prs)
1632 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1633 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1634 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
1635 | rhs <- rhss]
1636 ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1637 where
1638 (ids,rhss) = unzip prs
1639 -- For a recursive group, it's a bit of a pain to work out the minimal
1640 -- set of tyvars over which to abstract:
1641 -- /\ a b c. let x = ...a... in
1642 -- letrec { p = ...x...q...
1643 -- q = .....p...b... } in
1644 -- ...
1645 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1646 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1647 -- Since it's a pain, we just use the whole set, which is always safe
1648 --
1649 -- If you ever want to be more selective, remember this bizarre case too:
1650 -- x::a = x
1651 -- Here, we must abstract 'x' over 'a'.
1652 tvs_here = toposortTyVars main_tvs
1653
1654 mk_poly tvs_here var
1655 = do { uniq <- getUniqueM
1656 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1657 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1658 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1659 mkLocalIdOrCoVar poly_name poly_ty
1660 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1661 -- In the olden days, it was crucial to copy the occInfo of the original var,
1662 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1663 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1664 -- at already simplified code, so it doesn't matter
1665 --
1666 -- It's even right to retain single-occurrence or dead-var info:
1667 -- Suppose we started with /\a -> let x = E in B
1668 -- where x occurs once in B. Then we transform to:
1669 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1670 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1671 -- the occurrences of x' will be just the occurrences originally
1672 -- pinned on x.
1673
1674 {-
1675 Note [Abstract over coercions]
1676 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1677 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1678 type variable a. Rather than sort this mess out, we simply bale out and abstract
1679 wrt all the type variables if any of them are coercion variables.
1680
1681
1682 Historical note: if you use let-bindings instead of a substitution, beware of this:
1683
1684 -- Suppose we start with:
1685 --
1686 -- x = /\ a -> let g = G in E
1687 --
1688 -- Then we'll float to get
1689 --
1690 -- x = let poly_g = /\ a -> G
1691 -- in /\ a -> let g = poly_g a in E
1692 --
1693 -- But now the occurrence analyser will see just one occurrence
1694 -- of poly_g, not inside a lambda, so the simplifier will
1695 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1696 -- (I used to think that the "don't inline lone occurrences" stuff
1697 -- would stop this happening, but since it's the *only* occurrence,
1698 -- PreInlineUnconditionally kicks in first!)
1699 --
1700 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1701 -- to appear many times. (NB: mkInlineMe eliminates
1702 -- such notes on trivial RHSs, so do it manually.)
1703
1704 ************************************************************************
1705 * *
1706 prepareAlts
1707 * *
1708 ************************************************************************
1709
1710 prepareAlts tries these things:
1711
1712 1. Eliminate alternatives that cannot match, including the
1713 DEFAULT alternative.
1714
1715 2. If the DEFAULT alternative can match only one possible constructor,
1716 then make that constructor explicit.
1717 e.g.
1718 case e of x { DEFAULT -> rhs }
1719 ===>
1720 case e of x { (a,b) -> rhs }
1721 where the type is a single constructor type. This gives better code
1722 when rhs also scrutinises x or e.
1723
1724 3. Returns a list of the constructors that cannot holds in the
1725 DEFAULT alternative (if there is one)
1726
1727 Here "cannot match" includes knowledge from GADTs
1728
1729 It's a good idea to do this stuff before simplifying the alternatives, to
1730 avoid simplifying alternatives we know can't happen, and to come up with
1731 the list of constructors that are handled, to put into the IdInfo of the
1732 case binder, for use when simplifying the alternatives.
1733
1734 Eliminating the default alternative in (1) isn't so obvious, but it can
1735 happen:
1736
1737 data Colour = Red | Green | Blue
1738
1739 f x = case x of
1740 Red -> ..
1741 Green -> ..
1742 DEFAULT -> h x
1743
1744 h y = case y of
1745 Blue -> ..
1746 DEFAULT -> [ case y of ... ]
1747
1748 If we inline h into f, the default case of the inlined h can't happen.
1749 If we don't notice this, we may end up filtering out *all* the cases
1750 of the inner case y, which give us nowhere to go!
1751 -}
1752
1753 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1754 -- The returned alternatives can be empty, none are possible
1755 prepareAlts scrut case_bndr' alts
1756 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1757 -- Case binder is needed just for its type. Note that as an
1758 -- OutId, it has maximum information; this is important.
1759 -- Test simpl013 is an example
1760 = do { us <- getUniquesM
1761 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1762 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1763 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1764 -- "idcs" stands for "impossible default data constructors"
1765 -- i.e. the constructors that can't match the default case
1766 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1767 ; when yes3 $ tick (AltMerge case_bndr')
1768 ; return (idcs3, alts3) }
1769
1770 | otherwise -- Not a data type, so nothing interesting happens
1771 = return ([], alts)
1772 where
1773 imposs_cons = case scrut of
1774 Var v -> otherCons (idUnfolding v)
1775 _ -> []
1776
1777
1778 {-
1779 ************************************************************************
1780 * *
1781 mkCase
1782 * *
1783 ************************************************************************
1784
1785 mkCase tries these things
1786
1787 * Note [Nerge nested cases]
1788 * Note [Eliminate identity case]
1789 * Note [Scrutinee constant folding]
1790
1791 Note [Merge Nested Cases]
1792 ~~~~~~~~~~~~~~~~~~~~~~~~~
1793 case e of b { ==> case e of b {
1794 p1 -> rhs1 p1 -> rhs1
1795 ... ...
1796 pm -> rhsm pm -> rhsm
1797 _ -> case b of b' { pn -> let b'=b in rhsn
1798 pn -> rhsn ...
1799 ... po -> let b'=b in rhso
1800 po -> rhso _ -> let b'=b in rhsd
1801 _ -> rhsd
1802 }
1803
1804 which merges two cases in one case when -- the default alternative of
1805 the outer case scrutises the same variable as the outer case. This
1806 transformation is called Case Merging. It avoids that the same
1807 variable is scrutinised multiple times.
1808
1809 Note [Eliminate Identity Case]
1810 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1811 case e of ===> e
1812 True -> True;
1813 False -> False
1814
1815 and similar friends.
1816
1817 Note [Scrutinee Constant Folding]
1818 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1819 case x op# k# of _ { ===> case x of _ {
1820 a1# -> e1 (a1# inv_op# k#) -> e1
1821 a2# -> e2 (a2# inv_op# k#) -> e2
1822 ... ...
1823 DEFAULT -> ed DEFAULT -> ed
1824
1825 where (x op# k#) inv_op# k# == x
1826
1827 And similarly for commuted arguments and for some unary operations.
1828
1829 The purpose of this transformation is not only to avoid an arithmetic
1830 operation at runtime but to allow other transformations to apply in cascade.
1831
1832 Example with the "Merge Nested Cases" optimization (from #12877):
1833
1834 main = case t of t0
1835 0## -> ...
1836 DEFAULT -> case t0 `minusWord#` 1## of t1
1837 0## -> ...
1838 DEFAUT -> case t1 `minusWord#` 1## of t2
1839 0## -> ...
1840 DEFAULT -> case t2 `minusWord#` 1## of _
1841 0## -> ...
1842 DEFAULT -> ...
1843
1844 becomes:
1845
1846 main = case t of _
1847 0## -> ...
1848 1## -> ...
1849 2## -> ...
1850 3## -> ...
1851 DEFAULT -> ...
1852
1853 There are some wrinkles
1854
1855 * Do not apply caseRules if there is just a single DEFAULT alternative
1856 case e +# 3# of b { DEFAULT -> rhs }
1857 If we applied the transformation here we would (stupidly) get
1858 case a of b' { DEFAULT -> let b = e +# 3# in rhs }
1859 and now the process may repeat, because that let will really
1860 be a case.
1861
1862 * The type of the scrutinee might change. E.g.
1863 case tagToEnum (x :: Int#) of (b::Bool)
1864 False -> e1
1865 True -> e2
1866 ==>
1867 case x of (b'::Int#)
1868 DEFAULT -> e1
1869 1# -> e2
1870
1871 * The case binder may be used in the right hand sides, so we need
1872 to make a local binding for it, if it is alive. e.g.
1873 case e +# 10# of b
1874 DEFAULT -> blah...b...
1875 44# -> blah2...b...
1876 ===>
1877 case e of b'
1878 DEFAULT -> let b = b' +# 10# in blah...b...
1879 34# -> let b = 44# in blah2...b...
1880
1881 Note that in the non-DEFAULT cases we know what to bind 'b' to,
1882 whereas in the DEFAULT case we must reconstruct the original value.
1883 But NB: we use b'; we do not duplicate 'e'.
1884
1885 * In dataToTag we might need to make up some fake binders;
1886 see Note [caseRules for dataToTag] in PrelRules
1887 -}
1888
1889 mkCase, mkCase1, mkCase2, mkCase3
1890 :: DynFlags
1891 -> OutExpr -> OutId
1892 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1893 -> SimplM OutExpr
1894
1895 --------------------------------------------------
1896 -- 1. Merge Nested Cases
1897 --------------------------------------------------
1898
1899 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1900 | gopt Opt_CaseMerge dflags
1901 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1902 <- stripTicksTop tickishFloatable deflt_rhs
1903 , inner_scrut_var == outer_bndr
1904 = do { tick (CaseMerge outer_bndr)
1905
1906 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1907 (con, args, wrap_rhs rhs)
1908 -- Simplifier's no-shadowing invariant should ensure
1909 -- that outer_bndr is not shadowed by the inner patterns
1910 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
1911 -- The let is OK even for unboxed binders,
1912
1913 wrapped_alts | isDeadBinder inner_bndr = inner_alts
1914 | otherwise = map wrap_alt inner_alts
1915
1916 merged_alts = mergeAlts outer_alts wrapped_alts
1917 -- NB: mergeAlts gives priority to the left
1918 -- case x of
1919 -- A -> e1
1920 -- DEFAULT -> case x of
1921 -- A -> e2
1922 -- B -> e3
1923 -- When we merge, we must ensure that e1 takes
1924 -- precedence over e2 as the value for A!
1925
1926 ; fmap (mkTicks ticks) $
1927 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
1928 }
1929 -- Warning: don't call mkCase recursively!
1930 -- Firstly, there's no point, because inner alts have already had
1931 -- mkCase applied to them, so they won't have a case in their default
1932 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1933 -- in munge_rhs may put a case into the DEFAULT branch!
1934
1935 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
1936
1937 --------------------------------------------------
1938 -- 2. Eliminate Identity Case
1939 --------------------------------------------------
1940
1941 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
1942 | all identity_alt alts
1943 = do { tick (CaseIdentity case_bndr)
1944 ; return (mkTicks ticks $ re_cast scrut rhs1) }
1945 where
1946 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
1947 identity_alt (con, args, rhs) = check_eq rhs con args
1948
1949 check_eq (Cast rhs co) con args -- See Note [RHS casts]
1950 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
1951 check_eq (Tick t e) alt args
1952 = tickishFloatable t && check_eq e alt args
1953
1954 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
1955 check_eq (Var v) _ _ | v == case_bndr = True
1956 check_eq (Var v) (DataAlt con) args
1957 | null arg_tys, null args = v == dataConWorkId con
1958 -- Optimisation only
1959 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
1960 mkConApp2 con arg_tys args
1961 check_eq _ _ _ = False
1962
1963 arg_tys = tyConAppArgs (idType case_bndr)
1964
1965 -- Note [RHS casts]
1966 -- ~~~~~~~~~~~~~~~~
1967 -- We've seen this:
1968 -- case e of x { _ -> x `cast` c }
1969 -- And we definitely want to eliminate this case, to give
1970 -- e `cast` c
1971 -- So we throw away the cast from the RHS, and reconstruct
1972 -- it at the other end. All the RHS casts must be the same
1973 -- if (all identity_alt alts) holds.
1974 --
1975 -- Don't worry about nested casts, because the simplifier combines them
1976
1977 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
1978 re_cast scrut _ = scrut
1979
1980 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
1981
1982 --------------------------------------------------
1983 -- 2. Scrutinee Constant Folding
1984 --------------------------------------------------
1985
1986 mkCase2 dflags scrut bndr alts_ty alts
1987 | -- See Note [Scrutinee Constant Folding]
1988 case alts of -- Not if there is just a DEFAULT alterantive
1989 [(DEFAULT,_,_)] -> False
1990 _ -> True
1991 , gopt Opt_CaseFolding dflags
1992 , Just (scrut', tx_con, mk_orig) <- caseRules dflags scrut
1993 = do { bndr' <- newId (fsLit "lwild") (exprType scrut')
1994 ; alts' <- mapM (tx_alt tx_con mk_orig bndr') alts
1995 ; mkCase3 dflags scrut' bndr' alts_ty $
1996 add_default (re_sort alts')
1997 }
1998
1999 | otherwise
2000 = mkCase3 dflags scrut bndr alts_ty alts
2001 where
2002 -- We need to keep the correct association between the scrutinee and its
2003 -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
2004 -- "let bndr = ... in":
2005 --
2006 -- case v + 10 of y =====> case v of y
2007 -- 20 -> e1 10 -> let y = 20 in e1
2008 -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2
2009 --
2010 -- Other transformations give: =====> case v of y'
2011 -- 10 -> let y = 20 in e1
2012 -- DEFAULT -> let y = y' + 10 in e2
2013 --
2014 -- This wrapping is done in tx_alt; we use mk_orig, returned by caseRules,
2015 -- to construct an expression equivalent to the original one, for use
2016 -- in the DEFAULT case
2017
2018 tx_alt tx_con mk_orig new_bndr (con, bs, rhs)
2019 | DataAlt dc <- con', not (isNullaryRepDataCon dc)
2020 = -- For non-nullary data cons we must invent some fake binders
2021 -- See Note [caseRules for dataToTag] in PrelRules
2022 do { us <- getUniquesM
2023 ; let (ex_tvs, arg_ids) = dataConRepInstPat us dc
2024 (tyConAppArgs (idType new_bndr))
2025 ; return (con', ex_tvs ++ arg_ids, rhs') }
2026 | otherwise
2027 = return (con', [], rhs')
2028 where
2029 con' = tx_con con
2030
2031 rhs' | isDeadBinder bndr = rhs
2032 | otherwise = bindNonRec bndr orig_val rhs
2033
2034 orig_val = case con of
2035 DEFAULT -> mk_orig new_bndr
2036 LitAlt l -> Lit l
2037 DataAlt dc -> mkConApp2 dc (tyConAppArgs (idType bndr)) bs
2038
2039
2040 re_sort :: [CoreAlt] -> [CoreAlt] -- Re-sort the alternatives to
2041 re_sort alts = sortBy cmpAlt alts -- preserve the #case_invariants#
2042
2043 add_default :: [CoreAlt] -> [CoreAlt]
2044 -- TagToEnum may change a boolean True/False set of alternatives
2045 -- to LitAlt 0#/1# alterantives. But literal alternatives always
2046 -- have a DEFAULT (I think). So add it.
2047 add_default ((LitAlt {}, bs, rhs) : alts) = (DEFAULT, bs, rhs) : alts
2048 add_default alts = alts
2049
2050 --------------------------------------------------
2051 -- Catch-all
2052 --------------------------------------------------
2053 mkCase3 _dflags scrut bndr alts_ty alts
2054 = return (Case scrut bndr alts_ty alts)
2055
2056 {-
2057 Note [Dead binders]
2058 ~~~~~~~~~~~~~~~~~~~~
2059 Note that dead-ness is maintained by the simplifier, so that it is
2060 accurate after simplification as well as before.
2061
2062
2063 Note [Cascading case merge]
2064 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2065 Case merging should cascade in one sweep, because it
2066 happens bottom-up
2067
2068 case e of a {
2069 DEFAULT -> case a of b
2070 DEFAULT -> case b of c {
2071 DEFAULT -> e
2072 A -> ea
2073 B -> eb
2074 C -> ec
2075 ==>
2076 case e of a {
2077 DEFAULT -> case a of b
2078 DEFAULT -> let c = b in e
2079 A -> let c = b in ea
2080 B -> eb
2081 C -> ec
2082 ==>
2083 case e of a {
2084 DEFAULT -> let b = a in let c = b in e
2085 A -> let b = a in let c = b in ea
2086 B -> let b = a in eb
2087 C -> ec
2088
2089
2090 However here's a tricky case that we still don't catch, and I don't
2091 see how to catch it in one pass:
2092
2093 case x of c1 { I# a1 ->
2094 case a1 of c2 ->
2095 0 -> ...
2096 DEFAULT -> case x of c3 { I# a2 ->
2097 case a2 of ...
2098
2099 After occurrence analysis (and its binder-swap) we get this
2100
2101 case x of c1 { I# a1 ->
2102 let x = c1 in -- Binder-swap addition
2103 case a1 of c2 ->
2104 0 -> ...
2105 DEFAULT -> case x of c3 { I# a2 ->
2106 case a2 of ...
2107
2108 When we simplify the inner case x, we'll see that
2109 x=c1=I# a1. So we'll bind a2 to a1, and get
2110
2111 case x of c1 { I# a1 ->
2112 case a1 of c2 ->
2113 0 -> ...
2114 DEFAULT -> case a1 of ...
2115
2116 This is corect, but we can't do a case merge in this sweep
2117 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
2118 without getting changed to c1=I# c2.
2119
2120 I don't think this is worth fixing, even if I knew how. It'll
2121 all come out in the next pass anyway.
2122 -}