Refactor the Mighty Simplifier
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified, contIsStop,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import SimplEnv
39 import CoreMonad ( SimplMode(..), Tick(..) )
40 import DynFlags
41 import CoreSyn
42 import qualified CoreSubst
43 import PprCore
44 import CoreFVs
45 import CoreUtils
46 import CoreArity
47 import CoreUnfold
48 import Name
49 import Id
50 import IdInfo
51 import Var
52 import Demand
53 import SimplMonad
54 import Type hiding( substTy )
55 import Coercion hiding( substCo )
56 import DataCon ( dataConWorkId, isNullaryRepDataCon )
57 import VarSet
58 import BasicTypes
59 import Util
60 import OrdList ( isNilOL )
61 import MonadUtils
62 import Outputable
63 import Pair
64 import PrelRules
65 import FastString ( fsLit )
66
67 import Control.Monad ( when )
68 import Data.List ( sortBy )
69
70 {-
71 ************************************************************************
72 * *
73 The SimplCont and DupFlag types
74 * *
75 ************************************************************************
76
77 A SimplCont allows the simplifier to traverse the expression in a
78 zipper-like fashion. The SimplCont represents the rest of the expression,
79 "above" the point of interest.
80
81 You can also think of a SimplCont as an "evaluation context", using
82 that term in the way it is used for operational semantics. This is the
83 way I usually think of it, For example you'll often see a syntax for
84 evaluation context looking like
85 C ::= [] | C e | case C of alts | C `cast` co
86 That's the kind of thing we are doing here, and I use that syntax in
87 the comments.
88
89
90 Key points:
91 * A SimplCont describes a *strict* context (just like
92 evaluation contexts do). E.g. Just [] is not a SimplCont
93
94 * A SimplCont describes a context that *does not* bind
95 any variables. E.g. \x. [] is not a SimplCont
96 -}
97
98 data SimplCont
99 = Stop -- Stop[e] = e
100 OutType -- Type of the <hole>
101 CallCtxt -- Tells if there is something interesting about
102 -- the context, and hence the inliner
103 -- should be a bit keener (see interestingCallContext)
104 -- Specifically:
105 -- This is an argument of a function that has RULES
106 -- Inlining the call might allow the rule to fire
107 -- Never ValAppCxt (use ApplyToVal instead)
108 -- or CaseCtxt (use Select instead)
109
110 | CastIt -- (CastIt co K)[e] = K[ e `cast` co ]
111 OutCoercion -- The coercion simplified
112 -- Invariant: never an identity coercion
113 SimplCont
114
115 | ApplyToVal -- (ApplyToVal arg K)[e] = K[ e arg ]
116 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
117 , sc_arg :: InExpr -- The argument,
118 , sc_env :: StaticEnv -- and its static env
119 , sc_cont :: SimplCont }
120
121 | ApplyToTy -- (ApplyToTy ty K)[e] = K[ e ty ]
122 { sc_arg_ty :: OutType -- Argument type
123 , sc_hole_ty :: OutType -- Type of the function, presumably (forall a. blah)
124 -- See Note [The hole type in ApplyToTy]
125 , sc_cont :: SimplCont }
126
127 | Select -- (Select alts K)[e] = K[ case e of alts ]
128 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
129 , sc_bndr :: InId -- case binder
130 , sc_alts :: [InAlt] -- Alternatives
131 , sc_env :: StaticEnv -- and their static environment
132 , sc_cont :: SimplCont }
133
134 -- The two strict forms have no DupFlag, because we never duplicate them
135 | StrictBind -- (StrictBind x xs b K)[e] = let x = e in K[\xs.b]
136 -- or, equivalently, = K[ (\x xs.b) e ]
137 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
138 , sc_bndr :: InId
139 , sc_bndrs :: [InBndr]
140 , sc_body :: InExpr
141 , sc_env :: StaticEnv
142 , sc_cont :: SimplCont }
143
144 | StrictArg -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ]
145 { sc_dup :: DupFlag -- Always Simplified or OkToDup
146 , sc_fun :: ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
147 -- plus strictness flags for *further* args
148 , sc_cci :: CallCtxt -- Whether *this* argument position is interesting
149 , sc_cont :: SimplCont }
150
151 | TickIt -- (TickIt t K)[e] = K[ tick t e ]
152 (Tickish Id) -- Tick tickish <hole>
153 SimplCont
154
155 data DupFlag = NoDup -- Unsimplified, might be big
156 | Simplified -- Simplified
157 | OkToDup -- Simplified and small
158
159 isSimplified :: DupFlag -> Bool
160 isSimplified NoDup = False
161 isSimplified _ = True -- Invariant: the subst-env is empty
162
163 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
164 perhapsSubstTy dup env ty
165 | isSimplified dup = ty
166 | otherwise = substTy env ty
167
168 {-
169 Note [DupFlag invariants]
170 ~~~~~~~~~~~~~~~~~~~~~~~~~
171 In both (ApplyToVal dup _ env k)
172 and (Select dup _ _ env k)
173 the following invariants hold
174
175 (a) if dup = OkToDup, then continuation k is also ok-to-dup
176 (b) if dup = OkToDup or Simplified, the subst-env is empty
177 (and and hence no need to re-simplify)
178 -}
179
180 instance Outputable DupFlag where
181 ppr OkToDup = text "ok"
182 ppr NoDup = text "nodup"
183 ppr Simplified = text "simpl"
184
185 instance Outputable SimplCont where
186 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
187 ppr (CastIt co cont ) = (text "CastIt" <+> pprOptCo co) $$ ppr cont
188 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
189 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
190 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
191 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
192 = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg)
193 $$ ppr cont
194 ppr (StrictBind { sc_bndr = b, sc_cont = cont })
195 = (text "StrictBind" <+> ppr b) $$ ppr cont
196 ppr (StrictArg { sc_fun = ai, sc_cont = cont })
197 = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
198 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
199 = (text "Select" <+> ppr dup <+> ppr bndr) $$
200 ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
201
202
203 {- Note [The hole type in ApplyToTy]
204 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
205 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
206 continuation. It is absolutely necessary to compute contHoleType, but it is
207 not used for anything else (and hence may not be evaluated).
208
209 Why is it necessary for contHoleType? Consider the continuation
210 ApplyToType Int (Stop Int)
211 corresponding to
212 (<hole> @Int) :: Int
213 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
214 and there is no way to know which, so we must record it.
215
216 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
217 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
218 doesn't matter because we'll never compute them all.
219
220 ************************************************************************
221 * *
222 ArgInfo and ArgSpec
223 * *
224 ************************************************************************
225 -}
226
227 data ArgInfo
228 = ArgInfo {
229 ai_fun :: OutId, -- The function
230 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
231
232 ai_type :: OutType, -- Type of (f a1 ... an)
233
234 ai_rules :: FunRules, -- Rules for this function
235
236 ai_encl :: Bool, -- Flag saying whether this function
237 -- or an enclosing one has rules (recursively)
238 -- True => be keener to inline in all args
239
240 ai_strs :: [Bool], -- Strictness of remaining arguments
241 -- Usually infinite, but if it is finite it guarantees
242 -- that the function diverges after being given
243 -- that number of args
244 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
245 -- Always infinite
246 }
247
248 data ArgSpec
249 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
250 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
251 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
252 | CastBy OutCoercion -- Cast by this; c.f. CastIt
253
254 instance Outputable ArgSpec where
255 ppr (ValArg e) = text "ValArg" <+> ppr e
256 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
257 ppr (CastBy c) = text "CastBy" <+> ppr c
258
259 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
260 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
261 , ai_type = applyTypeToArg (ai_type ai) arg
262 , ai_rules = decRules (ai_rules ai) }
263
264 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
265 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
266 , ai_type = piResultTy poly_fun_ty arg_ty
267 , ai_rules = decRules (ai_rules ai) }
268 where
269 poly_fun_ty = ai_type ai
270 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
271
272 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
273 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
274 , ai_type = pSnd (coercionKind co) }
275
276 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
277 argInfoAppArgs [] = []
278 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
279 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
280 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
281
282 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
283 pushSimplifiedArgs _env [] k = k
284 pushSimplifiedArgs env (arg : args) k
285 = case arg of
286 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
287 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
288 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
289 CastBy c -> CastIt c rest
290 where
291 rest = pushSimplifiedArgs env args k
292 -- The env has an empty SubstEnv
293
294 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
295 -- NB: the [ArgSpec] is reversed so that the first arg
296 -- in the list is the last one in the application
297 argInfoExpr fun rev_args
298 = go rev_args
299 where
300 go [] = Var fun
301 go (ValArg a : as) = go as `App` a
302 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
303 go (CastBy co : as) = mkCast (go as) co
304
305
306 type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function
307 -- Nothing => No rules
308 -- Just (n, rules) => some rules, requiring at least n more type/value args
309
310 decRules :: FunRules -> FunRules
311 decRules (Just (n, rules)) = Just (n-1, rules)
312 decRules Nothing = Nothing
313
314 mkFunRules :: [CoreRule] -> FunRules
315 mkFunRules [] = Nothing
316 mkFunRules rs = Just (n_required, rs)
317 where
318 n_required = maximum (map ruleArity rs)
319
320 {-
321 ************************************************************************
322 * *
323 Functions on SimplCont
324 * *
325 ************************************************************************
326 -}
327
328 mkBoringStop :: OutType -> SimplCont
329 mkBoringStop ty = Stop ty BoringCtxt
330
331 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
332 mkRhsStop ty = Stop ty RhsCtxt
333
334 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
335 mkLazyArgStop ty cci = Stop ty cci
336
337 -------------------
338 contIsRhsOrArg :: SimplCont -> Bool
339 contIsRhsOrArg (Stop {}) = True
340 contIsRhsOrArg (StrictBind {}) = True
341 contIsRhsOrArg (StrictArg {}) = True
342 contIsRhsOrArg _ = False
343
344 contIsRhs :: SimplCont -> Bool
345 contIsRhs (Stop _ RhsCtxt) = True
346 contIsRhs _ = False
347
348 -------------------
349 contIsStop :: SimplCont -> Bool
350 contIsStop (Stop {}) = True
351 contIsStop _ = False
352
353 contIsDupable :: SimplCont -> Bool
354 contIsDupable (Stop {}) = True
355 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
356 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
357 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
358 contIsDupable (StrictArg { sc_dup = OkToDup }) = True -- ...ditto...
359 contIsDupable (CastIt _ k) = contIsDupable k
360 contIsDupable _ = False
361
362 -------------------
363 contIsTrivial :: SimplCont -> Bool
364 contIsTrivial (Stop {}) = True
365 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
366 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
367 contIsTrivial (CastIt _ k) = contIsTrivial k
368 contIsTrivial _ = False
369
370 -------------------
371 contResultType :: SimplCont -> OutType
372 contResultType (Stop ty _) = ty
373 contResultType (CastIt _ k) = contResultType k
374 contResultType (StrictBind { sc_cont = k }) = contResultType k
375 contResultType (StrictArg { sc_cont = k }) = contResultType k
376 contResultType (Select { sc_cont = k }) = contResultType k
377 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
378 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
379 contResultType (TickIt _ k) = contResultType k
380
381 contHoleType :: SimplCont -> OutType
382 contHoleType (Stop ty _) = ty
383 contHoleType (TickIt _ k) = contHoleType k
384 contHoleType (CastIt co _) = pFst (coercionKind co)
385 contHoleType (StrictBind { sc_bndr = b, sc_dup = dup, sc_env = se })
386 = perhapsSubstTy dup se (idType b)
387 contHoleType (StrictArg { sc_fun = ai }) = funArgTy (ai_type ai)
388 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
389 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
390 = mkFunTy (perhapsSubstTy dup se (exprType e))
391 (contHoleType k)
392 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
393 = perhapsSubstTy d se (idType b)
394
395 -------------------
396 countArgs :: SimplCont -> Int
397 -- Count all arguments, including types, coercions, and other values
398 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
399 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
400 countArgs _ = 0
401
402 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
403 -- Summarises value args, discards type args and coercions
404 -- The returned continuation of the call is only used to
405 -- answer questions like "are you interesting?"
406 contArgs cont
407 | lone cont = (True, [], cont)
408 | otherwise = go [] cont
409 where
410 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
411 lone (ApplyToVal {}) = False
412 lone (CastIt {}) = False
413 lone _ = True
414
415 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
416 = go (is_interesting arg se : args) k
417 go args (ApplyToTy { sc_cont = k }) = go args k
418 go args (CastIt _ k) = go args k
419 go args k = (False, reverse args, k)
420
421 is_interesting arg se = interestingArg se arg
422 -- Do *not* use short-cutting substitution here
423 -- because we want to get as much IdInfo as possible
424
425
426 -------------------
427 mkArgInfo :: Id
428 -> [CoreRule] -- Rules for function
429 -> Int -- Number of value args
430 -> SimplCont -- Context of the call
431 -> ArgInfo
432
433 mkArgInfo fun rules n_val_args call_cont
434 | n_val_args < idArity fun -- Note [Unsaturated functions]
435 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
436 , ai_rules = fun_rules, ai_encl = False
437 , ai_strs = vanilla_stricts
438 , ai_discs = vanilla_discounts }
439 | otherwise
440 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
441 , ai_rules = fun_rules
442 , ai_encl = interestingArgContext rules call_cont
443 , ai_strs = add_type_str fun_ty arg_stricts
444 , ai_discs = arg_discounts }
445 where
446 fun_ty = idType fun
447
448 fun_rules = mkFunRules rules
449
450 vanilla_discounts, arg_discounts :: [Int]
451 vanilla_discounts = repeat 0
452 arg_discounts = case idUnfolding fun of
453 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
454 -> discounts ++ vanilla_discounts
455 _ -> vanilla_discounts
456
457 vanilla_stricts, arg_stricts :: [Bool]
458 vanilla_stricts = repeat False
459
460 arg_stricts
461 = case splitStrictSig (idStrictness fun) of
462 (demands, result_info)
463 | not (demands `lengthExceeds` n_val_args)
464 -> -- Enough args, use the strictness given.
465 -- For bottoming functions we used to pretend that the arg
466 -- is lazy, so that we don't treat the arg as an
467 -- interesting context. This avoids substituting
468 -- top-level bindings for (say) strings into
469 -- calls to error. But now we are more careful about
470 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
471 if isBotRes result_info then
472 map isStrictDmd demands -- Finite => result is bottom
473 else
474 map isStrictDmd demands ++ vanilla_stricts
475 | otherwise
476 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
477 <+> ppr n_val_args <+> ppr demands )
478 vanilla_stricts -- Not enough args, or no strictness
479
480 add_type_str :: Type -> [Bool] -> [Bool]
481 -- If the function arg types are strict, record that in the 'strictness bits'
482 -- No need to instantiate because unboxed types (which dominate the strict
483 -- types) can't instantiate type variables.
484 -- add_type_str is done repeatedly (for each call); might be better
485 -- once-for-all in the function
486 -- But beware primops/datacons with no strictness
487
488 add_type_str
489 = go
490 where
491 go _ [] = []
492 go fun_ty strs -- Look through foralls
493 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
494 = go fun_ty' strs
495 go fun_ty (str:strs) -- Add strict-type info
496 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
497 = (str || Just False == isLiftedType_maybe arg_ty) : go fun_ty' strs
498 -- If the type is levity-polymorphic, we can't know whether it's
499 -- strict. isLiftedType_maybe will return Just False only when
500 -- we're sure the type is unlifted.
501 go _ strs
502 = strs
503
504 {- Note [Unsaturated functions]
505 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
506 Consider (test eyeball/inline4)
507 x = a:as
508 y = f x
509 where f has arity 2. Then we do not want to inline 'x', because
510 it'll just be floated out again. Even if f has lots of discounts
511 on its first argument -- it must be saturated for these to kick in
512 -}
513
514
515 {-
516 ************************************************************************
517 * *
518 Interesting arguments
519 * *
520 ************************************************************************
521
522 Note [Interesting call context]
523 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
524 We want to avoid inlining an expression where there can't possibly be
525 any gain, such as in an argument position. Hence, if the continuation
526 is interesting (eg. a case scrutinee, application etc.) then we
527 inline, otherwise we don't.
528
529 Previously some_benefit used to return True only if the variable was
530 applied to some value arguments. This didn't work:
531
532 let x = _coerce_ (T Int) Int (I# 3) in
533 case _coerce_ Int (T Int) x of
534 I# y -> ....
535
536 we want to inline x, but can't see that it's a constructor in a case
537 scrutinee position, and some_benefit is False.
538
539 Another example:
540
541 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
542
543 .... case dMonadST _@_ x0 of (a,b,c) -> ....
544
545 we'd really like to inline dMonadST here, but we *don't* want to
546 inline if the case expression is just
547
548 case x of y { DEFAULT -> ... }
549
550 since we can just eliminate this case instead (x is in WHNF). Similar
551 applies when x is bound to a lambda expression. Hence
552 contIsInteresting looks for case expressions with just a single
553 default case.
554
555 Note [No case of case is boring]
556 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
557 If we see
558 case f x of <alts>
559
560 we'd usually treat the context as interesting, to encourage 'f' to
561 inline. But if case-of-case is off, it's really not so interesting
562 after all, because we are unlikely to be able to push the case
563 expression into the branches of any case in f's unfolding. So, to
564 reduce unnecessary code expansion, we just make the context look boring.
565 This made a small compile-time perf improvement in perf/compiler/T6048,
566 and it looks plausible to me.
567 -}
568
569 interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt
570 -- See Note [Interesting call context]
571 interestingCallContext env cont
572 = interesting cont
573 where
574 interesting (Select {})
575 | sm_case_case (getMode env) = CaseCtxt
576 | otherwise = BoringCtxt
577 -- See Note [No case of case is boring]
578
579 interesting (ApplyToVal {}) = ValAppCtxt
580 -- Can happen if we have (f Int |> co) y
581 -- If f has an INLINE prag we need to give it some
582 -- motivation to inline. See Note [Cast then apply]
583 -- in CoreUnfold
584
585 interesting (StrictArg { sc_cci = cci }) = cci
586 interesting (StrictBind {}) = BoringCtxt
587 interesting (Stop _ cci) = cci
588 interesting (TickIt _ k) = interesting k
589 interesting (ApplyToTy { sc_cont = k }) = interesting k
590 interesting (CastIt _ k) = interesting k
591 -- If this call is the arg of a strict function, the context
592 -- is a bit interesting. If we inline here, we may get useful
593 -- evaluation information to avoid repeated evals: e.g.
594 -- x + (y * z)
595 -- Here the contIsInteresting makes the '*' keener to inline,
596 -- which in turn exposes a constructor which makes the '+' inline.
597 -- Assuming that +,* aren't small enough to inline regardless.
598 --
599 -- It's also very important to inline in a strict context for things
600 -- like
601 -- foldr k z (f x)
602 -- Here, the context of (f x) is strict, and if f's unfolding is
603 -- a build it's *great* to inline it here. So we must ensure that
604 -- the context for (f x) is not totally uninteresting.
605
606 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
607 -- If the argument has form (f x y), where x,y are boring,
608 -- and f is marked INLINE, then we don't want to inline f.
609 -- But if the context of the argument is
610 -- g (f x y)
611 -- where g has rules, then we *do* want to inline f, in case it
612 -- exposes a rule that might fire. Similarly, if the context is
613 -- h (g (f x x))
614 -- where h has rules, then we do want to inline f; hence the
615 -- call_cont argument to interestingArgContext
616 --
617 -- The ai-rules flag makes this happen; if it's
618 -- set, the inliner gets just enough keener to inline f
619 -- regardless of how boring f's arguments are, if it's marked INLINE
620 --
621 -- The alternative would be to *always* inline an INLINE function,
622 -- regardless of how boring its context is; but that seems overkill
623 -- For example, it'd mean that wrapper functions were always inlined
624 --
625 -- The call_cont passed to interestingArgContext is the context of
626 -- the call itself, e.g. g <hole> in the example above
627 interestingArgContext rules call_cont
628 = notNull rules || enclosing_fn_has_rules
629 where
630 enclosing_fn_has_rules = go call_cont
631
632 go (Select {}) = False
633 go (ApplyToVal {}) = False -- Shouldn't really happen
634 go (ApplyToTy {}) = False -- Ditto
635 go (StrictArg { sc_cci = cci }) = interesting cci
636 go (StrictBind {}) = False -- ??
637 go (CastIt _ c) = go c
638 go (Stop _ cci) = interesting cci
639 go (TickIt _ c) = go c
640
641 interesting RuleArgCtxt = True
642 interesting _ = False
643
644
645 {- Note [Interesting arguments]
646 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
647 An argument is interesting if it deserves a discount for unfoldings
648 with a discount in that argument position. The idea is to avoid
649 unfolding a function that is applied only to variables that have no
650 unfolding (i.e. they are probably lambda bound): f x y z There is
651 little point in inlining f here.
652
653 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
654 we must look through lets, eg (let x = e in C a b), because the let will
655 float, exposing the value, if we inline. That makes it different to
656 exprIsHNF.
657
658 Before 2009 we said it was interesting if the argument had *any* structure
659 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
660
661 But we don't regard (f x y) as interesting, unless f is unsaturated.
662 If it's saturated and f hasn't inlined, then it's probably not going
663 to now!
664
665 Note [Conlike is interesting]
666 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
667 Consider
668 f d = ...((*) d x y)...
669 ... f (df d')...
670 where df is con-like. Then we'd really like to inline 'f' so that the
671 rule for (*) (df d) can fire. To do this
672 a) we give a discount for being an argument of a class-op (eg (*) d)
673 b) we say that a con-like argument (eg (df d)) is interesting
674 -}
675
676 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
677 -- See Note [Interesting arguments]
678 interestingArg env e = go env 0 e
679 where
680 -- n is # value args to which the expression is applied
681 go env n (Var v)
682 = case substId env v of
683 DoneId v' -> go_var n v'
684 DoneEx e _ -> go (zapSubstEnv env) n e
685 ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
686
687 go _ _ (Lit {}) = ValueArg
688 go _ _ (Type _) = TrivArg
689 go _ _ (Coercion _) = TrivArg
690 go env n (App fn (Type _)) = go env n fn
691 go env n (App fn _) = go env (n+1) fn
692 go env n (Tick _ a) = go env n a
693 go env n (Cast e _) = go env n e
694 go env n (Lam v e)
695 | isTyVar v = go env n e
696 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
697 | otherwise = ValueArg
698 go _ _ (Case {}) = NonTrivArg
699 go env n (Let b e) = case go env' n e of
700 ValueArg -> ValueArg
701 _ -> NonTrivArg
702 where
703 env' = env `addNewInScopeIds` bindersOf b
704
705 go_var n v
706 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
707 -- data constructors here
708 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
709 | n > 0 = NonTrivArg -- Saturated or unknown call
710 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
711 -- See Note [Conlike is interesting]
712 | otherwise = TrivArg -- n==0, no useful unfolding
713 where
714 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
715
716 {-
717 ************************************************************************
718 * *
719 SimplMode
720 * *
721 ************************************************************************
722
723 The SimplMode controls several switches; see its definition in
724 CoreMonad
725 sm_rules :: Bool -- Whether RULES are enabled
726 sm_inline :: Bool -- Whether inlining is enabled
727 sm_case_case :: Bool -- Whether case-of-case is enabled
728 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
729 -}
730
731 simplEnvForGHCi :: DynFlags -> SimplEnv
732 simplEnvForGHCi dflags
733 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
734 , sm_phase = InitialPhase
735 , sm_dflags = dflags
736 , sm_rules = rules_on
737 , sm_inline = False
738 , sm_eta_expand = eta_expand_on
739 , sm_case_case = True }
740 where
741 rules_on = gopt Opt_EnableRewriteRules dflags
742 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
743 -- Do not do any inlining, in case we expose some unboxed
744 -- tuple stuff that confuses the bytecode interpreter
745
746 updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode
747 -- See Note [Simplifying inside stable unfoldings]
748 updModeForStableUnfoldings inline_rule_act current_mode
749 = current_mode { sm_phase = phaseFromActivation inline_rule_act
750 , sm_inline = True
751 , sm_eta_expand = False }
752 -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
753 -- For sm_rules, just inherit; sm_rules might be "off"
754 -- because of -fno-enable-rewrite-rules
755 where
756 phaseFromActivation (ActiveAfter _ n) = Phase n
757 phaseFromActivation _ = InitialPhase
758
759 updModeForRules :: SimplMode -> SimplMode
760 -- See Note [Simplifying rules]
761 updModeForRules current_mode
762 = current_mode { sm_phase = InitialPhase
763 , sm_inline = False
764 , sm_rules = False
765 , sm_eta_expand = False }
766
767 {- Note [Simplifying rules]
768 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
769 When simplifying a rule LHS, refrain from /any/ inlining or applying
770 of other RULES.
771
772 Doing anything to the LHS is plain confusing, because it means that what the
773 rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
774 Moreover, inlining (or applying rules) on rule LHSs risks introducing
775 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
776
777 Doing this to either side confounds tools like HERMIT, which seek to reason
778 about and apply the RULES as originally written. See Trac #10829.
779
780 Note [No eta expansion in stable unfoldings]
781 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
782 If we have a stable unfolding
783
784 f :: Ord a => a -> IO ()
785 -- Unfolding template
786 -- = /\a \(d:Ord a) (x:a). bla
787
788 we do not want to eta-expand to
789
790 f :: Ord a => a -> IO ()
791 -- Unfolding template
792 -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co
793
794 because not specialisation of the overloading doesn't work properly
795 (see Note [Specialisation shape] in Specialise), Trac #9509.
796
797 So we disable eta-expansion in stable unfoldings.
798
799 Note [Inlining in gentle mode]
800 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
801 Something is inlined if
802 (i) the sm_inline flag is on, AND
803 (ii) the thing has an INLINE pragma, AND
804 (iii) the thing is inlinable in the earliest phase.
805
806 Example of why (iii) is important:
807 {-# INLINE [~1] g #-}
808 g = ...
809
810 {-# INLINE f #-}
811 f x = g (g x)
812
813 If we were to inline g into f's inlining, then an importing module would
814 never be able to do
815 f e --> g (g e) ---> RULE fires
816 because the stable unfolding for f has had g inlined into it.
817
818 On the other hand, it is bad not to do ANY inlining into an
819 stable unfolding, because then recursive knots in instance declarations
820 don't get unravelled.
821
822 However, *sometimes* SimplGently must do no call-site inlining at all
823 (hence sm_inline = False). Before full laziness we must be careful
824 not to inline wrappers, because doing so inhibits floating
825 e.g. ...(case f x of ...)...
826 ==> ...(case (case x of I# x# -> fw x#) of ...)...
827 ==> ...(case x of I# x# -> case fw x# of ...)...
828 and now the redex (f x) isn't floatable any more.
829
830 The no-inlining thing is also important for Template Haskell. You might be
831 compiling in one-shot mode with -O2; but when TH compiles a splice before
832 running it, we don't want to use -O2. Indeed, we don't want to inline
833 anything, because the byte-code interpreter might get confused about
834 unboxed tuples and suchlike.
835
836 Note [Simplifying inside stable unfoldings]
837 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
838 We must take care with simplification inside stable unfoldings (which come from
839 INLINE pragmas).
840
841 First, consider the following example
842 let f = \pq -> BIG
843 in
844 let g = \y -> f y y
845 {-# INLINE g #-}
846 in ...g...g...g...g...g...
847 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
848 and thence copied multiple times when g is inlined. HENCE we treat
849 any occurrence in a stable unfolding as a multiple occurrence, not a single
850 one; see OccurAnal.addRuleUsage.
851
852 Second, we do want *do* to some modest rules/inlining stuff in stable
853 unfoldings, partly to eliminate senseless crap, and partly to break
854 the recursive knots generated by instance declarations.
855
856 However, suppose we have
857 {-# INLINE <act> f #-}
858 f = <rhs>
859 meaning "inline f in phases p where activation <act>(p) holds".
860 Then what inlinings/rules can we apply to the copy of <rhs> captured in
861 f's stable unfolding? Our model is that literally <rhs> is substituted for
862 f when it is inlined. So our conservative plan (implemented by
863 updModeForStableUnfoldings) is this:
864
865 -------------------------------------------------------------
866 When simplifying the RHS of an stable unfolding, set the phase
867 to the phase in which the stable unfolding first becomes active
868 -------------------------------------------------------------
869
870 That ensures that
871
872 a) Rules/inlinings that *cease* being active before p will
873 not apply to the stable unfolding, consistent with it being
874 inlined in its *original* form in phase p.
875
876 b) Rules/inlinings that only become active *after* p will
877 not apply to the stable unfolding, again to be consistent with
878 inlining the *original* rhs in phase p.
879
880 For example,
881 {-# INLINE f #-}
882 f x = ...g...
883
884 {-# NOINLINE [1] g #-}
885 g y = ...
886
887 {-# RULE h g = ... #-}
888 Here we must not inline g into f's RHS, even when we get to phase 0,
889 because when f is later inlined into some other module we want the
890 rule for h to fire.
891
892 Similarly, consider
893 {-# INLINE f #-}
894 f x = ...g...
895
896 g y = ...
897 and suppose that there are auto-generated specialisations and a strictness
898 wrapper for g. The specialisations get activation AlwaysActive, and the
899 strictness wrapper get activation (ActiveAfter 0). So the strictness
900 wrepper fails the test and won't be inlined into f's stable unfolding. That
901 means f can inline, expose the specialised call to g, so the specialisation
902 rules can fire.
903
904 A note about wrappers
905 ~~~~~~~~~~~~~~~~~~~~~
906 It's also important not to inline a worker back into a wrapper.
907 A wrapper looks like
908 wraper = inline_me (\x -> ...worker... )
909 Normally, the inline_me prevents the worker getting inlined into
910 the wrapper (initially, the worker's only call site!). But,
911 if the wrapper is sure to be called, the strictness analyser will
912 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
913 continuation.
914 -}
915
916 activeUnfolding :: SimplEnv -> Id -> Bool
917 activeUnfolding env id
918 | isCompulsoryUnfolding (realIdUnfolding id)
919 = True -- Even sm_inline can't override compulsory unfoldings
920 | otherwise
921 = isActive (sm_phase mode) (idInlineActivation id)
922 && sm_inline mode
923 -- `or` isStableUnfolding (realIdUnfolding id)
924 -- Inline things when
925 -- (a) they are active
926 -- (b) sm_inline says so, except that for stable unfoldings
927 -- (ie pragmas) we inline anyway
928 where
929 mode = getMode env
930
931 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
932 -- When matching in RULE, we want to "look through" an unfolding
933 -- (to see a constructor) if *rules* are on, even if *inlinings*
934 -- are not. A notable example is DFuns, which really we want to
935 -- match in rules like (op dfun) in gentle mode. Another example
936 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
937 -- see very early on
938 getUnfoldingInRuleMatch env
939 = (in_scope, id_unf)
940 where
941 in_scope = seInScope env
942 mode = getMode env
943 id_unf id | unf_is_active id = idUnfolding id
944 | otherwise = NoUnfolding
945 unf_is_active id
946 | not (sm_rules mode) = -- active_unfolding_minimal id
947 isStableUnfolding (realIdUnfolding id)
948 -- Do we even need to test this? I think this InScopeEnv
949 -- is only consulted if activeRule returns True, which
950 -- never happens if sm_rules is False
951 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
952
953 ----------------------
954 activeRule :: SimplEnv -> Activation -> Bool
955 -- Nothing => No rules at all
956 activeRule env
957 | not (sm_rules mode) = \_ -> False -- Rewriting is off
958 | otherwise = isActive (sm_phase mode)
959 where
960 mode = getMode env
961
962 {-
963 ************************************************************************
964 * *
965 preInlineUnconditionally
966 * *
967 ************************************************************************
968
969 preInlineUnconditionally
970 ~~~~~~~~~~~~~~~~~~~~~~~~
971 @preInlineUnconditionally@ examines a bndr to see if it is used just
972 once in a completely safe way, so that it is safe to discard the
973 binding inline its RHS at the (unique) usage site, REGARDLESS of how
974 big the RHS might be. If this is the case we don't simplify the RHS
975 first, but just inline it un-simplified.
976
977 This is much better than first simplifying a perhaps-huge RHS and then
978 inlining and re-simplifying it. Indeed, it can be at least quadratically
979 better. Consider
980
981 x1 = e1
982 x2 = e2[x1]
983 x3 = e3[x2]
984 ...etc...
985 xN = eN[xN-1]
986
987 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
988 This can happen with cascades of functions too:
989
990 f1 = \x1.e1
991 f2 = \xs.e2[f1]
992 f3 = \xs.e3[f3]
993 ...etc...
994
995 THE MAIN INVARIANT is this:
996
997 ---- preInlineUnconditionally invariant -----
998 IF preInlineUnconditionally chooses to inline x = <rhs>
999 THEN doing the inlining should not change the occurrence
1000 info for the free vars of <rhs>
1001 ----------------------------------------------
1002
1003 For example, it's tempting to look at trivial binding like
1004 x = y
1005 and inline it unconditionally. But suppose x is used many times,
1006 but this is the unique occurrence of y. Then inlining x would change
1007 y's occurrence info, which breaks the invariant. It matters: y
1008 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
1009
1010
1011 Even RHSs labelled InlineMe aren't caught here, because there might be
1012 no benefit from inlining at the call site.
1013
1014 [Sept 01] Don't unconditionally inline a top-level thing, because that
1015 can simply make a static thing into something built dynamically. E.g.
1016 x = (a,b)
1017 main = \s -> h x
1018
1019 [Remember that we treat \s as a one-shot lambda.] No point in
1020 inlining x unless there is something interesting about the call site.
1021
1022 But watch out: if you aren't careful, some useful foldr/build fusion
1023 can be lost (most notably in spectral/hartel/parstof) because the
1024 foldr didn't see the build. Doing the dynamic allocation isn't a big
1025 deal, in fact, but losing the fusion can be. But the right thing here
1026 seems to be to do a callSiteInline based on the fact that there is
1027 something interesting about the call site (it's strict). Hmm. That
1028 seems a bit fragile.
1029
1030 Conclusion: inline top level things gaily until Phase 0 (the last
1031 phase), at which point don't.
1032
1033 Note [pre/postInlineUnconditionally in gentle mode]
1034 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1035 Even in gentle mode we want to do preInlineUnconditionally. The
1036 reason is that too little clean-up happens if you don't inline
1037 use-once things. Also a bit of inlining is *good* for full laziness;
1038 it can expose constant sub-expressions. Example in
1039 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
1040 let-float if you inline windowToViewport
1041
1042 However, as usual for Gentle mode, do not inline things that are
1043 inactive in the intial stages. See Note [Gentle mode].
1044
1045 Note [Stable unfoldings and preInlineUnconditionally]
1046 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1047 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
1048 Example
1049
1050 {-# INLINE f #-}
1051 f :: Eq a => a -> a
1052 f x = ...
1053
1054 fInt :: Int -> Int
1055 fInt = f Int dEqInt
1056
1057 ...fInt...fInt...fInt...
1058
1059 Here f occurs just once, in the RHS of fInt. But if we inline it there
1060 it might make fInt look big, and we'll lose the opportunity to inline f
1061 at each of fInt's call sites. The INLINE pragma will only inline when
1062 the application is saturated for exactly this reason; and we don't
1063 want PreInlineUnconditionally to second-guess it. A live example is
1064 Trac #3736.
1065 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1066
1067 Note [Top-level bottoming Ids]
1068 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1069 Don't inline top-level Ids that are bottoming, even if they are used just
1070 once, because FloatOut has gone to some trouble to extract them out.
1071 Inlining them won't make the program run faster!
1072
1073 Note [Do not inline CoVars unconditionally]
1074 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1075 Coercion variables appear inside coercions, and the RHS of a let-binding
1076 is a term (not a coercion) so we can't necessarily inline the latter in
1077 the former.
1078 -}
1079
1080 preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1081 -- Precondition: rhs satisfies the let/app invariant
1082 -- See Note [CoreSyn let/app invariant] in CoreSyn
1083 -- Reason: we don't want to inline single uses, or discard dead bindings,
1084 -- for unlifted, side-effect-ful bindings
1085 preInlineUnconditionally env top_lvl bndr rhs
1086 | not pre_inline_unconditionally = False
1087 | not active = False
1088 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1089 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1090 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1091 | otherwise = case idOccInfo bndr of
1092 IAmDead -> True -- Happens in ((\x.1) v)
1093 occ@OneOcc { occ_one_br = True }
1094 -> try_once (occ_in_lam occ)
1095 (occ_int_cxt occ)
1096 _ -> False
1097 where
1098 pre_inline_unconditionally = gopt Opt_SimplPreInlining (seDynFlags env)
1099 mode = getMode env
1100 active = isActive (sm_phase mode) act
1101 -- See Note [pre/postInlineUnconditionally in gentle mode]
1102 act = idInlineActivation bndr
1103 try_once in_lam int_cxt -- There's one textual occurrence
1104 | not in_lam = isNotTopLevel top_lvl || early_phase
1105 | otherwise = int_cxt && canInlineInLam rhs
1106
1107 -- Be very careful before inlining inside a lambda, because (a) we must not
1108 -- invalidate occurrence information, and (b) we want to avoid pushing a
1109 -- single allocation (here) into multiple allocations (inside lambda).
1110 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1111 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1112 -- where
1113 -- is_cheap = exprIsCheap rhs
1114 -- ok = is_cheap && int_cxt
1115
1116 -- int_cxt The context isn't totally boring
1117 -- E.g. let f = \ab.BIG in \y. map f xs
1118 -- Don't want to substitute for f, because then we allocate
1119 -- its closure every time the \y is called
1120 -- But: let f = \ab.BIG in \y. map (f y) xs
1121 -- Now we do want to substitute for f, even though it's not
1122 -- saturated, because we're going to allocate a closure for
1123 -- (f y) every time round the loop anyhow.
1124
1125 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1126 -- so substituting rhs inside a lambda doesn't change the occ info.
1127 -- Sadly, not quite the same as exprIsHNF.
1128 canInlineInLam (Lit _) = True
1129 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1130 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1131 canInlineInLam _ = False
1132 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1133 -- ticks around a Lam will disappear anyway.
1134
1135 early_phase = case sm_phase mode of
1136 Phase 0 -> False
1137 _ -> True
1138 -- If we don't have this early_phase test, consider
1139 -- x = length [1,2,3]
1140 -- The full laziness pass carefully floats all the cons cells to
1141 -- top level, and preInlineUnconditionally floats them all back in.
1142 -- Result is (a) static allocation replaced by dynamic allocation
1143 -- (b) many simplifier iterations because this tickles
1144 -- a related problem; only one inlining per pass
1145 --
1146 -- On the other hand, I have seen cases where top-level fusion is
1147 -- lost if we don't inline top level thing (e.g. string constants)
1148 -- Hence the test for phase zero (which is the phase for all the final
1149 -- simplifications). Until phase zero we take no special notice of
1150 -- top level things, but then we become more leery about inlining
1151 -- them.
1152
1153 {-
1154 ************************************************************************
1155 * *
1156 postInlineUnconditionally
1157 * *
1158 ************************************************************************
1159
1160 postInlineUnconditionally
1161 ~~~~~~~~~~~~~~~~~~~~~~~~~
1162 @postInlineUnconditionally@ decides whether to unconditionally inline
1163 a thing based on the form of its RHS; in particular if it has a
1164 trivial RHS. If so, we can inline and discard the binding altogether.
1165
1166 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1167 only have *forward* references. Hence, it's safe to discard the binding
1168
1169 NOTE: This isn't our last opportunity to inline. We're at the binding
1170 site right now, and we'll get another opportunity when we get to the
1171 occurrence(s)
1172
1173 Note that we do this unconditional inlining only for trival RHSs.
1174 Don't inline even WHNFs inside lambdas; doing so may simply increase
1175 allocation when the function is called. This isn't the last chance; see
1176 NOTE above.
1177
1178 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1179 Because we don't even want to inline them into the RHS of constructor
1180 arguments. See NOTE above
1181
1182 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1183 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1184 with both a and b marked NOINLINE. But that seems incompatible with
1185 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1186 story for now.
1187 -}
1188
1189 postInlineUnconditionally
1190 :: SimplEnv -> TopLevelFlag
1191 -> OutId -- The binder (*not* a CoVar), including its unfolding
1192 -> OccInfo -- From the InId
1193 -> OutExpr
1194 -> Bool
1195 -- Precondition: rhs satisfies the let/app invariant
1196 -- See Note [CoreSyn let/app invariant] in CoreSyn
1197 -- Reason: we don't want to inline single uses, or discard dead bindings,
1198 -- for unlifted, side-effect-ful bindings
1199 postInlineUnconditionally env top_lvl bndr occ_info rhs
1200 | not active = False
1201 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1202 -- because it might be referred to "earlier"
1203 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1204 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1205 | exprIsTrivial rhs = True
1206 | otherwise
1207 = case occ_info of
1208 -- The point of examining occ_info here is that for *non-values*
1209 -- that occur outside a lambda, the call-site inliner won't have
1210 -- a chance (because it doesn't know that the thing
1211 -- only occurs once). The pre-inliner won't have gotten
1212 -- it either, if the thing occurs in more than one branch
1213 -- So the main target is things like
1214 -- let x = f y in
1215 -- case v of
1216 -- True -> case x of ...
1217 -- False -> case x of ...
1218 -- This is very important in practice; e.g. wheel-seive1 doubles
1219 -- in allocation if you miss this out
1220 OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt }
1221 -- OneOcc => no code-duplication issue
1222 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1223 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1224 --
1225 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1226 -- Reason: doing so risks exponential behaviour. We simplify a big
1227 -- expression, inline it, and simplify it again. But if the
1228 -- very same thing happens in the big expression, we get
1229 -- exponential cost!
1230 -- PRINCIPLE: when we've already simplified an expression once,
1231 -- make sure that we only inline it if it's reasonably small.
1232
1233 && (not in_lam ||
1234 -- Outside a lambda, we want to be reasonably aggressive
1235 -- about inlining into multiple branches of case
1236 -- e.g. let x = <non-value>
1237 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1238 -- Inlining can be a big win if C3 is the hot-spot, even if
1239 -- the uses in C1, C2 are not 'interesting'
1240 -- An example that gets worse if you add int_cxt here is 'clausify'
1241
1242 (isCheapUnfolding unfolding && int_cxt))
1243 -- isCheap => acceptable work duplication; in_lam may be true
1244 -- int_cxt to prevent us inlining inside a lambda without some
1245 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1246
1247 IAmDead -> True -- This happens; for example, the case_bndr during case of
1248 -- known constructor: case (a,b) of x { (p,q) -> ... }
1249 -- Here x isn't mentioned in the RHS, so we don't want to
1250 -- create the (dead) let-binding let x = (a,b) in ...
1251
1252 _ -> False
1253
1254 -- Here's an example that we don't handle well:
1255 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1256 -- in \y. ....case f of {...} ....
1257 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1258 -- But
1259 -- - We can't preInlineUnconditionally because that woud invalidate
1260 -- the occ info for b.
1261 -- - We can't postInlineUnconditionally because the RHS is big, and
1262 -- that risks exponential behaviour
1263 -- - We can't call-site inline, because the rhs is big
1264 -- Alas!
1265
1266 where
1267 unfolding = idUnfolding bndr
1268 dflags = seDynFlags env
1269 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1270 -- See Note [pre/postInlineUnconditionally in gentle mode]
1271
1272 {-
1273 Note [Top level and postInlineUnconditionally]
1274 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1275 We don't do postInlineUnconditionally for top-level things (even for
1276 ones that are trivial):
1277
1278 * Doing so will inline top-level error expressions that have been
1279 carefully floated out by FloatOut. More generally, it might
1280 replace static allocation with dynamic.
1281
1282 * Even for trivial expressions there's a problem. Consider
1283 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1284 blah xs = reverse xs
1285 ruggle = sort
1286 In one simplifier pass we might fire the rule, getting
1287 blah xs = ruggle xs
1288 but in *that* simplifier pass we must not do postInlineUnconditionally
1289 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1290
1291 If the rhs is trivial it'll be inlined by callSiteInline, and then
1292 the binding will be dead and discarded by the next use of OccurAnal
1293
1294 * There is less point, because the main goal is to get rid of local
1295 bindings used in multiple case branches.
1296
1297 * The inliner should inline trivial things at call sites anyway.
1298
1299 * The Id might be exported. We could check for that separately,
1300 but since we aren't going to postInlineUnconditionally /any/
1301 top-level bindings, we don't need to test.
1302
1303 Note [Stable unfoldings and postInlineUnconditionally]
1304 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1305 Do not do postInlineUnconditionally if the Id has an stable unfolding,
1306 otherwise we lose the unfolding. Example
1307
1308 -- f has stable unfolding with rhs (e |> co)
1309 -- where 'e' is big
1310 f = e |> co
1311
1312 Then there's a danger we'll optimise to
1313
1314 f' = e
1315 f = f' |> co
1316
1317 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1318 won't inline because 'e' is too big.
1319
1320 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1321
1322
1323 ************************************************************************
1324 * *
1325 Rebuilding a lambda
1326 * *
1327 ************************************************************************
1328 -}
1329
1330 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1331 -- mkLam tries three things
1332 -- a) eta reduction, if that gives a trivial expression
1333 -- b) eta expansion [only if there are some value lambdas]
1334
1335 mkLam _env [] body _cont
1336 = return body
1337 mkLam env bndrs body cont
1338 = do { dflags <- getDynFlags
1339 ; mkLam' dflags bndrs body }
1340 where
1341 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1342 mkLam' dflags bndrs (Cast body co)
1343 | not (any bad bndrs)
1344 -- Note [Casts and lambdas]
1345 = do { lam <- mkLam' dflags bndrs body
1346 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1347 where
1348 co_vars = tyCoVarsOfCo co
1349 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1350
1351 mkLam' dflags bndrs body@(Lam {})
1352 = mkLam' dflags (bndrs ++ bndrs1) body1
1353 where
1354 (bndrs1, body1) = collectBinders body
1355
1356 mkLam' dflags bndrs (Tick t expr)
1357 | tickishFloatable t
1358 = mkTick t <$> mkLam' dflags bndrs expr
1359
1360 mkLam' dflags bndrs body
1361 | gopt Opt_DoEtaReduction dflags
1362 , Just etad_lam <- tryEtaReduce bndrs body
1363 = do { tick (EtaReduction (head bndrs))
1364 ; return etad_lam }
1365
1366 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1367 , sm_eta_expand (getMode env)
1368 , any isRuntimeVar bndrs
1369 , let body_arity = exprEtaExpandArity dflags body
1370 , body_arity > 0
1371 = do { tick (EtaExpansion (head bndrs))
1372 ; let res = mkLams bndrs (etaExpand body_arity body)
1373 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1374 , text "after" <+> ppr res])
1375 ; return res }
1376
1377 | otherwise
1378 = return (mkLams bndrs body)
1379
1380 {-
1381 Note [Eta expanding lambdas]
1382 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1383 In general we *do* want to eta-expand lambdas. Consider
1384 f (\x -> case x of (a,b) -> \s -> blah)
1385 where 's' is a state token, and hence can be eta expanded. This
1386 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1387 important function!
1388
1389 The eta-expansion will never happen unless we do it now. (Well, it's
1390 possible that CorePrep will do it, but CorePrep only has a half-baked
1391 eta-expander that can't deal with casts. So it's much better to do it
1392 here.)
1393
1394 However, when the lambda is let-bound, as the RHS of a let, we have a
1395 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1396 bother to try expansion in mkLam in that case; hence the contIsRhs
1397 guard.
1398
1399 NB: We check the SimplEnv (sm_eta_expand), not DynFlags.
1400 See Note [No eta expansion in stable unfoldings]
1401
1402 Note [Casts and lambdas]
1403 ~~~~~~~~~~~~~~~~~~~~~~~~
1404 Consider
1405 (\x. (\y. e) `cast` g1) `cast` g2
1406 There is a danger here that the two lambdas look separated, and the
1407 full laziness pass might float an expression to between the two.
1408
1409 So this equation in mkLam' floats the g1 out, thus:
1410 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1411 where x:tx.
1412
1413 In general, this floats casts outside lambdas, where (I hope) they
1414 might meet and cancel with some other cast:
1415 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1416 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1417 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1418 (if not (g `in` co))
1419
1420 Notice that it works regardless of 'e'. Originally it worked only
1421 if 'e' was itself a lambda, but in some cases that resulted in
1422 fruitless iteration in the simplifier. A good example was when
1423 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1424 like (\x. Get `cast` g)
1425 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1426 the Get, and the next iteration eta-reduced it, and then eta-expanded
1427 it again.
1428
1429 Note also the side condition for the case of coercion binders.
1430 It does not make sense to transform
1431 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1432 because the latter is not well-kinded.
1433
1434 ************************************************************************
1435 * *
1436 Eta expansion
1437 * *
1438 ************************************************************************
1439 -}
1440
1441 tryEtaExpandRhs :: SimplMode -> OutId -> OutExpr
1442 -> SimplM (Arity, Bool, OutExpr)
1443 -- See Note [Eta-expanding at let bindings]
1444 -- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then
1445 -- (a) rhs' has manifest arity
1446 -- (b) if is_bot is True then rhs' applied to n args is guaranteed bottom
1447 tryEtaExpandRhs mode bndr rhs
1448 | isJoinId bndr
1449 = return (manifestArity rhs, False, rhs)
1450 -- Note [Do not eta-expand join points]
1451
1452 | otherwise
1453 = do { (new_arity, is_bot, new_rhs) <- try_expand
1454
1455 ; WARN( new_arity < old_id_arity,
1456 (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
1457 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1458 -- Note [Arity decrease] in Simplify
1459 return (new_arity, is_bot, new_rhs) }
1460 where
1461 try_expand
1462 | exprIsTrivial rhs
1463 = return (exprArity rhs, False, rhs)
1464
1465 | sm_eta_expand mode -- Provided eta-expansion is on
1466 , new_arity > old_arity -- And the current manifest arity isn't enough
1467 = do { tick (EtaExpansion bndr)
1468 ; return (new_arity, is_bot, etaExpand new_arity rhs) }
1469
1470 | otherwise
1471 = return (old_arity, is_bot && new_arity == old_arity, rhs)
1472
1473 dflags = sm_dflags mode
1474 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1475 old_id_arity = idArity bndr
1476
1477 (new_arity1, is_bot) = findRhsArity dflags bndr rhs old_arity
1478 new_arity2 = idCallArity bndr
1479 new_arity = max new_arity1 new_arity2
1480
1481 {-
1482 Note [Eta-expanding at let bindings]
1483 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1484 We now eta expand at let-bindings, which is where the payoff comes.
1485 The most significant thing is that we can do a simple arity analysis
1486 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1487
1488 One useful consequence of not eta-expanding lambdas is this example:
1489 genMap :: C a => ...
1490 {-# INLINE genMap #-}
1491 genMap f xs = ...
1492
1493 myMap :: D a => ...
1494 {-# INLINE myMap #-}
1495 myMap = genMap
1496
1497 Notice that 'genMap' should only inline if applied to two arguments.
1498 In the stable unfolding for myMap we'll have the unfolding
1499 (\d -> genMap Int (..d..))
1500 We do not want to eta-expand to
1501 (\d f xs -> genMap Int (..d..) f xs)
1502 because then 'genMap' will inline, and it really shouldn't: at least
1503 as far as the programmer is concerned, it's not applied to two
1504 arguments!
1505
1506 Note [Do not eta-expand join points]
1507 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1508 Similarly to CPR (see Note [Don't CPR join points] in WorkWrap), a join point
1509 stands well to gain from its outer binding's eta-expansion, and eta-expanding a
1510 join point is fraught with issues like how to deal with a cast:
1511
1512 let join $j1 :: IO ()
1513 $j1 = ...
1514 $j2 :: Int -> IO ()
1515 $j2 n = if n > 0 then $j1
1516 else ...
1517
1518 =>
1519
1520 let join $j1 :: IO ()
1521 $j1 = (\eta -> ...)
1522 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1523 ~ IO ()
1524 $j2 :: Int -> IO ()
1525 $j2 n = (\eta -> if n > 0 then $j1
1526 else ...)
1527 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1528 ~ IO ()
1529
1530 The cast here can't be pushed inside the lambda (since it's not casting to a
1531 function type), so the lambda has to stay, but it can't because it contains a
1532 reference to a join point. In fact, $j2 can't be eta-expanded at all. Rather
1533 than try and detect this situation (and whatever other situations crop up!), we
1534 don't bother; again, any surrounding eta-expansion will improve these join
1535 points anyway, since an outer cast can *always* be pushed inside. By the time
1536 CorePrep comes around, the code is very likely to look more like this:
1537
1538 let join $j1 :: State# RealWorld -> (# State# RealWorld, ())
1539 $j1 = (...) eta
1540 $j2 :: Int -> State# RealWorld -> (# State# RealWorld, ())
1541 $j2 = if n > 0 then $j1
1542 else (...) eta
1543
1544 Note [Do not eta-expand PAPs]
1545 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1546 We used to have old_arity = manifestArity rhs, which meant that we
1547 would eta-expand even PAPs. But this gives no particular advantage,
1548 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1549 Suppose we have a PAP
1550 foo :: IO ()
1551 foo = returnIO ()
1552 Then we can eta-expand do
1553 foo = (\eta. (returnIO () |> sym g) eta) |> g
1554 where
1555 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1556
1557 But there is really no point in doing this, and it generates masses of
1558 coercions and whatnot that eventually disappear again. For T9020, GHC
1559 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1560 1.8G to 45M.
1561
1562 But note that this won't eta-expand, say
1563 f = \g -> map g
1564 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1565 strictness analysis will have less to bite on?
1566
1567
1568 ************************************************************************
1569 * *
1570 \subsection{Floating lets out of big lambdas}
1571 * *
1572 ************************************************************************
1573
1574 Note [Floating and type abstraction]
1575 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1576 Consider this:
1577 x = /\a. C e1 e2
1578 We'd like to float this to
1579 y1 = /\a. e1
1580 y2 = /\a. e2
1581 x = /\a. C (y1 a) (y2 a)
1582 for the usual reasons: we want to inline x rather vigorously.
1583
1584 You may think that this kind of thing is rare. But in some programs it is
1585 common. For example, if you do closure conversion you might get:
1586
1587 data a :-> b = forall e. (e -> a -> b) :$ e
1588
1589 f_cc :: forall a. a :-> a
1590 f_cc = /\a. (\e. id a) :$ ()
1591
1592 Now we really want to inline that f_cc thing so that the
1593 construction of the closure goes away.
1594
1595 So I have elaborated simplLazyBind to understand right-hand sides that look
1596 like
1597 /\ a1..an. body
1598
1599 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1600 but there is quite a bit of plumbing in simplLazyBind as well.
1601
1602 The same transformation is good when there are lets in the body:
1603
1604 /\abc -> let(rec) x = e in b
1605 ==>
1606 let(rec) x' = /\abc -> let x = x' a b c in e
1607 in
1608 /\abc -> let x = x' a b c in b
1609
1610 This is good because it can turn things like:
1611
1612 let f = /\a -> letrec g = ... g ... in g
1613 into
1614 letrec g' = /\a -> ... g' a ...
1615 in
1616 let f = /\ a -> g' a
1617
1618 which is better. In effect, it means that big lambdas don't impede
1619 let-floating.
1620
1621 This optimisation is CRUCIAL in eliminating the junk introduced by
1622 desugaring mutually recursive definitions. Don't eliminate it lightly!
1623
1624 [May 1999] If we do this transformation *regardless* then we can
1625 end up with some pretty silly stuff. For example,
1626
1627 let
1628 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1629 in ..
1630 becomes
1631 let y1 = /\s -> r1
1632 y2 = /\s -> r2
1633 st = /\s -> ...[y1 s/x1, y2 s/x2]
1634 in ..
1635
1636 Unless the "..." is a WHNF there is really no point in doing this.
1637 Indeed it can make things worse. Suppose x1 is used strictly,
1638 and is of the form
1639
1640 x1* = case f y of { (a,b) -> e }
1641
1642 If we abstract this wrt the tyvar we then can't do the case inline
1643 as we would normally do.
1644
1645 That's why the whole transformation is part of the same process that
1646 floats let-bindings and constructor arguments out of RHSs. In particular,
1647 it is guarded by the doFloatFromRhs call in simplLazyBind.
1648
1649 Note [Which type variables to abstract over]
1650 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1651 Abstract only over the type variables free in the rhs wrt which the
1652 new binding is abstracted. Note that
1653
1654 * The naive approach of abstracting wrt the
1655 tyvars free in the Id's /type/ fails. Consider:
1656 /\ a b -> let t :: (a,b) = (e1, e2)
1657 x :: a = fst t
1658 in ...
1659 Here, b isn't free in x's type, but we must nevertheless
1660 abstract wrt b as well, because t's type mentions b.
1661 Since t is floated too, we'd end up with the bogus:
1662 poly_t = /\ a b -> (e1, e2)
1663 poly_x = /\ a -> fst (poly_t a *b*)
1664
1665 * We must do closeOverKinds. Example (Trac #10934):
1666 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1667 Here we want to float 't', but we must remember to abstract over
1668 'k' as well, even though it is not explicitly mentioned in the RHS,
1669 otherwise we get
1670 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1671 which is obviously bogus.
1672 -}
1673
1674 abstractFloats :: DynFlags -> TopLevelFlag -> [OutTyVar] -> SimplFloats
1675 -> OutExpr -> SimplM ([OutBind], OutExpr)
1676 abstractFloats dflags top_lvl main_tvs floats body
1677 = ASSERT( notNull body_floats )
1678 ASSERT( isNilOL (sfJoinFloats floats) )
1679 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1680 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1681 where
1682 is_top_lvl = isTopLevel top_lvl
1683 main_tv_set = mkVarSet main_tvs
1684 body_floats = letFloatBinds (sfLetFloats floats)
1685 empty_subst = CoreSubst.mkEmptySubst (sfInScope floats)
1686
1687 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1688 abstract subst (NonRec id rhs)
1689 = do { (poly_id1, poly_app) <- mk_poly1 tvs_here id
1690 ; let (poly_id2, poly_rhs) = mk_poly2 poly_id1 tvs_here rhs'
1691 subst' = CoreSubst.extendIdSubst subst id poly_app
1692 ; return (subst', NonRec poly_id2 poly_rhs) }
1693 where
1694 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1695
1696 -- tvs_here: see Note [Which type variables to abstract over]
1697 tvs_here = toposortTyVars $
1698 filter (`elemVarSet` main_tv_set) $
1699 closeOverKindsList $
1700 exprSomeFreeVarsList isTyVar rhs'
1701
1702 abstract subst (Rec prs)
1703 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids
1704 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1705 poly_pairs = [ mk_poly2 poly_id tvs_here rhs'
1706 | (poly_id, rhs) <- poly_ids `zip` rhss
1707 , let rhs' = CoreSubst.substExpr (text "abstract_floats")
1708 subst' rhs ]
1709 ; return (subst', Rec poly_pairs) }
1710 where
1711 (ids,rhss) = unzip prs
1712 -- For a recursive group, it's a bit of a pain to work out the minimal
1713 -- set of tyvars over which to abstract:
1714 -- /\ a b c. let x = ...a... in
1715 -- letrec { p = ...x...q...
1716 -- q = .....p...b... } in
1717 -- ...
1718 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1719 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1720 -- Since it's a pain, we just use the whole set, which is always safe
1721 --
1722 -- If you ever want to be more selective, remember this bizarre case too:
1723 -- x::a = x
1724 -- Here, we must abstract 'x' over 'a'.
1725 tvs_here = toposortTyVars main_tvs
1726
1727 mk_poly1 :: [TyVar] -> Id -> SimplM (Id, CoreExpr)
1728 mk_poly1 tvs_here var
1729 = do { uniq <- getUniqueM
1730 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1731 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1732 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1733 mkLocalIdOrCoVar poly_name poly_ty
1734 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1735 -- In the olden days, it was crucial to copy the occInfo of the original var,
1736 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1737 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1738 -- at already simplified code, so it doesn't matter
1739 --
1740 -- It's even right to retain single-occurrence or dead-var info:
1741 -- Suppose we started with /\a -> let x = E in B
1742 -- where x occurs once in B. Then we transform to:
1743 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1744 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1745 -- the occurrences of x' will be just the occurrences originally
1746 -- pinned on x.
1747
1748 mk_poly2 :: Id -> [TyVar] -> CoreExpr -> (Id, CoreExpr)
1749 mk_poly2 poly_id tvs_here rhs
1750 = (poly_id `setIdUnfolding` unf, poly_rhs)
1751 where
1752 poly_rhs = mkLams tvs_here rhs
1753 unf = mkUnfolding dflags InlineRhs is_top_lvl False poly_rhs
1754
1755 -- We want the unfolding. Consider
1756 -- let
1757 -- x = /\a. let y = ... in Just y
1758 -- in body
1759 -- Then we float the y-binding out (via abstractFloats and addPolyBind)
1760 -- but 'x' may well then be inlined in 'body' in which case we'd like the
1761 -- opportunity to inline 'y' too.
1762
1763 {-
1764 Note [Abstract over coercions]
1765 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1766 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1767 type variable a. Rather than sort this mess out, we simply bale out and abstract
1768 wrt all the type variables if any of them are coercion variables.
1769
1770
1771 Historical note: if you use let-bindings instead of a substitution, beware of this:
1772
1773 -- Suppose we start with:
1774 --
1775 -- x = /\ a -> let g = G in E
1776 --
1777 -- Then we'll float to get
1778 --
1779 -- x = let poly_g = /\ a -> G
1780 -- in /\ a -> let g = poly_g a in E
1781 --
1782 -- But now the occurrence analyser will see just one occurrence
1783 -- of poly_g, not inside a lambda, so the simplifier will
1784 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1785 -- (I used to think that the "don't inline lone occurrences" stuff
1786 -- would stop this happening, but since it's the *only* occurrence,
1787 -- PreInlineUnconditionally kicks in first!)
1788 --
1789 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1790 -- to appear many times. (NB: mkInlineMe eliminates
1791 -- such notes on trivial RHSs, so do it manually.)
1792
1793 ************************************************************************
1794 * *
1795 prepareAlts
1796 * *
1797 ************************************************************************
1798
1799 prepareAlts tries these things:
1800
1801 1. Eliminate alternatives that cannot match, including the
1802 DEFAULT alternative.
1803
1804 2. If the DEFAULT alternative can match only one possible constructor,
1805 then make that constructor explicit.
1806 e.g.
1807 case e of x { DEFAULT -> rhs }
1808 ===>
1809 case e of x { (a,b) -> rhs }
1810 where the type is a single constructor type. This gives better code
1811 when rhs also scrutinises x or e.
1812
1813 3. Returns a list of the constructors that cannot holds in the
1814 DEFAULT alternative (if there is one)
1815
1816 Here "cannot match" includes knowledge from GADTs
1817
1818 It's a good idea to do this stuff before simplifying the alternatives, to
1819 avoid simplifying alternatives we know can't happen, and to come up with
1820 the list of constructors that are handled, to put into the IdInfo of the
1821 case binder, for use when simplifying the alternatives.
1822
1823 Eliminating the default alternative in (1) isn't so obvious, but it can
1824 happen:
1825
1826 data Colour = Red | Green | Blue
1827
1828 f x = case x of
1829 Red -> ..
1830 Green -> ..
1831 DEFAULT -> h x
1832
1833 h y = case y of
1834 Blue -> ..
1835 DEFAULT -> [ case y of ... ]
1836
1837 If we inline h into f, the default case of the inlined h can't happen.
1838 If we don't notice this, we may end up filtering out *all* the cases
1839 of the inner case y, which give us nowhere to go!
1840 -}
1841
1842 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1843 -- The returned alternatives can be empty, none are possible
1844 prepareAlts scrut case_bndr' alts
1845 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1846 -- Case binder is needed just for its type. Note that as an
1847 -- OutId, it has maximum information; this is important.
1848 -- Test simpl013 is an example
1849 = do { us <- getUniquesM
1850 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1851 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1852 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1853 -- "idcs" stands for "impossible default data constructors"
1854 -- i.e. the constructors that can't match the default case
1855 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1856 ; when yes3 $ tick (AltMerge case_bndr')
1857 ; return (idcs3, alts3) }
1858
1859 | otherwise -- Not a data type, so nothing interesting happens
1860 = return ([], alts)
1861 where
1862 imposs_cons = case scrut of
1863 Var v -> otherCons (idUnfolding v)
1864 _ -> []
1865
1866
1867 {-
1868 ************************************************************************
1869 * *
1870 mkCase
1871 * *
1872 ************************************************************************
1873
1874 mkCase tries these things
1875
1876 * Note [Nerge nested cases]
1877 * Note [Eliminate identity case]
1878 * Note [Scrutinee constant folding]
1879
1880 Note [Merge Nested Cases]
1881 ~~~~~~~~~~~~~~~~~~~~~~~~~
1882 case e of b { ==> case e of b {
1883 p1 -> rhs1 p1 -> rhs1
1884 ... ...
1885 pm -> rhsm pm -> rhsm
1886 _ -> case b of b' { pn -> let b'=b in rhsn
1887 pn -> rhsn ...
1888 ... po -> let b'=b in rhso
1889 po -> rhso _ -> let b'=b in rhsd
1890 _ -> rhsd
1891 }
1892
1893 which merges two cases in one case when -- the default alternative of
1894 the outer case scrutises the same variable as the outer case. This
1895 transformation is called Case Merging. It avoids that the same
1896 variable is scrutinised multiple times.
1897
1898 Note [Eliminate Identity Case]
1899 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1900 case e of ===> e
1901 True -> True;
1902 False -> False
1903
1904 and similar friends.
1905
1906 Note [Scrutinee Constant Folding]
1907 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1908 case x op# k# of _ { ===> case x of _ {
1909 a1# -> e1 (a1# inv_op# k#) -> e1
1910 a2# -> e2 (a2# inv_op# k#) -> e2
1911 ... ...
1912 DEFAULT -> ed DEFAULT -> ed
1913
1914 where (x op# k#) inv_op# k# == x
1915
1916 And similarly for commuted arguments and for some unary operations.
1917
1918 The purpose of this transformation is not only to avoid an arithmetic
1919 operation at runtime but to allow other transformations to apply in cascade.
1920
1921 Example with the "Merge Nested Cases" optimization (from #12877):
1922
1923 main = case t of t0
1924 0## -> ...
1925 DEFAULT -> case t0 `minusWord#` 1## of t1
1926 0## -> ...
1927 DEFAUT -> case t1 `minusWord#` 1## of t2
1928 0## -> ...
1929 DEFAULT -> case t2 `minusWord#` 1## of _
1930 0## -> ...
1931 DEFAULT -> ...
1932
1933 becomes:
1934
1935 main = case t of _
1936 0## -> ...
1937 1## -> ...
1938 2## -> ...
1939 3## -> ...
1940 DEFAULT -> ...
1941
1942 There are some wrinkles
1943
1944 * Do not apply caseRules if there is just a single DEFAULT alternative
1945 case e +# 3# of b { DEFAULT -> rhs }
1946 If we applied the transformation here we would (stupidly) get
1947 case a of b' { DEFAULT -> let b = e +# 3# in rhs }
1948 and now the process may repeat, because that let will really
1949 be a case.
1950
1951 * The type of the scrutinee might change. E.g.
1952 case tagToEnum (x :: Int#) of (b::Bool)
1953 False -> e1
1954 True -> e2
1955 ==>
1956 case x of (b'::Int#)
1957 DEFAULT -> e1
1958 1# -> e2
1959
1960 * The case binder may be used in the right hand sides, so we need
1961 to make a local binding for it, if it is alive. e.g.
1962 case e +# 10# of b
1963 DEFAULT -> blah...b...
1964 44# -> blah2...b...
1965 ===>
1966 case e of b'
1967 DEFAULT -> let b = b' +# 10# in blah...b...
1968 34# -> let b = 44# in blah2...b...
1969
1970 Note that in the non-DEFAULT cases we know what to bind 'b' to,
1971 whereas in the DEFAULT case we must reconstruct the original value.
1972 But NB: we use b'; we do not duplicate 'e'.
1973
1974 * In dataToTag we might need to make up some fake binders;
1975 see Note [caseRules for dataToTag] in PrelRules
1976 -}
1977
1978 mkCase, mkCase1, mkCase2, mkCase3
1979 :: DynFlags
1980 -> OutExpr -> OutId
1981 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1982 -> SimplM OutExpr
1983
1984 --------------------------------------------------
1985 -- 1. Merge Nested Cases
1986 --------------------------------------------------
1987
1988 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1989 | gopt Opt_CaseMerge dflags
1990 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1991 <- stripTicksTop tickishFloatable deflt_rhs
1992 , inner_scrut_var == outer_bndr
1993 = do { tick (CaseMerge outer_bndr)
1994
1995 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1996 (con, args, wrap_rhs rhs)
1997 -- Simplifier's no-shadowing invariant should ensure
1998 -- that outer_bndr is not shadowed by the inner patterns
1999 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
2000 -- The let is OK even for unboxed binders,
2001
2002 wrapped_alts | isDeadBinder inner_bndr = inner_alts
2003 | otherwise = map wrap_alt inner_alts
2004
2005 merged_alts = mergeAlts outer_alts wrapped_alts
2006 -- NB: mergeAlts gives priority to the left
2007 -- case x of
2008 -- A -> e1
2009 -- DEFAULT -> case x of
2010 -- A -> e2
2011 -- B -> e3
2012 -- When we merge, we must ensure that e1 takes
2013 -- precedence over e2 as the value for A!
2014
2015 ; fmap (mkTicks ticks) $
2016 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
2017 }
2018 -- Warning: don't call mkCase recursively!
2019 -- Firstly, there's no point, because inner alts have already had
2020 -- mkCase applied to them, so they won't have a case in their default
2021 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
2022 -- in munge_rhs may put a case into the DEFAULT branch!
2023
2024 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
2025
2026 --------------------------------------------------
2027 -- 2. Eliminate Identity Case
2028 --------------------------------------------------
2029
2030 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
2031 | all identity_alt alts
2032 = do { tick (CaseIdentity case_bndr)
2033 ; return (mkTicks ticks $ re_cast scrut rhs1) }
2034 where
2035 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
2036 identity_alt (con, args, rhs) = check_eq rhs con args
2037
2038 check_eq (Cast rhs co) con args -- See Note [RHS casts]
2039 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
2040 check_eq (Tick t e) alt args
2041 = tickishFloatable t && check_eq e alt args
2042
2043 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
2044 check_eq (Var v) _ _ | v == case_bndr = True
2045 check_eq (Var v) (DataAlt con) args
2046 | null arg_tys, null args = v == dataConWorkId con
2047 -- Optimisation only
2048 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
2049 mkConApp2 con arg_tys args
2050 check_eq _ _ _ = False
2051
2052 arg_tys = tyConAppArgs (idType case_bndr)
2053
2054 -- Note [RHS casts]
2055 -- ~~~~~~~~~~~~~~~~
2056 -- We've seen this:
2057 -- case e of x { _ -> x `cast` c }
2058 -- And we definitely want to eliminate this case, to give
2059 -- e `cast` c
2060 -- So we throw away the cast from the RHS, and reconstruct
2061 -- it at the other end. All the RHS casts must be the same
2062 -- if (all identity_alt alts) holds.
2063 --
2064 -- Don't worry about nested casts, because the simplifier combines them
2065
2066 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
2067 re_cast scrut _ = scrut
2068
2069 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
2070
2071 --------------------------------------------------
2072 -- 2. Scrutinee Constant Folding
2073 --------------------------------------------------
2074
2075 mkCase2 dflags scrut bndr alts_ty alts
2076 | -- See Note [Scrutinee Constant Folding]
2077 case alts of -- Not if there is just a DEFAULT alterantive
2078 [(DEFAULT,_,_)] -> False
2079 _ -> True
2080 , gopt Opt_CaseFolding dflags
2081 , Just (scrut', tx_con, mk_orig) <- caseRules dflags scrut
2082 = do { bndr' <- newId (fsLit "lwild") (exprType scrut')
2083 ; alts' <- mapM (tx_alt tx_con mk_orig bndr') alts
2084 ; mkCase3 dflags scrut' bndr' alts_ty $
2085 add_default (re_sort alts')
2086 }
2087
2088 | otherwise
2089 = mkCase3 dflags scrut bndr alts_ty alts
2090 where
2091 -- We need to keep the correct association between the scrutinee and its
2092 -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
2093 -- "let bndr = ... in":
2094 --
2095 -- case v + 10 of y =====> case v of y
2096 -- 20 -> e1 10 -> let y = 20 in e1
2097 -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2
2098 --
2099 -- Other transformations give: =====> case v of y'
2100 -- 10 -> let y = 20 in e1
2101 -- DEFAULT -> let y = y' + 10 in e2
2102 --
2103 -- This wrapping is done in tx_alt; we use mk_orig, returned by caseRules,
2104 -- to construct an expression equivalent to the original one, for use
2105 -- in the DEFAULT case
2106
2107 tx_alt tx_con mk_orig new_bndr (con, bs, rhs)
2108 | DataAlt dc <- con', not (isNullaryRepDataCon dc)
2109 = -- For non-nullary data cons we must invent some fake binders
2110 -- See Note [caseRules for dataToTag] in PrelRules
2111 do { us <- getUniquesM
2112 ; let (ex_tvs, arg_ids) = dataConRepInstPat us dc
2113 (tyConAppArgs (idType new_bndr))
2114 ; return (con', ex_tvs ++ arg_ids, rhs') }
2115 | otherwise
2116 = return (con', [], rhs')
2117 where
2118 con' = tx_con con
2119
2120 rhs' | isDeadBinder bndr = rhs
2121 | otherwise = bindNonRec bndr orig_val rhs
2122
2123 orig_val = case con of
2124 DEFAULT -> mk_orig new_bndr
2125 LitAlt l -> Lit l
2126 DataAlt dc -> mkConApp2 dc (tyConAppArgs (idType bndr)) bs
2127
2128
2129 re_sort :: [CoreAlt] -> [CoreAlt] -- Re-sort the alternatives to
2130 re_sort alts = sortBy cmpAlt alts -- preserve the #case_invariants#
2131
2132 add_default :: [CoreAlt] -> [CoreAlt]
2133 -- TagToEnum may change a boolean True/False set of alternatives
2134 -- to LitAlt 0#/1# alterantives. But literal alternatives always
2135 -- have a DEFAULT (I think). So add it.
2136 add_default ((LitAlt {}, bs, rhs) : alts) = (DEFAULT, bs, rhs) : alts
2137 add_default alts = alts
2138
2139 --------------------------------------------------
2140 -- Catch-all
2141 --------------------------------------------------
2142 mkCase3 _dflags scrut bndr alts_ty alts
2143 = return (Case scrut bndr alts_ty alts)
2144
2145 {-
2146 Note [Dead binders]
2147 ~~~~~~~~~~~~~~~~~~~~
2148 Note that dead-ness is maintained by the simplifier, so that it is
2149 accurate after simplification as well as before.
2150
2151
2152 Note [Cascading case merge]
2153 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2154 Case merging should cascade in one sweep, because it
2155 happens bottom-up
2156
2157 case e of a {
2158 DEFAULT -> case a of b
2159 DEFAULT -> case b of c {
2160 DEFAULT -> e
2161 A -> ea
2162 B -> eb
2163 C -> ec
2164 ==>
2165 case e of a {
2166 DEFAULT -> case a of b
2167 DEFAULT -> let c = b in e
2168 A -> let c = b in ea
2169 B -> eb
2170 C -> ec
2171 ==>
2172 case e of a {
2173 DEFAULT -> let b = a in let c = b in e
2174 A -> let b = a in let c = b in ea
2175 B -> let b = a in eb
2176 C -> ec
2177
2178
2179 However here's a tricky case that we still don't catch, and I don't
2180 see how to catch it in one pass:
2181
2182 case x of c1 { I# a1 ->
2183 case a1 of c2 ->
2184 0 -> ...
2185 DEFAULT -> case x of c3 { I# a2 ->
2186 case a2 of ...
2187
2188 After occurrence analysis (and its binder-swap) we get this
2189
2190 case x of c1 { I# a1 ->
2191 let x = c1 in -- Binder-swap addition
2192 case a1 of c2 ->
2193 0 -> ...
2194 DEFAULT -> case x of c3 { I# a2 ->
2195 case a2 of ...
2196
2197 When we simplify the inner case x, we'll see that
2198 x=c1=I# a1. So we'll bind a2 to a1, and get
2199
2200 case x of c1 { I# a1 ->
2201 case a1 of c2 ->
2202 0 -> ...
2203 DEFAULT -> case a1 of ...
2204
2205 This is corect, but we can't do a case merge in this sweep
2206 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
2207 without getting changed to c1=I# c2.
2208
2209 I don't think this is worth fixing, even if I knew how. It'll
2210 all come out in the next pass anyway.
2211 -}