A bunch of typofixes
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..),
21 isSimplified, contIsStop,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import GhcPrelude
39
40 import SimplEnv
41 import CoreMonad ( SimplMode(..), Tick(..) )
42 import DynFlags
43 import CoreSyn
44 import qualified CoreSubst
45 import PprCore
46 import CoreFVs
47 import CoreUtils
48 import CoreArity
49 import CoreUnfold
50 import Name
51 import Id
52 import IdInfo
53 import Var
54 import Demand
55 import SimplMonad
56 import Type hiding( substTy )
57 import Coercion hiding( substCo )
58 import DataCon ( dataConWorkId, isNullaryRepDataCon )
59 import VarSet
60 import BasicTypes
61 import Util
62 import OrdList ( isNilOL )
63 import MonadUtils
64 import Outputable
65 import Pair
66 import PrelRules
67 import FastString ( fsLit )
68
69 import Control.Monad ( when )
70 import Data.List ( sortBy )
71
72 {-
73 ************************************************************************
74 * *
75 The SimplCont and DupFlag types
76 * *
77 ************************************************************************
78
79 A SimplCont allows the simplifier to traverse the expression in a
80 zipper-like fashion. The SimplCont represents the rest of the expression,
81 "above" the point of interest.
82
83 You can also think of a SimplCont as an "evaluation context", using
84 that term in the way it is used for operational semantics. This is the
85 way I usually think of it, For example you'll often see a syntax for
86 evaluation context looking like
87 C ::= [] | C e | case C of alts | C `cast` co
88 That's the kind of thing we are doing here, and I use that syntax in
89 the comments.
90
91
92 Key points:
93 * A SimplCont describes a *strict* context (just like
94 evaluation contexts do). E.g. Just [] is not a SimplCont
95
96 * A SimplCont describes a context that *does not* bind
97 any variables. E.g. \x. [] is not a SimplCont
98 -}
99
100 data SimplCont
101 = Stop -- Stop[e] = e
102 OutType -- Type of the <hole>
103 CallCtxt -- Tells if there is something interesting about
104 -- the context, and hence the inliner
105 -- should be a bit keener (see interestingCallContext)
106 -- Specifically:
107 -- This is an argument of a function that has RULES
108 -- Inlining the call might allow the rule to fire
109 -- Never ValAppCxt (use ApplyToVal instead)
110 -- or CaseCtxt (use Select instead)
111
112 | CastIt -- (CastIt co K)[e] = K[ e `cast` co ]
113 OutCoercion -- The coercion simplified
114 -- Invariant: never an identity coercion
115 SimplCont
116
117 | ApplyToVal -- (ApplyToVal arg K)[e] = K[ e arg ]
118 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
119 , sc_arg :: InExpr -- The argument,
120 , sc_env :: StaticEnv -- and its static env
121 , sc_cont :: SimplCont }
122
123 | ApplyToTy -- (ApplyToTy ty K)[e] = K[ e ty ]
124 { sc_arg_ty :: OutType -- Argument type
125 , sc_hole_ty :: OutType -- Type of the function, presumably (forall a. blah)
126 -- See Note [The hole type in ApplyToTy]
127 , sc_cont :: SimplCont }
128
129 | Select -- (Select alts K)[e] = K[ case e of alts ]
130 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
131 , sc_bndr :: InId -- case binder
132 , sc_alts :: [InAlt] -- Alternatives
133 , sc_env :: StaticEnv -- and their static environment
134 , sc_cont :: SimplCont }
135
136 -- The two strict forms have no DupFlag, because we never duplicate them
137 | StrictBind -- (StrictBind x xs b K)[e] = let x = e in K[\xs.b]
138 -- or, equivalently, = K[ (\x xs.b) e ]
139 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
140 , sc_bndr :: InId
141 , sc_bndrs :: [InBndr]
142 , sc_body :: InExpr
143 , sc_env :: StaticEnv
144 , sc_cont :: SimplCont }
145
146 | StrictArg -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ]
147 { sc_dup :: DupFlag -- Always Simplified or OkToDup
148 , sc_fun :: ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
149 -- plus strictness flags for *further* args
150 , sc_cci :: CallCtxt -- Whether *this* argument position is interesting
151 , sc_cont :: SimplCont }
152
153 | TickIt -- (TickIt t K)[e] = K[ tick t e ]
154 (Tickish Id) -- Tick tickish <hole>
155 SimplCont
156
157 data DupFlag = NoDup -- Unsimplified, might be big
158 | Simplified -- Simplified
159 | OkToDup -- Simplified and small
160
161 isSimplified :: DupFlag -> Bool
162 isSimplified NoDup = False
163 isSimplified _ = True -- Invariant: the subst-env is empty
164
165 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
166 perhapsSubstTy dup env ty
167 | isSimplified dup = ty
168 | otherwise = substTy env ty
169
170 {-
171 Note [DupFlag invariants]
172 ~~~~~~~~~~~~~~~~~~~~~~~~~
173 In both (ApplyToVal dup _ env k)
174 and (Select dup _ _ env k)
175 the following invariants hold
176
177 (a) if dup = OkToDup, then continuation k is also ok-to-dup
178 (b) if dup = OkToDup or Simplified, the subst-env is empty
179 (and and hence no need to re-simplify)
180 -}
181
182 instance Outputable DupFlag where
183 ppr OkToDup = text "ok"
184 ppr NoDup = text "nodup"
185 ppr Simplified = text "simpl"
186
187 instance Outputable SimplCont where
188 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
189 ppr (CastIt co cont ) = (text "CastIt" <+> pprOptCo co) $$ ppr cont
190 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
191 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
192 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
193 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
194 = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg)
195 $$ ppr cont
196 ppr (StrictBind { sc_bndr = b, sc_cont = cont })
197 = (text "StrictBind" <+> ppr b) $$ ppr cont
198 ppr (StrictArg { sc_fun = ai, sc_cont = cont })
199 = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
200 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
201 = (text "Select" <+> ppr dup <+> ppr bndr) $$
202 whenPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
203
204
205 {- Note [The hole type in ApplyToTy]
206 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
207 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
208 continuation. It is absolutely necessary to compute contHoleType, but it is
209 not used for anything else (and hence may not be evaluated).
210
211 Why is it necessary for contHoleType? Consider the continuation
212 ApplyToType Int (Stop Int)
213 corresponding to
214 (<hole> @Int) :: Int
215 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
216 and there is no way to know which, so we must record it.
217
218 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
219 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
220 doesn't matter because we'll never compute them all.
221
222 ************************************************************************
223 * *
224 ArgInfo and ArgSpec
225 * *
226 ************************************************************************
227 -}
228
229 data ArgInfo
230 = ArgInfo {
231 ai_fun :: OutId, -- The function
232 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
233
234 ai_type :: OutType, -- Type of (f a1 ... an)
235
236 ai_rules :: FunRules, -- Rules for this function
237
238 ai_encl :: Bool, -- Flag saying whether this function
239 -- or an enclosing one has rules (recursively)
240 -- True => be keener to inline in all args
241
242 ai_strs :: [Bool], -- Strictness of remaining arguments
243 -- Usually infinite, but if it is finite it guarantees
244 -- that the function diverges after being given
245 -- that number of args
246 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
247 -- Always infinite
248 }
249
250 data ArgSpec
251 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
252 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
253 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
254 | CastBy OutCoercion -- Cast by this; c.f. CastIt
255
256 instance Outputable ArgSpec where
257 ppr (ValArg e) = text "ValArg" <+> ppr e
258 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
259 ppr (CastBy c) = text "CastBy" <+> ppr c
260
261 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
262 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
263 , ai_type = applyTypeToArg (ai_type ai) arg
264 , ai_rules = decRules (ai_rules ai) }
265
266 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
267 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
268 , ai_type = piResultTy poly_fun_ty arg_ty
269 , ai_rules = decRules (ai_rules ai) }
270 where
271 poly_fun_ty = ai_type ai
272 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
273
274 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
275 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
276 , ai_type = pSnd (coercionKind co) }
277
278 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
279 argInfoAppArgs [] = []
280 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
281 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
282 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
283
284 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
285 pushSimplifiedArgs _env [] k = k
286 pushSimplifiedArgs env (arg : args) k
287 = case arg of
288 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
289 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
290 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
291 CastBy c -> CastIt c rest
292 where
293 rest = pushSimplifiedArgs env args k
294 -- The env has an empty SubstEnv
295
296 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
297 -- NB: the [ArgSpec] is reversed so that the first arg
298 -- in the list is the last one in the application
299 argInfoExpr fun rev_args
300 = go rev_args
301 where
302 go [] = Var fun
303 go (ValArg a : as) = go as `App` a
304 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
305 go (CastBy co : as) = mkCast (go as) co
306
307
308 type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function
309 -- Nothing => No rules
310 -- Just (n, rules) => some rules, requiring at least n more type/value args
311
312 decRules :: FunRules -> FunRules
313 decRules (Just (n, rules)) = Just (n-1, rules)
314 decRules Nothing = Nothing
315
316 mkFunRules :: [CoreRule] -> FunRules
317 mkFunRules [] = Nothing
318 mkFunRules rs = Just (n_required, rs)
319 where
320 n_required = maximum (map ruleArity rs)
321
322 {-
323 ************************************************************************
324 * *
325 Functions on SimplCont
326 * *
327 ************************************************************************
328 -}
329
330 mkBoringStop :: OutType -> SimplCont
331 mkBoringStop ty = Stop ty BoringCtxt
332
333 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
334 mkRhsStop ty = Stop ty RhsCtxt
335
336 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
337 mkLazyArgStop ty cci = Stop ty cci
338
339 -------------------
340 contIsRhsOrArg :: SimplCont -> Bool
341 contIsRhsOrArg (Stop {}) = True
342 contIsRhsOrArg (StrictBind {}) = True
343 contIsRhsOrArg (StrictArg {}) = True
344 contIsRhsOrArg _ = False
345
346 contIsRhs :: SimplCont -> Bool
347 contIsRhs (Stop _ RhsCtxt) = True
348 contIsRhs _ = False
349
350 -------------------
351 contIsStop :: SimplCont -> Bool
352 contIsStop (Stop {}) = True
353 contIsStop _ = False
354
355 contIsDupable :: SimplCont -> Bool
356 contIsDupable (Stop {}) = True
357 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
358 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
359 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
360 contIsDupable (StrictArg { sc_dup = OkToDup }) = True -- ...ditto...
361 contIsDupable (CastIt _ k) = contIsDupable k
362 contIsDupable _ = False
363
364 -------------------
365 contIsTrivial :: SimplCont -> Bool
366 contIsTrivial (Stop {}) = True
367 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
368 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
369 contIsTrivial (CastIt _ k) = contIsTrivial k
370 contIsTrivial _ = False
371
372 -------------------
373 contResultType :: SimplCont -> OutType
374 contResultType (Stop ty _) = ty
375 contResultType (CastIt _ k) = contResultType k
376 contResultType (StrictBind { sc_cont = k }) = contResultType k
377 contResultType (StrictArg { sc_cont = k }) = contResultType k
378 contResultType (Select { sc_cont = k }) = contResultType k
379 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
380 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
381 contResultType (TickIt _ k) = contResultType k
382
383 contHoleType :: SimplCont -> OutType
384 contHoleType (Stop ty _) = ty
385 contHoleType (TickIt _ k) = contHoleType k
386 contHoleType (CastIt co _) = pFst (coercionKind co)
387 contHoleType (StrictBind { sc_bndr = b, sc_dup = dup, sc_env = se })
388 = perhapsSubstTy dup se (idType b)
389 contHoleType (StrictArg { sc_fun = ai }) = funArgTy (ai_type ai)
390 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
391 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
392 = mkFunTy (perhapsSubstTy dup se (exprType e))
393 (contHoleType k)
394 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
395 = perhapsSubstTy d se (idType b)
396
397 -------------------
398 countArgs :: SimplCont -> Int
399 -- Count all arguments, including types, coercions, and other values
400 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
401 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
402 countArgs _ = 0
403
404 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
405 -- Summarises value args, discards type args and coercions
406 -- The returned continuation of the call is only used to
407 -- answer questions like "are you interesting?"
408 contArgs cont
409 | lone cont = (True, [], cont)
410 | otherwise = go [] cont
411 where
412 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
413 lone (ApplyToVal {}) = False
414 lone (CastIt {}) = False
415 lone _ = True
416
417 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
418 = go (is_interesting arg se : args) k
419 go args (ApplyToTy { sc_cont = k }) = go args k
420 go args (CastIt _ k) = go args k
421 go args k = (False, reverse args, k)
422
423 is_interesting arg se = interestingArg se arg
424 -- Do *not* use short-cutting substitution here
425 -- because we want to get as much IdInfo as possible
426
427
428 -------------------
429 mkArgInfo :: Id
430 -> [CoreRule] -- Rules for function
431 -> Int -- Number of value args
432 -> SimplCont -- Context of the call
433 -> ArgInfo
434
435 mkArgInfo fun rules n_val_args call_cont
436 | n_val_args < idArity fun -- Note [Unsaturated functions]
437 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
438 , ai_rules = fun_rules, ai_encl = False
439 , ai_strs = vanilla_stricts
440 , ai_discs = vanilla_discounts }
441 | otherwise
442 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
443 , ai_rules = fun_rules
444 , ai_encl = interestingArgContext rules call_cont
445 , ai_strs = add_type_str fun_ty arg_stricts
446 , ai_discs = arg_discounts }
447 where
448 fun_ty = idType fun
449
450 fun_rules = mkFunRules rules
451
452 vanilla_discounts, arg_discounts :: [Int]
453 vanilla_discounts = repeat 0
454 arg_discounts = case idUnfolding fun of
455 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
456 -> discounts ++ vanilla_discounts
457 _ -> vanilla_discounts
458
459 vanilla_stricts, arg_stricts :: [Bool]
460 vanilla_stricts = repeat False
461
462 arg_stricts
463 = case splitStrictSig (idStrictness fun) of
464 (demands, result_info)
465 | not (demands `lengthExceeds` n_val_args)
466 -> -- Enough args, use the strictness given.
467 -- For bottoming functions we used to pretend that the arg
468 -- is lazy, so that we don't treat the arg as an
469 -- interesting context. This avoids substituting
470 -- top-level bindings for (say) strings into
471 -- calls to error. But now we are more careful about
472 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
473 if isBotRes result_info then
474 map isStrictDmd demands -- Finite => result is bottom
475 else
476 map isStrictDmd demands ++ vanilla_stricts
477 | otherwise
478 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
479 <+> ppr n_val_args <+> ppr demands )
480 vanilla_stricts -- Not enough args, or no strictness
481
482 add_type_str :: Type -> [Bool] -> [Bool]
483 -- If the function arg types are strict, record that in the 'strictness bits'
484 -- No need to instantiate because unboxed types (which dominate the strict
485 -- types) can't instantiate type variables.
486 -- add_type_str is done repeatedly (for each call); might be better
487 -- once-for-all in the function
488 -- But beware primops/datacons with no strictness
489
490 add_type_str
491 = go
492 where
493 go _ [] = []
494 go fun_ty strs -- Look through foralls
495 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
496 = go fun_ty' strs
497 go fun_ty (str:strs) -- Add strict-type info
498 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
499 = (str || Just False == isLiftedType_maybe arg_ty) : go fun_ty' strs
500 -- If the type is levity-polymorphic, we can't know whether it's
501 -- strict. isLiftedType_maybe will return Just False only when
502 -- we're sure the type is unlifted.
503 go _ strs
504 = strs
505
506 {- Note [Unsaturated functions]
507 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
508 Consider (test eyeball/inline4)
509 x = a:as
510 y = f x
511 where f has arity 2. Then we do not want to inline 'x', because
512 it'll just be floated out again. Even if f has lots of discounts
513 on its first argument -- it must be saturated for these to kick in
514 -}
515
516
517 {-
518 ************************************************************************
519 * *
520 Interesting arguments
521 * *
522 ************************************************************************
523
524 Note [Interesting call context]
525 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
526 We want to avoid inlining an expression where there can't possibly be
527 any gain, such as in an argument position. Hence, if the continuation
528 is interesting (eg. a case scrutinee, application etc.) then we
529 inline, otherwise we don't.
530
531 Previously some_benefit used to return True only if the variable was
532 applied to some value arguments. This didn't work:
533
534 let x = _coerce_ (T Int) Int (I# 3) in
535 case _coerce_ Int (T Int) x of
536 I# y -> ....
537
538 we want to inline x, but can't see that it's a constructor in a case
539 scrutinee position, and some_benefit is False.
540
541 Another example:
542
543 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
544
545 .... case dMonadST _@_ x0 of (a,b,c) -> ....
546
547 we'd really like to inline dMonadST here, but we *don't* want to
548 inline if the case expression is just
549
550 case x of y { DEFAULT -> ... }
551
552 since we can just eliminate this case instead (x is in WHNF). Similar
553 applies when x is bound to a lambda expression. Hence
554 contIsInteresting looks for case expressions with just a single
555 default case.
556
557 Note [No case of case is boring]
558 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
559 If we see
560 case f x of <alts>
561
562 we'd usually treat the context as interesting, to encourage 'f' to
563 inline. But if case-of-case is off, it's really not so interesting
564 after all, because we are unlikely to be able to push the case
565 expression into the branches of any case in f's unfolding. So, to
566 reduce unnecessary code expansion, we just make the context look boring.
567 This made a small compile-time perf improvement in perf/compiler/T6048,
568 and it looks plausible to me.
569 -}
570
571 interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt
572 -- See Note [Interesting call context]
573 interestingCallContext env cont
574 = interesting cont
575 where
576 interesting (Select {})
577 | sm_case_case (getMode env) = CaseCtxt
578 | otherwise = BoringCtxt
579 -- See Note [No case of case is boring]
580
581 interesting (ApplyToVal {}) = ValAppCtxt
582 -- Can happen if we have (f Int |> co) y
583 -- If f has an INLINE prag we need to give it some
584 -- motivation to inline. See Note [Cast then apply]
585 -- in CoreUnfold
586
587 interesting (StrictArg { sc_cci = cci }) = cci
588 interesting (StrictBind {}) = BoringCtxt
589 interesting (Stop _ cci) = cci
590 interesting (TickIt _ k) = interesting k
591 interesting (ApplyToTy { sc_cont = k }) = interesting k
592 interesting (CastIt _ k) = interesting k
593 -- If this call is the arg of a strict function, the context
594 -- is a bit interesting. If we inline here, we may get useful
595 -- evaluation information to avoid repeated evals: e.g.
596 -- x + (y * z)
597 -- Here the contIsInteresting makes the '*' keener to inline,
598 -- which in turn exposes a constructor which makes the '+' inline.
599 -- Assuming that +,* aren't small enough to inline regardless.
600 --
601 -- It's also very important to inline in a strict context for things
602 -- like
603 -- foldr k z (f x)
604 -- Here, the context of (f x) is strict, and if f's unfolding is
605 -- a build it's *great* to inline it here. So we must ensure that
606 -- the context for (f x) is not totally uninteresting.
607
608 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
609 -- If the argument has form (f x y), where x,y are boring,
610 -- and f is marked INLINE, then we don't want to inline f.
611 -- But if the context of the argument is
612 -- g (f x y)
613 -- where g has rules, then we *do* want to inline f, in case it
614 -- exposes a rule that might fire. Similarly, if the context is
615 -- h (g (f x x))
616 -- where h has rules, then we do want to inline f; hence the
617 -- call_cont argument to interestingArgContext
618 --
619 -- The ai-rules flag makes this happen; if it's
620 -- set, the inliner gets just enough keener to inline f
621 -- regardless of how boring f's arguments are, if it's marked INLINE
622 --
623 -- The alternative would be to *always* inline an INLINE function,
624 -- regardless of how boring its context is; but that seems overkill
625 -- For example, it'd mean that wrapper functions were always inlined
626 --
627 -- The call_cont passed to interestingArgContext is the context of
628 -- the call itself, e.g. g <hole> in the example above
629 interestingArgContext rules call_cont
630 = notNull rules || enclosing_fn_has_rules
631 where
632 enclosing_fn_has_rules = go call_cont
633
634 go (Select {}) = False
635 go (ApplyToVal {}) = False -- Shouldn't really happen
636 go (ApplyToTy {}) = False -- Ditto
637 go (StrictArg { sc_cci = cci }) = interesting cci
638 go (StrictBind {}) = False -- ??
639 go (CastIt _ c) = go c
640 go (Stop _ cci) = interesting cci
641 go (TickIt _ c) = go c
642
643 interesting RuleArgCtxt = True
644 interesting _ = False
645
646
647 {- Note [Interesting arguments]
648 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
649 An argument is interesting if it deserves a discount for unfoldings
650 with a discount in that argument position. The idea is to avoid
651 unfolding a function that is applied only to variables that have no
652 unfolding (i.e. they are probably lambda bound): f x y z There is
653 little point in inlining f here.
654
655 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
656 we must look through lets, eg (let x = e in C a b), because the let will
657 float, exposing the value, if we inline. That makes it different to
658 exprIsHNF.
659
660 Before 2009 we said it was interesting if the argument had *any* structure
661 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
662
663 But we don't regard (f x y) as interesting, unless f is unsaturated.
664 If it's saturated and f hasn't inlined, then it's probably not going
665 to now!
666
667 Note [Conlike is interesting]
668 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
669 Consider
670 f d = ...((*) d x y)...
671 ... f (df d')...
672 where df is con-like. Then we'd really like to inline 'f' so that the
673 rule for (*) (df d) can fire. To do this
674 a) we give a discount for being an argument of a class-op (eg (*) d)
675 b) we say that a con-like argument (eg (df d)) is interesting
676 -}
677
678 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
679 -- See Note [Interesting arguments]
680 interestingArg env e = go env 0 e
681 where
682 -- n is # value args to which the expression is applied
683 go env n (Var v)
684 = case substId env v of
685 DoneId v' -> go_var n v'
686 DoneEx e _ -> go (zapSubstEnv env) n e
687 ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
688
689 go _ _ (Lit {}) = ValueArg
690 go _ _ (Type _) = TrivArg
691 go _ _ (Coercion _) = TrivArg
692 go env n (App fn (Type _)) = go env n fn
693 go env n (App fn _) = go env (n+1) fn
694 go env n (Tick _ a) = go env n a
695 go env n (Cast e _) = go env n e
696 go env n (Lam v e)
697 | isTyVar v = go env n e
698 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
699 | otherwise = ValueArg
700 go _ _ (Case {}) = NonTrivArg
701 go env n (Let b e) = case go env' n e of
702 ValueArg -> ValueArg
703 _ -> NonTrivArg
704 where
705 env' = env `addNewInScopeIds` bindersOf b
706
707 go_var n v
708 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
709 -- data constructors here
710 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
711 | n > 0 = NonTrivArg -- Saturated or unknown call
712 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
713 -- See Note [Conlike is interesting]
714 | otherwise = TrivArg -- n==0, no useful unfolding
715 where
716 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
717
718 {-
719 ************************************************************************
720 * *
721 SimplMode
722 * *
723 ************************************************************************
724
725 The SimplMode controls several switches; see its definition in
726 CoreMonad
727 sm_rules :: Bool -- Whether RULES are enabled
728 sm_inline :: Bool -- Whether inlining is enabled
729 sm_case_case :: Bool -- Whether case-of-case is enabled
730 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
731 -}
732
733 simplEnvForGHCi :: DynFlags -> SimplEnv
734 simplEnvForGHCi dflags
735 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
736 , sm_phase = InitialPhase
737 , sm_dflags = dflags
738 , sm_rules = rules_on
739 , sm_inline = False
740 , sm_eta_expand = eta_expand_on
741 , sm_case_case = True }
742 where
743 rules_on = gopt Opt_EnableRewriteRules dflags
744 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
745 -- Do not do any inlining, in case we expose some unboxed
746 -- tuple stuff that confuses the bytecode interpreter
747
748 updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode
749 -- See Note [Simplifying inside stable unfoldings]
750 updModeForStableUnfoldings inline_rule_act current_mode
751 = current_mode { sm_phase = phaseFromActivation inline_rule_act
752 , sm_inline = True
753 , sm_eta_expand = False }
754 -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
755 -- For sm_rules, just inherit; sm_rules might be "off"
756 -- because of -fno-enable-rewrite-rules
757 where
758 phaseFromActivation (ActiveAfter _ n) = Phase n
759 phaseFromActivation _ = InitialPhase
760
761 updModeForRules :: SimplMode -> SimplMode
762 -- See Note [Simplifying rules]
763 updModeForRules current_mode
764 = current_mode { sm_phase = InitialPhase
765 , sm_inline = False
766 , sm_rules = False
767 , sm_eta_expand = False }
768
769 {- Note [Simplifying rules]
770 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
771 When simplifying a rule LHS, refrain from /any/ inlining or applying
772 of other RULES.
773
774 Doing anything to the LHS is plain confusing, because it means that what the
775 rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
776 Moreover, inlining (or applying rules) on rule LHSs risks introducing
777 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
778
779 Doing this to either side confounds tools like HERMIT, which seek to reason
780 about and apply the RULES as originally written. See Trac #10829.
781
782 Note [No eta expansion in stable unfoldings]
783 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
784 If we have a stable unfolding
785
786 f :: Ord a => a -> IO ()
787 -- Unfolding template
788 -- = /\a \(d:Ord a) (x:a). bla
789
790 we do not want to eta-expand to
791
792 f :: Ord a => a -> IO ()
793 -- Unfolding template
794 -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co
795
796 because not specialisation of the overloading doesn't work properly
797 (see Note [Specialisation shape] in Specialise), Trac #9509.
798
799 So we disable eta-expansion in stable unfoldings.
800
801 Note [Inlining in gentle mode]
802 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
803 Something is inlined if
804 (i) the sm_inline flag is on, AND
805 (ii) the thing has an INLINE pragma, AND
806 (iii) the thing is inlinable in the earliest phase.
807
808 Example of why (iii) is important:
809 {-# INLINE [~1] g #-}
810 g = ...
811
812 {-# INLINE f #-}
813 f x = g (g x)
814
815 If we were to inline g into f's inlining, then an importing module would
816 never be able to do
817 f e --> g (g e) ---> RULE fires
818 because the stable unfolding for f has had g inlined into it.
819
820 On the other hand, it is bad not to do ANY inlining into an
821 stable unfolding, because then recursive knots in instance declarations
822 don't get unravelled.
823
824 However, *sometimes* SimplGently must do no call-site inlining at all
825 (hence sm_inline = False). Before full laziness we must be careful
826 not to inline wrappers, because doing so inhibits floating
827 e.g. ...(case f x of ...)...
828 ==> ...(case (case x of I# x# -> fw x#) of ...)...
829 ==> ...(case x of I# x# -> case fw x# of ...)...
830 and now the redex (f x) isn't floatable any more.
831
832 The no-inlining thing is also important for Template Haskell. You might be
833 compiling in one-shot mode with -O2; but when TH compiles a splice before
834 running it, we don't want to use -O2. Indeed, we don't want to inline
835 anything, because the byte-code interpreter might get confused about
836 unboxed tuples and suchlike.
837
838 Note [Simplifying inside stable unfoldings]
839 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
840 We must take care with simplification inside stable unfoldings (which come from
841 INLINE pragmas).
842
843 First, consider the following example
844 let f = \pq -> BIG
845 in
846 let g = \y -> f y y
847 {-# INLINE g #-}
848 in ...g...g...g...g...g...
849 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
850 and thence copied multiple times when g is inlined. HENCE we treat
851 any occurrence in a stable unfolding as a multiple occurrence, not a single
852 one; see OccurAnal.addRuleUsage.
853
854 Second, we do want *do* to some modest rules/inlining stuff in stable
855 unfoldings, partly to eliminate senseless crap, and partly to break
856 the recursive knots generated by instance declarations.
857
858 However, suppose we have
859 {-# INLINE <act> f #-}
860 f = <rhs>
861 meaning "inline f in phases p where activation <act>(p) holds".
862 Then what inlinings/rules can we apply to the copy of <rhs> captured in
863 f's stable unfolding? Our model is that literally <rhs> is substituted for
864 f when it is inlined. So our conservative plan (implemented by
865 updModeForStableUnfoldings) is this:
866
867 -------------------------------------------------------------
868 When simplifying the RHS of a stable unfolding, set the phase
869 to the phase in which the stable unfolding first becomes active
870 -------------------------------------------------------------
871
872 That ensures that
873
874 a) Rules/inlinings that *cease* being active before p will
875 not apply to the stable unfolding, consistent with it being
876 inlined in its *original* form in phase p.
877
878 b) Rules/inlinings that only become active *after* p will
879 not apply to the stable unfolding, again to be consistent with
880 inlining the *original* rhs in phase p.
881
882 For example,
883 {-# INLINE f #-}
884 f x = ...g...
885
886 {-# NOINLINE [1] g #-}
887 g y = ...
888
889 {-# RULE h g = ... #-}
890 Here we must not inline g into f's RHS, even when we get to phase 0,
891 because when f is later inlined into some other module we want the
892 rule for h to fire.
893
894 Similarly, consider
895 {-# INLINE f #-}
896 f x = ...g...
897
898 g y = ...
899 and suppose that there are auto-generated specialisations and a strictness
900 wrapper for g. The specialisations get activation AlwaysActive, and the
901 strictness wrapper get activation (ActiveAfter 0). So the strictness
902 wrepper fails the test and won't be inlined into f's stable unfolding. That
903 means f can inline, expose the specialised call to g, so the specialisation
904 rules can fire.
905
906 A note about wrappers
907 ~~~~~~~~~~~~~~~~~~~~~
908 It's also important not to inline a worker back into a wrapper.
909 A wrapper looks like
910 wraper = inline_me (\x -> ...worker... )
911 Normally, the inline_me prevents the worker getting inlined into
912 the wrapper (initially, the worker's only call site!). But,
913 if the wrapper is sure to be called, the strictness analyser will
914 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
915 continuation.
916 -}
917
918 activeUnfolding :: SimplEnv -> Id -> Bool
919 activeUnfolding env id
920 | isCompulsoryUnfolding (realIdUnfolding id)
921 = True -- Even sm_inline can't override compulsory unfoldings
922 | otherwise
923 = isActive (sm_phase mode) (idInlineActivation id)
924 && sm_inline mode
925 -- `or` isStableUnfolding (realIdUnfolding id)
926 -- Inline things when
927 -- (a) they are active
928 -- (b) sm_inline says so, except that for stable unfoldings
929 -- (ie pragmas) we inline anyway
930 where
931 mode = getMode env
932
933 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
934 -- When matching in RULE, we want to "look through" an unfolding
935 -- (to see a constructor) if *rules* are on, even if *inlinings*
936 -- are not. A notable example is DFuns, which really we want to
937 -- match in rules like (op dfun) in gentle mode. Another example
938 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
939 -- see very early on
940 getUnfoldingInRuleMatch env
941 = (in_scope, id_unf)
942 where
943 in_scope = seInScope env
944 mode = getMode env
945 id_unf id | unf_is_active id = idUnfolding id
946 | otherwise = NoUnfolding
947 unf_is_active id
948 | not (sm_rules mode) = -- active_unfolding_minimal id
949 isStableUnfolding (realIdUnfolding id)
950 -- Do we even need to test this? I think this InScopeEnv
951 -- is only consulted if activeRule returns True, which
952 -- never happens if sm_rules is False
953 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
954
955 ----------------------
956 activeRule :: SimplEnv -> Activation -> Bool
957 -- Nothing => No rules at all
958 activeRule env
959 | not (sm_rules mode) = \_ -> False -- Rewriting is off
960 | otherwise = isActive (sm_phase mode)
961 where
962 mode = getMode env
963
964 {-
965 ************************************************************************
966 * *
967 preInlineUnconditionally
968 * *
969 ************************************************************************
970
971 preInlineUnconditionally
972 ~~~~~~~~~~~~~~~~~~~~~~~~
973 @preInlineUnconditionally@ examines a bndr to see if it is used just
974 once in a completely safe way, so that it is safe to discard the
975 binding inline its RHS at the (unique) usage site, REGARDLESS of how
976 big the RHS might be. If this is the case we don't simplify the RHS
977 first, but just inline it un-simplified.
978
979 This is much better than first simplifying a perhaps-huge RHS and then
980 inlining and re-simplifying it. Indeed, it can be at least quadratically
981 better. Consider
982
983 x1 = e1
984 x2 = e2[x1]
985 x3 = e3[x2]
986 ...etc...
987 xN = eN[xN-1]
988
989 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
990 This can happen with cascades of functions too:
991
992 f1 = \x1.e1
993 f2 = \xs.e2[f1]
994 f3 = \xs.e3[f3]
995 ...etc...
996
997 THE MAIN INVARIANT is this:
998
999 ---- preInlineUnconditionally invariant -----
1000 IF preInlineUnconditionally chooses to inline x = <rhs>
1001 THEN doing the inlining should not change the occurrence
1002 info for the free vars of <rhs>
1003 ----------------------------------------------
1004
1005 For example, it's tempting to look at trivial binding like
1006 x = y
1007 and inline it unconditionally. But suppose x is used many times,
1008 but this is the unique occurrence of y. Then inlining x would change
1009 y's occurrence info, which breaks the invariant. It matters: y
1010 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
1011
1012
1013 Even RHSs labelled InlineMe aren't caught here, because there might be
1014 no benefit from inlining at the call site.
1015
1016 [Sept 01] Don't unconditionally inline a top-level thing, because that
1017 can simply make a static thing into something built dynamically. E.g.
1018 x = (a,b)
1019 main = \s -> h x
1020
1021 [Remember that we treat \s as a one-shot lambda.] No point in
1022 inlining x unless there is something interesting about the call site.
1023
1024 But watch out: if you aren't careful, some useful foldr/build fusion
1025 can be lost (most notably in spectral/hartel/parstof) because the
1026 foldr didn't see the build. Doing the dynamic allocation isn't a big
1027 deal, in fact, but losing the fusion can be. But the right thing here
1028 seems to be to do a callSiteInline based on the fact that there is
1029 something interesting about the call site (it's strict). Hmm. That
1030 seems a bit fragile.
1031
1032 Conclusion: inline top level things gaily until Phase 0 (the last
1033 phase), at which point don't.
1034
1035 Note [pre/postInlineUnconditionally in gentle mode]
1036 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1037 Even in gentle mode we want to do preInlineUnconditionally. The
1038 reason is that too little clean-up happens if you don't inline
1039 use-once things. Also a bit of inlining is *good* for full laziness;
1040 it can expose constant sub-expressions. Example in
1041 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
1042 let-float if you inline windowToViewport
1043
1044 However, as usual for Gentle mode, do not inline things that are
1045 inactive in the intial stages. See Note [Gentle mode].
1046
1047 Note [Stable unfoldings and preInlineUnconditionally]
1048 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1049 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
1050 Example
1051
1052 {-# INLINE f #-}
1053 f :: Eq a => a -> a
1054 f x = ...
1055
1056 fInt :: Int -> Int
1057 fInt = f Int dEqInt
1058
1059 ...fInt...fInt...fInt...
1060
1061 Here f occurs just once, in the RHS of fInt. But if we inline it there
1062 it might make fInt look big, and we'll lose the opportunity to inline f
1063 at each of fInt's call sites. The INLINE pragma will only inline when
1064 the application is saturated for exactly this reason; and we don't
1065 want PreInlineUnconditionally to second-guess it. A live example is
1066 Trac #3736.
1067 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1068
1069 Note [Top-level bottoming Ids]
1070 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1071 Don't inline top-level Ids that are bottoming, even if they are used just
1072 once, because FloatOut has gone to some trouble to extract them out.
1073 Inlining them won't make the program run faster!
1074
1075 Note [Do not inline CoVars unconditionally]
1076 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1077 Coercion variables appear inside coercions, and the RHS of a let-binding
1078 is a term (not a coercion) so we can't necessarily inline the latter in
1079 the former.
1080 -}
1081
1082 preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
1083 -- Precondition: rhs satisfies the let/app invariant
1084 -- See Note [CoreSyn let/app invariant] in CoreSyn
1085 -- Reason: we don't want to inline single uses, or discard dead bindings,
1086 -- for unlifted, side-effect-ful bindings
1087 preInlineUnconditionally env top_lvl bndr rhs
1088 | not pre_inline_unconditionally = False
1089 | not active = False
1090 | isStableUnfolding (idUnfolding bndr) = False -- Note [Stable unfoldings and preInlineUnconditionally]
1091 | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
1092 | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
1093 | otherwise = case idOccInfo bndr of
1094 IAmDead -> True -- Happens in ((\x.1) v)
1095 occ@OneOcc { occ_one_br = True }
1096 -> try_once (occ_in_lam occ)
1097 (occ_int_cxt occ)
1098 _ -> False
1099 where
1100 pre_inline_unconditionally = gopt Opt_SimplPreInlining (seDynFlags env)
1101 mode = getMode env
1102 active = isActive (sm_phase mode) act
1103 -- See Note [pre/postInlineUnconditionally in gentle mode]
1104 act = idInlineActivation bndr
1105 try_once in_lam int_cxt -- There's one textual occurrence
1106 | not in_lam = isNotTopLevel top_lvl || early_phase
1107 | otherwise = int_cxt && canInlineInLam rhs
1108
1109 -- Be very careful before inlining inside a lambda, because (a) we must not
1110 -- invalidate occurrence information, and (b) we want to avoid pushing a
1111 -- single allocation (here) into multiple allocations (inside lambda).
1112 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1113 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1114 -- where
1115 -- is_cheap = exprIsCheap rhs
1116 -- ok = is_cheap && int_cxt
1117
1118 -- int_cxt The context isn't totally boring
1119 -- E.g. let f = \ab.BIG in \y. map f xs
1120 -- Don't want to substitute for f, because then we allocate
1121 -- its closure every time the \y is called
1122 -- But: let f = \ab.BIG in \y. map (f y) xs
1123 -- Now we do want to substitute for f, even though it's not
1124 -- saturated, because we're going to allocate a closure for
1125 -- (f y) every time round the loop anyhow.
1126
1127 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1128 -- so substituting rhs inside a lambda doesn't change the occ info.
1129 -- Sadly, not quite the same as exprIsHNF.
1130 canInlineInLam (Lit _) = True
1131 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1132 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1133 canInlineInLam _ = False
1134 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1135 -- ticks around a Lam will disappear anyway.
1136
1137 early_phase = case sm_phase mode of
1138 Phase 0 -> False
1139 _ -> True
1140 -- If we don't have this early_phase test, consider
1141 -- x = length [1,2,3]
1142 -- The full laziness pass carefully floats all the cons cells to
1143 -- top level, and preInlineUnconditionally floats them all back in.
1144 -- Result is (a) static allocation replaced by dynamic allocation
1145 -- (b) many simplifier iterations because this tickles
1146 -- a related problem; only one inlining per pass
1147 --
1148 -- On the other hand, I have seen cases where top-level fusion is
1149 -- lost if we don't inline top level thing (e.g. string constants)
1150 -- Hence the test for phase zero (which is the phase for all the final
1151 -- simplifications). Until phase zero we take no special notice of
1152 -- top level things, but then we become more leery about inlining
1153 -- them.
1154
1155 {-
1156 ************************************************************************
1157 * *
1158 postInlineUnconditionally
1159 * *
1160 ************************************************************************
1161
1162 postInlineUnconditionally
1163 ~~~~~~~~~~~~~~~~~~~~~~~~~
1164 @postInlineUnconditionally@ decides whether to unconditionally inline
1165 a thing based on the form of its RHS; in particular if it has a
1166 trivial RHS. If so, we can inline and discard the binding altogether.
1167
1168 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1169 only have *forward* references. Hence, it's safe to discard the binding
1170
1171 NOTE: This isn't our last opportunity to inline. We're at the binding
1172 site right now, and we'll get another opportunity when we get to the
1173 occurrence(s)
1174
1175 Note that we do this unconditional inlining only for trival RHSs.
1176 Don't inline even WHNFs inside lambdas; doing so may simply increase
1177 allocation when the function is called. This isn't the last chance; see
1178 NOTE above.
1179
1180 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1181 Because we don't even want to inline them into the RHS of constructor
1182 arguments. See NOTE above
1183
1184 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1185 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1186 with both a and b marked NOINLINE. But that seems incompatible with
1187 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1188 story for now.
1189 -}
1190
1191 postInlineUnconditionally
1192 :: SimplEnv -> TopLevelFlag
1193 -> OutId -- The binder (*not* a CoVar), including its unfolding
1194 -> OccInfo -- From the InId
1195 -> OutExpr
1196 -> Bool
1197 -- Precondition: rhs satisfies the let/app invariant
1198 -- See Note [CoreSyn let/app invariant] in CoreSyn
1199 -- Reason: we don't want to inline single uses, or discard dead bindings,
1200 -- for unlifted, side-effect-ful bindings
1201 postInlineUnconditionally env top_lvl bndr occ_info rhs
1202 | not active = False
1203 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1204 -- because it might be referred to "earlier"
1205 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1206 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1207 | exprIsTrivial rhs = True
1208 | otherwise
1209 = case occ_info of
1210 -- The point of examining occ_info here is that for *non-values*
1211 -- that occur outside a lambda, the call-site inliner won't have
1212 -- a chance (because it doesn't know that the thing
1213 -- only occurs once). The pre-inliner won't have gotten
1214 -- it either, if the thing occurs in more than one branch
1215 -- So the main target is things like
1216 -- let x = f y in
1217 -- case v of
1218 -- True -> case x of ...
1219 -- False -> case x of ...
1220 -- This is very important in practice; e.g. wheel-seive1 doubles
1221 -- in allocation if you miss this out
1222 OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt }
1223 -- OneOcc => no code-duplication issue
1224 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1225 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1226 --
1227 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1228 -- Reason: doing so risks exponential behaviour. We simplify a big
1229 -- expression, inline it, and simplify it again. But if the
1230 -- very same thing happens in the big expression, we get
1231 -- exponential cost!
1232 -- PRINCIPLE: when we've already simplified an expression once,
1233 -- make sure that we only inline it if it's reasonably small.
1234
1235 && (not in_lam ||
1236 -- Outside a lambda, we want to be reasonably aggressive
1237 -- about inlining into multiple branches of case
1238 -- e.g. let x = <non-value>
1239 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1240 -- Inlining can be a big win if C3 is the hot-spot, even if
1241 -- the uses in C1, C2 are not 'interesting'
1242 -- An example that gets worse if you add int_cxt here is 'clausify'
1243
1244 (isCheapUnfolding unfolding && int_cxt))
1245 -- isCheap => acceptable work duplication; in_lam may be true
1246 -- int_cxt to prevent us inlining inside a lambda without some
1247 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1248
1249 IAmDead -> True -- This happens; for example, the case_bndr during case of
1250 -- known constructor: case (a,b) of x { (p,q) -> ... }
1251 -- Here x isn't mentioned in the RHS, so we don't want to
1252 -- create the (dead) let-binding let x = (a,b) in ...
1253
1254 _ -> False
1255
1256 -- Here's an example that we don't handle well:
1257 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1258 -- in \y. ....case f of {...} ....
1259 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1260 -- But
1261 -- - We can't preInlineUnconditionally because that woud invalidate
1262 -- the occ info for b.
1263 -- - We can't postInlineUnconditionally because the RHS is big, and
1264 -- that risks exponential behaviour
1265 -- - We can't call-site inline, because the rhs is big
1266 -- Alas!
1267
1268 where
1269 unfolding = idUnfolding bndr
1270 dflags = seDynFlags env
1271 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1272 -- See Note [pre/postInlineUnconditionally in gentle mode]
1273
1274 {-
1275 Note [Top level and postInlineUnconditionally]
1276 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1277 We don't do postInlineUnconditionally for top-level things (even for
1278 ones that are trivial):
1279
1280 * Doing so will inline top-level error expressions that have been
1281 carefully floated out by FloatOut. More generally, it might
1282 replace static allocation with dynamic.
1283
1284 * Even for trivial expressions there's a problem. Consider
1285 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1286 blah xs = reverse xs
1287 ruggle = sort
1288 In one simplifier pass we might fire the rule, getting
1289 blah xs = ruggle xs
1290 but in *that* simplifier pass we must not do postInlineUnconditionally
1291 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1292
1293 If the rhs is trivial it'll be inlined by callSiteInline, and then
1294 the binding will be dead and discarded by the next use of OccurAnal
1295
1296 * There is less point, because the main goal is to get rid of local
1297 bindings used in multiple case branches.
1298
1299 * The inliner should inline trivial things at call sites anyway.
1300
1301 * The Id might be exported. We could check for that separately,
1302 but since we aren't going to postInlineUnconditionally /any/
1303 top-level bindings, we don't need to test.
1304
1305 Note [Stable unfoldings and postInlineUnconditionally]
1306 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1307 Do not do postInlineUnconditionally if the Id has a stable unfolding,
1308 otherwise we lose the unfolding. Example
1309
1310 -- f has stable unfolding with rhs (e |> co)
1311 -- where 'e' is big
1312 f = e |> co
1313
1314 Then there's a danger we'll optimise to
1315
1316 f' = e
1317 f = f' |> co
1318
1319 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1320 won't inline because 'e' is too big.
1321
1322 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1323
1324
1325 ************************************************************************
1326 * *
1327 Rebuilding a lambda
1328 * *
1329 ************************************************************************
1330 -}
1331
1332 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1333 -- mkLam tries three things
1334 -- a) eta reduction, if that gives a trivial expression
1335 -- b) eta expansion [only if there are some value lambdas]
1336
1337 mkLam _env [] body _cont
1338 = return body
1339 mkLam env bndrs body cont
1340 = do { dflags <- getDynFlags
1341 ; mkLam' dflags bndrs body }
1342 where
1343 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1344 mkLam' dflags bndrs (Cast body co)
1345 | not (any bad bndrs)
1346 -- Note [Casts and lambdas]
1347 = do { lam <- mkLam' dflags bndrs body
1348 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1349 where
1350 co_vars = tyCoVarsOfCo co
1351 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1352
1353 mkLam' dflags bndrs body@(Lam {})
1354 = mkLam' dflags (bndrs ++ bndrs1) body1
1355 where
1356 (bndrs1, body1) = collectBinders body
1357
1358 mkLam' dflags bndrs (Tick t expr)
1359 | tickishFloatable t
1360 = mkTick t <$> mkLam' dflags bndrs expr
1361
1362 mkLam' dflags bndrs body
1363 | gopt Opt_DoEtaReduction dflags
1364 , Just etad_lam <- tryEtaReduce bndrs body
1365 = do { tick (EtaReduction (head bndrs))
1366 ; return etad_lam }
1367
1368 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1369 , sm_eta_expand (getMode env)
1370 , any isRuntimeVar bndrs
1371 , let body_arity = exprEtaExpandArity dflags body
1372 , body_arity > 0
1373 = do { tick (EtaExpansion (head bndrs))
1374 ; let res = mkLams bndrs (etaExpand body_arity body)
1375 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1376 , text "after" <+> ppr res])
1377 ; return res }
1378
1379 | otherwise
1380 = return (mkLams bndrs body)
1381
1382 {-
1383 Note [Eta expanding lambdas]
1384 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1385 In general we *do* want to eta-expand lambdas. Consider
1386 f (\x -> case x of (a,b) -> \s -> blah)
1387 where 's' is a state token, and hence can be eta expanded. This
1388 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1389 important function!
1390
1391 The eta-expansion will never happen unless we do it now. (Well, it's
1392 possible that CorePrep will do it, but CorePrep only has a half-baked
1393 eta-expander that can't deal with casts. So it's much better to do it
1394 here.)
1395
1396 However, when the lambda is let-bound, as the RHS of a let, we have a
1397 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1398 bother to try expansion in mkLam in that case; hence the contIsRhs
1399 guard.
1400
1401 NB: We check the SimplEnv (sm_eta_expand), not DynFlags.
1402 See Note [No eta expansion in stable unfoldings]
1403
1404 Note [Casts and lambdas]
1405 ~~~~~~~~~~~~~~~~~~~~~~~~
1406 Consider
1407 (\x. (\y. e) `cast` g1) `cast` g2
1408 There is a danger here that the two lambdas look separated, and the
1409 full laziness pass might float an expression to between the two.
1410
1411 So this equation in mkLam' floats the g1 out, thus:
1412 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1413 where x:tx.
1414
1415 In general, this floats casts outside lambdas, where (I hope) they
1416 might meet and cancel with some other cast:
1417 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1418 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1419 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1420 (if not (g `in` co))
1421
1422 Notice that it works regardless of 'e'. Originally it worked only
1423 if 'e' was itself a lambda, but in some cases that resulted in
1424 fruitless iteration in the simplifier. A good example was when
1425 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1426 like (\x. Get `cast` g)
1427 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1428 the Get, and the next iteration eta-reduced it, and then eta-expanded
1429 it again.
1430
1431 Note also the side condition for the case of coercion binders.
1432 It does not make sense to transform
1433 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1434 because the latter is not well-kinded.
1435
1436 ************************************************************************
1437 * *
1438 Eta expansion
1439 * *
1440 ************************************************************************
1441 -}
1442
1443 tryEtaExpandRhs :: SimplMode -> OutId -> OutExpr
1444 -> SimplM (Arity, Bool, OutExpr)
1445 -- See Note [Eta-expanding at let bindings]
1446 -- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then
1447 -- (a) rhs' has manifest arity
1448 -- (b) if is_bot is True then rhs' applied to n args is guaranteed bottom
1449 tryEtaExpandRhs mode bndr rhs
1450 | isJoinId bndr
1451 = return (manifestArity rhs, False, rhs)
1452 -- Note [Do not eta-expand join points]
1453
1454 | otherwise
1455 = do { (new_arity, is_bot, new_rhs) <- try_expand
1456
1457 ; WARN( new_arity < old_id_arity,
1458 (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
1459 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1460 -- Note [Arity decrease] in Simplify
1461 return (new_arity, is_bot, new_rhs) }
1462 where
1463 try_expand
1464 | exprIsTrivial rhs
1465 = return (exprArity rhs, False, rhs)
1466
1467 | sm_eta_expand mode -- Provided eta-expansion is on
1468 , new_arity > old_arity -- And the current manifest arity isn't enough
1469 = do { tick (EtaExpansion bndr)
1470 ; return (new_arity, is_bot, etaExpand new_arity rhs) }
1471
1472 | otherwise
1473 = return (old_arity, is_bot && new_arity == old_arity, rhs)
1474
1475 dflags = sm_dflags mode
1476 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1477 old_id_arity = idArity bndr
1478
1479 (new_arity1, is_bot) = findRhsArity dflags bndr rhs old_arity
1480 new_arity2 = idCallArity bndr
1481 new_arity = max new_arity1 new_arity2
1482
1483 {-
1484 Note [Eta-expanding at let bindings]
1485 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1486 We now eta expand at let-bindings, which is where the payoff comes.
1487 The most significant thing is that we can do a simple arity analysis
1488 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1489
1490 One useful consequence of not eta-expanding lambdas is this example:
1491 genMap :: C a => ...
1492 {-# INLINE genMap #-}
1493 genMap f xs = ...
1494
1495 myMap :: D a => ...
1496 {-# INLINE myMap #-}
1497 myMap = genMap
1498
1499 Notice that 'genMap' should only inline if applied to two arguments.
1500 In the stable unfolding for myMap we'll have the unfolding
1501 (\d -> genMap Int (..d..))
1502 We do not want to eta-expand to
1503 (\d f xs -> genMap Int (..d..) f xs)
1504 because then 'genMap' will inline, and it really shouldn't: at least
1505 as far as the programmer is concerned, it's not applied to two
1506 arguments!
1507
1508 Note [Do not eta-expand join points]
1509 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1510 Similarly to CPR (see Note [Don't CPR join points] in WorkWrap), a join point
1511 stands well to gain from its outer binding's eta-expansion, and eta-expanding a
1512 join point is fraught with issues like how to deal with a cast:
1513
1514 let join $j1 :: IO ()
1515 $j1 = ...
1516 $j2 :: Int -> IO ()
1517 $j2 n = if n > 0 then $j1
1518 else ...
1519
1520 =>
1521
1522 let join $j1 :: IO ()
1523 $j1 = (\eta -> ...)
1524 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1525 ~ IO ()
1526 $j2 :: Int -> IO ()
1527 $j2 n = (\eta -> if n > 0 then $j1
1528 else ...)
1529 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1530 ~ IO ()
1531
1532 The cast here can't be pushed inside the lambda (since it's not casting to a
1533 function type), so the lambda has to stay, but it can't because it contains a
1534 reference to a join point. In fact, $j2 can't be eta-expanded at all. Rather
1535 than try and detect this situation (and whatever other situations crop up!), we
1536 don't bother; again, any surrounding eta-expansion will improve these join
1537 points anyway, since an outer cast can *always* be pushed inside. By the time
1538 CorePrep comes around, the code is very likely to look more like this:
1539
1540 let join $j1 :: State# RealWorld -> (# State# RealWorld, ())
1541 $j1 = (...) eta
1542 $j2 :: Int -> State# RealWorld -> (# State# RealWorld, ())
1543 $j2 = if n > 0 then $j1
1544 else (...) eta
1545
1546 Note [Do not eta-expand PAPs]
1547 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1548 We used to have old_arity = manifestArity rhs, which meant that we
1549 would eta-expand even PAPs. But this gives no particular advantage,
1550 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1551 Suppose we have a PAP
1552 foo :: IO ()
1553 foo = returnIO ()
1554 Then we can eta-expand do
1555 foo = (\eta. (returnIO () |> sym g) eta) |> g
1556 where
1557 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1558
1559 But there is really no point in doing this, and it generates masses of
1560 coercions and whatnot that eventually disappear again. For T9020, GHC
1561 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1562 1.8G to 45M.
1563
1564 But note that this won't eta-expand, say
1565 f = \g -> map g
1566 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1567 strictness analysis will have less to bite on?
1568
1569
1570 ************************************************************************
1571 * *
1572 \subsection{Floating lets out of big lambdas}
1573 * *
1574 ************************************************************************
1575
1576 Note [Floating and type abstraction]
1577 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1578 Consider this:
1579 x = /\a. C e1 e2
1580 We'd like to float this to
1581 y1 = /\a. e1
1582 y2 = /\a. e2
1583 x = /\a. C (y1 a) (y2 a)
1584 for the usual reasons: we want to inline x rather vigorously.
1585
1586 You may think that this kind of thing is rare. But in some programs it is
1587 common. For example, if you do closure conversion you might get:
1588
1589 data a :-> b = forall e. (e -> a -> b) :$ e
1590
1591 f_cc :: forall a. a :-> a
1592 f_cc = /\a. (\e. id a) :$ ()
1593
1594 Now we really want to inline that f_cc thing so that the
1595 construction of the closure goes away.
1596
1597 So I have elaborated simplLazyBind to understand right-hand sides that look
1598 like
1599 /\ a1..an. body
1600
1601 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1602 but there is quite a bit of plumbing in simplLazyBind as well.
1603
1604 The same transformation is good when there are lets in the body:
1605
1606 /\abc -> let(rec) x = e in b
1607 ==>
1608 let(rec) x' = /\abc -> let x = x' a b c in e
1609 in
1610 /\abc -> let x = x' a b c in b
1611
1612 This is good because it can turn things like:
1613
1614 let f = /\a -> letrec g = ... g ... in g
1615 into
1616 letrec g' = /\a -> ... g' a ...
1617 in
1618 let f = /\ a -> g' a
1619
1620 which is better. In effect, it means that big lambdas don't impede
1621 let-floating.
1622
1623 This optimisation is CRUCIAL in eliminating the junk introduced by
1624 desugaring mutually recursive definitions. Don't eliminate it lightly!
1625
1626 [May 1999] If we do this transformation *regardless* then we can
1627 end up with some pretty silly stuff. For example,
1628
1629 let
1630 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1631 in ..
1632 becomes
1633 let y1 = /\s -> r1
1634 y2 = /\s -> r2
1635 st = /\s -> ...[y1 s/x1, y2 s/x2]
1636 in ..
1637
1638 Unless the "..." is a WHNF there is really no point in doing this.
1639 Indeed it can make things worse. Suppose x1 is used strictly,
1640 and is of the form
1641
1642 x1* = case f y of { (a,b) -> e }
1643
1644 If we abstract this wrt the tyvar we then can't do the case inline
1645 as we would normally do.
1646
1647 That's why the whole transformation is part of the same process that
1648 floats let-bindings and constructor arguments out of RHSs. In particular,
1649 it is guarded by the doFloatFromRhs call in simplLazyBind.
1650
1651 Note [Which type variables to abstract over]
1652 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1653 Abstract only over the type variables free in the rhs wrt which the
1654 new binding is abstracted. Note that
1655
1656 * The naive approach of abstracting wrt the
1657 tyvars free in the Id's /type/ fails. Consider:
1658 /\ a b -> let t :: (a,b) = (e1, e2)
1659 x :: a = fst t
1660 in ...
1661 Here, b isn't free in x's type, but we must nevertheless
1662 abstract wrt b as well, because t's type mentions b.
1663 Since t is floated too, we'd end up with the bogus:
1664 poly_t = /\ a b -> (e1, e2)
1665 poly_x = /\ a -> fst (poly_t a *b*)
1666
1667 * We must do closeOverKinds. Example (Trac #10934):
1668 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1669 Here we want to float 't', but we must remember to abstract over
1670 'k' as well, even though it is not explicitly mentioned in the RHS,
1671 otherwise we get
1672 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1673 which is obviously bogus.
1674 -}
1675
1676 abstractFloats :: DynFlags -> TopLevelFlag -> [OutTyVar] -> SimplFloats
1677 -> OutExpr -> SimplM ([OutBind], OutExpr)
1678 abstractFloats dflags top_lvl main_tvs floats body
1679 = ASSERT( notNull body_floats )
1680 ASSERT( isNilOL (sfJoinFloats floats) )
1681 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1682 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1683 where
1684 is_top_lvl = isTopLevel top_lvl
1685 main_tv_set = mkVarSet main_tvs
1686 body_floats = letFloatBinds (sfLetFloats floats)
1687 empty_subst = CoreSubst.mkEmptySubst (sfInScope floats)
1688
1689 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1690 abstract subst (NonRec id rhs)
1691 = do { (poly_id1, poly_app) <- mk_poly1 tvs_here id
1692 ; let (poly_id2, poly_rhs) = mk_poly2 poly_id1 tvs_here rhs'
1693 subst' = CoreSubst.extendIdSubst subst id poly_app
1694 ; return (subst', NonRec poly_id2 poly_rhs) }
1695 where
1696 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1697
1698 -- tvs_here: see Note [Which type variables to abstract over]
1699 tvs_here = toposortTyVars $
1700 filter (`elemVarSet` main_tv_set) $
1701 closeOverKindsList $
1702 exprSomeFreeVarsList isTyVar rhs'
1703
1704 abstract subst (Rec prs)
1705 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids
1706 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1707 poly_pairs = [ mk_poly2 poly_id tvs_here rhs'
1708 | (poly_id, rhs) <- poly_ids `zip` rhss
1709 , let rhs' = CoreSubst.substExpr (text "abstract_floats")
1710 subst' rhs ]
1711 ; return (subst', Rec poly_pairs) }
1712 where
1713 (ids,rhss) = unzip prs
1714 -- For a recursive group, it's a bit of a pain to work out the minimal
1715 -- set of tyvars over which to abstract:
1716 -- /\ a b c. let x = ...a... in
1717 -- letrec { p = ...x...q...
1718 -- q = .....p...b... } in
1719 -- ...
1720 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1721 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1722 -- Since it's a pain, we just use the whole set, which is always safe
1723 --
1724 -- If you ever want to be more selective, remember this bizarre case too:
1725 -- x::a = x
1726 -- Here, we must abstract 'x' over 'a'.
1727 tvs_here = toposortTyVars main_tvs
1728
1729 mk_poly1 :: [TyVar] -> Id -> SimplM (Id, CoreExpr)
1730 mk_poly1 tvs_here var
1731 = do { uniq <- getUniqueM
1732 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1733 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1734 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1735 mkLocalIdOrCoVar poly_name poly_ty
1736 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1737 -- In the olden days, it was crucial to copy the occInfo of the original var,
1738 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1739 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1740 -- at already simplified code, so it doesn't matter
1741 --
1742 -- It's even right to retain single-occurrence or dead-var info:
1743 -- Suppose we started with /\a -> let x = E in B
1744 -- where x occurs once in B. Then we transform to:
1745 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1746 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1747 -- the occurrences of x' will be just the occurrences originally
1748 -- pinned on x.
1749
1750 mk_poly2 :: Id -> [TyVar] -> CoreExpr -> (Id, CoreExpr)
1751 mk_poly2 poly_id tvs_here rhs
1752 = (poly_id `setIdUnfolding` unf, poly_rhs)
1753 where
1754 poly_rhs = mkLams tvs_here rhs
1755 unf = mkUnfolding dflags InlineRhs is_top_lvl False poly_rhs
1756
1757 -- We want the unfolding. Consider
1758 -- let
1759 -- x = /\a. let y = ... in Just y
1760 -- in body
1761 -- Then we float the y-binding out (via abstractFloats and addPolyBind)
1762 -- but 'x' may well then be inlined in 'body' in which case we'd like the
1763 -- opportunity to inline 'y' too.
1764
1765 {-
1766 Note [Abstract over coercions]
1767 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1768 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1769 type variable a. Rather than sort this mess out, we simply bale out and abstract
1770 wrt all the type variables if any of them are coercion variables.
1771
1772
1773 Historical note: if you use let-bindings instead of a substitution, beware of this:
1774
1775 -- Suppose we start with:
1776 --
1777 -- x = /\ a -> let g = G in E
1778 --
1779 -- Then we'll float to get
1780 --
1781 -- x = let poly_g = /\ a -> G
1782 -- in /\ a -> let g = poly_g a in E
1783 --
1784 -- But now the occurrence analyser will see just one occurrence
1785 -- of poly_g, not inside a lambda, so the simplifier will
1786 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1787 -- (I used to think that the "don't inline lone occurrences" stuff
1788 -- would stop this happening, but since it's the *only* occurrence,
1789 -- PreInlineUnconditionally kicks in first!)
1790 --
1791 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1792 -- to appear many times. (NB: mkInlineMe eliminates
1793 -- such notes on trivial RHSs, so do it manually.)
1794
1795 ************************************************************************
1796 * *
1797 prepareAlts
1798 * *
1799 ************************************************************************
1800
1801 prepareAlts tries these things:
1802
1803 1. Eliminate alternatives that cannot match, including the
1804 DEFAULT alternative.
1805
1806 2. If the DEFAULT alternative can match only one possible constructor,
1807 then make that constructor explicit.
1808 e.g.
1809 case e of x { DEFAULT -> rhs }
1810 ===>
1811 case e of x { (a,b) -> rhs }
1812 where the type is a single constructor type. This gives better code
1813 when rhs also scrutinises x or e.
1814
1815 3. Returns a list of the constructors that cannot holds in the
1816 DEFAULT alternative (if there is one)
1817
1818 Here "cannot match" includes knowledge from GADTs
1819
1820 It's a good idea to do this stuff before simplifying the alternatives, to
1821 avoid simplifying alternatives we know can't happen, and to come up with
1822 the list of constructors that are handled, to put into the IdInfo of the
1823 case binder, for use when simplifying the alternatives.
1824
1825 Eliminating the default alternative in (1) isn't so obvious, but it can
1826 happen:
1827
1828 data Colour = Red | Green | Blue
1829
1830 f x = case x of
1831 Red -> ..
1832 Green -> ..
1833 DEFAULT -> h x
1834
1835 h y = case y of
1836 Blue -> ..
1837 DEFAULT -> [ case y of ... ]
1838
1839 If we inline h into f, the default case of the inlined h can't happen.
1840 If we don't notice this, we may end up filtering out *all* the cases
1841 of the inner case y, which give us nowhere to go!
1842 -}
1843
1844 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1845 -- The returned alternatives can be empty, none are possible
1846 prepareAlts scrut case_bndr' alts
1847 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1848 -- Case binder is needed just for its type. Note that as an
1849 -- OutId, it has maximum information; this is important.
1850 -- Test simpl013 is an example
1851 = do { us <- getUniquesM
1852 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1853 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1854 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1855 -- "idcs" stands for "impossible default data constructors"
1856 -- i.e. the constructors that can't match the default case
1857 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1858 ; when yes3 $ tick (AltMerge case_bndr')
1859 ; return (idcs3, alts3) }
1860
1861 | otherwise -- Not a data type, so nothing interesting happens
1862 = return ([], alts)
1863 where
1864 imposs_cons = case scrut of
1865 Var v -> otherCons (idUnfolding v)
1866 _ -> []
1867
1868
1869 {-
1870 ************************************************************************
1871 * *
1872 mkCase
1873 * *
1874 ************************************************************************
1875
1876 mkCase tries these things
1877
1878 * Note [Nerge nested cases]
1879 * Note [Eliminate identity case]
1880 * Note [Scrutinee constant folding]
1881
1882 Note [Merge Nested Cases]
1883 ~~~~~~~~~~~~~~~~~~~~~~~~~
1884 case e of b { ==> case e of b {
1885 p1 -> rhs1 p1 -> rhs1
1886 ... ...
1887 pm -> rhsm pm -> rhsm
1888 _ -> case b of b' { pn -> let b'=b in rhsn
1889 pn -> rhsn ...
1890 ... po -> let b'=b in rhso
1891 po -> rhso _ -> let b'=b in rhsd
1892 _ -> rhsd
1893 }
1894
1895 which merges two cases in one case when -- the default alternative of
1896 the outer case scrutises the same variable as the outer case. This
1897 transformation is called Case Merging. It avoids that the same
1898 variable is scrutinised multiple times.
1899
1900 Note [Eliminate Identity Case]
1901 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1902 case e of ===> e
1903 True -> True;
1904 False -> False
1905
1906 and similar friends.
1907
1908 Note [Scrutinee Constant Folding]
1909 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1910 case x op# k# of _ { ===> case x of _ {
1911 a1# -> e1 (a1# inv_op# k#) -> e1
1912 a2# -> e2 (a2# inv_op# k#) -> e2
1913 ... ...
1914 DEFAULT -> ed DEFAULT -> ed
1915
1916 where (x op# k#) inv_op# k# == x
1917
1918 And similarly for commuted arguments and for some unary operations.
1919
1920 The purpose of this transformation is not only to avoid an arithmetic
1921 operation at runtime but to allow other transformations to apply in cascade.
1922
1923 Example with the "Merge Nested Cases" optimization (from #12877):
1924
1925 main = case t of t0
1926 0## -> ...
1927 DEFAULT -> case t0 `minusWord#` 1## of t1
1928 0## -> ...
1929 DEFAUT -> case t1 `minusWord#` 1## of t2
1930 0## -> ...
1931 DEFAULT -> case t2 `minusWord#` 1## of _
1932 0## -> ...
1933 DEFAULT -> ...
1934
1935 becomes:
1936
1937 main = case t of _
1938 0## -> ...
1939 1## -> ...
1940 2## -> ...
1941 3## -> ...
1942 DEFAULT -> ...
1943
1944 There are some wrinkles
1945
1946 * Do not apply caseRules if there is just a single DEFAULT alternative
1947 case e +# 3# of b { DEFAULT -> rhs }
1948 If we applied the transformation here we would (stupidly) get
1949 case a of b' { DEFAULT -> let b = e +# 3# in rhs }
1950 and now the process may repeat, because that let will really
1951 be a case.
1952
1953 * The type of the scrutinee might change. E.g.
1954 case tagToEnum (x :: Int#) of (b::Bool)
1955 False -> e1
1956 True -> e2
1957 ==>
1958 case x of (b'::Int#)
1959 DEFAULT -> e1
1960 1# -> e2
1961
1962 * The case binder may be used in the right hand sides, so we need
1963 to make a local binding for it, if it is alive. e.g.
1964 case e +# 10# of b
1965 DEFAULT -> blah...b...
1966 44# -> blah2...b...
1967 ===>
1968 case e of b'
1969 DEFAULT -> let b = b' +# 10# in blah...b...
1970 34# -> let b = 44# in blah2...b...
1971
1972 Note that in the non-DEFAULT cases we know what to bind 'b' to,
1973 whereas in the DEFAULT case we must reconstruct the original value.
1974 But NB: we use b'; we do not duplicate 'e'.
1975
1976 * In dataToTag we might need to make up some fake binders;
1977 see Note [caseRules for dataToTag] in PrelRules
1978 -}
1979
1980 mkCase, mkCase1, mkCase2, mkCase3
1981 :: DynFlags
1982 -> OutExpr -> OutId
1983 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
1984 -> SimplM OutExpr
1985
1986 --------------------------------------------------
1987 -- 1. Merge Nested Cases
1988 --------------------------------------------------
1989
1990 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
1991 | gopt Opt_CaseMerge dflags
1992 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
1993 <- stripTicksTop tickishFloatable deflt_rhs
1994 , inner_scrut_var == outer_bndr
1995 = do { tick (CaseMerge outer_bndr)
1996
1997 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
1998 (con, args, wrap_rhs rhs)
1999 -- Simplifier's no-shadowing invariant should ensure
2000 -- that outer_bndr is not shadowed by the inner patterns
2001 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
2002 -- The let is OK even for unboxed binders,
2003
2004 wrapped_alts | isDeadBinder inner_bndr = inner_alts
2005 | otherwise = map wrap_alt inner_alts
2006
2007 merged_alts = mergeAlts outer_alts wrapped_alts
2008 -- NB: mergeAlts gives priority to the left
2009 -- case x of
2010 -- A -> e1
2011 -- DEFAULT -> case x of
2012 -- A -> e2
2013 -- B -> e3
2014 -- When we merge, we must ensure that e1 takes
2015 -- precedence over e2 as the value for A!
2016
2017 ; fmap (mkTicks ticks) $
2018 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
2019 }
2020 -- Warning: don't call mkCase recursively!
2021 -- Firstly, there's no point, because inner alts have already had
2022 -- mkCase applied to them, so they won't have a case in their default
2023 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
2024 -- in munge_rhs may put a case into the DEFAULT branch!
2025
2026 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
2027
2028 --------------------------------------------------
2029 -- 2. Eliminate Identity Case
2030 --------------------------------------------------
2031
2032 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
2033 | all identity_alt alts
2034 = do { tick (CaseIdentity case_bndr)
2035 ; return (mkTicks ticks $ re_cast scrut rhs1) }
2036 where
2037 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
2038 identity_alt (con, args, rhs) = check_eq rhs con args
2039
2040 check_eq (Cast rhs co) con args -- See Note [RHS casts]
2041 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
2042 check_eq (Tick t e) alt args
2043 = tickishFloatable t && check_eq e alt args
2044
2045 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
2046 check_eq (Var v) _ _ | v == case_bndr = True
2047 check_eq (Var v) (DataAlt con) args
2048 | null arg_tys, null args = v == dataConWorkId con
2049 -- Optimisation only
2050 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
2051 mkConApp2 con arg_tys args
2052 check_eq _ _ _ = False
2053
2054 arg_tys = tyConAppArgs (idType case_bndr)
2055
2056 -- Note [RHS casts]
2057 -- ~~~~~~~~~~~~~~~~
2058 -- We've seen this:
2059 -- case e of x { _ -> x `cast` c }
2060 -- And we definitely want to eliminate this case, to give
2061 -- e `cast` c
2062 -- So we throw away the cast from the RHS, and reconstruct
2063 -- it at the other end. All the RHS casts must be the same
2064 -- if (all identity_alt alts) holds.
2065 --
2066 -- Don't worry about nested casts, because the simplifier combines them
2067
2068 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
2069 re_cast scrut _ = scrut
2070
2071 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
2072
2073 --------------------------------------------------
2074 -- 2. Scrutinee Constant Folding
2075 --------------------------------------------------
2076
2077 mkCase2 dflags scrut bndr alts_ty alts
2078 | -- See Note [Scrutinee Constant Folding]
2079 case alts of -- Not if there is just a DEFAULT alterantive
2080 [(DEFAULT,_,_)] -> False
2081 _ -> True
2082 , gopt Opt_CaseFolding dflags
2083 , Just (scrut', tx_con, mk_orig) <- caseRules dflags scrut
2084 = do { bndr' <- newId (fsLit "lwild") (exprType scrut')
2085 ; alts' <- mapM (tx_alt tx_con mk_orig bndr') alts
2086 ; mkCase3 dflags scrut' bndr' alts_ty $
2087 add_default (re_sort alts')
2088 }
2089
2090 | otherwise
2091 = mkCase3 dflags scrut bndr alts_ty alts
2092 where
2093 -- We need to keep the correct association between the scrutinee and its
2094 -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
2095 -- "let bndr = ... in":
2096 --
2097 -- case v + 10 of y =====> case v of y
2098 -- 20 -> e1 10 -> let y = 20 in e1
2099 -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2
2100 --
2101 -- Other transformations give: =====> case v of y'
2102 -- 10 -> let y = 20 in e1
2103 -- DEFAULT -> let y = y' + 10 in e2
2104 --
2105 -- This wrapping is done in tx_alt; we use mk_orig, returned by caseRules,
2106 -- to construct an expression equivalent to the original one, for use
2107 -- in the DEFAULT case
2108
2109 tx_alt tx_con mk_orig new_bndr (con, bs, rhs)
2110 | DataAlt dc <- con', not (isNullaryRepDataCon dc)
2111 = -- For non-nullary data cons we must invent some fake binders
2112 -- See Note [caseRules for dataToTag] in PrelRules
2113 do { us <- getUniquesM
2114 ; let (ex_tvs, arg_ids) = dataConRepInstPat us dc
2115 (tyConAppArgs (idType new_bndr))
2116 ; return (con', ex_tvs ++ arg_ids, rhs') }
2117 | otherwise
2118 = return (con', [], rhs')
2119 where
2120 con' = tx_con con
2121
2122 rhs' | isDeadBinder bndr = rhs
2123 | otherwise = bindNonRec bndr orig_val rhs
2124
2125 orig_val = case con of
2126 DEFAULT -> mk_orig new_bndr
2127 LitAlt l -> Lit l
2128 DataAlt dc -> mkConApp2 dc (tyConAppArgs (idType bndr)) bs
2129
2130
2131 re_sort :: [CoreAlt] -> [CoreAlt] -- Re-sort the alternatives to
2132 re_sort alts = sortBy cmpAlt alts -- preserve the #case_invariants#
2133
2134 add_default :: [CoreAlt] -> [CoreAlt]
2135 -- TagToEnum may change a boolean True/False set of alternatives
2136 -- to LitAlt 0#/1# alterantives. But literal alternatives always
2137 -- have a DEFAULT (I think). So add it.
2138 add_default ((LitAlt {}, bs, rhs) : alts) = (DEFAULT, bs, rhs) : alts
2139 add_default alts = alts
2140
2141 --------------------------------------------------
2142 -- Catch-all
2143 --------------------------------------------------
2144 mkCase3 _dflags scrut bndr alts_ty alts
2145 = return (Case scrut bndr alts_ty alts)
2146
2147 {-
2148 Note [Dead binders]
2149 ~~~~~~~~~~~~~~~~~~~~
2150 Note that dead-ness is maintained by the simplifier, so that it is
2151 accurate after simplification as well as before.
2152
2153
2154 Note [Cascading case merge]
2155 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2156 Case merging should cascade in one sweep, because it
2157 happens bottom-up
2158
2159 case e of a {
2160 DEFAULT -> case a of b
2161 DEFAULT -> case b of c {
2162 DEFAULT -> e
2163 A -> ea
2164 B -> eb
2165 C -> ec
2166 ==>
2167 case e of a {
2168 DEFAULT -> case a of b
2169 DEFAULT -> let c = b in e
2170 A -> let c = b in ea
2171 B -> eb
2172 C -> ec
2173 ==>
2174 case e of a {
2175 DEFAULT -> let b = a in let c = b in e
2176 A -> let b = a in let c = b in ea
2177 B -> let b = a in eb
2178 C -> ec
2179
2180
2181 However here's a tricky case that we still don't catch, and I don't
2182 see how to catch it in one pass:
2183
2184 case x of c1 { I# a1 ->
2185 case a1 of c2 ->
2186 0 -> ...
2187 DEFAULT -> case x of c3 { I# a2 ->
2188 case a2 of ...
2189
2190 After occurrence analysis (and its binder-swap) we get this
2191
2192 case x of c1 { I# a1 ->
2193 let x = c1 in -- Binder-swap addition
2194 case a1 of c2 ->
2195 0 -> ...
2196 DEFAULT -> case x of c3 { I# a2 ->
2197 case a2 of ...
2198
2199 When we simplify the inner case x, we'll see that
2200 x=c1=I# a1. So we'll bind a2 to a1, and get
2201
2202 case x of c1 { I# a1 ->
2203 case a1 of c2 ->
2204 0 -> ...
2205 DEFAULT -> case a1 of ...
2206
2207 This is corect, but we can't do a case merge in this sweep
2208 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
2209 without getting changed to c1=I# c2.
2210
2211 I don't think this is worth fixing, even if I knew how. It'll
2212 all come out in the next pass anyway.
2213 -}