d86adbb1c4d7c56ff06004b58e27740e476ba6e1
[ghc.git] / compiler / simplCore / SimplUtils.hs
1 {-
2 (c) The AQUA Project, Glasgow University, 1993-1998
3
4 \section[SimplUtils]{The simplifier utilities}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplUtils (
10 -- Rebuilding
11 mkLam, mkCase, prepareAlts, tryEtaExpandRhs,
12
13 -- Inlining,
14 preInlineUnconditionally, postInlineUnconditionally,
15 activeUnfolding, activeRule,
16 getUnfoldingInRuleMatch,
17 simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
18
19 -- The continuation type
20 SimplCont(..), DupFlag(..), StaticEnv,
21 isSimplified, contIsStop,
22 contIsDupable, contResultType, contHoleType,
23 contIsTrivial, contArgs,
24 countArgs,
25 mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
26 interestingCallContext,
27
28 -- ArgInfo
29 ArgInfo(..), ArgSpec(..), mkArgInfo,
30 addValArgTo, addCastTo, addTyArgTo,
31 argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
32
33 abstractFloats
34 ) where
35
36 #include "HsVersions.h"
37
38 import GhcPrelude
39
40 import SimplEnv
41 import CoreMonad ( SimplMode(..), Tick(..) )
42 import DynFlags
43 import CoreSyn
44 import qualified CoreSubst
45 import PprCore
46 import CoreFVs
47 import CoreUtils
48 import CoreArity
49 import CoreUnfold
50 import Name
51 import Id
52 import IdInfo
53 import Var
54 import Demand
55 import SimplMonad
56 import Type hiding( substTy )
57 import Coercion hiding( substCo )
58 import DataCon ( dataConWorkId, isNullaryRepDataCon )
59 import VarSet
60 import BasicTypes
61 import Util
62 import OrdList ( isNilOL )
63 import MonadUtils
64 import Outputable
65 import Pair
66 import PrelRules
67 import FastString ( fsLit )
68
69 import Control.Monad ( when )
70 import Data.List ( sortBy )
71
72 {-
73 ************************************************************************
74 * *
75 The SimplCont and DupFlag types
76 * *
77 ************************************************************************
78
79 A SimplCont allows the simplifier to traverse the expression in a
80 zipper-like fashion. The SimplCont represents the rest of the expression,
81 "above" the point of interest.
82
83 You can also think of a SimplCont as an "evaluation context", using
84 that term in the way it is used for operational semantics. This is the
85 way I usually think of it, For example you'll often see a syntax for
86 evaluation context looking like
87 C ::= [] | C e | case C of alts | C `cast` co
88 That's the kind of thing we are doing here, and I use that syntax in
89 the comments.
90
91
92 Key points:
93 * A SimplCont describes a *strict* context (just like
94 evaluation contexts do). E.g. Just [] is not a SimplCont
95
96 * A SimplCont describes a context that *does not* bind
97 any variables. E.g. \x. [] is not a SimplCont
98 -}
99
100 data SimplCont
101 = Stop -- Stop[e] = e
102 OutType -- Type of the <hole>
103 CallCtxt -- Tells if there is something interesting about
104 -- the context, and hence the inliner
105 -- should be a bit keener (see interestingCallContext)
106 -- Specifically:
107 -- This is an argument of a function that has RULES
108 -- Inlining the call might allow the rule to fire
109 -- Never ValAppCxt (use ApplyToVal instead)
110 -- or CaseCtxt (use Select instead)
111
112 | CastIt -- (CastIt co K)[e] = K[ e `cast` co ]
113 OutCoercion -- The coercion simplified
114 -- Invariant: never an identity coercion
115 SimplCont
116
117 | ApplyToVal -- (ApplyToVal arg K)[e] = K[ e arg ]
118 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
119 , sc_arg :: InExpr -- The argument,
120 , sc_env :: StaticEnv -- see Note [StaticEnv invariant]
121 , sc_cont :: SimplCont }
122
123 | ApplyToTy -- (ApplyToTy ty K)[e] = K[ e ty ]
124 { sc_arg_ty :: OutType -- Argument type
125 , sc_hole_ty :: OutType -- Type of the function, presumably (forall a. blah)
126 -- See Note [The hole type in ApplyToTy]
127 , sc_cont :: SimplCont }
128
129 | Select -- (Select alts K)[e] = K[ case e of alts ]
130 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
131 , sc_bndr :: InId -- case binder
132 , sc_alts :: [InAlt] -- Alternatives
133 , sc_env :: StaticEnv -- See Note [StaticEnv invariant]
134 , sc_cont :: SimplCont }
135
136 -- The two strict forms have no DupFlag, because we never duplicate them
137 | StrictBind -- (StrictBind x xs b K)[e] = let x = e in K[\xs.b]
138 -- or, equivalently, = K[ (\x xs.b) e ]
139 { sc_dup :: DupFlag -- See Note [DupFlag invariants]
140 , sc_bndr :: InId
141 , sc_bndrs :: [InBndr]
142 , sc_body :: InExpr
143 , sc_env :: StaticEnv -- See Note [StaticEnv invariant]
144 , sc_cont :: SimplCont }
145
146 | StrictArg -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ]
147 { sc_dup :: DupFlag -- Always Simplified or OkToDup
148 , sc_fun :: ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
149 -- plus strictness flags for *further* args
150 , sc_cci :: CallCtxt -- Whether *this* argument position is interesting
151 , sc_cont :: SimplCont }
152
153 | TickIt -- (TickIt t K)[e] = K[ tick t e ]
154 (Tickish Id) -- Tick tickish <hole>
155 SimplCont
156
157 type StaticEnv = SimplEnv -- Just the static part is relevant
158
159 data DupFlag = NoDup -- Unsimplified, might be big
160 | Simplified -- Simplified
161 | OkToDup -- Simplified and small
162
163 isSimplified :: DupFlag -> Bool
164 isSimplified NoDup = False
165 isSimplified _ = True -- Invariant: the subst-env is empty
166
167 perhapsSubstTy :: DupFlag -> StaticEnv -> Type -> Type
168 perhapsSubstTy dup env ty
169 | isSimplified dup = ty
170 | otherwise = substTy env ty
171
172 {- Note [StaticEnv invariant]
173 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174 We pair up an InExpr or InAlts with a StaticEnv, which establishes the
175 lexical scope for that InExpr. When we simplify that InExpr/InAlts, we
176 use
177 - Its captured StaticEnv
178 - Overriding its InScopeSet with the larger one at the
179 simplification point.
180
181 Why override the InScopeSet? Example:
182 (let y = ey in f) ex
183 By the time we simplify ex, 'y' will be in scope.
184
185 However the InScopeSet in the StaticEnv is not irrelevant: it should
186 include all the free vars of applying the substitution to the InExpr.
187 Reason: contHoleType uses perhapsSubstTy to apply the substitution to
188 the expression, and that (rightly) gives ASSERT failures if the InScopeSet
189 isn't big enough.
190
191 Note [DupFlag invariants]
192 ~~~~~~~~~~~~~~~~~~~~~~~~~
193 In both (ApplyToVal dup _ env k)
194 and (Select dup _ _ env k)
195 the following invariants hold
196
197 (a) if dup = OkToDup, then continuation k is also ok-to-dup
198 (b) if dup = OkToDup or Simplified, the subst-env is empty
199 (and and hence no need to re-simplify)
200 -}
201
202 instance Outputable DupFlag where
203 ppr OkToDup = text "ok"
204 ppr NoDup = text "nodup"
205 ppr Simplified = text "simpl"
206
207 instance Outputable SimplCont where
208 ppr (Stop ty interesting) = text "Stop" <> brackets (ppr interesting) <+> ppr ty
209 ppr (CastIt co cont ) = (text "CastIt" <+> pprOptCo co) $$ ppr cont
210 ppr (TickIt t cont) = (text "TickIt" <+> ppr t) $$ ppr cont
211 ppr (ApplyToTy { sc_arg_ty = ty, sc_cont = cont })
212 = (text "ApplyToTy" <+> pprParendType ty) $$ ppr cont
213 ppr (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_cont = cont })
214 = (text "ApplyToVal" <+> ppr dup <+> pprParendExpr arg)
215 $$ ppr cont
216 ppr (StrictBind { sc_bndr = b, sc_cont = cont })
217 = (text "StrictBind" <+> ppr b) $$ ppr cont
218 ppr (StrictArg { sc_fun = ai, sc_cont = cont })
219 = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
220 ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
221 = (text "Select" <+> ppr dup <+> ppr bndr) $$
222 whenPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
223
224
225 {- Note [The hole type in ApplyToTy]
226 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
227 The sc_hole_ty field of ApplyToTy records the type of the "hole" in the
228 continuation. It is absolutely necessary to compute contHoleType, but it is
229 not used for anything else (and hence may not be evaluated).
230
231 Why is it necessary for contHoleType? Consider the continuation
232 ApplyToType Int (Stop Int)
233 corresponding to
234 (<hole> @Int) :: Int
235 What is the type of <hole>? It could be (forall a. Int) or (forall a. a),
236 and there is no way to know which, so we must record it.
237
238 In a chain of applications (f @t1 @t2 @t3) we'll lazily compute exprType
239 for (f @t1) and (f @t1 @t2), which is potentially non-linear; but it probably
240 doesn't matter because we'll never compute them all.
241
242 ************************************************************************
243 * *
244 ArgInfo and ArgSpec
245 * *
246 ************************************************************************
247 -}
248
249 data ArgInfo
250 = ArgInfo {
251 ai_fun :: OutId, -- The function
252 ai_args :: [ArgSpec], -- ...applied to these args (which are in *reverse* order)
253
254 ai_type :: OutType, -- Type of (f a1 ... an)
255
256 ai_rules :: FunRules, -- Rules for this function
257
258 ai_encl :: Bool, -- Flag saying whether this function
259 -- or an enclosing one has rules (recursively)
260 -- True => be keener to inline in all args
261
262 ai_strs :: [Bool], -- Strictness of remaining arguments
263 -- Usually infinite, but if it is finite it guarantees
264 -- that the function diverges after being given
265 -- that number of args
266 ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
267 -- Always infinite
268 }
269
270 data ArgSpec
271 = ValArg OutExpr -- Apply to this (coercion or value); c.f. ApplyToVal
272 | TyArg { as_arg_ty :: OutType -- Apply to this type; c.f. ApplyToTy
273 , as_hole_ty :: OutType } -- Type of the function (presumably forall a. blah)
274 | CastBy OutCoercion -- Cast by this; c.f. CastIt
275
276 instance Outputable ArgSpec where
277 ppr (ValArg e) = text "ValArg" <+> ppr e
278 ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
279 ppr (CastBy c) = text "CastBy" <+> ppr c
280
281 addValArgTo :: ArgInfo -> OutExpr -> ArgInfo
282 addValArgTo ai arg = ai { ai_args = ValArg arg : ai_args ai
283 , ai_type = applyTypeToArg (ai_type ai) arg
284 , ai_rules = decRules (ai_rules ai) }
285
286 addTyArgTo :: ArgInfo -> OutType -> ArgInfo
287 addTyArgTo ai arg_ty = ai { ai_args = arg_spec : ai_args ai
288 , ai_type = piResultTy poly_fun_ty arg_ty
289 , ai_rules = decRules (ai_rules ai) }
290 where
291 poly_fun_ty = ai_type ai
292 arg_spec = TyArg { as_arg_ty = arg_ty, as_hole_ty = poly_fun_ty }
293
294 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
295 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai
296 , ai_type = pSnd (coercionKind co) }
297
298 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
299 argInfoAppArgs [] = []
300 argInfoAppArgs (CastBy {} : _) = [] -- Stop at a cast
301 argInfoAppArgs (ValArg e : as) = e : argInfoAppArgs as
302 argInfoAppArgs (TyArg { as_arg_ty = ty } : as) = Type ty : argInfoAppArgs as
303
304 pushSimplifiedArgs :: SimplEnv -> [ArgSpec] -> SimplCont -> SimplCont
305 pushSimplifiedArgs _env [] k = k
306 pushSimplifiedArgs env (arg : args) k
307 = case arg of
308 TyArg { as_arg_ty = arg_ty, as_hole_ty = hole_ty }
309 -> ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_cont = rest }
310 ValArg e -> ApplyToVal { sc_arg = e, sc_env = env, sc_dup = Simplified, sc_cont = rest }
311 CastBy c -> CastIt c rest
312 where
313 rest = pushSimplifiedArgs env args k
314 -- The env has an empty SubstEnv
315
316 argInfoExpr :: OutId -> [ArgSpec] -> OutExpr
317 -- NB: the [ArgSpec] is reversed so that the first arg
318 -- in the list is the last one in the application
319 argInfoExpr fun rev_args
320 = go rev_args
321 where
322 go [] = Var fun
323 go (ValArg a : as) = go as `App` a
324 go (TyArg { as_arg_ty = ty } : as) = go as `App` Type ty
325 go (CastBy co : as) = mkCast (go as) co
326
327
328 type FunRules = Maybe (Int, [CoreRule]) -- Remaining rules for this function
329 -- Nothing => No rules
330 -- Just (n, rules) => some rules, requiring at least n more type/value args
331
332 decRules :: FunRules -> FunRules
333 decRules (Just (n, rules)) = Just (n-1, rules)
334 decRules Nothing = Nothing
335
336 mkFunRules :: [CoreRule] -> FunRules
337 mkFunRules [] = Nothing
338 mkFunRules rs = Just (n_required, rs)
339 where
340 n_required = maximum (map ruleArity rs)
341
342 {-
343 ************************************************************************
344 * *
345 Functions on SimplCont
346 * *
347 ************************************************************************
348 -}
349
350 mkBoringStop :: OutType -> SimplCont
351 mkBoringStop ty = Stop ty BoringCtxt
352
353 mkRhsStop :: OutType -> SimplCont -- See Note [RHS of lets] in CoreUnfold
354 mkRhsStop ty = Stop ty RhsCtxt
355
356 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
357 mkLazyArgStop ty cci = Stop ty cci
358
359 -------------------
360 contIsRhsOrArg :: SimplCont -> Bool
361 contIsRhsOrArg (Stop {}) = True
362 contIsRhsOrArg (StrictBind {}) = True
363 contIsRhsOrArg (StrictArg {}) = True
364 contIsRhsOrArg _ = False
365
366 contIsRhs :: SimplCont -> Bool
367 contIsRhs (Stop _ RhsCtxt) = True
368 contIsRhs _ = False
369
370 -------------------
371 contIsStop :: SimplCont -> Bool
372 contIsStop (Stop {}) = True
373 contIsStop _ = False
374
375 contIsDupable :: SimplCont -> Bool
376 contIsDupable (Stop {}) = True
377 contIsDupable (ApplyToTy { sc_cont = k }) = contIsDupable k
378 contIsDupable (ApplyToVal { sc_dup = OkToDup }) = True -- See Note [DupFlag invariants]
379 contIsDupable (Select { sc_dup = OkToDup }) = True -- ...ditto...
380 contIsDupable (StrictArg { sc_dup = OkToDup }) = True -- ...ditto...
381 contIsDupable (CastIt _ k) = contIsDupable k
382 contIsDupable _ = False
383
384 -------------------
385 contIsTrivial :: SimplCont -> Bool
386 contIsTrivial (Stop {}) = True
387 contIsTrivial (ApplyToTy { sc_cont = k }) = contIsTrivial k
388 contIsTrivial (ApplyToVal { sc_arg = Coercion _, sc_cont = k }) = contIsTrivial k
389 contIsTrivial (CastIt _ k) = contIsTrivial k
390 contIsTrivial _ = False
391
392 -------------------
393 contResultType :: SimplCont -> OutType
394 contResultType (Stop ty _) = ty
395 contResultType (CastIt _ k) = contResultType k
396 contResultType (StrictBind { sc_cont = k }) = contResultType k
397 contResultType (StrictArg { sc_cont = k }) = contResultType k
398 contResultType (Select { sc_cont = k }) = contResultType k
399 contResultType (ApplyToTy { sc_cont = k }) = contResultType k
400 contResultType (ApplyToVal { sc_cont = k }) = contResultType k
401 contResultType (TickIt _ k) = contResultType k
402
403 contHoleType :: SimplCont -> OutType
404 contHoleType (Stop ty _) = ty
405 contHoleType (TickIt _ k) = contHoleType k
406 contHoleType (CastIt co _) = pFst (coercionKind co)
407 contHoleType (StrictBind { sc_bndr = b, sc_dup = dup, sc_env = se })
408 = perhapsSubstTy dup se (idType b)
409 contHoleType (StrictArg { sc_fun = ai }) = funArgTy (ai_type ai)
410 contHoleType (ApplyToTy { sc_hole_ty = ty }) = ty -- See Note [The hole type in ApplyToTy]
411 contHoleType (ApplyToVal { sc_arg = e, sc_env = se, sc_dup = dup, sc_cont = k })
412 = mkFunTy (perhapsSubstTy dup se (exprType e))
413 (contHoleType k)
414 contHoleType (Select { sc_dup = d, sc_bndr = b, sc_env = se })
415 = perhapsSubstTy d se (idType b)
416
417 -------------------
418 countArgs :: SimplCont -> Int
419 -- Count all arguments, including types, coercions, and other values
420 countArgs (ApplyToTy { sc_cont = cont }) = 1 + countArgs cont
421 countArgs (ApplyToVal { sc_cont = cont }) = 1 + countArgs cont
422 countArgs _ = 0
423
424 contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
425 -- Summarises value args, discards type args and coercions
426 -- The returned continuation of the call is only used to
427 -- answer questions like "are you interesting?"
428 contArgs cont
429 | lone cont = (True, [], cont)
430 | otherwise = go [] cont
431 where
432 lone (ApplyToTy {}) = False -- See Note [Lone variables] in CoreUnfold
433 lone (ApplyToVal {}) = False
434 lone (CastIt {}) = False
435 lone _ = True
436
437 go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
438 = go (is_interesting arg se : args) k
439 go args (ApplyToTy { sc_cont = k }) = go args k
440 go args (CastIt _ k) = go args k
441 go args k = (False, reverse args, k)
442
443 is_interesting arg se = interestingArg se arg
444 -- Do *not* use short-cutting substitution here
445 -- because we want to get as much IdInfo as possible
446
447
448 -------------------
449 mkArgInfo :: Id
450 -> [CoreRule] -- Rules for function
451 -> Int -- Number of value args
452 -> SimplCont -- Context of the call
453 -> ArgInfo
454
455 mkArgInfo fun rules n_val_args call_cont
456 | n_val_args < idArity fun -- Note [Unsaturated functions]
457 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
458 , ai_rules = fun_rules, ai_encl = False
459 , ai_strs = vanilla_stricts
460 , ai_discs = vanilla_discounts }
461 | otherwise
462 = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
463 , ai_rules = fun_rules
464 , ai_encl = interestingArgContext rules call_cont
465 , ai_strs = add_type_str fun_ty arg_stricts
466 , ai_discs = arg_discounts }
467 where
468 fun_ty = idType fun
469
470 fun_rules = mkFunRules rules
471
472 vanilla_discounts, arg_discounts :: [Int]
473 vanilla_discounts = repeat 0
474 arg_discounts = case idUnfolding fun of
475 CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
476 -> discounts ++ vanilla_discounts
477 _ -> vanilla_discounts
478
479 vanilla_stricts, arg_stricts :: [Bool]
480 vanilla_stricts = repeat False
481
482 arg_stricts
483 = case splitStrictSig (idStrictness fun) of
484 (demands, result_info)
485 | not (demands `lengthExceeds` n_val_args)
486 -> -- Enough args, use the strictness given.
487 -- For bottoming functions we used to pretend that the arg
488 -- is lazy, so that we don't treat the arg as an
489 -- interesting context. This avoids substituting
490 -- top-level bindings for (say) strings into
491 -- calls to error. But now we are more careful about
492 -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
493 if isBotRes result_info then
494 map isStrictDmd demands -- Finite => result is bottom
495 else
496 map isStrictDmd demands ++ vanilla_stricts
497 | otherwise
498 -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
499 <+> ppr n_val_args <+> ppr demands )
500 vanilla_stricts -- Not enough args, or no strictness
501
502 add_type_str :: Type -> [Bool] -> [Bool]
503 -- If the function arg types are strict, record that in the 'strictness bits'
504 -- No need to instantiate because unboxed types (which dominate the strict
505 -- types) can't instantiate type variables.
506 -- add_type_str is done repeatedly (for each call); might be better
507 -- once-for-all in the function
508 -- But beware primops/datacons with no strictness
509
510 add_type_str
511 = go
512 where
513 go _ [] = []
514 go fun_ty strs -- Look through foralls
515 | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions
516 = go fun_ty' strs
517 go fun_ty (str:strs) -- Add strict-type info
518 | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
519 = (str || Just False == isLiftedType_maybe arg_ty) : go fun_ty' strs
520 -- If the type is levity-polymorphic, we can't know whether it's
521 -- strict. isLiftedType_maybe will return Just False only when
522 -- we're sure the type is unlifted.
523 go _ strs
524 = strs
525
526 {- Note [Unsaturated functions]
527 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
528 Consider (test eyeball/inline4)
529 x = a:as
530 y = f x
531 where f has arity 2. Then we do not want to inline 'x', because
532 it'll just be floated out again. Even if f has lots of discounts
533 on its first argument -- it must be saturated for these to kick in
534 -}
535
536
537 {-
538 ************************************************************************
539 * *
540 Interesting arguments
541 * *
542 ************************************************************************
543
544 Note [Interesting call context]
545 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
546 We want to avoid inlining an expression where there can't possibly be
547 any gain, such as in an argument position. Hence, if the continuation
548 is interesting (eg. a case scrutinee, application etc.) then we
549 inline, otherwise we don't.
550
551 Previously some_benefit used to return True only if the variable was
552 applied to some value arguments. This didn't work:
553
554 let x = _coerce_ (T Int) Int (I# 3) in
555 case _coerce_ Int (T Int) x of
556 I# y -> ....
557
558 we want to inline x, but can't see that it's a constructor in a case
559 scrutinee position, and some_benefit is False.
560
561 Another example:
562
563 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
564
565 .... case dMonadST _@_ x0 of (a,b,c) -> ....
566
567 we'd really like to inline dMonadST here, but we *don't* want to
568 inline if the case expression is just
569
570 case x of y { DEFAULT -> ... }
571
572 since we can just eliminate this case instead (x is in WHNF). Similar
573 applies when x is bound to a lambda expression. Hence
574 contIsInteresting looks for case expressions with just a single
575 default case.
576
577 Note [No case of case is boring]
578 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
579 If we see
580 case f x of <alts>
581
582 we'd usually treat the context as interesting, to encourage 'f' to
583 inline. But if case-of-case is off, it's really not so interesting
584 after all, because we are unlikely to be able to push the case
585 expression into the branches of any case in f's unfolding. So, to
586 reduce unnecessary code expansion, we just make the context look boring.
587 This made a small compile-time perf improvement in perf/compiler/T6048,
588 and it looks plausible to me.
589 -}
590
591 interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt
592 -- See Note [Interesting call context]
593 interestingCallContext env cont
594 = interesting cont
595 where
596 interesting (Select {})
597 | sm_case_case (getMode env) = CaseCtxt
598 | otherwise = BoringCtxt
599 -- See Note [No case of case is boring]
600
601 interesting (ApplyToVal {}) = ValAppCtxt
602 -- Can happen if we have (f Int |> co) y
603 -- If f has an INLINE prag we need to give it some
604 -- motivation to inline. See Note [Cast then apply]
605 -- in CoreUnfold
606
607 interesting (StrictArg { sc_cci = cci }) = cci
608 interesting (StrictBind {}) = BoringCtxt
609 interesting (Stop _ cci) = cci
610 interesting (TickIt _ k) = interesting k
611 interesting (ApplyToTy { sc_cont = k }) = interesting k
612 interesting (CastIt _ k) = interesting k
613 -- If this call is the arg of a strict function, the context
614 -- is a bit interesting. If we inline here, we may get useful
615 -- evaluation information to avoid repeated evals: e.g.
616 -- x + (y * z)
617 -- Here the contIsInteresting makes the '*' keener to inline,
618 -- which in turn exposes a constructor which makes the '+' inline.
619 -- Assuming that +,* aren't small enough to inline regardless.
620 --
621 -- It's also very important to inline in a strict context for things
622 -- like
623 -- foldr k z (f x)
624 -- Here, the context of (f x) is strict, and if f's unfolding is
625 -- a build it's *great* to inline it here. So we must ensure that
626 -- the context for (f x) is not totally uninteresting.
627
628 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
629 -- If the argument has form (f x y), where x,y are boring,
630 -- and f is marked INLINE, then we don't want to inline f.
631 -- But if the context of the argument is
632 -- g (f x y)
633 -- where g has rules, then we *do* want to inline f, in case it
634 -- exposes a rule that might fire. Similarly, if the context is
635 -- h (g (f x x))
636 -- where h has rules, then we do want to inline f; hence the
637 -- call_cont argument to interestingArgContext
638 --
639 -- The ai-rules flag makes this happen; if it's
640 -- set, the inliner gets just enough keener to inline f
641 -- regardless of how boring f's arguments are, if it's marked INLINE
642 --
643 -- The alternative would be to *always* inline an INLINE function,
644 -- regardless of how boring its context is; but that seems overkill
645 -- For example, it'd mean that wrapper functions were always inlined
646 --
647 -- The call_cont passed to interestingArgContext is the context of
648 -- the call itself, e.g. g <hole> in the example above
649 interestingArgContext rules call_cont
650 = notNull rules || enclosing_fn_has_rules
651 where
652 enclosing_fn_has_rules = go call_cont
653
654 go (Select {}) = False
655 go (ApplyToVal {}) = False -- Shouldn't really happen
656 go (ApplyToTy {}) = False -- Ditto
657 go (StrictArg { sc_cci = cci }) = interesting cci
658 go (StrictBind {}) = False -- ??
659 go (CastIt _ c) = go c
660 go (Stop _ cci) = interesting cci
661 go (TickIt _ c) = go c
662
663 interesting RuleArgCtxt = True
664 interesting _ = False
665
666
667 {- Note [Interesting arguments]
668 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
669 An argument is interesting if it deserves a discount for unfoldings
670 with a discount in that argument position. The idea is to avoid
671 unfolding a function that is applied only to variables that have no
672 unfolding (i.e. they are probably lambda bound): f x y z There is
673 little point in inlining f here.
674
675 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
676 we must look through lets, eg (let x = e in C a b), because the let will
677 float, exposing the value, if we inline. That makes it different to
678 exprIsHNF.
679
680 Before 2009 we said it was interesting if the argument had *any* structure
681 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
682
683 But we don't regard (f x y) as interesting, unless f is unsaturated.
684 If it's saturated and f hasn't inlined, then it's probably not going
685 to now!
686
687 Note [Conlike is interesting]
688 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
689 Consider
690 f d = ...((*) d x y)...
691 ... f (df d')...
692 where df is con-like. Then we'd really like to inline 'f' so that the
693 rule for (*) (df d) can fire. To do this
694 a) we give a discount for being an argument of a class-op (eg (*) d)
695 b) we say that a con-like argument (eg (df d)) is interesting
696 -}
697
698 interestingArg :: SimplEnv -> CoreExpr -> ArgSummary
699 -- See Note [Interesting arguments]
700 interestingArg env e = go env 0 e
701 where
702 -- n is # value args to which the expression is applied
703 go env n (Var v)
704 = case substId env v of
705 DoneId v' -> go_var n v'
706 DoneEx e _ -> go (zapSubstEnv env) n e
707 ContEx tvs cvs ids e -> go (setSubstEnv env tvs cvs ids) n e
708
709 go _ _ (Lit {}) = ValueArg
710 go _ _ (Type _) = TrivArg
711 go _ _ (Coercion _) = TrivArg
712 go env n (App fn (Type _)) = go env n fn
713 go env n (App fn _) = go env (n+1) fn
714 go env n (Tick _ a) = go env n a
715 go env n (Cast e _) = go env n e
716 go env n (Lam v e)
717 | isTyVar v = go env n e
718 | n>0 = NonTrivArg -- (\x.b) e is NonTriv
719 | otherwise = ValueArg
720 go _ _ (Case {}) = NonTrivArg
721 go env n (Let b e) = case go env' n e of
722 ValueArg -> ValueArg
723 _ -> NonTrivArg
724 where
725 env' = env `addNewInScopeIds` bindersOf b
726
727 go_var n v
728 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
729 -- data constructors here
730 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
731 | n > 0 = NonTrivArg -- Saturated or unknown call
732 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
733 -- See Note [Conlike is interesting]
734 | otherwise = TrivArg -- n==0, no useful unfolding
735 where
736 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
737
738 {-
739 ************************************************************************
740 * *
741 SimplMode
742 * *
743 ************************************************************************
744
745 The SimplMode controls several switches; see its definition in
746 CoreMonad
747 sm_rules :: Bool -- Whether RULES are enabled
748 sm_inline :: Bool -- Whether inlining is enabled
749 sm_case_case :: Bool -- Whether case-of-case is enabled
750 sm_eta_expand :: Bool -- Whether eta-expansion is enabled
751 -}
752
753 simplEnvForGHCi :: DynFlags -> SimplEnv
754 simplEnvForGHCi dflags
755 = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
756 , sm_phase = InitialPhase
757 , sm_dflags = dflags
758 , sm_rules = rules_on
759 , sm_inline = False
760 , sm_eta_expand = eta_expand_on
761 , sm_case_case = True }
762 where
763 rules_on = gopt Opt_EnableRewriteRules dflags
764 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
765 -- Do not do any inlining, in case we expose some unboxed
766 -- tuple stuff that confuses the bytecode interpreter
767
768 updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode
769 -- See Note [Simplifying inside stable unfoldings]
770 updModeForStableUnfoldings inline_rule_act current_mode
771 = current_mode { sm_phase = phaseFromActivation inline_rule_act
772 , sm_inline = True
773 , sm_eta_expand = False }
774 -- sm_eta_expand: see Note [No eta expansion in stable unfoldings]
775 -- For sm_rules, just inherit; sm_rules might be "off"
776 -- because of -fno-enable-rewrite-rules
777 where
778 phaseFromActivation (ActiveAfter _ n) = Phase n
779 phaseFromActivation _ = InitialPhase
780
781 updModeForRules :: SimplMode -> SimplMode
782 -- See Note [Simplifying rules]
783 updModeForRules current_mode
784 = current_mode { sm_phase = InitialPhase
785 , sm_inline = False
786 , sm_rules = False
787 , sm_eta_expand = False }
788
789 {- Note [Simplifying rules]
790 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
791 When simplifying a rule LHS, refrain from /any/ inlining or applying
792 of other RULES.
793
794 Doing anything to the LHS is plain confusing, because it means that what the
795 rule matches is not what the user wrote. c.f. Trac #10595, and #10528.
796 Moreover, inlining (or applying rules) on rule LHSs risks introducing
797 Ticks into the LHS, which makes matching trickier. Trac #10665, #10745.
798
799 Doing this to either side confounds tools like HERMIT, which seek to reason
800 about and apply the RULES as originally written. See Trac #10829.
801
802 Note [No eta expansion in stable unfoldings]
803 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
804 If we have a stable unfolding
805
806 f :: Ord a => a -> IO ()
807 -- Unfolding template
808 -- = /\a \(d:Ord a) (x:a). bla
809
810 we do not want to eta-expand to
811
812 f :: Ord a => a -> IO ()
813 -- Unfolding template
814 -- = (/\a \(d:Ord a) (x:a) (eta:State#). bla eta) |> co
815
816 because not specialisation of the overloading doesn't work properly
817 (see Note [Specialisation shape] in Specialise), Trac #9509.
818
819 So we disable eta-expansion in stable unfoldings.
820
821 Note [Inlining in gentle mode]
822 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
823 Something is inlined if
824 (i) the sm_inline flag is on, AND
825 (ii) the thing has an INLINE pragma, AND
826 (iii) the thing is inlinable in the earliest phase.
827
828 Example of why (iii) is important:
829 {-# INLINE [~1] g #-}
830 g = ...
831
832 {-# INLINE f #-}
833 f x = g (g x)
834
835 If we were to inline g into f's inlining, then an importing module would
836 never be able to do
837 f e --> g (g e) ---> RULE fires
838 because the stable unfolding for f has had g inlined into it.
839
840 On the other hand, it is bad not to do ANY inlining into an
841 stable unfolding, because then recursive knots in instance declarations
842 don't get unravelled.
843
844 However, *sometimes* SimplGently must do no call-site inlining at all
845 (hence sm_inline = False). Before full laziness we must be careful
846 not to inline wrappers, because doing so inhibits floating
847 e.g. ...(case f x of ...)...
848 ==> ...(case (case x of I# x# -> fw x#) of ...)...
849 ==> ...(case x of I# x# -> case fw x# of ...)...
850 and now the redex (f x) isn't floatable any more.
851
852 The no-inlining thing is also important for Template Haskell. You might be
853 compiling in one-shot mode with -O2; but when TH compiles a splice before
854 running it, we don't want to use -O2. Indeed, we don't want to inline
855 anything, because the byte-code interpreter might get confused about
856 unboxed tuples and suchlike.
857
858 Note [Simplifying inside stable unfoldings]
859 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
860 We must take care with simplification inside stable unfoldings (which come from
861 INLINE pragmas).
862
863 First, consider the following example
864 let f = \pq -> BIG
865 in
866 let g = \y -> f y y
867 {-# INLINE g #-}
868 in ...g...g...g...g...g...
869 Now, if that's the ONLY occurrence of f, it might be inlined inside g,
870 and thence copied multiple times when g is inlined. HENCE we treat
871 any occurrence in a stable unfolding as a multiple occurrence, not a single
872 one; see OccurAnal.addRuleUsage.
873
874 Second, we do want *do* to some modest rules/inlining stuff in stable
875 unfoldings, partly to eliminate senseless crap, and partly to break
876 the recursive knots generated by instance declarations.
877
878 However, suppose we have
879 {-# INLINE <act> f #-}
880 f = <rhs>
881 meaning "inline f in phases p where activation <act>(p) holds".
882 Then what inlinings/rules can we apply to the copy of <rhs> captured in
883 f's stable unfolding? Our model is that literally <rhs> is substituted for
884 f when it is inlined. So our conservative plan (implemented by
885 updModeForStableUnfoldings) is this:
886
887 -------------------------------------------------------------
888 When simplifying the RHS of a stable unfolding, set the phase
889 to the phase in which the stable unfolding first becomes active
890 -------------------------------------------------------------
891
892 That ensures that
893
894 a) Rules/inlinings that *cease* being active before p will
895 not apply to the stable unfolding, consistent with it being
896 inlined in its *original* form in phase p.
897
898 b) Rules/inlinings that only become active *after* p will
899 not apply to the stable unfolding, again to be consistent with
900 inlining the *original* rhs in phase p.
901
902 For example,
903 {-# INLINE f #-}
904 f x = ...g...
905
906 {-# NOINLINE [1] g #-}
907 g y = ...
908
909 {-# RULE h g = ... #-}
910 Here we must not inline g into f's RHS, even when we get to phase 0,
911 because when f is later inlined into some other module we want the
912 rule for h to fire.
913
914 Similarly, consider
915 {-# INLINE f #-}
916 f x = ...g...
917
918 g y = ...
919 and suppose that there are auto-generated specialisations and a strictness
920 wrapper for g. The specialisations get activation AlwaysActive, and the
921 strictness wrapper get activation (ActiveAfter 0). So the strictness
922 wrepper fails the test and won't be inlined into f's stable unfolding. That
923 means f can inline, expose the specialised call to g, so the specialisation
924 rules can fire.
925
926 A note about wrappers
927 ~~~~~~~~~~~~~~~~~~~~~
928 It's also important not to inline a worker back into a wrapper.
929 A wrapper looks like
930 wraper = inline_me (\x -> ...worker... )
931 Normally, the inline_me prevents the worker getting inlined into
932 the wrapper (initially, the worker's only call site!). But,
933 if the wrapper is sure to be called, the strictness analyser will
934 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
935 continuation.
936 -}
937
938 activeUnfolding :: SimplMode -> Id -> Bool
939 activeUnfolding mode id
940 | isCompulsoryUnfolding (realIdUnfolding id)
941 = True -- Even sm_inline can't override compulsory unfoldings
942 | otherwise
943 = isActive (sm_phase mode) (idInlineActivation id)
944 && sm_inline mode
945 -- `or` isStableUnfolding (realIdUnfolding id)
946 -- Inline things when
947 -- (a) they are active
948 -- (b) sm_inline says so, except that for stable unfoldings
949 -- (ie pragmas) we inline anyway
950
951 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
952 -- When matching in RULE, we want to "look through" an unfolding
953 -- (to see a constructor) if *rules* are on, even if *inlinings*
954 -- are not. A notable example is DFuns, which really we want to
955 -- match in rules like (op dfun) in gentle mode. Another example
956 -- is 'otherwise' which we want exprIsConApp_maybe to be able to
957 -- see very early on
958 getUnfoldingInRuleMatch env
959 = (in_scope, id_unf)
960 where
961 in_scope = seInScope env
962 mode = getMode env
963 id_unf id | unf_is_active id = idUnfolding id
964 | otherwise = NoUnfolding
965 unf_is_active id
966 | not (sm_rules mode) = -- active_unfolding_minimal id
967 isStableUnfolding (realIdUnfolding id)
968 -- Do we even need to test this? I think this InScopeEnv
969 -- is only consulted if activeRule returns True, which
970 -- never happens if sm_rules is False
971 | otherwise = isActive (sm_phase mode) (idInlineActivation id)
972
973 ----------------------
974 activeRule :: SimplMode -> Activation -> Bool
975 -- Nothing => No rules at all
976 activeRule mode
977 | not (sm_rules mode) = \_ -> False -- Rewriting is off
978 | otherwise = isActive (sm_phase mode)
979
980 {-
981 ************************************************************************
982 * *
983 preInlineUnconditionally
984 * *
985 ************************************************************************
986
987 preInlineUnconditionally
988 ~~~~~~~~~~~~~~~~~~~~~~~~
989 @preInlineUnconditionally@ examines a bndr to see if it is used just
990 once in a completely safe way, so that it is safe to discard the
991 binding inline its RHS at the (unique) usage site, REGARDLESS of how
992 big the RHS might be. If this is the case we don't simplify the RHS
993 first, but just inline it un-simplified.
994
995 This is much better than first simplifying a perhaps-huge RHS and then
996 inlining and re-simplifying it. Indeed, it can be at least quadratically
997 better. Consider
998
999 x1 = e1
1000 x2 = e2[x1]
1001 x3 = e3[x2]
1002 ...etc...
1003 xN = eN[xN-1]
1004
1005 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
1006 This can happen with cascades of functions too:
1007
1008 f1 = \x1.e1
1009 f2 = \xs.e2[f1]
1010 f3 = \xs.e3[f3]
1011 ...etc...
1012
1013 THE MAIN INVARIANT is this:
1014
1015 ---- preInlineUnconditionally invariant -----
1016 IF preInlineUnconditionally chooses to inline x = <rhs>
1017 THEN doing the inlining should not change the occurrence
1018 info for the free vars of <rhs>
1019 ----------------------------------------------
1020
1021 For example, it's tempting to look at trivial binding like
1022 x = y
1023 and inline it unconditionally. But suppose x is used many times,
1024 but this is the unique occurrence of y. Then inlining x would change
1025 y's occurrence info, which breaks the invariant. It matters: y
1026 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
1027
1028
1029 Even RHSs labelled InlineMe aren't caught here, because there might be
1030 no benefit from inlining at the call site.
1031
1032 [Sept 01] Don't unconditionally inline a top-level thing, because that
1033 can simply make a static thing into something built dynamically. E.g.
1034 x = (a,b)
1035 main = \s -> h x
1036
1037 [Remember that we treat \s as a one-shot lambda.] No point in
1038 inlining x unless there is something interesting about the call site.
1039
1040 But watch out: if you aren't careful, some useful foldr/build fusion
1041 can be lost (most notably in spectral/hartel/parstof) because the
1042 foldr didn't see the build. Doing the dynamic allocation isn't a big
1043 deal, in fact, but losing the fusion can be. But the right thing here
1044 seems to be to do a callSiteInline based on the fact that there is
1045 something interesting about the call site (it's strict). Hmm. That
1046 seems a bit fragile.
1047
1048 Conclusion: inline top level things gaily until Phase 0 (the last
1049 phase), at which point don't.
1050
1051 Note [pre/postInlineUnconditionally in gentle mode]
1052 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1053 Even in gentle mode we want to do preInlineUnconditionally. The
1054 reason is that too little clean-up happens if you don't inline
1055 use-once things. Also a bit of inlining is *good* for full laziness;
1056 it can expose constant sub-expressions. Example in
1057 spectral/mandel/Mandel.hs, where the mandelset function gets a useful
1058 let-float if you inline windowToViewport
1059
1060 However, as usual for Gentle mode, do not inline things that are
1061 inactive in the intial stages. See Note [Gentle mode].
1062
1063 Note [Stable unfoldings and preInlineUnconditionally]
1064 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1065 Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
1066 Example
1067
1068 {-# INLINE f #-}
1069 f :: Eq a => a -> a
1070 f x = ...
1071
1072 fInt :: Int -> Int
1073 fInt = f Int dEqInt
1074
1075 ...fInt...fInt...fInt...
1076
1077 Here f occurs just once, in the RHS of fInt. But if we inline it there
1078 it might make fInt look big, and we'll lose the opportunity to inline f
1079 at each of fInt's call sites. The INLINE pragma will only inline when
1080 the application is saturated for exactly this reason; and we don't
1081 want PreInlineUnconditionally to second-guess it. A live example is
1082 Trac #3736.
1083 c.f. Note [Stable unfoldings and postInlineUnconditionally]
1084
1085 NB: if the pragama is INLINEABLE, then we don't want to behave int
1086 this special way -- an INLINEABLE pragam just says to GHC "inline this
1087 if you like". But if there is a unique occurrence, we want to inline
1088 the stable unfolding, not the RHS.
1089
1090 Note [Top-level bottoming Ids]
1091 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1092 Don't inline top-level Ids that are bottoming, even if they are used just
1093 once, because FloatOut has gone to some trouble to extract them out.
1094 Inlining them won't make the program run faster!
1095
1096 Note [Do not inline CoVars unconditionally]
1097 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1098 Coercion variables appear inside coercions, and the RHS of a let-binding
1099 is a term (not a coercion) so we can't necessarily inline the latter in
1100 the former.
1101 -}
1102
1103 preInlineUnconditionally
1104 :: SimplEnv -> TopLevelFlag -> InId
1105 -> InExpr -> StaticEnv -- These two go together
1106 -> Maybe SimplEnv -- Returned env has extended substitution
1107 -- Precondition: rhs satisfies the let/app invariant
1108 -- See Note [CoreSyn let/app invariant] in CoreSyn
1109 -- Reason: we don't want to inline single uses, or discard dead bindings,
1110 -- for unlifted, side-effect-ful bindings
1111 preInlineUnconditionally env top_lvl bndr rhs rhs_env
1112 | not pre_inline_unconditionally = Nothing
1113 | not active = Nothing
1114 | isTopLevel top_lvl && isBottomingId bndr = Nothing -- Note [Top-level bottoming Ids]
1115 | isCoVar bndr = Nothing -- Note [Do not inline CoVars unconditionally]
1116 | isExitJoinId bndr = Nothing
1117 | not (one_occ (idOccInfo bndr)) = Nothing
1118 | not (isStableUnfolding unf) = Just (extend_subst_with rhs)
1119
1120 -- Note [Stable unfoldings and preInlineUnconditionally]
1121 | isInlinablePragma inline_prag
1122 , Just inl <- maybeUnfoldingTemplate unf = Just (extend_subst_with inl)
1123 | otherwise = Nothing
1124 where
1125 unf = idUnfolding bndr
1126 extend_subst_with inl_rhs = extendIdSubst env bndr (mkContEx rhs_env inl_rhs)
1127
1128 one_occ IAmDead = True -- Happens in ((\x.1) v)
1129 one_occ (OneOcc { occ_one_br = True -- One textual occurrence
1130 , occ_in_lam = in_lam
1131 , occ_int_cxt = int_cxt })
1132 | not in_lam = isNotTopLevel top_lvl || early_phase
1133 | otherwise = int_cxt && canInlineInLam rhs
1134 one_occ _ = False
1135
1136 pre_inline_unconditionally = gopt Opt_SimplPreInlining (seDynFlags env)
1137 mode = getMode env
1138 active = isActive (sm_phase mode) (inlinePragmaActivation inline_prag)
1139 -- See Note [pre/postInlineUnconditionally in gentle mode]
1140 inline_prag = idInlinePragma bndr
1141
1142 -- Be very careful before inlining inside a lambda, because (a) we must not
1143 -- invalidate occurrence information, and (b) we want to avoid pushing a
1144 -- single allocation (here) into multiple allocations (inside lambda).
1145 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
1146 -- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
1147 -- where
1148 -- is_cheap = exprIsCheap rhs
1149 -- ok = is_cheap && int_cxt
1150
1151 -- int_cxt The context isn't totally boring
1152 -- E.g. let f = \ab.BIG in \y. map f xs
1153 -- Don't want to substitute for f, because then we allocate
1154 -- its closure every time the \y is called
1155 -- But: let f = \ab.BIG in \y. map (f y) xs
1156 -- Now we do want to substitute for f, even though it's not
1157 -- saturated, because we're going to allocate a closure for
1158 -- (f y) every time round the loop anyhow.
1159
1160 -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
1161 -- so substituting rhs inside a lambda doesn't change the occ info.
1162 -- Sadly, not quite the same as exprIsHNF.
1163 canInlineInLam (Lit _) = True
1164 canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
1165 canInlineInLam (Tick t e) = not (tickishIsCode t) && canInlineInLam e
1166 canInlineInLam _ = False
1167 -- not ticks. Counting ticks cannot be duplicated, and non-counting
1168 -- ticks around a Lam will disappear anyway.
1169
1170 early_phase = case sm_phase mode of
1171 Phase 0 -> False
1172 _ -> True
1173 -- If we don't have this early_phase test, consider
1174 -- x = length [1,2,3]
1175 -- The full laziness pass carefully floats all the cons cells to
1176 -- top level, and preInlineUnconditionally floats them all back in.
1177 -- Result is (a) static allocation replaced by dynamic allocation
1178 -- (b) many simplifier iterations because this tickles
1179 -- a related problem; only one inlining per pass
1180 --
1181 -- On the other hand, I have seen cases where top-level fusion is
1182 -- lost if we don't inline top level thing (e.g. string constants)
1183 -- Hence the test for phase zero (which is the phase for all the final
1184 -- simplifications). Until phase zero we take no special notice of
1185 -- top level things, but then we become more leery about inlining
1186 -- them.
1187
1188 {-
1189 ************************************************************************
1190 * *
1191 postInlineUnconditionally
1192 * *
1193 ************************************************************************
1194
1195 postInlineUnconditionally
1196 ~~~~~~~~~~~~~~~~~~~~~~~~~
1197 @postInlineUnconditionally@ decides whether to unconditionally inline
1198 a thing based on the form of its RHS; in particular if it has a
1199 trivial RHS. If so, we can inline and discard the binding altogether.
1200
1201 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
1202 only have *forward* references. Hence, it's safe to discard the binding
1203
1204 NOTE: This isn't our last opportunity to inline. We're at the binding
1205 site right now, and we'll get another opportunity when we get to the
1206 occurrence(s)
1207
1208 Note that we do this unconditional inlining only for trival RHSs.
1209 Don't inline even WHNFs inside lambdas; doing so may simply increase
1210 allocation when the function is called. This isn't the last chance; see
1211 NOTE above.
1212
1213 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
1214 Because we don't even want to inline them into the RHS of constructor
1215 arguments. See NOTE above
1216
1217 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
1218 it's best to inline it anyway. We often get a=E; b=a from desugaring,
1219 with both a and b marked NOINLINE. But that seems incompatible with
1220 our new view that inlining is like a RULE, so I'm sticking to the 'active'
1221 story for now.
1222 -}
1223
1224 postInlineUnconditionally
1225 :: SimplEnv -> TopLevelFlag
1226 -> OutId -- The binder (*not* a CoVar), including its unfolding
1227 -> OccInfo -- From the InId
1228 -> OutExpr
1229 -> Bool
1230 -- Precondition: rhs satisfies the let/app invariant
1231 -- See Note [CoreSyn let/app invariant] in CoreSyn
1232 -- Reason: we don't want to inline single uses, or discard dead bindings,
1233 -- for unlifted, side-effect-ful bindings
1234 postInlineUnconditionally env top_lvl bndr occ_info rhs
1235 | not active = False
1236 | isWeakLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
1237 -- because it might be referred to "earlier"
1238 | isStableUnfolding unfolding = False -- Note [Stable unfoldings and postInlineUnconditionally]
1239 | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
1240 | exprIsTrivial rhs = True
1241 | otherwise
1242 = case occ_info of
1243 -- The point of examining occ_info here is that for *non-values*
1244 -- that occur outside a lambda, the call-site inliner won't have
1245 -- a chance (because it doesn't know that the thing
1246 -- only occurs once). The pre-inliner won't have gotten
1247 -- it either, if the thing occurs in more than one branch
1248 -- So the main target is things like
1249 -- let x = f y in
1250 -- case v of
1251 -- True -> case x of ...
1252 -- False -> case x of ...
1253 -- This is very important in practice; e.g. wheel-seive1 doubles
1254 -- in allocation if you miss this out
1255 OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt }
1256 -- OneOcc => no code-duplication issue
1257 -> smallEnoughToInline dflags unfolding -- Small enough to dup
1258 -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
1259 --
1260 -- NB: Do NOT inline arbitrarily big things, even if one_br is True
1261 -- Reason: doing so risks exponential behaviour. We simplify a big
1262 -- expression, inline it, and simplify it again. But if the
1263 -- very same thing happens in the big expression, we get
1264 -- exponential cost!
1265 -- PRINCIPLE: when we've already simplified an expression once,
1266 -- make sure that we only inline it if it's reasonably small.
1267
1268 && (not in_lam ||
1269 -- Outside a lambda, we want to be reasonably aggressive
1270 -- about inlining into multiple branches of case
1271 -- e.g. let x = <non-value>
1272 -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
1273 -- Inlining can be a big win if C3 is the hot-spot, even if
1274 -- the uses in C1, C2 are not 'interesting'
1275 -- An example that gets worse if you add int_cxt here is 'clausify'
1276
1277 (isCheapUnfolding unfolding && int_cxt))
1278 -- isCheap => acceptable work duplication; in_lam may be true
1279 -- int_cxt to prevent us inlining inside a lambda without some
1280 -- good reason. See the notes on int_cxt in preInlineUnconditionally
1281
1282 IAmDead -> True -- This happens; for example, the case_bndr during case of
1283 -- known constructor: case (a,b) of x { (p,q) -> ... }
1284 -- Here x isn't mentioned in the RHS, so we don't want to
1285 -- create the (dead) let-binding let x = (a,b) in ...
1286
1287 _ -> False
1288
1289 -- Here's an example that we don't handle well:
1290 -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
1291 -- in \y. ....case f of {...} ....
1292 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
1293 -- But
1294 -- - We can't preInlineUnconditionally because that woud invalidate
1295 -- the occ info for b.
1296 -- - We can't postInlineUnconditionally because the RHS is big, and
1297 -- that risks exponential behaviour
1298 -- - We can't call-site inline, because the rhs is big
1299 -- Alas!
1300
1301 where
1302 unfolding = idUnfolding bndr
1303 dflags = seDynFlags env
1304 active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
1305 -- See Note [pre/postInlineUnconditionally in gentle mode]
1306
1307 {-
1308 Note [Top level and postInlineUnconditionally]
1309 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1310 We don't do postInlineUnconditionally for top-level things (even for
1311 ones that are trivial):
1312
1313 * Doing so will inline top-level error expressions that have been
1314 carefully floated out by FloatOut. More generally, it might
1315 replace static allocation with dynamic.
1316
1317 * Even for trivial expressions there's a problem. Consider
1318 {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
1319 blah xs = reverse xs
1320 ruggle = sort
1321 In one simplifier pass we might fire the rule, getting
1322 blah xs = ruggle xs
1323 but in *that* simplifier pass we must not do postInlineUnconditionally
1324 on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
1325
1326 If the rhs is trivial it'll be inlined by callSiteInline, and then
1327 the binding will be dead and discarded by the next use of OccurAnal
1328
1329 * There is less point, because the main goal is to get rid of local
1330 bindings used in multiple case branches.
1331
1332 * The inliner should inline trivial things at call sites anyway.
1333
1334 * The Id might be exported. We could check for that separately,
1335 but since we aren't going to postInlineUnconditionally /any/
1336 top-level bindings, we don't need to test.
1337
1338 Note [Stable unfoldings and postInlineUnconditionally]
1339 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1340 Do not do postInlineUnconditionally if the Id has a stable unfolding,
1341 otherwise we lose the unfolding. Example
1342
1343 -- f has stable unfolding with rhs (e |> co)
1344 -- where 'e' is big
1345 f = e |> co
1346
1347 Then there's a danger we'll optimise to
1348
1349 f' = e
1350 f = f' |> co
1351
1352 and now postInlineUnconditionally, losing the stable unfolding on f. Now f'
1353 won't inline because 'e' is too big.
1354
1355 c.f. Note [Stable unfoldings and preInlineUnconditionally]
1356
1357
1358 ************************************************************************
1359 * *
1360 Rebuilding a lambda
1361 * *
1362 ************************************************************************
1363 -}
1364
1365 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplCont -> SimplM OutExpr
1366 -- mkLam tries three things
1367 -- a) eta reduction, if that gives a trivial expression
1368 -- b) eta expansion [only if there are some value lambdas]
1369
1370 mkLam _env [] body _cont
1371 = return body
1372 mkLam env bndrs body cont
1373 = do { dflags <- getDynFlags
1374 ; mkLam' dflags bndrs body }
1375 where
1376 mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
1377 mkLam' dflags bndrs (Cast body co)
1378 | not (any bad bndrs)
1379 -- Note [Casts and lambdas]
1380 = do { lam <- mkLam' dflags bndrs body
1381 ; return (mkCast lam (mkPiCos Representational bndrs co)) }
1382 where
1383 co_vars = tyCoVarsOfCo co
1384 bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
1385
1386 mkLam' dflags bndrs body@(Lam {})
1387 = mkLam' dflags (bndrs ++ bndrs1) body1
1388 where
1389 (bndrs1, body1) = collectBinders body
1390
1391 mkLam' dflags bndrs (Tick t expr)
1392 | tickishFloatable t
1393 = mkTick t <$> mkLam' dflags bndrs expr
1394
1395 mkLam' dflags bndrs body
1396 | gopt Opt_DoEtaReduction dflags
1397 , Just etad_lam <- tryEtaReduce bndrs body
1398 = do { tick (EtaReduction (head bndrs))
1399 ; return etad_lam }
1400
1401 | not (contIsRhs cont) -- See Note [Eta-expanding lambdas]
1402 , sm_eta_expand (getMode env)
1403 , any isRuntimeVar bndrs
1404 , let body_arity = exprEtaExpandArity dflags body
1405 , body_arity > 0
1406 = do { tick (EtaExpansion (head bndrs))
1407 ; let res = mkLams bndrs (etaExpand body_arity body)
1408 ; traceSmpl "eta expand" (vcat [text "before" <+> ppr (mkLams bndrs body)
1409 , text "after" <+> ppr res])
1410 ; return res }
1411
1412 | otherwise
1413 = return (mkLams bndrs body)
1414
1415 {-
1416 Note [Eta expanding lambdas]
1417 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1418 In general we *do* want to eta-expand lambdas. Consider
1419 f (\x -> case x of (a,b) -> \s -> blah)
1420 where 's' is a state token, and hence can be eta expanded. This
1421 showed up in the code for GHc.IO.Handle.Text.hPutChar, a rather
1422 important function!
1423
1424 The eta-expansion will never happen unless we do it now. (Well, it's
1425 possible that CorePrep will do it, but CorePrep only has a half-baked
1426 eta-expander that can't deal with casts. So it's much better to do it
1427 here.)
1428
1429 However, when the lambda is let-bound, as the RHS of a let, we have a
1430 better eta-expander (in the form of tryEtaExpandRhs), so we don't
1431 bother to try expansion in mkLam in that case; hence the contIsRhs
1432 guard.
1433
1434 NB: We check the SimplEnv (sm_eta_expand), not DynFlags.
1435 See Note [No eta expansion in stable unfoldings]
1436
1437 Note [Casts and lambdas]
1438 ~~~~~~~~~~~~~~~~~~~~~~~~
1439 Consider
1440 (\x. (\y. e) `cast` g1) `cast` g2
1441 There is a danger here that the two lambdas look separated, and the
1442 full laziness pass might float an expression to between the two.
1443
1444 So this equation in mkLam' floats the g1 out, thus:
1445 (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1)
1446 where x:tx.
1447
1448 In general, this floats casts outside lambdas, where (I hope) they
1449 might meet and cancel with some other cast:
1450 \x. e `cast` co ===> (\x. e) `cast` (tx -> co)
1451 /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co)
1452 /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co)
1453 (if not (g `in` co))
1454
1455 Notice that it works regardless of 'e'. Originally it worked only
1456 if 'e' was itself a lambda, but in some cases that resulted in
1457 fruitless iteration in the simplifier. A good example was when
1458 compiling Text.ParserCombinators.ReadPrec, where we had a definition
1459 like (\x. Get `cast` g)
1460 where Get is a constructor with nonzero arity. Then mkLam eta-expanded
1461 the Get, and the next iteration eta-reduced it, and then eta-expanded
1462 it again.
1463
1464 Note also the side condition for the case of coercion binders.
1465 It does not make sense to transform
1466 /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g)
1467 because the latter is not well-kinded.
1468
1469 ************************************************************************
1470 * *
1471 Eta expansion
1472 * *
1473 ************************************************************************
1474 -}
1475
1476 tryEtaExpandRhs :: SimplMode -> OutId -> OutExpr
1477 -> SimplM (Arity, Bool, OutExpr)
1478 -- See Note [Eta-expanding at let bindings]
1479 -- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then
1480 -- (a) rhs' has manifest arity
1481 -- (b) if is_bot is True then rhs' applied to n args is guaranteed bottom
1482 tryEtaExpandRhs mode bndr rhs
1483 | isJoinId bndr
1484 = return (manifestArity rhs, False, rhs)
1485 -- Note [Do not eta-expand join points]
1486
1487 | otherwise
1488 = do { (new_arity, is_bot, new_rhs) <- try_expand
1489
1490 ; WARN( new_arity < old_id_arity,
1491 (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
1492 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
1493 -- Note [Arity decrease] in Simplify
1494 return (new_arity, is_bot, new_rhs) }
1495 where
1496 try_expand
1497 | exprIsTrivial rhs
1498 = return (exprArity rhs, False, rhs)
1499
1500 | sm_eta_expand mode -- Provided eta-expansion is on
1501 , new_arity > old_arity -- And the current manifest arity isn't enough
1502 = do { tick (EtaExpansion bndr)
1503 ; return (new_arity, is_bot, etaExpand new_arity rhs) }
1504
1505 | otherwise
1506 = return (old_arity, is_bot && new_arity == old_arity, rhs)
1507
1508 dflags = sm_dflags mode
1509 old_arity = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
1510 old_id_arity = idArity bndr
1511
1512 (new_arity1, is_bot) = findRhsArity dflags bndr rhs old_arity
1513 new_arity2 = idCallArity bndr
1514 new_arity = max new_arity1 new_arity2
1515
1516 {-
1517 Note [Eta-expanding at let bindings]
1518 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1519 We now eta expand at let-bindings, which is where the payoff comes.
1520 The most significant thing is that we can do a simple arity analysis
1521 (in CoreArity.findRhsArity), which we can't do for free-floating lambdas
1522
1523 One useful consequence of not eta-expanding lambdas is this example:
1524 genMap :: C a => ...
1525 {-# INLINE genMap #-}
1526 genMap f xs = ...
1527
1528 myMap :: D a => ...
1529 {-# INLINE myMap #-}
1530 myMap = genMap
1531
1532 Notice that 'genMap' should only inline if applied to two arguments.
1533 In the stable unfolding for myMap we'll have the unfolding
1534 (\d -> genMap Int (..d..))
1535 We do not want to eta-expand to
1536 (\d f xs -> genMap Int (..d..) f xs)
1537 because then 'genMap' will inline, and it really shouldn't: at least
1538 as far as the programmer is concerned, it's not applied to two
1539 arguments!
1540
1541 Note [Do not eta-expand join points]
1542 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1543 Similarly to CPR (see Note [Don't CPR join points] in WorkWrap), a join point
1544 stands well to gain from its outer binding's eta-expansion, and eta-expanding a
1545 join point is fraught with issues like how to deal with a cast:
1546
1547 let join $j1 :: IO ()
1548 $j1 = ...
1549 $j2 :: Int -> IO ()
1550 $j2 n = if n > 0 then $j1
1551 else ...
1552
1553 =>
1554
1555 let join $j1 :: IO ()
1556 $j1 = (\eta -> ...)
1557 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1558 ~ IO ()
1559 $j2 :: Int -> IO ()
1560 $j2 n = (\eta -> if n > 0 then $j1
1561 else ...)
1562 `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
1563 ~ IO ()
1564
1565 The cast here can't be pushed inside the lambda (since it's not casting to a
1566 function type), so the lambda has to stay, but it can't because it contains a
1567 reference to a join point. In fact, $j2 can't be eta-expanded at all. Rather
1568 than try and detect this situation (and whatever other situations crop up!), we
1569 don't bother; again, any surrounding eta-expansion will improve these join
1570 points anyway, since an outer cast can *always* be pushed inside. By the time
1571 CorePrep comes around, the code is very likely to look more like this:
1572
1573 let join $j1 :: State# RealWorld -> (# State# RealWorld, ())
1574 $j1 = (...) eta
1575 $j2 :: Int -> State# RealWorld -> (# State# RealWorld, ())
1576 $j2 = if n > 0 then $j1
1577 else (...) eta
1578
1579 Note [Do not eta-expand PAPs]
1580 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1581 We used to have old_arity = manifestArity rhs, which meant that we
1582 would eta-expand even PAPs. But this gives no particular advantage,
1583 and can lead to a massive blow-up in code size, exhibited by Trac #9020.
1584 Suppose we have a PAP
1585 foo :: IO ()
1586 foo = returnIO ()
1587 Then we can eta-expand do
1588 foo = (\eta. (returnIO () |> sym g) eta) |> g
1589 where
1590 g :: IO () ~ State# RealWorld -> (# State# RealWorld, () #)
1591
1592 But there is really no point in doing this, and it generates masses of
1593 coercions and whatnot that eventually disappear again. For T9020, GHC
1594 allocated 6.6G beore, and 0.8G afterwards; and residency dropped from
1595 1.8G to 45M.
1596
1597 But note that this won't eta-expand, say
1598 f = \g -> map g
1599 Does it matter not eta-expanding such functions? I'm not sure. Perhaps
1600 strictness analysis will have less to bite on?
1601
1602
1603 ************************************************************************
1604 * *
1605 \subsection{Floating lets out of big lambdas}
1606 * *
1607 ************************************************************************
1608
1609 Note [Floating and type abstraction]
1610 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1611 Consider this:
1612 x = /\a. C e1 e2
1613 We'd like to float this to
1614 y1 = /\a. e1
1615 y2 = /\a. e2
1616 x = /\a. C (y1 a) (y2 a)
1617 for the usual reasons: we want to inline x rather vigorously.
1618
1619 You may think that this kind of thing is rare. But in some programs it is
1620 common. For example, if you do closure conversion you might get:
1621
1622 data a :-> b = forall e. (e -> a -> b) :$ e
1623
1624 f_cc :: forall a. a :-> a
1625 f_cc = /\a. (\e. id a) :$ ()
1626
1627 Now we really want to inline that f_cc thing so that the
1628 construction of the closure goes away.
1629
1630 So I have elaborated simplLazyBind to understand right-hand sides that look
1631 like
1632 /\ a1..an. body
1633
1634 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1635 but there is quite a bit of plumbing in simplLazyBind as well.
1636
1637 The same transformation is good when there are lets in the body:
1638
1639 /\abc -> let(rec) x = e in b
1640 ==>
1641 let(rec) x' = /\abc -> let x = x' a b c in e
1642 in
1643 /\abc -> let x = x' a b c in b
1644
1645 This is good because it can turn things like:
1646
1647 let f = /\a -> letrec g = ... g ... in g
1648 into
1649 letrec g' = /\a -> ... g' a ...
1650 in
1651 let f = /\ a -> g' a
1652
1653 which is better. In effect, it means that big lambdas don't impede
1654 let-floating.
1655
1656 This optimisation is CRUCIAL in eliminating the junk introduced by
1657 desugaring mutually recursive definitions. Don't eliminate it lightly!
1658
1659 [May 1999] If we do this transformation *regardless* then we can
1660 end up with some pretty silly stuff. For example,
1661
1662 let
1663 st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1664 in ..
1665 becomes
1666 let y1 = /\s -> r1
1667 y2 = /\s -> r2
1668 st = /\s -> ...[y1 s/x1, y2 s/x2]
1669 in ..
1670
1671 Unless the "..." is a WHNF there is really no point in doing this.
1672 Indeed it can make things worse. Suppose x1 is used strictly,
1673 and is of the form
1674
1675 x1* = case f y of { (a,b) -> e }
1676
1677 If we abstract this wrt the tyvar we then can't do the case inline
1678 as we would normally do.
1679
1680 That's why the whole transformation is part of the same process that
1681 floats let-bindings and constructor arguments out of RHSs. In particular,
1682 it is guarded by the doFloatFromRhs call in simplLazyBind.
1683
1684 Note [Which type variables to abstract over]
1685 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1686 Abstract only over the type variables free in the rhs wrt which the
1687 new binding is abstracted. Note that
1688
1689 * The naive approach of abstracting wrt the
1690 tyvars free in the Id's /type/ fails. Consider:
1691 /\ a b -> let t :: (a,b) = (e1, e2)
1692 x :: a = fst t
1693 in ...
1694 Here, b isn't free in x's type, but we must nevertheless
1695 abstract wrt b as well, because t's type mentions b.
1696 Since t is floated too, we'd end up with the bogus:
1697 poly_t = /\ a b -> (e1, e2)
1698 poly_x = /\ a -> fst (poly_t a *b*)
1699
1700 * We must do closeOverKinds. Example (Trac #10934):
1701 f = /\k (f:k->*) (a:k). let t = AccFailure @ (f a) in ...
1702 Here we want to float 't', but we must remember to abstract over
1703 'k' as well, even though it is not explicitly mentioned in the RHS,
1704 otherwise we get
1705 t = /\ (f:k->*) (a:k). AccFailure @ (f a)
1706 which is obviously bogus.
1707 -}
1708
1709 abstractFloats :: DynFlags -> TopLevelFlag -> [OutTyVar] -> SimplFloats
1710 -> OutExpr -> SimplM ([OutBind], OutExpr)
1711 abstractFloats dflags top_lvl main_tvs floats body
1712 = ASSERT( notNull body_floats )
1713 ASSERT( isNilOL (sfJoinFloats floats) )
1714 do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1715 ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
1716 where
1717 is_top_lvl = isTopLevel top_lvl
1718 main_tv_set = mkVarSet main_tvs
1719 body_floats = letFloatBinds (sfLetFloats floats)
1720 empty_subst = CoreSubst.mkEmptySubst (sfInScope floats)
1721
1722 abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1723 abstract subst (NonRec id rhs)
1724 = do { (poly_id1, poly_app) <- mk_poly1 tvs_here id
1725 ; let (poly_id2, poly_rhs) = mk_poly2 poly_id1 tvs_here rhs'
1726 subst' = CoreSubst.extendIdSubst subst id poly_app
1727 ; return (subst', NonRec poly_id2 poly_rhs) }
1728 where
1729 rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
1730
1731 -- tvs_here: see Note [Which type variables to abstract over]
1732 tvs_here = toposortTyVars $
1733 filter (`elemVarSet` main_tv_set) $
1734 closeOverKindsList $
1735 exprSomeFreeVarsList isTyVar rhs'
1736
1737 abstract subst (Rec prs)
1738 = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids
1739 ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1740 poly_pairs = [ mk_poly2 poly_id tvs_here rhs'
1741 | (poly_id, rhs) <- poly_ids `zip` rhss
1742 , let rhs' = CoreSubst.substExpr (text "abstract_floats")
1743 subst' rhs ]
1744 ; return (subst', Rec poly_pairs) }
1745 where
1746 (ids,rhss) = unzip prs
1747 -- For a recursive group, it's a bit of a pain to work out the minimal
1748 -- set of tyvars over which to abstract:
1749 -- /\ a b c. let x = ...a... in
1750 -- letrec { p = ...x...q...
1751 -- q = .....p...b... } in
1752 -- ...
1753 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1754 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1755 -- Since it's a pain, we just use the whole set, which is always safe
1756 --
1757 -- If you ever want to be more selective, remember this bizarre case too:
1758 -- x::a = x
1759 -- Here, we must abstract 'x' over 'a'.
1760 tvs_here = toposortTyVars main_tvs
1761
1762 mk_poly1 :: [TyVar] -> Id -> SimplM (Id, CoreExpr)
1763 mk_poly1 tvs_here var
1764 = do { uniq <- getUniqueM
1765 ; let poly_name = setNameUnique (idName var) uniq -- Keep same name
1766 poly_ty = mkInvForAllTys tvs_here (idType var) -- But new type of course
1767 poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.hs
1768 mkLocalIdOrCoVar poly_name poly_ty
1769 ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1770 -- In the olden days, it was crucial to copy the occInfo of the original var,
1771 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1772 -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
1773 -- at already simplified code, so it doesn't matter
1774 --
1775 -- It's even right to retain single-occurrence or dead-var info:
1776 -- Suppose we started with /\a -> let x = E in B
1777 -- where x occurs once in B. Then we transform to:
1778 -- let x' = /\a -> E in /\a -> let x* = x' a in B
1779 -- where x* has an INLINE prag on it. Now, once x* is inlined,
1780 -- the occurrences of x' will be just the occurrences originally
1781 -- pinned on x.
1782
1783 mk_poly2 :: Id -> [TyVar] -> CoreExpr -> (Id, CoreExpr)
1784 mk_poly2 poly_id tvs_here rhs
1785 = (poly_id `setIdUnfolding` unf, poly_rhs)
1786 where
1787 poly_rhs = mkLams tvs_here rhs
1788 unf = mkUnfolding dflags InlineRhs is_top_lvl False poly_rhs
1789
1790 -- We want the unfolding. Consider
1791 -- let
1792 -- x = /\a. let y = ... in Just y
1793 -- in body
1794 -- Then we float the y-binding out (via abstractFloats and addPolyBind)
1795 -- but 'x' may well then be inlined in 'body' in which case we'd like the
1796 -- opportunity to inline 'y' too.
1797
1798 {-
1799 Note [Abstract over coercions]
1800 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1801 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1802 type variable a. Rather than sort this mess out, we simply bale out and abstract
1803 wrt all the type variables if any of them are coercion variables.
1804
1805
1806 Historical note: if you use let-bindings instead of a substitution, beware of this:
1807
1808 -- Suppose we start with:
1809 --
1810 -- x = /\ a -> let g = G in E
1811 --
1812 -- Then we'll float to get
1813 --
1814 -- x = let poly_g = /\ a -> G
1815 -- in /\ a -> let g = poly_g a in E
1816 --
1817 -- But now the occurrence analyser will see just one occurrence
1818 -- of poly_g, not inside a lambda, so the simplifier will
1819 -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
1820 -- (I used to think that the "don't inline lone occurrences" stuff
1821 -- would stop this happening, but since it's the *only* occurrence,
1822 -- PreInlineUnconditionally kicks in first!)
1823 --
1824 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1825 -- to appear many times. (NB: mkInlineMe eliminates
1826 -- such notes on trivial RHSs, so do it manually.)
1827
1828 ************************************************************************
1829 * *
1830 prepareAlts
1831 * *
1832 ************************************************************************
1833
1834 prepareAlts tries these things:
1835
1836 1. Eliminate alternatives that cannot match, including the
1837 DEFAULT alternative.
1838
1839 2. If the DEFAULT alternative can match only one possible constructor,
1840 then make that constructor explicit.
1841 e.g.
1842 case e of x { DEFAULT -> rhs }
1843 ===>
1844 case e of x { (a,b) -> rhs }
1845 where the type is a single constructor type. This gives better code
1846 when rhs also scrutinises x or e.
1847
1848 3. Returns a list of the constructors that cannot holds in the
1849 DEFAULT alternative (if there is one)
1850
1851 Here "cannot match" includes knowledge from GADTs
1852
1853 It's a good idea to do this stuff before simplifying the alternatives, to
1854 avoid simplifying alternatives we know can't happen, and to come up with
1855 the list of constructors that are handled, to put into the IdInfo of the
1856 case binder, for use when simplifying the alternatives.
1857
1858 Eliminating the default alternative in (1) isn't so obvious, but it can
1859 happen:
1860
1861 data Colour = Red | Green | Blue
1862
1863 f x = case x of
1864 Red -> ..
1865 Green -> ..
1866 DEFAULT -> h x
1867
1868 h y = case y of
1869 Blue -> ..
1870 DEFAULT -> [ case y of ... ]
1871
1872 If we inline h into f, the default case of the inlined h can't happen.
1873 If we don't notice this, we may end up filtering out *all* the cases
1874 of the inner case y, which give us nowhere to go!
1875 -}
1876
1877 prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1878 -- The returned alternatives can be empty, none are possible
1879 prepareAlts scrut case_bndr' alts
1880 | Just (tc, tys) <- splitTyConApp_maybe (varType case_bndr')
1881 -- Case binder is needed just for its type. Note that as an
1882 -- OutId, it has maximum information; this is important.
1883 -- Test simpl013 is an example
1884 = do { us <- getUniquesM
1885 ; let (idcs1, alts1) = filterAlts tc tys imposs_cons alts
1886 (yes2, alts2) = refineDefaultAlt us tc tys idcs1 alts1
1887 (yes3, idcs3, alts3) = combineIdenticalAlts idcs1 alts2
1888 -- "idcs" stands for "impossible default data constructors"
1889 -- i.e. the constructors that can't match the default case
1890 ; when yes2 $ tick (FillInCaseDefault case_bndr')
1891 ; when yes3 $ tick (AltMerge case_bndr')
1892 ; return (idcs3, alts3) }
1893
1894 | otherwise -- Not a data type, so nothing interesting happens
1895 = return ([], alts)
1896 where
1897 imposs_cons = case scrut of
1898 Var v -> otherCons (idUnfolding v)
1899 _ -> []
1900
1901
1902 {-
1903 ************************************************************************
1904 * *
1905 mkCase
1906 * *
1907 ************************************************************************
1908
1909 mkCase tries these things
1910
1911 * Note [Nerge nested cases]
1912 * Note [Eliminate identity case]
1913 * Note [Scrutinee constant folding]
1914
1915 Note [Merge Nested Cases]
1916 ~~~~~~~~~~~~~~~~~~~~~~~~~
1917 case e of b { ==> case e of b {
1918 p1 -> rhs1 p1 -> rhs1
1919 ... ...
1920 pm -> rhsm pm -> rhsm
1921 _ -> case b of b' { pn -> let b'=b in rhsn
1922 pn -> rhsn ...
1923 ... po -> let b'=b in rhso
1924 po -> rhso _ -> let b'=b in rhsd
1925 _ -> rhsd
1926 }
1927
1928 which merges two cases in one case when -- the default alternative of
1929 the outer case scrutises the same variable as the outer case. This
1930 transformation is called Case Merging. It avoids that the same
1931 variable is scrutinised multiple times.
1932
1933 Note [Eliminate Identity Case]
1934 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1935 case e of ===> e
1936 True -> True;
1937 False -> False
1938
1939 and similar friends.
1940
1941 Note [Scrutinee Constant Folding]
1942 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1943 case x op# k# of _ { ===> case x of _ {
1944 a1# -> e1 (a1# inv_op# k#) -> e1
1945 a2# -> e2 (a2# inv_op# k#) -> e2
1946 ... ...
1947 DEFAULT -> ed DEFAULT -> ed
1948
1949 where (x op# k#) inv_op# k# == x
1950
1951 And similarly for commuted arguments and for some unary operations.
1952
1953 The purpose of this transformation is not only to avoid an arithmetic
1954 operation at runtime but to allow other transformations to apply in cascade.
1955
1956 Example with the "Merge Nested Cases" optimization (from #12877):
1957
1958 main = case t of t0
1959 0## -> ...
1960 DEFAULT -> case t0 `minusWord#` 1## of t1
1961 0## -> ...
1962 DEFAUT -> case t1 `minusWord#` 1## of t2
1963 0## -> ...
1964 DEFAULT -> case t2 `minusWord#` 1## of _
1965 0## -> ...
1966 DEFAULT -> ...
1967
1968 becomes:
1969
1970 main = case t of _
1971 0## -> ...
1972 1## -> ...
1973 2## -> ...
1974 3## -> ...
1975 DEFAULT -> ...
1976
1977 There are some wrinkles
1978
1979 * Do not apply caseRules if there is just a single DEFAULT alternative
1980 case e +# 3# of b { DEFAULT -> rhs }
1981 If we applied the transformation here we would (stupidly) get
1982 case a of b' { DEFAULT -> let b = e +# 3# in rhs }
1983 and now the process may repeat, because that let will really
1984 be a case.
1985
1986 * The type of the scrutinee might change. E.g.
1987 case tagToEnum (x :: Int#) of (b::Bool)
1988 False -> e1
1989 True -> e2
1990 ==>
1991 case x of (b'::Int#)
1992 DEFAULT -> e1
1993 1# -> e2
1994
1995 * The case binder may be used in the right hand sides, so we need
1996 to make a local binding for it, if it is alive. e.g.
1997 case e +# 10# of b
1998 DEFAULT -> blah...b...
1999 44# -> blah2...b...
2000 ===>
2001 case e of b'
2002 DEFAULT -> let b = b' +# 10# in blah...b...
2003 34# -> let b = 44# in blah2...b...
2004
2005 Note that in the non-DEFAULT cases we know what to bind 'b' to,
2006 whereas in the DEFAULT case we must reconstruct the original value.
2007 But NB: we use b'; we do not duplicate 'e'.
2008
2009 * In dataToTag we might need to make up some fake binders;
2010 see Note [caseRules for dataToTag] in PrelRules
2011 -}
2012
2013 mkCase, mkCase1, mkCase2, mkCase3
2014 :: DynFlags
2015 -> OutExpr -> OutId
2016 -> OutType -> [OutAlt] -- Alternatives in standard (increasing) order
2017 -> SimplM OutExpr
2018
2019 --------------------------------------------------
2020 -- 1. Merge Nested Cases
2021 --------------------------------------------------
2022
2023 mkCase dflags scrut outer_bndr alts_ty ((DEFAULT, _, deflt_rhs) : outer_alts)
2024 | gopt Opt_CaseMerge dflags
2025 , (ticks, Case (Var inner_scrut_var) inner_bndr _ inner_alts)
2026 <- stripTicksTop tickishFloatable deflt_rhs
2027 , inner_scrut_var == outer_bndr
2028 = do { tick (CaseMerge outer_bndr)
2029
2030 ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
2031 (con, args, wrap_rhs rhs)
2032 -- Simplifier's no-shadowing invariant should ensure
2033 -- that outer_bndr is not shadowed by the inner patterns
2034 wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
2035 -- The let is OK even for unboxed binders,
2036
2037 wrapped_alts | isDeadBinder inner_bndr = inner_alts
2038 | otherwise = map wrap_alt inner_alts
2039
2040 merged_alts = mergeAlts outer_alts wrapped_alts
2041 -- NB: mergeAlts gives priority to the left
2042 -- case x of
2043 -- A -> e1
2044 -- DEFAULT -> case x of
2045 -- A -> e2
2046 -- B -> e3
2047 -- When we merge, we must ensure that e1 takes
2048 -- precedence over e2 as the value for A!
2049
2050 ; fmap (mkTicks ticks) $
2051 mkCase1 dflags scrut outer_bndr alts_ty merged_alts
2052 }
2053 -- Warning: don't call mkCase recursively!
2054 -- Firstly, there's no point, because inner alts have already had
2055 -- mkCase applied to them, so they won't have a case in their default
2056 -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
2057 -- in munge_rhs may put a case into the DEFAULT branch!
2058
2059 mkCase dflags scrut bndr alts_ty alts = mkCase1 dflags scrut bndr alts_ty alts
2060
2061 --------------------------------------------------
2062 -- 2. Eliminate Identity Case
2063 --------------------------------------------------
2064
2065 mkCase1 _dflags scrut case_bndr _ alts@((_,_,rhs1) : _) -- Identity case
2066 | all identity_alt alts
2067 = do { tick (CaseIdentity case_bndr)
2068 ; return (mkTicks ticks $ re_cast scrut rhs1) }
2069 where
2070 ticks = concatMap (stripTicksT tickishFloatable . thdOf3) (tail alts)
2071 identity_alt (con, args, rhs) = check_eq rhs con args
2072
2073 check_eq (Cast rhs co) con args -- See Note [RHS casts]
2074 = not (any (`elemVarSet` tyCoVarsOfCo co) args) && check_eq rhs con args
2075 check_eq (Tick t e) alt args
2076 = tickishFloatable t && check_eq e alt args
2077
2078 check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
2079 check_eq (Var v) _ _ | v == case_bndr = True
2080 check_eq (Var v) (DataAlt con) args
2081 | null arg_tys, null args = v == dataConWorkId con
2082 -- Optimisation only
2083 check_eq rhs (DataAlt con) args = cheapEqExpr' tickishFloatable rhs $
2084 mkConApp2 con arg_tys args
2085 check_eq _ _ _ = False
2086
2087 arg_tys = tyConAppArgs (idType case_bndr)
2088
2089 -- Note [RHS casts]
2090 -- ~~~~~~~~~~~~~~~~
2091 -- We've seen this:
2092 -- case e of x { _ -> x `cast` c }
2093 -- And we definitely want to eliminate this case, to give
2094 -- e `cast` c
2095 -- So we throw away the cast from the RHS, and reconstruct
2096 -- it at the other end. All the RHS casts must be the same
2097 -- if (all identity_alt alts) holds.
2098 --
2099 -- Don't worry about nested casts, because the simplifier combines them
2100
2101 re_cast scrut (Cast rhs co) = Cast (re_cast scrut rhs) co
2102 re_cast scrut _ = scrut
2103
2104 mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
2105
2106 --------------------------------------------------
2107 -- 2. Scrutinee Constant Folding
2108 --------------------------------------------------
2109
2110 mkCase2 dflags scrut bndr alts_ty alts
2111 | -- See Note [Scrutinee Constant Folding]
2112 case alts of -- Not if there is just a DEFAULT alternative
2113 [(DEFAULT,_,_)] -> False
2114 _ -> True
2115 , gopt Opt_CaseFolding dflags
2116 , Just (scrut', tx_con, mk_orig) <- caseRules dflags scrut
2117 = do { bndr' <- newId (fsLit "lwild") (exprType scrut')
2118 ; alts' <- mapM (tx_alt tx_con mk_orig bndr') alts
2119 ; mkCase3 dflags scrut' bndr' alts_ty $
2120 add_default (re_sort alts')
2121 }
2122
2123 | otherwise
2124 = mkCase3 dflags scrut bndr alts_ty alts
2125 where
2126 -- We need to keep the correct association between the scrutinee and its
2127 -- binder if the latter isn't dead. Hence we wrap rhs of alternatives with
2128 -- "let bndr = ... in":
2129 --
2130 -- case v + 10 of y =====> case v of y
2131 -- 20 -> e1 10 -> let y = 20 in e1
2132 -- DEFAULT -> e2 DEFAULT -> let y = v + 10 in e2
2133 --
2134 -- Other transformations give: =====> case v of y'
2135 -- 10 -> let y = 20 in e1
2136 -- DEFAULT -> let y = y' + 10 in e2
2137 --
2138 -- This wrapping is done in tx_alt; we use mk_orig, returned by caseRules,
2139 -- to construct an expression equivalent to the original one, for use
2140 -- in the DEFAULT case
2141
2142 tx_alt tx_con mk_orig new_bndr (con, bs, rhs)
2143 | DataAlt dc <- con', not (isNullaryRepDataCon dc)
2144 = -- For non-nullary data cons we must invent some fake binders
2145 -- See Note [caseRules for dataToTag] in PrelRules
2146 do { us <- getUniquesM
2147 ; let (ex_tvs, arg_ids) = dataConRepInstPat us dc
2148 (tyConAppArgs (idType new_bndr))
2149 ; return (con', ex_tvs ++ arg_ids, rhs') }
2150 | otherwise
2151 = return (con', [], rhs')
2152 where
2153 con' = tx_con con
2154
2155 rhs' | isDeadBinder bndr = rhs
2156 | otherwise = bindNonRec bndr orig_val rhs
2157
2158 orig_val = case con of
2159 DEFAULT -> mk_orig new_bndr
2160 LitAlt l -> Lit l
2161 DataAlt dc -> mkConApp2 dc (tyConAppArgs (idType bndr)) bs
2162
2163
2164 re_sort :: [CoreAlt] -> [CoreAlt] -- Re-sort the alternatives to
2165 re_sort alts = sortBy cmpAlt alts -- preserve the #case_invariants#
2166
2167 add_default :: [CoreAlt] -> [CoreAlt]
2168 -- TagToEnum may change a boolean True/False set of alternatives
2169 -- to LitAlt 0#/1# alternatives. But literal alternatives always
2170 -- have a DEFAULT (I think). So add it.
2171 add_default ((LitAlt {}, bs, rhs) : alts) = (DEFAULT, bs, rhs) : alts
2172 add_default alts = alts
2173
2174 --------------------------------------------------
2175 -- Catch-all
2176 --------------------------------------------------
2177 mkCase3 _dflags scrut bndr alts_ty alts
2178 = return (Case scrut bndr alts_ty alts)
2179
2180 {-
2181 Note [Dead binders]
2182 ~~~~~~~~~~~~~~~~~~~~
2183 Note that dead-ness is maintained by the simplifier, so that it is
2184 accurate after simplification as well as before.
2185
2186
2187 Note [Cascading case merge]
2188 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2189 Case merging should cascade in one sweep, because it
2190 happens bottom-up
2191
2192 case e of a {
2193 DEFAULT -> case a of b
2194 DEFAULT -> case b of c {
2195 DEFAULT -> e
2196 A -> ea
2197 B -> eb
2198 C -> ec
2199 ==>
2200 case e of a {
2201 DEFAULT -> case a of b
2202 DEFAULT -> let c = b in e
2203 A -> let c = b in ea
2204 B -> eb
2205 C -> ec
2206 ==>
2207 case e of a {
2208 DEFAULT -> let b = a in let c = b in e
2209 A -> let b = a in let c = b in ea
2210 B -> let b = a in eb
2211 C -> ec
2212
2213
2214 However here's a tricky case that we still don't catch, and I don't
2215 see how to catch it in one pass:
2216
2217 case x of c1 { I# a1 ->
2218 case a1 of c2 ->
2219 0 -> ...
2220 DEFAULT -> case x of c3 { I# a2 ->
2221 case a2 of ...
2222
2223 After occurrence analysis (and its binder-swap) we get this
2224
2225 case x of c1 { I# a1 ->
2226 let x = c1 in -- Binder-swap addition
2227 case a1 of c2 ->
2228 0 -> ...
2229 DEFAULT -> case x of c3 { I# a2 ->
2230 case a2 of ...
2231
2232 When we simplify the inner case x, we'll see that
2233 x=c1=I# a1. So we'll bind a2 to a1, and get
2234
2235 case x of c1 { I# a1 ->
2236 case a1 of c2 ->
2237 0 -> ...
2238 DEFAULT -> case a1 of ...
2239
2240 This is corect, but we can't do a case merge in this sweep
2241 because c2 /= a1. Reason: the binding c1=I# a1 went inwards
2242 without getting changed to c1=I# c2.
2243
2244 I don't think this is worth fixing, even if I knew how. It'll
2245 all come out in the next pass anyway.
2246 -}