76ac48bd756b22b813c6a9ac44cfa78f02bff007
[ghc.git] / compiler / simplCore / SetLevels.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section{SetLevels}
5
6 ***************************
7 Overview
8 ***************************
9
10 1. We attach binding levels to Core bindings, in preparation for floating
11 outwards (@FloatOut@).
12
13 2. We also let-ify many expressions (notably case scrutinees), so they
14 will have a fighting chance of being floated sensible.
15
16 3. Note [Need for cloning during float-out]
17 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 We clone the binders of any floatable let-binding, so that when it is
19 floated out it will be unique. Example
20 (let x=2 in x) + (let x=3 in x)
21 we must clone before floating so we get
22 let x1=2 in
23 let x2=3 in
24 x1+x2
25
26 NOTE: this can't be done using the uniqAway idea, because the variable
27 must be unique in the whole program, not just its current scope,
28 because two variables in different scopes may float out to the
29 same top level place
30
31 NOTE: Very tiresomely, we must apply this substitution to
32 the rules stored inside a variable too.
33
34 We do *not* clone top-level bindings, because some of them must not change,
35 but we *do* clone bindings that are heading for the top level
36
37 4. Note [Binder-swap during float-out]
38 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39 In the expression
40 case x of wild { p -> ...wild... }
41 we substitute x for wild in the RHS of the case alternatives:
42 case x of wild { p -> ...x... }
43 This means that a sub-expression involving x is not "trapped" inside the RHS.
44 And it's not inconvenient because we already have a substitution.
45
46 Note that this is EXACTLY BACKWARDS from the what the simplifier does.
47 The simplifier tries to get rid of occurrences of x, in favour of wild,
48 in the hope that there will only be one remaining occurrence of x, namely
49 the scrutinee of the case, and we can inline it.
50 -}
51
52 {-# LANGUAGE CPP, MultiWayIf #-}
53 module SetLevels (
54 setLevels,
55
56 Level(..), LevelType(..), tOP_LEVEL, isJoinCeilLvl, asJoinCeilLvl,
57 LevelledBind, LevelledExpr, LevelledBndr,
58 FloatSpec(..), floatSpecLevel,
59
60 incMinorLvl, ltMajLvl, ltLvl, isTopLvl
61 ) where
62
63 #include "HsVersions.h"
64
65 import CoreSyn
66 import CoreMonad ( FloatOutSwitches(..) )
67 import CoreUtils ( exprType, exprIsCheap, exprIsHNF
68 , exprOkForSpeculation
69 , exprIsTopLevelBindable
70 , isExprLevPoly
71 , collectMakeStaticArgs
72 )
73 import CoreArity ( exprBotStrictness_maybe )
74 import CoreFVs -- all of it
75 import CoreSubst
76 import MkCore ( sortQuantVars )
77
78 import Id
79 import IdInfo
80 import Var
81 import VarSet
82 import VarEnv
83 import Literal ( litIsTrivial )
84 import Demand ( StrictSig, isStrictDmd, splitStrictSig, increaseStrictSigArity )
85 import Name ( getOccName, mkSystemVarName )
86 import OccName ( occNameString )
87 import Type ( isUnliftedType, Type, mkLamTypes, splitTyConApp_maybe )
88 import BasicTypes ( Arity, RecFlag(..), isRec )
89 import DataCon ( dataConOrigResTy )
90 import TysWiredIn
91 import UniqSupply
92 import Util
93 import Outputable
94 import FastString
95 import UniqDFM
96 import FV
97 import Data.Maybe
98 import Control.Monad ( zipWithM )
99
100 {-
101 ************************************************************************
102 * *
103 \subsection{Level numbers}
104 * *
105 ************************************************************************
106 -}
107
108 type LevelledExpr = TaggedExpr FloatSpec
109 type LevelledBind = TaggedBind FloatSpec
110 type LevelledBndr = TaggedBndr FloatSpec
111
112 data Level = Level Int -- Level number of enclosing lambdas
113 Int -- Number of big-lambda and/or case expressions and/or
114 -- context boundaries between
115 -- here and the nearest enclosing lambda
116 LevelType -- Binder or join ceiling?
117 data LevelType = BndrLvl | JoinCeilLvl deriving (Eq)
118
119 data FloatSpec
120 = FloatMe Level -- Float to just inside the binding
121 -- tagged with this level
122 | StayPut Level -- Stay where it is; binding is
123 -- tagged with tihs level
124
125 floatSpecLevel :: FloatSpec -> Level
126 floatSpecLevel (FloatMe l) = l
127 floatSpecLevel (StayPut l) = l
128
129 {-
130 The {\em level number} on a (type-)lambda-bound variable is the
131 nesting depth of the (type-)lambda which binds it. The outermost lambda
132 has level 1, so (Level 0 0) means that the variable is bound outside any lambda.
133
134 On an expression, it's the maximum level number of its free
135 (type-)variables. On a let(rec)-bound variable, it's the level of its
136 RHS. On a case-bound variable, it's the number of enclosing lambdas.
137
138 Top-level variables: level~0. Those bound on the RHS of a top-level
139 definition but ``before'' a lambda; e.g., the \tr{x} in (levels shown
140 as ``subscripts'')...
141 \begin{verbatim}
142 a_0 = let b_? = ... in
143 x_1 = ... b ... in ...
144 \end{verbatim}
145
146 The main function @lvlExpr@ carries a ``context level'' (@le_ctxt_lvl@).
147 That's meant to be the level number of the enclosing binder in the
148 final (floated) program. If the level number of a sub-expression is
149 less than that of the context, then it might be worth let-binding the
150 sub-expression so that it will indeed float.
151
152 If you can float to level @Level 0 0@ worth doing so because then your
153 allocation becomes static instead of dynamic. We always start with
154 context @Level 0 0@.
155
156
157 Note [FloatOut inside INLINE]
158 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
159 @InlineCtxt@ very similar to @Level 0 0@, but is used for one purpose:
160 to say "don't float anything out of here". That's exactly what we
161 want for the body of an INLINE, where we don't want to float anything
162 out at all. See notes with lvlMFE below.
163
164 But, check this out:
165
166 -- At one time I tried the effect of not float anything out of an InlineMe,
167 -- but it sometimes works badly. For example, consider PrelArr.done. It
168 -- has the form __inline (\d. e)
169 -- where e doesn't mention d. If we float this to
170 -- __inline (let x = e in \d. x)
171 -- things are bad. The inliner doesn't even inline it because it doesn't look
172 -- like a head-normal form. So it seems a lesser evil to let things float.
173 -- In SetLevels we do set the context to (Level 0 0) when we get to an InlineMe
174 -- which discourages floating out.
175
176 So the conclusion is: don't do any floating at all inside an InlineMe.
177 (In the above example, don't float the {x=e} out of the \d.)
178
179 One particular case is that of workers: we don't want to float the
180 call to the worker outside the wrapper, otherwise the worker might get
181 inlined into the floated expression, and an importing module won't see
182 the worker at all.
183
184 Note [Join ceiling]
185 ~~~~~~~~~~~~~~~~~~~
186 Join points can't float very far; too far, and they can't remain join points
187 (though see Note [When to ruin a join point]). So, suppose we have:
188
189 f x =
190 (joinrec j y = ... x ... in jump j x) + 1
191
192 One may be tempted to float j out to the top of f's RHS, but then the jump
193 would not be a tail call. Thus we keep track of a level called the *join
194 ceiling* past which join points are not allowed to float.
195
196 The troublesome thing is that, unlike most levels to which something might
197 float, there is not necessarily an identifier to which the join ceiling is
198 attached. Fortunately, if something is to be floated to a join ceiling, it must
199 be dropped at the *nearest* join ceiling. Thus each level is marked as to
200 whether it is a join ceiling, so that FloatOut can tell which binders are being
201 floated to the nearest join ceiling and which to a particular binder (or set of
202 binders).
203 -}
204
205 instance Outputable FloatSpec where
206 ppr (FloatMe l) = char 'F' <> ppr l
207 ppr (StayPut l) = ppr l
208
209 tOP_LEVEL :: Level
210 tOP_LEVEL = Level 0 0 BndrLvl
211
212 incMajorLvl :: Level -> Level
213 incMajorLvl (Level major _ _) = Level (major + 1) 0 BndrLvl
214
215 incMinorLvl :: Level -> Level
216 incMinorLvl (Level major minor _) = Level major (minor+1) BndrLvl
217
218 asJoinCeilLvl :: Level -> Level
219 asJoinCeilLvl (Level major minor _) = Level major minor JoinCeilLvl
220
221 maxLvl :: Level -> Level -> Level
222 maxLvl l1@(Level maj1 min1 _) l2@(Level maj2 min2 _)
223 | (maj1 > maj2) || (maj1 == maj2 && min1 > min2) = l1
224 | otherwise = l2
225
226 ltLvl :: Level -> Level -> Bool
227 ltLvl (Level maj1 min1 _) (Level maj2 min2 _)
228 = (maj1 < maj2) || (maj1 == maj2 && min1 < min2)
229
230 ltMajLvl :: Level -> Level -> Bool
231 -- Tells if one level belongs to a difft *lambda* level to another
232 ltMajLvl (Level maj1 _ _) (Level maj2 _ _) = maj1 < maj2
233
234 isTopLvl :: Level -> Bool
235 isTopLvl (Level 0 0 _) = True
236 isTopLvl _ = False
237
238 isJoinCeilLvl :: Level -> Bool
239 isJoinCeilLvl (Level _ _ t) = t == JoinCeilLvl
240
241 instance Outputable Level where
242 ppr (Level maj min typ)
243 = hcat [ char '<', int maj, char ',', int min, char '>'
244 , ppWhen (typ == JoinCeilLvl) (char 'C') ]
245
246 instance Eq Level where
247 (Level maj1 min1 _) == (Level maj2 min2 _) = maj1 == maj2 && min1 == min2
248
249 {-
250 ************************************************************************
251 * *
252 \subsection{Main level-setting code}
253 * *
254 ************************************************************************
255 -}
256
257 setLevels :: FloatOutSwitches
258 -> CoreProgram
259 -> UniqSupply
260 -> [LevelledBind]
261
262 setLevels float_lams binds us
263 = initLvl us (do_them init_env binds)
264 where
265 init_env = initialEnv float_lams
266
267 do_them :: LevelEnv -> [CoreBind] -> LvlM [LevelledBind]
268 do_them _ [] = return []
269 do_them env (b:bs)
270 = do { (lvld_bind, env') <- lvlTopBind env b
271 ; lvld_binds <- do_them env' bs
272 ; return (lvld_bind : lvld_binds) }
273
274 lvlTopBind :: LevelEnv -> Bind Id -> LvlM (LevelledBind, LevelEnv)
275 lvlTopBind env (NonRec bndr rhs)
276 = do { rhs' <- lvlRhs env NonRecursive Nothing -- Not a join point
277 (freeVars rhs)
278 ; let (env', [bndr']) = substAndLvlBndrs NonRecursive env tOP_LEVEL [bndr]
279 ; return (NonRec bndr' rhs', env') }
280
281 lvlTopBind env (Rec pairs)
282 = do let (bndrs,rhss) = unzip pairs
283 (env', bndrs') = substAndLvlBndrs Recursive env tOP_LEVEL bndrs
284 rhss' <- mapM (lvlRhs env' Recursive Nothing . freeVars) rhss
285 return (Rec (bndrs' `zip` rhss'), env')
286
287 {-
288 ************************************************************************
289 * *
290 \subsection{Setting expression levels}
291 * *
292 ************************************************************************
293
294 Note [Floating over-saturated applications]
295 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
296 If we see (f x y), and (f x) is a redex (ie f's arity is 1),
297 we call (f x) an "over-saturated application"
298
299 Should we float out an over-sat app, if can escape a value lambda?
300 It is sometimes very beneficial (-7% runtime -4% alloc over nofib -O2).
301 But we don't want to do it for class selectors, because the work saved
302 is minimal, and the extra local thunks allocated cost money.
303
304 Arguably we could float even class-op applications if they were going to
305 top level -- but then they must be applied to a constant dictionary and
306 will almost certainly be optimised away anyway.
307 -}
308
309 lvlExpr :: LevelEnv -- Context
310 -> CoreExprWithFVs -- Input expression
311 -> LvlM LevelledExpr -- Result expression
312
313 {-
314 The @le_ctxt_lvl@ is, roughly, the level of the innermost enclosing
315 binder. Here's an example
316
317 v = \x -> ...\y -> let r = case (..x..) of
318 ..x..
319 in ..
320
321 When looking at the rhs of @r@, @le_ctxt_lvl@ will be 1 because that's
322 the level of @r@, even though it's inside a level-2 @\y@. It's
323 important that @le_ctxt_lvl@ is 1 and not 2 in @r@'s rhs, because we
324 don't want @lvlExpr@ to turn the scrutinee of the @case@ into an MFE
325 --- because it isn't a *maximal* free expression.
326
327 If there were another lambda in @r@'s rhs, it would get level-2 as well.
328 -}
329
330 lvlExpr env (_, AnnType ty) = return (Type (CoreSubst.substTy (le_subst env) ty))
331 lvlExpr env (_, AnnCoercion co) = return (Coercion (substCo (le_subst env) co))
332 lvlExpr env (_, AnnVar v) = return (lookupVar env v)
333 lvlExpr _ (_, AnnLit lit) = return (Lit lit)
334
335 lvlExpr env (_, AnnCast expr (_, co)) = do
336 expr' <- lvlNonTailExpr env expr
337 return (Cast expr' (substCo (le_subst env) co))
338
339 lvlExpr env (_, AnnTick tickish expr) = do
340 expr' <- lvlNonTailExpr env expr
341 let tickish' = substTickish (le_subst env) tickish
342 return (Tick tickish' expr')
343
344 lvlExpr env expr@(_, AnnApp _ _) = lvlApp env expr (collectAnnArgs expr)
345
346 -- We don't split adjacent lambdas. That is, given
347 -- \x y -> (x+1,y)
348 -- we don't float to give
349 -- \x -> let v = x+1 in \y -> (v,y)
350 -- Why not? Because partial applications are fairly rare, and splitting
351 -- lambdas makes them more expensive.
352
353 lvlExpr env expr@(_, AnnLam {})
354 = do { new_body <- lvlNonTailMFE new_env True body
355 ; return (mkLams new_bndrs new_body) }
356 where
357 (bndrs, body) = collectAnnBndrs expr
358 (env1, bndrs1) = substBndrsSL NonRecursive env bndrs
359 (new_env, new_bndrs) = lvlLamBndrs env1 (le_ctxt_lvl env) bndrs1
360 -- At one time we called a special verion of collectBinders,
361 -- which ignored coercions, because we don't want to split
362 -- a lambda like this (\x -> coerce t (\s -> ...))
363 -- This used to happen quite a bit in state-transformer programs,
364 -- but not nearly so much now non-recursive newtypes are transparent.
365 -- [See SetLevels rev 1.50 for a version with this approach.]
366
367 lvlExpr env (_, AnnLet bind body)
368 = do { (bind', new_env) <- lvlBind env bind
369 ; body' <- lvlExpr new_env body
370 -- No point in going via lvlMFE here. If the binding is alive
371 -- (mentioned in body), and the whole let-expression doesn't
372 -- float, then neither will the body
373 ; return (Let bind' body') }
374
375 lvlExpr env (_, AnnCase scrut case_bndr ty alts)
376 = do { scrut' <- lvlNonTailMFE env True scrut
377 ; lvlCase env (freeVarsOf scrut) scrut' case_bndr ty alts }
378
379 lvlNonTailExpr :: LevelEnv -- Context
380 -> CoreExprWithFVs -- Input expression
381 -> LvlM LevelledExpr -- Result expression
382 lvlNonTailExpr env expr
383 = lvlExpr (placeJoinCeiling env) expr
384
385 -------------------------------------------
386 lvlApp :: LevelEnv
387 -> CoreExprWithFVs
388 -> (CoreExprWithFVs, [CoreExprWithFVs]) -- Input application
389 -> LvlM LevelledExpr -- Result expression
390 lvlApp env orig_expr ((_,AnnVar fn), args)
391 | floatOverSat env -- See Note [Floating over-saturated applications]
392 , arity > 0
393 , arity < n_val_args
394 , Nothing <- isClassOpId_maybe fn
395 = do { rargs' <- mapM (lvlNonTailMFE env False) rargs
396 ; lapp' <- lvlNonTailMFE env False lapp
397 ; return (foldl App lapp' rargs') }
398
399 | otherwise
400 = do { args' <- zipWithM (lvlMFE env) stricts args
401 -- Take account of argument strictness; see
402 -- Note [Floating to the top]
403 ; return (foldl App (lookupVar env fn) args') }
404 where
405 n_val_args = count (isValArg . deAnnotate) args
406 arity = idArity fn
407
408 stricts :: [Bool] -- True for strict argument
409 stricts = case splitStrictSig (idStrictness fn) of
410 (arg_ds, _) | not (arg_ds `lengthExceeds` n_val_args)
411 -> map isStrictDmd arg_ds ++ repeat False
412 | otherwise
413 -> repeat False
414
415 -- Separate out the PAP that we are floating from the extra
416 -- arguments, by traversing the spine until we have collected
417 -- (n_val_args - arity) value arguments.
418 (lapp, rargs) = left (n_val_args - arity) orig_expr []
419
420 left 0 e rargs = (e, rargs)
421 left n (_, AnnApp f a) rargs
422 | isValArg (deAnnotate a) = left (n-1) f (a:rargs)
423 | otherwise = left n f (a:rargs)
424 left _ _ _ = panic "SetLevels.lvlExpr.left"
425
426 lvlApp env _ (fun, args)
427 = -- No PAPs that we can float: just carry on with the
428 -- arguments and the function.
429 do { args' <- mapM (lvlNonTailMFE env False) args
430 ; fun' <- lvlNonTailExpr env fun
431 ; return (foldl App fun' args') }
432
433 -------------------------------------------
434 lvlCase :: LevelEnv -- Level of in-scope names/tyvars
435 -> DVarSet -- Free vars of input scrutinee
436 -> LevelledExpr -- Processed scrutinee
437 -> Id -> Type -- Case binder and result type
438 -> [CoreAltWithFVs] -- Input alternatives
439 -> LvlM LevelledExpr -- Result expression
440 lvlCase env scrut_fvs scrut' case_bndr ty alts
441 | [(con@(DataAlt {}), bs, body)] <- alts
442 , exprOkForSpeculation scrut' -- See Note [Check the output scrutinee for okForSpec]
443 , not (isTopLvl dest_lvl) -- Can't have top-level cases
444 , not (floatTopLvlOnly env) -- Can float anywhere
445 = -- See Note [Floating cases]
446 -- Always float the case if possible
447 -- Unlike lets we don't insist that it escapes a value lambda
448 do { (env1, (case_bndr' : bs')) <- cloneCaseBndrs env dest_lvl (case_bndr : bs)
449 ; let rhs_env = extendCaseBndrEnv env1 case_bndr scrut'
450 ; body' <- lvlMFE rhs_env True body
451 ; let alt' = (con, map (stayPut dest_lvl) bs', body')
452 ; return (Case scrut' (TB case_bndr' (FloatMe dest_lvl)) ty' [alt']) }
453
454 | otherwise -- Stays put
455 = do { let (alts_env1, [case_bndr']) = substAndLvlBndrs NonRecursive env incd_lvl [case_bndr]
456 alts_env = extendCaseBndrEnv alts_env1 case_bndr scrut'
457 ; alts' <- mapM (lvl_alt alts_env) alts
458 ; return (Case scrut' case_bndr' ty' alts') }
459 where
460 ty' = substTy (le_subst env) ty
461
462 incd_lvl = incMinorLvl (le_ctxt_lvl env)
463 dest_lvl = maxFvLevel (const True) env scrut_fvs
464 -- Don't abstact over type variables, hence const True
465
466 lvl_alt alts_env (con, bs, rhs)
467 = do { rhs' <- lvlMFE new_env True rhs
468 ; return (con, bs', rhs') }
469 where
470 (new_env, bs') = substAndLvlBndrs NonRecursive alts_env incd_lvl bs
471
472 {-
473 Note [Floating cases]
474 ~~~~~~~~~~~~~~~~~~~~~
475 Consider this:
476 data T a = MkT !a
477 f :: T Int -> blah
478 f x vs = case x of { MkT y ->
479 let f vs = ...(case y of I# w -> e)...f..
480 in f vs
481 Here we can float the (case y ...) out , because y is sure
482 to be evaluated, to give
483 f x vs = case x of { MkT y ->
484 caes y of I# w ->
485 let f vs = ...(e)...f..
486 in f vs
487
488 That saves unboxing it every time round the loop. It's important in
489 some DPH stuff where we really want to avoid that repeated unboxing in
490 the inner loop.
491
492 Things to note
493 * We can't float a case to top level
494 * It's worth doing this float even if we don't float
495 the case outside a value lambda. Example
496 case x of {
497 MkT y -> (case y of I# w2 -> ..., case y of I# w2 -> ...)
498 If we floated the cases out we could eliminate one of them.
499 * We only do this with a single-alternative case
500
501 Note [Check the output scrutinee for okForSpec]
502 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
503 Consider this:
504 case x of y {
505 A -> ....(case y of alts)....
506 }
507 Because of the binder-swap, the inner case will get substituted to
508 (case x of ..). So when testing whether the scrutinee is
509 okForSpecuation we must be careful to test the *result* scrutinee ('x'
510 in this case), not the *input* one 'y'. The latter *is* ok for
511 speculation here, but the former is not -- and indeed we can't float
512 the inner case out, at least not unless x is also evaluated at its
513 binding site.
514
515 That's why we apply exprOkForSpeculation to scrut' and not to scrut.
516 -}
517
518 lvlNonTailMFE :: LevelEnv -- Level of in-scope names/tyvars
519 -> Bool -- True <=> strict context [body of case
520 -- or let]
521 -> CoreExprWithFVs -- input expression
522 -> LvlM LevelledExpr -- Result expression
523 lvlNonTailMFE env strict_ctxt ann_expr
524 = lvlMFE (placeJoinCeiling env) strict_ctxt ann_expr
525
526 lvlMFE :: LevelEnv -- Level of in-scope names/tyvars
527 -> Bool -- True <=> strict context [body of case or let]
528 -> CoreExprWithFVs -- input expression
529 -> LvlM LevelledExpr -- Result expression
530 -- lvlMFE is just like lvlExpr, except that it might let-bind
531 -- the expression, so that it can itself be floated.
532
533 lvlMFE env _ (_, AnnType ty)
534 = return (Type (CoreSubst.substTy (le_subst env) ty))
535
536 -- No point in floating out an expression wrapped in a coercion or note
537 -- If we do we'll transform lvl = e |> co
538 -- to lvl' = e; lvl = lvl' |> co
539 -- and then inline lvl. Better just to float out the payload.
540 lvlMFE env strict_ctxt (_, AnnTick t e)
541 = do { e' <- lvlMFE env strict_ctxt e
542 ; return (Tick t e') }
543
544 lvlMFE env strict_ctxt (_, AnnCast e (_, co))
545 = do { e' <- lvlMFE env strict_ctxt e
546 ; return (Cast e' (substCo (le_subst env) co)) }
547
548 lvlMFE env strict_ctxt e@(_, AnnCase {})
549 | strict_ctxt -- Don't share cases in a strict context
550 = lvlExpr env e -- See Note [Case MFEs]
551
552 lvlMFE env strict_ctxt ann_expr
553 | floatTopLvlOnly env && not (isTopLvl dest_lvl)
554 -- Only floating to the top level is allowed.
555 || isTopLvl dest_lvl && need_join -- Can't put join point at top level
556 || isExprLevPoly expr
557 -- We can't let-bind levity polymorphic expressions
558 -- See Note [Levity polymorphism invariants] in CoreSyn
559 || notWorthFloating expr abs_vars
560 || not float_me
561 = -- Don't float it out
562 lvlExpr env ann_expr
563
564 | float_is_new_lam || need_join || exprIsTopLevelBindable expr expr_ty
565 -- No wrapping needed if the type is lifted, or is a literal string
566 -- or if we are wrapping it in one or more value lambdas
567 = do { expr1 <- lvlFloatRhs abs_vars dest_lvl rhs_env NonRecursive join_arity_maybe ann_expr
568 -- Treat the expr just like a right-hand side
569 ; var <- newLvlVar expr1 join_arity_maybe
570 ; let var2 = annotateBotStr var float_n_lams mb_bot_str
571 ; return (Let (NonRec (TB var2 (FloatMe dest_lvl)) expr1)
572 (mkVarApps (Var var2) abs_vars)) }
573
574 -- OK, so the float has an unlifted type
575 -- and no new value lambdas (float_is_new_lam is False)
576 -- Try for the boxing strategy
577 -- See Note [Floating MFEs of unlifted type]
578 | escapes_value_lam
579 , not (exprIsCheap expr) -- Boxing/unboxing isn't worth
580 -- it for cheap expressions
581 , Just (tc, _) <- splitTyConApp_maybe expr_ty
582 , Just dc <- boxingDataCon_maybe tc
583 , let dc_res_ty = dataConOrigResTy dc -- No free type variables
584 [bx_bndr, ubx_bndr] = mkTemplateLocals [dc_res_ty, expr_ty]
585 = do { expr1 <- lvlExpr rhs_env ann_expr
586 ; let l1r = incMinorLvlFrom rhs_env
587 float_rhs = mkLams abs_vars_w_lvls $
588 Case expr1 (stayPut l1r ubx_bndr) dc_res_ty
589 [(DEFAULT, [], mkConApp dc [Var ubx_bndr])]
590
591 ; var <- newLvlVar float_rhs Nothing
592 ; let l1u = incMinorLvlFrom env
593 use_expr = Case (mkVarApps (Var var) abs_vars)
594 (stayPut l1u bx_bndr) expr_ty
595 [(DataAlt dc, [stayPut l1u ubx_bndr], Var ubx_bndr)]
596 ; return (Let (NonRec (TB var (FloatMe dest_lvl)) float_rhs)
597 use_expr) }
598
599 | otherwise -- e.g. do not float unboxed tuples
600 = lvlExpr env ann_expr
601
602 where
603 expr = deAnnotate ann_expr
604 expr_ty = exprType expr
605 fvs = freeVarsOf ann_expr
606 is_bot = isBottomThunk mb_bot_str
607 is_function = isFunction ann_expr
608 mb_bot_str = exprBotStrictness_maybe expr
609 -- See Note [Bottoming floats]
610 -- esp Bottoming floats (2)
611 dest_lvl = destLevel env fvs is_function is_bot need_join
612 abs_vars = abstractVars dest_lvl env fvs
613
614 -- float_is_new_lam: the floated thing will be a new value lambda
615 -- replacing, say (g (x+4)) by (lvl x). No work is saved, nor is
616 -- allocation saved. The benefit is to get it to the top level
617 -- and hence out of the body of this function altogether, making
618 -- it smaller and more inlinable
619 float_is_new_lam = float_n_lams > 0
620 float_n_lams = count isId abs_vars
621
622 (rhs_env, abs_vars_w_lvls) = lvlLamBndrs env dest_lvl abs_vars
623
624 -- Note [Join points and MFEs]
625 need_join = any (\v -> isId v && remainsJoinId env v) (dVarSetElems fvs)
626 join_arity_maybe | need_join = Just (length abs_vars)
627 | otherwise = Nothing
628
629 -- A decision to float entails let-binding this thing, and we only do
630 -- that if we'll escape a value lambda, or will go to the top level.
631 float_me = saves_work || saves_alloc
632
633 -- We can save work if we can move a redex outside a value lambda
634 -- But if float_is_new_lam is True, then the redex is wrapped in a
635 -- a new lambda, so no work is saved
636 saves_work = escapes_value_lam && not float_is_new_lam
637
638 escapes_value_lam = dest_lvl `ltMajLvl` (le_ctxt_lvl env)
639 -- See Note [Escaping a value lambda]
640
641 -- See Note [Floating to the top]
642 saves_alloc = isTopLvl dest_lvl
643 && floatConsts env
644 && (not strict_ctxt || is_bot || exprIsHNF expr)
645
646 isBottomThunk :: Maybe (Arity, s) -> Bool
647 -- See Note [Bottoming floats] (2)
648 isBottomThunk (Just (0, _)) = True -- Zero arity
649 isBottomThunk _ = False
650
651 {- Note [Floating to the top]
652 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
653 We are keen to float something to the top level, even if it does not
654 escape a value lambda (and hence save work), for two reasons:
655
656 * Doing so makes the function smaller, by floating out
657 bottoming expressions, or integer or string literals. That in
658 turn makes it easier to inline, with less duplication.
659
660 * (Minor) Doing so may turn a dynamic allocation (done by machine
661 instructions) into a static one. Minor because we are assuming
662 we are not escaping a value lambda
663
664 But do not so if:
665 - the context is a strict, and
666 - the expression is not a HNF, and
667 - the expression is not bottoming
668
669 Exammples:
670
671 * Bottoming
672 f x = case x of
673 0 -> error <big thing>
674 _ -> x+1
675 Here we want to float (error <big thing>) to top level, abstracting
676 over 'x', so as to make f's RHS smaller.
677
678 * HNF
679 f = case y of
680 True -> p:q
681 False -> blah
682 We may as well float the (p:q) so it becomes a static data structure.
683
684 * Case scrutinee
685 f = case g True of ....
686 Don't float (g True) to top level; then we have the admin of a
687 top-level thunk to worry about, with zero gain.
688
689 * Case alternative
690 h = case y of
691 True -> g True
692 False -> False
693 Don't float (g True) to the top level
694
695 * Arguments
696 t = f (g True)
697 If f is lazy, we /do/ float (g True) because then we can allocate
698 the thunk statically rather than dynamically. But if f is strict
699 we don't (see the use of idStrictness in lvlApp). It's not clear
700 if this test is worth the bother: it's only about CAFs!
701
702 It's controlled by a flag (floatConsts) , because doing this too
703 early loses opportunities for RULES which (needless to say) are
704 important in some nofib programs (gcd is an example). [SPJ note:
705 I think this is obselete; the flag seems always on.]
706
707 Note [Floating MFEs of unlifted type]
708 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
709 Suppose we have
710 case f x of (r::Int#) -> blah
711 we'd like to float (f x). But it's not trivial because it has type
712 Int#, and we don't want to evaluate it too early. But we can instead
713 float a boxed version
714 y = case f x of r -> I# r
715 and replace the original (f x) with
716 case (case y of I# r -> r) of r -> blah
717
718 Being able to float unboxed expressions is sometimes important; see
719 Trac #12603. I'm not sure how /often/ it is important, but it's
720 not hard to achieve.
721
722 We only do it for a fixed collection of types for which we have a
723 convenient boxing constructor (see boxingDataCon_maybe). In
724 particular we /don't/ do it for unboxed tuples; it's better to float
725 the components of the tuple individually.
726
727 I did experiment with a form of boxing that works for any type, namely
728 wrapping in a function. In our example
729
730 let y = case f x of r -> \v. f x
731 in case y void of r -> blah
732
733 It works fine, but it's 50% slower (based on some crude benchmarking).
734 I suppose we could do it for types not covered by boxingDataCon_maybe,
735 but it's more code and I'll wait to see if anyone wants it.
736
737 Note [Bottoming floats]
738 ~~~~~~~~~~~~~~~~~~~~~~~
739 If we see
740 f = \x. g (error "urk")
741 we'd like to float the call to error, to get
742 lvl = error "urk"
743 f = \x. g lvl
744
745 * Bottoming floats (1): Furthermore, we want to float a bottoming
746 expression even if it has free variables:
747 f = \x. g (let v = h x in error ("urk" ++ v))
748 Then we'd like to abstact over 'x' can float the whole arg of g:
749 lvl = \x. let v = h x in error ("urk" ++ v)
750 f = \x. g (lvl x)
751 To achieve this we pass is_bot to destLevel
752
753 * Bottoming floats (2): we do not do this for functions that return
754 bottom. Instead we treat the /body/ of such a function specially,
755 via point (1). For example:
756 f = \x. ....(\y z. if x then error y else error z)....
757 ===>
758 lvl = \x z y. if b then error y else error z
759 f = \x. ...(\y z. lvl x z y)...
760 (There is no guarantee that we'll choose the perfect argument order.)
761
762 See Maessen's paper 1999 "Bottom extraction: factoring error handling out
763 of functional programs" (unpublished I think).
764
765 When we do this, we set the strictness and arity of the new bottoming
766 Id, *immediately*, for three reasons:
767
768 * To prevent the abstracted thing being immediately inlined back in again
769 via preInlineUnconditionally. The latter has a test for bottoming Ids
770 to stop inlining them, so we'd better make sure it *is* a bottoming Id!
771
772 * So that it's properly exposed as such in the interface file, even if
773 this is all happening after strictness analysis.
774
775 * In case we do CSE with the same expression that *is* marked bottom
776 lvl = error "urk"
777 x{str=bot) = error "urk"
778 Here we don't want to replace 'x' with 'lvl', else we may get Lint
779 errors, e.g. via a case with empty alternatives: (case x of {})
780 Lint complains unless the scrutinee of such a case is clearly bottom.
781
782 This was reported in Trac #11290. But since the whole bottoming-float
783 thing is based on the cheap-and-cheerful exprIsBottom, I'm not sure
784 that it'll nail all such cases.
785
786 Note [Bottoming floats: eta expansion] c.f Note [Bottoming floats]
787 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
788 Tiresomely, though, the simplifier has an invariant that the manifest
789 arity of the RHS should be the same as the arity; but we can't call
790 etaExpand during SetLevels because it works over a decorated form of
791 CoreExpr. So we do the eta expansion later, in FloatOut.
792
793 Note [Case MFEs]
794 ~~~~~~~~~~~~~~~~
795 We don't float a case expression as an MFE from a strict context. Why not?
796 Because in doing so we share a tiny bit of computation (the switch) but
797 in exchange we build a thunk, which is bad. This case reduces allocation
798 by 7% in spectral/puzzle (a rather strange benchmark) and 1.2% in real/fem.
799 Doesn't change any other allocation at all.
800
801 We will make a separate decision for the scrutinees and alterantives.
802
803 Note [Join points and MFEs]
804 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
805
806 When we create an MFE float, if it has a free join variable, the new binding
807 must be a join point:
808
809 let join j x = ...
810 in case a of A -> ...
811 B -> j 3
812
813 =>
814
815 let join j x = ...
816 join k = j 3 -- only valid because k is a join point
817 in case a of A -> ...
818 B -> k
819
820 Normally we're very circumspect about floating join points, but in this case
821 it's definitely safe because we can only be floating it as far as another join
822 binding. In other words, one might worry about a situation like:
823
824 let join j x = ...
825 in case a of A -> ...
826 B -> f (j 3)
827
828 =>
829
830 let join j x = ...
831 in case a of A -> ...
832 B -> f (let join k = j 3 in k)
833
834 Here we have created the MFE float k, and are contemplating floating it up to
835 j. This would indeed be an invalid operation on a join point like k. However,
836 this example is ill-typed to begin with, since this time the call to j is not a
837 tail call. In summary, the very occurrence of the join variable in the MFE is
838 proof that we can float the MFE as far as that binding.
839 -}
840
841 annotateBotStr :: Id -> Arity -> Maybe (Arity, StrictSig) -> Id
842 -- See Note [Bottoming floats] for why we want to add
843 -- bottoming information right now
844 --
845 -- n_extra are the number of extra value arguments added during floating
846 annotateBotStr id n_extra mb_str
847 = case mb_str of
848 Nothing -> id
849 Just (arity, sig) -> id `setIdArity` (arity + n_extra)
850 `setIdStrictness` (increaseStrictSigArity n_extra sig)
851
852 notWorthFloating :: CoreExpr -> [Var] -> Bool
853 -- Returns True if the expression would be replaced by
854 -- something bigger than it is now. For example:
855 -- abs_vars = tvars only: return True if e is trivial,
856 -- but False for anything bigger
857 -- abs_vars = [x] (an Id): return True for trivial, or an application (f x)
858 -- but False for (f x x)
859 --
860 -- One big goal is that floating should be idempotent. Eg if
861 -- we replace e with (lvl79 x y) and then run FloatOut again, don't want
862 -- to replace (lvl79 x y) with (lvl83 x y)!
863
864 notWorthFloating e abs_vars
865 = go e (count isId abs_vars)
866 where
867 go (Var {}) n = n >= 0
868 go (Lit lit) n = ASSERT( n==0 )
869 litIsTrivial lit -- Note [Floating literals]
870 go (Tick t e) n = not (tickishIsCode t) && go e n
871 go (Cast e _) n = go e n
872 go (App e arg) n
873 | Type {} <- arg = go e n
874 | Coercion {} <- arg = go e n
875 | n==0 = False
876 | is_triv arg = go e (n-1)
877 | otherwise = False
878 go _ _ = False
879
880 is_triv (Lit {}) = True -- Treat all literals as trivial
881 is_triv (Var {}) = True -- (ie not worth floating)
882 is_triv (Cast e _) = is_triv e
883 is_triv (App e (Type {})) = is_triv e
884 is_triv (App e (Coercion {})) = is_triv e
885 is_triv (Tick t e) = not (tickishIsCode t) && is_triv e
886 is_triv _ = False
887
888 {-
889 Note [Floating literals]
890 ~~~~~~~~~~~~~~~~~~~~~~~~
891 It's important to float Integer literals, so that they get shared,
892 rather than being allocated every time round the loop.
893 Hence the litIsTrivial.
894
895 Ditto literal strings (MachStr), which we'd like to float to top
896 level, which is now possible.
897
898
899 Note [Escaping a value lambda]
900 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
901 We want to float even cheap expressions out of value lambdas,
902 because that saves allocation. Consider
903 f = \x. .. (\y.e) ...
904 Then we'd like to avoid allocating the (\y.e) every time we call f,
905 (assuming e does not mention x). An example where this really makes a
906 difference is simplrun009.
907
908 Another reason it's good is because it makes SpecContr fire on functions.
909 Consider
910 f = \x. ....(f (\y.e))....
911 After floating we get
912 lvl = \y.e
913 f = \x. ....(f lvl)...
914 and that is much easier for SpecConstr to generate a robust
915 specialisation for.
916
917 However, if we are wrapping the thing in extra value lambdas (in
918 abs_vars), then nothing is saved. E.g.
919 f = \xyz. ...(e1[y],e2)....
920 If we float
921 lvl = \y. (e1[y],e2)
922 f = \xyz. ...(lvl y)...
923 we have saved nothing: one pair will still be allocated for each
924 call of 'f'. Hence the (not float_is_lam) in float_me.
925
926
927 ************************************************************************
928 * *
929 \subsection{Bindings}
930 * *
931 ************************************************************************
932
933 The binding stuff works for top level too.
934 -}
935
936 lvlBind :: LevelEnv
937 -> CoreBindWithFVs
938 -> LvlM (LevelledBind, LevelEnv)
939
940 lvlBind env (AnnNonRec bndr rhs)
941 | isTyVar bndr -- Don't do anything for TyVar binders
942 -- (simplifier gets rid of them pronto)
943 || isCoVar bndr -- Difficult to fix up CoVar occurrences (see extendPolyLvlEnv)
944 -- so we will ignore this case for now
945 || not (profitableFloat env dest_lvl)
946 || (isTopLvl dest_lvl && isUnliftedType (idType bndr))
947 -- We can't float an unlifted binding to top level, so we don't
948 -- float it at all. It's a bit brutal, but unlifted bindings
949 -- aren't expensive either
950
951 = -- No float
952 do { rhs' <- lvlRhs env NonRecursive mb_join_arity rhs
953 ; let bind_lvl = incMinorLvl (le_ctxt_lvl env)
954 (env', [bndr']) = substAndLvlBndrs NonRecursive env bind_lvl [bndr]
955 ; return (NonRec bndr' rhs', env') }
956
957 -- Otherwise we are going to float
958 | null abs_vars
959 = do { -- No type abstraction; clone existing binder
960 rhs' <- lvlRhs (setCtxtLvl env dest_lvl) NonRecursive
961 zapped_join rhs
962 ; (env', [bndr']) <- cloneLetVars NonRecursive env dest_lvl
963 need_zap [bndr]
964 ; let bndr2 = annotateBotStr bndr' 0 mb_bot_str
965 ; return (NonRec (TB bndr2 (FloatMe dest_lvl)) rhs', env') }
966
967 | otherwise
968 = do { -- Yes, type abstraction; create a new binder, extend substitution, etc
969 rhs' <- lvlFloatRhs abs_vars dest_lvl env NonRecursive
970 zapped_join rhs
971 ; (env', [bndr']) <- newPolyBndrs dest_lvl env abs_vars
972 need_zap [bndr]
973 ; let bndr2 = annotateBotStr bndr' n_extra mb_bot_str
974 ; return (NonRec (TB bndr2 (FloatMe dest_lvl)) rhs', env') }
975
976 where
977 rhs_fvs = freeVarsOf rhs
978 bind_fvs = rhs_fvs `unionDVarSet` dIdFreeVars bndr
979 abs_vars = abstractVars dest_lvl env bind_fvs
980 dest_lvl = destLevel env bind_fvs (isFunction rhs) is_bot
981 is_unfloatable_join
982 mb_bot_str = exprBotStrictness_maybe (deAnnotate rhs)
983 -- See Note [Bottoming floats]
984 -- esp Bottoming floats (2)
985 is_bot = isBottomThunk mb_bot_str
986 n_extra = count isId abs_vars
987
988 mb_join_arity = isJoinId_maybe bndr
989 is_unfloatable_join = case mb_join_arity of Just ar -> ar > 0
990 Nothing -> False
991 -- See Note [When to ruin a join point]
992 need_zap = dest_lvl `ltLvl` joinCeilingLevel env
993 zapped_join | need_zap = Nothing -- Zap the join point
994 | otherwise = mb_join_arity
995
996 lvlBind env (AnnRec pairs)
997 | floatTopLvlOnly env && not (isTopLvl dest_lvl)
998 -- Only floating to the top level is allowed.
999 || not (profitableFloat env dest_lvl)
1000 = do { let bind_lvl = incMinorLvl (le_ctxt_lvl env)
1001 (env', bndrs') = substAndLvlBndrs Recursive env bind_lvl bndrs
1002 ; rhss' <- zipWithM (lvlRhs env' Recursive) mb_join_arities rhss
1003 ; return (Rec (bndrs' `zip` rhss'), env') }
1004
1005 | null abs_vars
1006 = do { (new_env, new_bndrs) <- cloneLetVars Recursive env dest_lvl
1007 need_zap bndrs
1008 ; let env_rhs = setCtxtLvl new_env dest_lvl
1009 ; new_rhss <- zipWithM (lvlRhs env_rhs Recursive)
1010 (map zap_join mb_join_arities) rhss
1011 ; return ( Rec ([TB b (FloatMe dest_lvl) | b <- new_bndrs] `zip` new_rhss)
1012 , new_env) }
1013
1014 -- ToDo: when enabling the floatLambda stuff,
1015 -- I think we want to stop doing this
1016 | [(bndr,rhs)] <- pairs
1017 , count isId abs_vars > 1
1018 = do -- Special case for self recursion where there are
1019 -- several variables carried around: build a local loop:
1020 -- poly_f = \abs_vars. \lam_vars . letrec f = \lam_vars. rhs in f lam_vars
1021 -- This just makes the closures a bit smaller. If we don't do
1022 -- this, allocation rises significantly on some programs
1023 --
1024 -- We could elaborate it for the case where there are several
1025 -- mutually functions, but it's quite a bit more complicated
1026 --
1027 -- This all seems a bit ad hoc -- sigh
1028 let (rhs_env, abs_vars_w_lvls) = lvlLamBndrs env dest_lvl abs_vars
1029 rhs_lvl = le_ctxt_lvl rhs_env
1030
1031 (rhs_env', [new_bndr]) <- cloneLetVars Recursive rhs_env rhs_lvl
1032 need_zap [bndr]
1033 let
1034 (lam_bndrs, rhs_body) = collectAnnBndrs rhs
1035 (body_env1, lam_bndrs1) = substBndrsSL NonRecursive rhs_env' lam_bndrs
1036 (body_env2, lam_bndrs2) = lvlLamBndrs body_env1 rhs_lvl lam_bndrs1
1037 mb_join_arity = isJoinId_maybe bndr
1038 new_rhs_body <- lvlRhs body_env2 Recursive
1039 (zap_join mb_join_arity) rhs_body
1040 (poly_env, [poly_bndr]) <- newPolyBndrs dest_lvl env abs_vars
1041 need_zap [bndr]
1042 return (Rec [(TB poly_bndr (FloatMe dest_lvl)
1043 , mkLams abs_vars_w_lvls $
1044 mkLams lam_bndrs2 $
1045 Let (Rec [( TB new_bndr (StayPut rhs_lvl)
1046 , mkLams lam_bndrs2 new_rhs_body)])
1047 (mkVarApps (Var new_bndr) lam_bndrs1))]
1048 , poly_env)
1049
1050 | otherwise -- Non-null abs_vars
1051 = do { (new_env, new_bndrs) <- newPolyBndrs dest_lvl env abs_vars
1052 need_zap bndrs
1053 ; new_rhss <- zipWithM (lvlFloatRhs abs_vars dest_lvl new_env Recursive)
1054 (map zap_join mb_join_arities) rhss
1055 ; return ( Rec ([TB b (FloatMe dest_lvl) | b <- new_bndrs] `zip` new_rhss)
1056 , new_env) }
1057
1058 where
1059 (bndrs,rhss) = unzip pairs
1060
1061 -- Finding the free vars of the binding group is annoying
1062 bind_fvs = ((unionDVarSets [ freeVarsOf rhs | (_, rhs) <- pairs])
1063 `unionDVarSet`
1064 (fvDVarSet $ unionsFV [ idFVs bndr
1065 | (bndr, (_,_)) <- pairs]))
1066 `delDVarSetList`
1067 bndrs
1068
1069 dest_lvl = destLevel env bind_fvs (all isFunction rhss) False
1070 has_unfloatable_join
1071 abs_vars = abstractVars dest_lvl env bind_fvs
1072
1073 mb_join_arities = map isJoinId_maybe bndrs
1074 has_unfloatable_join
1075 = any (\mb_ar -> case mb_ar of Just ar -> ar > 0
1076 Nothing -> False) mb_join_arities
1077
1078 need_zap = dest_lvl `ltLvl` joinCeilingLevel env
1079 zap_join mb_join_arity | need_zap = Nothing
1080 | otherwise = mb_join_arity
1081
1082 lvlRhs :: LevelEnv
1083 -> RecFlag
1084 -> Maybe JoinArity
1085 -> CoreExprWithFVs
1086 -> LvlM LevelledExpr
1087 lvlRhs env rec_flag mb_join_arity expr
1088 = lvlFloatRhs [] (le_ctxt_lvl env) env rec_flag mb_join_arity expr
1089
1090 profitableFloat :: LevelEnv -> Level -> Bool
1091 profitableFloat env dest_lvl
1092 = (dest_lvl `ltMajLvl` le_ctxt_lvl env) -- Escapes a value lambda
1093 || isTopLvl dest_lvl -- Going all the way to top level
1094
1095
1096 {-
1097 Note [When to ruin a join point]
1098 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1099
1100 Generally, we protect join points zealously. However, there are two situations
1101 in which it can pay to promote a join point to a function:
1102
1103 1. If the join point has no value arguments, then floating it outward will make
1104 it a *thunk*, not a function, so we might get increased sharing.
1105 2. If we float the join point all the way to the top level, it still won't be
1106 allocated, so the cost is much less.
1107
1108 Refusing to lose a join point in either of these cases can be disastrous---for
1109 instance, allocation in imaginary/x2n1 *triples* because $w$s^ becomes too big
1110 to inline, which prevents Float In from making a particular binding strictly
1111 demanded.
1112 -}
1113
1114 ----------------------------------------------------
1115 -- Three help functions for the type-abstraction case
1116
1117 lvlFloatRhs :: [OutVar] -> Level -> LevelEnv -> RecFlag
1118 -> Maybe JoinArity -> CoreExprWithFVs
1119 -> LvlM (Expr LevelledBndr)
1120 lvlFloatRhs abs_vars dest_lvl env rec mb_join_arity rhs
1121 = do { body' <- if any isId bndrs -- See Note [Floating from a RHS]
1122 then lvlMFE body_env True body
1123 else lvlExpr body_env body
1124 ; return (mkLams bndrs' body') }
1125 where
1126 (bndrs, body) | Just join_arity <- mb_join_arity
1127 = collectNAnnBndrs join_arity rhs
1128 | otherwise
1129 = collectAnnBndrs rhs
1130 (env1, bndrs1) = substBndrsSL NonRecursive env bndrs
1131 all_bndrs = abs_vars ++ bndrs1
1132 (body_env, bndrs') | Just _ <- mb_join_arity
1133 = lvlJoinBndrs env1 dest_lvl rec all_bndrs
1134 | otherwise
1135 = lvlLamBndrs (placeJoinCeiling env1) dest_lvl all_bndrs
1136 -- The important thing here is that we call lvlLamBndrs on
1137 -- all these binders at once (abs_vars and bndrs), so they
1138 -- all get the same major level. Otherwise we create stupid
1139 -- let-bindings inside, joyfully thinking they can float; but
1140 -- in the end they don't because we never float bindings in
1141 -- between lambdas
1142
1143 {- Note [Floating from a RHS]
1144 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1145 When float the RHS of a let-binding, we don't always want to apply
1146 lvlMFE to the body of a lambda, as we usually do, because the entire
1147 binding body is already going to the right place (dest_lvl)
1148
1149 A particular example is the top level. Consider
1150 concat = /\ a -> foldr ..a.. (++) []
1151 We don't want to float the body of the lambda to get
1152 lvl = /\ a -> foldr ..a.. (++) []
1153 concat = /\ a -> lvl a
1154 That would be stupid.
1155
1156 Previously this was avoided in a much nastier way, by testing strict_ctxt
1157 in float_me in lvlMFE. But that wasn't even right because it would fail
1158 to float out the error sub-expression in
1159 f = \x. case x of
1160 True -> error ("blah" ++ show x)
1161 False -> ...
1162
1163 But we must be careful! If we had
1164 f = \x -> factorial 20
1165 we /would/ want to float that (factorial 20) out! Functions are treated
1166 differently: see the use of isFunction in the calls to destLevel. If
1167 there are only type lambdas, then destLevel will say "go to top, and
1168 abstract over the free tyars" and we don't want that here.
1169
1170 Conclusion: use lvlMFE if there are any value lambdas, lvlExpr
1171 otherwise. A little subtle, and I got it wrong to start with.
1172 -}
1173
1174 {-
1175 ************************************************************************
1176 * *
1177 \subsection{Deciding floatability}
1178 * *
1179 ************************************************************************
1180 -}
1181
1182 substAndLvlBndrs :: RecFlag -> LevelEnv -> Level -> [InVar] -> (LevelEnv, [LevelledBndr])
1183 substAndLvlBndrs is_rec env lvl bndrs
1184 = lvlBndrs subst_env lvl subst_bndrs
1185 where
1186 (subst_env, subst_bndrs) = substBndrsSL is_rec env bndrs
1187
1188 substBndrsSL :: RecFlag -> LevelEnv -> [InVar] -> (LevelEnv, [OutVar])
1189 -- So named only to avoid the name clash with CoreSubst.substBndrs
1190 substBndrsSL is_rec env@(LE { le_subst = subst, le_env = id_env }) bndrs
1191 = ( env { le_subst = subst'
1192 , le_env = foldl add_id id_env (bndrs `zip` bndrs') }
1193 , bndrs')
1194 where
1195 (subst', bndrs') = case is_rec of
1196 NonRecursive -> substBndrs subst bndrs
1197 Recursive -> substRecBndrs subst bndrs
1198
1199 lvlLamBndrs :: LevelEnv -> Level -> [OutVar] -> (LevelEnv, [LevelledBndr])
1200 -- Compute the levels for the binders of a lambda group
1201 lvlLamBndrs env lvl bndrs
1202 = lvlBndrs env new_lvl bndrs
1203 where
1204 new_lvl | any is_major bndrs = incMajorLvl lvl
1205 | otherwise = incMinorLvl lvl
1206
1207 is_major bndr = isId bndr && not (isProbablyOneShotLambda bndr)
1208 -- The "probably" part says "don't float things out of a
1209 -- probable one-shot lambda"
1210 -- See Note [Computing one-shot info] in Demand.hs
1211
1212 lvlJoinBndrs :: LevelEnv -> Level -> RecFlag -> [OutVar]
1213 -> (LevelEnv, [LevelledBndr])
1214 lvlJoinBndrs env lvl rec bndrs
1215 = lvlBndrs env new_lvl bndrs
1216 where
1217 new_lvl | isRec rec = incMajorLvl lvl
1218 | otherwise = incMinorLvl lvl
1219 -- Non-recursive join points are one-shot; recursive ones are not
1220
1221 lvlBndrs :: LevelEnv -> Level -> [CoreBndr] -> (LevelEnv, [LevelledBndr])
1222 -- The binders returned are exactly the same as the ones passed,
1223 -- apart from applying the substitution, but they are now paired
1224 -- with a (StayPut level)
1225 --
1226 -- The returned envt has le_ctxt_lvl updated to the new_lvl
1227 --
1228 -- All the new binders get the same level, because
1229 -- any floating binding is either going to float past
1230 -- all or none. We never separate binders.
1231 lvlBndrs env@(LE { le_lvl_env = lvl_env }) new_lvl bndrs
1232 = ( env { le_ctxt_lvl = new_lvl
1233 , le_lvl_env = addLvls new_lvl lvl_env bndrs }
1234 , map (stayPut new_lvl) bndrs)
1235
1236 stayPut :: Level -> OutVar -> LevelledBndr
1237 stayPut new_lvl bndr = TB bndr (StayPut new_lvl)
1238
1239 -- Destination level is the max Id level of the expression
1240 -- (We'll abstract the type variables, if any.)
1241 destLevel :: LevelEnv -> DVarSet
1242 -> Bool -- True <=> is function
1243 -> Bool -- True <=> is bottom
1244 -> Bool -- True <=> is join point (or can be floated anyway)
1245 -> Level
1246 destLevel env fvs is_function is_bot is_join
1247 | is_bot -- Send bottoming bindings to the top
1248 = tOP_LEVEL -- regardless; see Note [Bottoming floats]
1249 -- Esp Bottoming floats (1)
1250
1251 | Just n_args <- floatLams env
1252 , n_args > 0 -- n=0 case handled uniformly by the 'otherwise' case
1253 , is_function
1254 , countFreeIds fvs <= n_args
1255 = tOP_LEVEL -- Send functions to top level; see
1256 -- the comments with isFunction
1257
1258 | is_join
1259 , hits_ceiling
1260 = join_ceiling
1261
1262 | otherwise = max_fv_level
1263 where
1264 max_fv_level = maxFvLevel isId env fvs -- Max over Ids only; the tyvars
1265 -- will be abstracted
1266
1267 join_ceiling = joinCeilingLevel env
1268 hits_ceiling = max_fv_level `ltLvl` join_ceiling &&
1269 not (isTopLvl max_fv_level)
1270 -- Note [When to ruin a join point]
1271
1272 isFunction :: CoreExprWithFVs -> Bool
1273 -- The idea here is that we want to float *functions* to
1274 -- the top level. This saves no work, but
1275 -- (a) it can make the host function body a lot smaller,
1276 -- and hence inlinable.
1277 -- (b) it can also save allocation when the function is recursive:
1278 -- h = \x -> letrec f = \y -> ...f...y...x...
1279 -- in f x
1280 -- becomes
1281 -- f = \x y -> ...(f x)...y...x...
1282 -- h = \x -> f x x
1283 -- No allocation for f now.
1284 -- We may only want to do this if there are sufficiently few free
1285 -- variables. We certainly only want to do it for values, and not for
1286 -- constructors. So the simple thing is just to look for lambdas
1287 isFunction (_, AnnLam b e) | isId b = True
1288 | otherwise = isFunction e
1289 -- isFunction (_, AnnTick _ e) = isFunction e -- dubious
1290 isFunction _ = False
1291
1292 countFreeIds :: DVarSet -> Int
1293 countFreeIds = nonDetFoldUDFM add 0
1294 -- It's OK to use nonDetFoldUDFM here because we're just counting things.
1295 where
1296 add :: Var -> Int -> Int
1297 add v n | isId v = n+1
1298 | otherwise = n
1299
1300 {-
1301 ************************************************************************
1302 * *
1303 \subsection{Free-To-Level Monad}
1304 * *
1305 ************************************************************************
1306 -}
1307
1308 data LevelEnv
1309 = LE { le_switches :: FloatOutSwitches
1310 , le_ctxt_lvl :: Level -- The current level
1311 , le_lvl_env :: VarEnv Level -- Domain is *post-cloned* TyVars and Ids
1312 , le_join_ceil:: Level -- Highest level to which joins float
1313 , le_subst :: Subst -- Domain is pre-cloned TyVars and Ids
1314 -- The Id -> CoreExpr in the Subst is ignored
1315 -- (since we want to substitute a LevelledExpr for
1316 -- an Id via le_env) but we do use the Co/TyVar substs
1317 , le_env :: IdEnv ([OutVar], LevelledExpr) -- Domain is pre-cloned Ids
1318 }
1319 -- We clone let- and case-bound variables so that they are still
1320 -- distinct when floated out; hence the le_subst/le_env.
1321 -- (see point 3 of the module overview comment).
1322 -- We also use these envs when making a variable polymorphic
1323 -- because we want to float it out past a big lambda.
1324 --
1325 -- The le_subst and le_env always implement the same mapping, but the
1326 -- le_subst maps to CoreExpr and the le_env to LevelledExpr
1327 -- Since the range is always a variable or type application,
1328 -- there is never any difference between the two, but sadly
1329 -- the types differ. The le_subst is used when substituting in
1330 -- a variable's IdInfo; the le_env when we find a Var.
1331 --
1332 -- In addition the le_env records a list of tyvars free in the
1333 -- type application, just so we don't have to call freeVars on
1334 -- the type application repeatedly.
1335 --
1336 -- The domain of the both envs is *pre-cloned* Ids, though
1337 --
1338 -- The domain of the le_lvl_env is the *post-cloned* Ids
1339
1340 initialEnv :: FloatOutSwitches -> LevelEnv
1341 initialEnv float_lams
1342 = LE { le_switches = float_lams
1343 , le_ctxt_lvl = tOP_LEVEL
1344 , le_join_ceil = panic "initialEnv"
1345 , le_lvl_env = emptyVarEnv
1346 , le_subst = emptySubst
1347 , le_env = emptyVarEnv }
1348
1349 addLvl :: Level -> VarEnv Level -> OutVar -> VarEnv Level
1350 addLvl dest_lvl env v' = extendVarEnv env v' dest_lvl
1351
1352 addLvls :: Level -> VarEnv Level -> [OutVar] -> VarEnv Level
1353 addLvls dest_lvl env vs = foldl (addLvl dest_lvl) env vs
1354
1355 floatLams :: LevelEnv -> Maybe Int
1356 floatLams le = floatOutLambdas (le_switches le)
1357
1358 floatConsts :: LevelEnv -> Bool
1359 floatConsts le = floatOutConstants (le_switches le)
1360
1361 floatOverSat :: LevelEnv -> Bool
1362 floatOverSat le = floatOutOverSatApps (le_switches le)
1363
1364 floatTopLvlOnly :: LevelEnv -> Bool
1365 floatTopLvlOnly le = floatToTopLevelOnly (le_switches le)
1366
1367 setCtxtLvl :: LevelEnv -> Level -> LevelEnv
1368 setCtxtLvl env lvl = env { le_ctxt_lvl = lvl }
1369
1370 incMinorLvlFrom :: LevelEnv -> Level
1371 incMinorLvlFrom env = incMinorLvl (le_ctxt_lvl env)
1372
1373 -- extendCaseBndrEnv adds the mapping case-bndr->scrut-var if it can
1374 -- See Note [Binder-swap during float-out]
1375 extendCaseBndrEnv :: LevelEnv
1376 -> Id -- Pre-cloned case binder
1377 -> Expr LevelledBndr -- Post-cloned scrutinee
1378 -> LevelEnv
1379 extendCaseBndrEnv le@(LE { le_subst = subst, le_env = id_env })
1380 case_bndr (Var scrut_var)
1381 = le { le_subst = extendSubstWithVar subst case_bndr scrut_var
1382 , le_env = add_id id_env (case_bndr, scrut_var) }
1383 extendCaseBndrEnv env _ _ = env
1384
1385 -- See Note [Join ceiling]
1386 placeJoinCeiling :: LevelEnv -> LevelEnv
1387 placeJoinCeiling le@(LE { le_ctxt_lvl = lvl })
1388 = le { le_ctxt_lvl = lvl', le_join_ceil = lvl' }
1389 where
1390 lvl' = asJoinCeilLvl (incMinorLvl lvl)
1391
1392 maxFvLevel :: (Var -> Bool) -> LevelEnv -> DVarSet -> Level
1393 maxFvLevel max_me (LE { le_lvl_env = lvl_env, le_env = id_env }) var_set
1394 = foldDVarSet max_in tOP_LEVEL var_set
1395 where
1396 max_in in_var lvl
1397 = foldr max_out lvl (case lookupVarEnv id_env in_var of
1398 Just (abs_vars, _) -> abs_vars
1399 Nothing -> [in_var])
1400
1401 max_out out_var lvl
1402 | max_me out_var = case lookupVarEnv lvl_env out_var of
1403 Just lvl' -> maxLvl lvl' lvl
1404 Nothing -> lvl
1405 | otherwise = lvl -- Ignore some vars depending on max_me
1406
1407 lookupVar :: LevelEnv -> Id -> LevelledExpr
1408 lookupVar le v = case lookupVarEnv (le_env le) v of
1409 Just (_, expr) -> expr
1410 _ -> Var v
1411
1412 -- Level to which join points are allowed to float (boundary of current tail
1413 -- context). See Note [Join ceiling]
1414 joinCeilingLevel :: LevelEnv -> Level
1415 joinCeilingLevel = le_join_ceil
1416
1417 remainsJoinId :: LevelEnv -> Id -> Bool
1418 remainsJoinId le v = case lookupVarEnv (le_env le) v of
1419 Just (v':_, _) -> isJoinId v'
1420 Nothing -> isJoinId v
1421 Just ([], e) -> pprPanic "remainsJoinId" $
1422 ppr v $$ ppr e
1423
1424 abstractVars :: Level -> LevelEnv -> DVarSet -> [OutVar]
1425 -- Find the variables in fvs, free vars of the target expression,
1426 -- whose level is greater than the destination level
1427 -- These are the ones we are going to abstract out
1428 --
1429 -- Note that to get reproducible builds, the variables need to be
1430 -- abstracted in deterministic order, not dependent on the values of
1431 -- Uniques. This is achieved by using DVarSets, deterministic free
1432 -- variable computation and deterministic sort.
1433 -- See Note [Unique Determinism] in Unique for explanation of why
1434 -- Uniques are not deterministic.
1435 abstractVars dest_lvl (LE { le_subst = subst, le_lvl_env = lvl_env }) in_fvs
1436 = -- NB: sortQuantVars might not put duplicates next to each other
1437 map zap $ sortQuantVars $ uniq
1438 [out_var | out_fv <- dVarSetElems (substDVarSet subst in_fvs)
1439 , out_var <- dVarSetElems (close out_fv)
1440 , abstract_me out_var ]
1441 -- NB: it's important to call abstract_me only on the OutIds the
1442 -- come from substDVarSet (not on fv, which is an InId)
1443 where
1444 uniq :: [Var] -> [Var]
1445 -- Remove duplicates, preserving order
1446 uniq = dVarSetElems . mkDVarSet
1447
1448 abstract_me v = case lookupVarEnv lvl_env v of
1449 Just lvl -> dest_lvl `ltLvl` lvl
1450 Nothing -> False
1451
1452 -- We are going to lambda-abstract, so nuke any IdInfo,
1453 -- and add the tyvars of the Id (if necessary)
1454 zap v | isId v = WARN( isStableUnfolding (idUnfolding v) ||
1455 not (isEmptyRuleInfo (idSpecialisation v)),
1456 text "absVarsOf: discarding info on" <+> ppr v )
1457 setIdInfo v vanillaIdInfo
1458 | otherwise = v
1459
1460 close :: Var -> DVarSet -- Close over variables free in the type
1461 -- Result includes the input variable itself
1462 close v = foldDVarSet (unionDVarSet . close)
1463 (unitDVarSet v)
1464 (fvDVarSet $ varTypeTyCoFVs v)
1465
1466 type LvlM result = UniqSM result
1467
1468 initLvl :: UniqSupply -> UniqSM a -> a
1469 initLvl = initUs_
1470
1471 newPolyBndrs :: Level -> LevelEnv -> [OutVar] -> Bool -> [InId]
1472 -> LvlM (LevelEnv, [OutId])
1473 -- The envt is extended to bind the new bndrs to dest_lvl, but
1474 -- the le_ctxt_lvl is unaffected
1475 newPolyBndrs dest_lvl
1476 env@(LE { le_lvl_env = lvl_env, le_subst = subst, le_env = id_env })
1477 abs_vars zapping_joins bndrs
1478 = ASSERT( all (not . isCoVar) bndrs ) -- What would we add to the CoSubst in this case. No easy answer.
1479 do { uniqs <- getUniquesM
1480 ; let new_bndrs = zipWith mk_poly_bndr bndrs uniqs
1481 bndr_prs = bndrs `zip` new_bndrs
1482 env' = env { le_lvl_env = addLvls dest_lvl lvl_env new_bndrs
1483 , le_subst = foldl add_subst subst bndr_prs
1484 , le_env = foldl add_id id_env bndr_prs }
1485 ; return (env', new_bndrs) }
1486 where
1487 add_subst env (v, v') = extendIdSubst env v (mkVarApps (Var v') abs_vars)
1488 add_id env (v, v') = extendVarEnv env v ((v':abs_vars), mkVarApps (Var v') abs_vars)
1489
1490 mk_poly_bndr bndr uniq = transferPolyIdInfo bndr abs_vars $ -- Note [transferPolyIdInfo] in Id.hs
1491 maybe_transfer_join_info bndr $
1492 mkSysLocalOrCoVar (mkFastString str) uniq poly_ty
1493 where
1494 str = "poly_" ++ occNameString (getOccName bndr)
1495 poly_ty = mkLamTypes abs_vars (CoreSubst.substTy subst (idType bndr))
1496 maybe_transfer_join_info bndr new_bndr
1497 | not zapping_joins
1498 , Just join_arity <- isJoinId_maybe bndr
1499 = new_bndr `asJoinId`
1500 join_arity + length abs_vars
1501 | otherwise
1502 = new_bndr
1503
1504 newLvlVar :: LevelledExpr -- The RHS of the new binding
1505 -> Maybe JoinArity -- Its join arity, if it is a join point
1506 -> LvlM Id
1507 newLvlVar lvld_rhs join_arity_maybe
1508 = do { uniq <- getUniqueM
1509 ; return (add_join_info (mk_id uniq rhs_ty))
1510 }
1511 where
1512 add_join_info var = var `asJoinId_maybe` join_arity_maybe
1513 de_tagged_rhs = deTagExpr lvld_rhs
1514 rhs_ty = exprType de_tagged_rhs
1515
1516 mk_id uniq rhs_ty
1517 -- See Note [Grand plan for static forms] in StaticPtrTable.
1518 | isJust $ collectMakeStaticArgs $ snd $
1519 collectTyBinders de_tagged_rhs
1520 = mkExportedVanillaId (mkSystemVarName uniq (mkFastString "static_ptr"))
1521 rhs_ty
1522 | otherwise
1523 = mkLocalIdOrCoVar (mkSystemVarName uniq (mkFastString "lvl")) rhs_ty
1524
1525 cloneCaseBndrs :: LevelEnv -> Level -> [Var] -> LvlM (LevelEnv, [Var])
1526 cloneCaseBndrs env@(LE { le_subst = subst, le_lvl_env = lvl_env, le_env = id_env })
1527 new_lvl vs
1528 = do { us <- getUniqueSupplyM
1529 ; let (subst', vs') = cloneBndrs subst us vs
1530 env' = env { le_ctxt_lvl = new_lvl
1531 , le_lvl_env = addLvls new_lvl lvl_env vs'
1532 , le_subst = subst'
1533 , le_env = foldl add_id id_env (vs `zip` vs') }
1534
1535 ; return (env', vs') }
1536
1537 cloneLetVars :: RecFlag -> LevelEnv -> Level -> Bool -> [InVar]
1538 -> LvlM (LevelEnv, [OutVar])
1539 -- See Note [Need for cloning during float-out]
1540 -- Works for Ids bound by let(rec)
1541 -- The dest_lvl is attributed to the binders in the new env,
1542 -- but cloneVars doesn't affect the le_ctxt_lvl of the incoming env
1543 cloneLetVars is_rec
1544 env@(LE { le_subst = subst, le_lvl_env = lvl_env, le_env = id_env })
1545 dest_lvl zapping_joins vs
1546 = do { us <- getUniqueSupplyM
1547 ; let vs1 = map (zap_demand_info . maybe_zap_join) vs
1548 -- See Note [Zapping the demand info]
1549 (subst', vs2) = case is_rec of
1550 NonRecursive -> cloneBndrs subst us vs1
1551 Recursive -> cloneRecIdBndrs subst us vs1
1552 prs = vs `zip` vs2
1553 env' = env { le_lvl_env = addLvls dest_lvl lvl_env vs2
1554 , le_subst = subst'
1555 , le_env = foldl add_id id_env prs }
1556
1557 ; return (env', vs2) }
1558 where
1559 maybe_zap_join v | isId v, zapping_joins = zapJoinId v
1560 | otherwise = v
1561
1562 add_id :: IdEnv ([Var], LevelledExpr) -> (Var, Var) -> IdEnv ([Var], LevelledExpr)
1563 add_id id_env (v, v1)
1564 | isTyVar v = delVarEnv id_env v
1565 | otherwise = extendVarEnv id_env v ([v1], ASSERT(not (isCoVar v1)) Var v1)
1566
1567 zap_demand_info :: Var -> Var
1568 zap_demand_info v
1569 | isId v = zapIdDemandInfo v
1570 | otherwise = v
1571
1572 {-
1573 Note [Zapping the demand info]
1574 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1575 VERY IMPORTANT: we must zap the demand info if the thing is going to
1576 float out, because it may be less demanded than at its original
1577 binding site. Eg
1578 f :: Int -> Int
1579 f x = let v = 3*4 in v+x
1580 Here v is strict; but if we float v to top level, it isn't any more.
1581
1582 Similarly, if we're floating a join point, it won't be one anymore, so we zap
1583 join point information as well.
1584 -}