2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 ************************************************************************
6 \section[FloatIn]{Floating Inwards pass}
8 ************************************************************************
10 The main purpose of @floatInwards@ is floating into branches of a
11 case, so that we don't allocate things, save them on the stack, and
12 then discover that they aren't needed in the chosen branch.
17 module FloatIn
( floatInwards
) where
19 #include
"HsVersions.h"
23 import CoreUtils
( exprIsDupable
, exprIsExpandable
, exprType
,
24 exprOkForSideEffects
, mkTicks
)
25 import CoreFVs
( CoreExprWithFVs
, freeVars
, freeVarsOf
, idRuleAndUnfoldingVars
)
26 import Id
( isOneShotBndr
, idType
)
28 import Type
( Type
, isUnLiftedType
, isFunTy
, splitFunTy
, applyTy
)
31 import UniqDFM
(UniqDFM
, udfmToUfm
)
34 import Data
.List
( mapAccumL )
37 Top-level interface function, @floatInwards@. Note that we do not
38 actually float any bindings downwards from the top-level.
41 floatInwards
:: DynFlags
-> CoreProgram
-> CoreProgram
42 floatInwards dflags
= map fi_top_bind
44 fi_top_bind
(NonRec binder rhs
)
45 = NonRec binder
(fiExpr dflags
[] (freeVars rhs
))
46 fi_top_bind
(Rec pairs
)
47 = Rec
[ (b
, fiExpr dflags
[] (freeVars rhs
)) |
(b
, rhs
) <- pairs
]
50 ************************************************************************
52 \subsection{Mail from Andr\'e [edited]}
54 ************************************************************************
56 {\em Will wrote: What??? I thought the idea was to float as far
57 inwards as possible, no matter what. This is dropping all bindings
58 every time it sees a lambda of any kind. Help! }
60 You are assuming we DO DO full laziness AFTER floating inwards! We
61 have to [not float inside lambdas] if we don't.
63 If we indeed do full laziness after the floating inwards (we could
64 check the compilation flags for that) then I agree we could be more
65 aggressive and do float inwards past lambdas.
67 Actually we are not doing a proper full laziness (see below), which
68 was another reason for not floating inwards past a lambda.
70 This can easily be fixed. The problem is that we float lets outwards,
71 but there are a few expressions which are not let bound, like case
72 scrutinees and case alternatives. After floating inwards the
73 simplifier could decide to inline the let and the laziness would be
77 let a = expensive ==> \b -> case expensive of ...
78 in \ b -> case a of ...
83 to let bind the algebraic case scrutinees (done, I think) and
84 the case alternatives (except the ones with an
85 unboxed type)(not done, I think). This is best done in the
86 SetLevels.hs module, which tags things with their level numbers.
88 do the full laziness pass (floating lets outwards).
90 simplify. The simplifier inlines the (trivial) lets that were
91 created but were not floated outwards.
94 With the fix I think Will's suggestion that we can gain even more from
95 strictness by floating inwards past lambdas makes sense.
97 We still gain even without going past lambdas, as things may be
98 strict in the (new) context of a branch (where it was floated to) or
101 let a = something case x of
102 in case x of alt1 -> case something of a -> a + a
103 alt1 -> a + a ==> alt2 -> b
106 let a = something let b = case something of a -> a + a
107 in let b = a + a ==> in (b,b)
110 Also, even if a is not found to be strict in the new context and is
111 still left as a let, if the branch is not taken (or b is not entered)
112 the closure for a is not built.
114 ************************************************************************
116 \subsection{Main floating-inwards code}
118 ************************************************************************
121 type FreeVarSet
= IdSet
122 type BoundVarSet
= IdSet
124 data FloatInBind
= FB BoundVarSet FreeVarSet FloatBind
125 -- The FreeVarSet is the free variables of the binding. In the case
126 -- of recursive bindings, the set doesn't include the bound
129 type FloatInBinds
= [FloatInBind
]
130 -- In reverse dependency order (innermost binder first)
133 -> FloatInBinds
-- Binds we're trying to drop
134 -- as far "inwards" as possible
135 -> CoreExprWithFVs
-- Input expr
136 -> CoreExpr
-- Result
138 fiExpr _ to_drop
(_
, AnnLit lit
) = ASSERT
( null to_drop
) Lit lit
139 fiExpr _ to_drop
(_
, AnnType ty
) = ASSERT
( null to_drop
) Type ty
140 fiExpr _ to_drop
(_
, AnnVar v
) = wrapFloats to_drop
(Var v
)
141 fiExpr _ to_drop
(_
, AnnCoercion co
) = wrapFloats to_drop
(Coercion co
)
142 fiExpr dflags to_drop
(_
, AnnCast expr
(fvs_co
, co
))
143 = wrapFloats
(drop_here
++ co_drop
) $
144 Cast
(fiExpr dflags e_drop expr
) co
146 [drop_here
, e_drop
, co_drop
] = sepBindsByDropPoint dflags
False [udfmToUfm
$ freeVarsOf expr
, udfmToUfm fvs_co
] to_drop
149 Applications: we do float inside applications, mainly because we
150 need to get at all the arguments. The next simplifier run will
151 pull out any silly ones.
154 fiExpr dflags to_drop ann_expr
@(_
,AnnApp
{})
155 = mkTicks ticks
$ wrapFloats drop_here
$ wrapFloats extra_drop
$
156 mkApps
(fiExpr dflags fun_drop ann_fun
)
157 (zipWith (fiExpr dflags
) arg_drops ann_args
)
159 (ann_fun
@(fun_fvs
, _
), ann_args
, ticks
)
160 = collectAnnArgsTicks tickishFloatable ann_expr
161 fun_ty
= exprType
(deAnnotate ann_fun
)
162 ((_
,extra_fvs
), arg_fvs
) = mapAccumL mk_arg_fvs
(fun_ty
, emptyVarSet
) ann_args
164 -- All this faffing about is so that we can get hold of
165 -- the types of the arguments, to pass to noFloatIntoRhs
166 mk_arg_fvs
:: (Type
, FreeVarSet
) -> CoreExprWithFVs
-> ((Type
, FreeVarSet
), FreeVarSet
)
167 mk_arg_fvs
(fun_ty
, extra_fvs
) (_
, AnnType ty
)
168 = ((applyTy fun_ty ty
, extra_fvs
), emptyVarSet
)
170 mk_arg_fvs
(fun_ty
, extra_fvs
) (arg_dfvs
, ann_arg
)
171 | ASSERT
( isFunTy fun_ty
) noFloatIntoRhs ann_arg arg_ty
172 = ((res_ty
, extra_fvs `unionVarSet` arg_fvs
), emptyVarSet
)
174 = ((res_ty
, extra_fvs
), arg_fvs
)
176 arg_fvs
= udfmToUfm arg_dfvs
177 (arg_ty
, res_ty
) = splitFunTy fun_ty
179 drop_here
: extra_drop
: fun_drop
: arg_drops
180 = sepBindsByDropPoint dflags
False (extra_fvs
: udfmToUfm fun_fvs
: arg_fvs
) to_drop
183 Note [Do not destroy the let/app invariant]
184 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
187 We don't want to float bindings into here
188 f (case ... of { x -> x +# y })
189 because that might destroy the let/app invariant, which requires
190 unlifted function arguments to be ok-for-speculation.
192 Note [Floating in past a lambda group]
193 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
194 * We must be careful about floating inside a value lambda.
195 That risks losing laziness.
196 The float-out pass might rescue us, but then again it might not.
198 * We must be careful about type lambdas too. At one time we did, and
199 there is no risk of duplicating work thereby, but we do need to be
200 careful. In particular, here is a bad case (it happened in the
203 in let f = /\t -> \a -> ...
205 let f = /\t -> let v = ... in \a -> ...
206 This is bad as now f is an updatable closure (update PAP)
209 * Hack alert! We only float in through one-shot lambdas,
210 not (as you might guess) through lone big lambdas.
211 Reason: we float *out* past big lambdas (see the test in the Lam
212 case of FloatOut.floatExpr) and we don't want to float straight
215 It *is* important to float into one-shot lambdas, however;
216 see the remarks with noFloatIntoRhs.
218 So we treat lambda in groups, using the following rule:
220 Float in if (a) there is at least one Id,
221 and (b) there are no non-one-shot Ids
223 Otherwise drop all the bindings outside the group.
225 This is what the 'go' function in the AnnLam case is doing.
227 Urk! if all are tyvars, and we don't float in, we may miss an
228 opportunity to float inside a nested case branch
231 fiExpr dflags to_drop lam
@(_
, AnnLam _ _
)
232 | okToFloatInside bndrs
-- Float in
233 -- NB: Must line up with noFloatIntoRhs (AnnLam...); see Trac #7088
234 = mkLams bndrs
(fiExpr dflags to_drop body
)
236 |
otherwise -- Dump it all here
237 = wrapFloats to_drop
(mkLams bndrs
(fiExpr dflags
[] body
))
240 (bndrs
, body
) = collectAnnBndrs lam
243 We don't float lets inwards past an SCC.
244 ToDo: keep info on current cc, and when passing
245 one, if it is not the same, annotate all lets in binds with current
246 cc, change current cc to the new one and float binds into expr.
249 fiExpr dflags to_drop
(_
, AnnTick tickish expr
)
250 | tickish `tickishScopesLike` SoftScope
251 = Tick tickish
(fiExpr dflags to_drop expr
)
253 |
otherwise -- Wimp out for now - we could push values in
254 = wrapFloats to_drop
(Tick tickish
(fiExpr dflags
[] expr
))
257 For @Lets@, the possible ``drop points'' for the \tr{to_drop}
258 bindings are: (a)~in the body, (b1)~in the RHS of a NonRec binding,
259 or~(b2), in each of the RHSs of the pairs of a @Rec@.
261 Note that we do {\em weird things} with this let's binding. Consider:
270 Look at the inner \tr{let}. As \tr{w} is used in both the bind and
271 body of the inner let, we could panic and leave \tr{w}'s binding where
272 it is. But \tr{v} is floatable further into the body of the inner let, and
273 {\em then} \tr{w} will also be only in the body of that inner let.
275 So: rather than drop \tr{w}'s binding here, we add it onto the list of
276 things to drop in the outer let's body, and let nature take its
279 Note [extra_fvs (1): avoid floating into RHS]
280 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
281 Consider let x=\y....t... in body. We do not necessarily want to float
282 a binding for t into the RHS, because it'll immediately be floated out
283 again. (It won't go inside the lambda else we risk losing work.)
284 In letrec, we need to be more careful still. We don't want to transform
287 letrec f = \z. ...x#...f...
290 letrec f = let x# = y# +# 1# in \z. ...x#...f... in ...
291 because now we can't float the let out again, because a letrec
292 can't have unboxed bindings.
294 So we make "extra_fvs" which is the rhs_fvs of such bindings, and
295 arrange to dump bindings that bind extra_fvs before the entire let.
297 Note [extra_fvs (2): free variables of rules]
298 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
300 let x{rule mentioning y} = rhs in body
301 Here y is not free in rhs or body; but we still want to dump bindings
302 that bind y outside the let. So we augment extra_fvs with the
303 idRuleAndUnfoldingVars of x. No need for type variables, hence not using
307 fiExpr dflags to_drop
(_
,AnnLet
(AnnNonRec
id rhs
@(rhs_dfvs
, ann_rhs
)) body
)
308 = fiExpr dflags new_to_drop body
310 body_fvs
= udfmToUfm
(freeVarsOf body
) `delVarSet`
id
312 rhs_fvs
= udfmToUfm rhs_dfvs
313 rule_fvs
= idRuleAndUnfoldingVars
id -- See Note [extra_fvs (2): free variables of rules]
314 extra_fvs | noFloatIntoRhs ann_rhs rhs_ty
= rule_fvs `unionVarSet` rhs_fvs
315 |
otherwise = rule_fvs
316 -- See Note [extra_fvs (1): avoid floating into RHS]
317 -- No point in floating in only to float straight out again
318 -- Ditto ok-for-speculation unlifted RHSs
320 [shared_binds
, extra_binds
, rhs_binds
, body_binds
]
321 = sepBindsByDropPoint dflags
False [extra_fvs
, rhs_fvs
, body_fvs
] to_drop
323 new_to_drop
= body_binds
++ -- the bindings used only in the body
324 [FB
(unitVarSet
id) rhs_fvs
'
325 (FloatLet
(NonRec
id rhs
'))] ++ -- the new binding itself
326 extra_binds
++ -- bindings from extra_fvs
327 shared_binds
-- the bindings used both in rhs and body
329 -- Push rhs_binds into the right hand side of the binding
330 rhs
' = fiExpr dflags rhs_binds rhs
331 rhs_fvs
' = rhs_fvs `unionVarSet` floatedBindsFVs rhs_binds `unionVarSet` rule_fvs
332 -- Don't forget the rule_fvs; the binding mentions them!
334 fiExpr dflags to_drop
(_
,AnnLet
(AnnRec bindings
) body
)
335 = fiExpr dflags new_to_drop body
337 (ids
, rhss
) = unzip bindings
338 rhss_fvs
= map (udfmToUfm
. freeVarsOf
) rhss
339 body_fvs
= udfmToUfm
$ freeVarsOf body
341 -- See Note [extra_fvs (1,2)]
342 rule_fvs
= mapUnionVarSet idRuleAndUnfoldingVars ids
343 extra_fvs
= rule_fvs `unionVarSet`
344 unionVarSets
[ udfmToUfm fvs |
(fvs
, rhs
) <- rhss
345 , noFloatIntoExpr rhs
]
347 (shared_binds
:extra_binds
:body_binds
:rhss_binds
)
348 = sepBindsByDropPoint dflags
False (extra_fvs
:body_fvs
:rhss_fvs
) to_drop
350 new_to_drop
= body_binds
++ -- the bindings used only in the body
351 [FB
(mkVarSet ids
) rhs_fvs
'
352 (FloatLet
(Rec
(fi_bind rhss_binds bindings
)))] ++
353 -- The new binding itself
354 extra_binds
++ -- Note [extra_fvs (1,2)]
355 shared_binds
-- Used in more than one place
357 rhs_fvs
' = unionVarSets rhss_fvs `unionVarSet`
358 unionVarSets
(map floatedBindsFVs rhss_binds
) `unionVarSet`
359 rule_fvs
-- Don't forget the rule variables!
361 -- Push rhs_binds into the right hand side of the binding
362 fi_bind
:: [FloatInBinds
] -- one per "drop pt" conjured w/ fvs_of_rhss
363 -> [(Id
, CoreExprWithFVs
)]
366 fi_bind to_drops pairs
367 = [ (binder
, fiExpr dflags to_drop rhs
)
368 |
((binder
, rhs
), to_drop
) <- zipEqual
"fi_bind" pairs to_drops
]
371 For @Case@, the possible ``drop points'' for the \tr{to_drop}
372 bindings are: (a)~inside the scrutinee, (b)~inside one of the
373 alternatives/default [default FVs always {\em first}!].
375 Floating case expressions inward was added to fix Trac #5658: strict bindings
376 not floated in. In particular, this change allows array indexing operations,
377 which have a single DEFAULT alternative without any binders, to be floated
378 inward. SIMD primops for unpacking SIMD vectors into an unboxed tuple of unboxed
379 scalars also need to be floated inward, but unpacks have a single non-DEFAULT
380 alternative that binds the elements of the tuple. We now therefore also support
381 floating in cases with a single alternative that may bind values.
384 fiExpr dflags to_drop
(_
, AnnCase scrut case_bndr _
[(con
,alt_bndrs
,rhs
)])
385 | isUnLiftedType
(idType case_bndr
)
386 , exprOkForSideEffects
(deAnnotate scrut
)
387 -- See PrimOp, Note [PrimOp can_fail and has_side_effects]
388 = wrapFloats shared_binds
$
389 fiExpr dflags
(case_float
: rhs_binds
) rhs
391 case_float
= FB
(mkVarSet
(case_bndr
: alt_bndrs
)) scrut_fvs
392 (FloatCase scrut
' case_bndr con alt_bndrs
)
393 scrut
' = fiExpr dflags scrut_binds scrut
394 [shared_binds
, scrut_binds
, rhs_binds
]
395 = sepBindsByDropPoint dflags
False [scrut_fvs
, rhs_fvs
] to_drop
396 rhs_fvs
= udfmToUfm
(freeVarsOf rhs
) `delVarSetList`
(case_bndr
: alt_bndrs
)
397 scrut_fvs
= udfmToUfm
$ freeVarsOf scrut
399 fiExpr dflags to_drop
(_
, AnnCase scrut case_bndr ty alts
)
400 = wrapFloats drop_here1
$
401 wrapFloats drop_here2
$
402 Case
(fiExpr dflags scrut_drops scrut
) case_bndr ty
403 (zipWith fi_alt alts_drops_s alts
)
405 -- Float into the scrut and alts-considered-together just like App
406 [drop_here1
, scrut_drops
, alts_drops
]
407 = sepBindsByDropPoint dflags
False [scrut_fvs
, all_alts_fvs
] to_drop
409 -- Float into the alts with the is_case flag set
410 (drop_here2
: alts_drops_s
) = sepBindsByDropPoint dflags
True alts_fvs alts_drops
412 scrut_fvs
= udfmToUfm
$ freeVarsOf scrut
413 alts_fvs
= map alt_fvs alts
414 all_alts_fvs
= unionVarSets alts_fvs
415 alt_fvs
(_con
, args
, rhs
) = foldl delVarSet
(udfmToUfm
$ freeVarsOf rhs
) (case_bndr
:args
)
416 -- Delete case_bndr and args from free vars of rhs
417 -- to get free vars of alt
419 fi_alt to_drop
(con
, args
, rhs
) = (con
, args
, fiExpr dflags to_drop rhs
)
421 okToFloatInside
:: [Var
] -> Bool
422 okToFloatInside bndrs
= all ok bndrs
424 ok b
= not (isId b
) || isOneShotBndr b
425 -- Push the floats inside there are no non-one-shot value binders
427 noFloatIntoRhs
:: AnnExpr
' Var
(UniqDFM Var
) -> Type
-> Bool
428 -- ^ True if it's a bad idea to float bindings into this RHS
429 -- Preconditio: rhs :: rhs_ty
430 noFloatIntoRhs rhs rhs_ty
431 = isUnLiftedType rhs_ty
-- See Note [Do not destroy the let/app invariant]
432 || noFloatIntoExpr rhs
434 noFloatIntoExpr
:: AnnExpr
' Var
(UniqDFM Var
) -> Bool
435 noFloatIntoExpr
(AnnLam bndr e
)
436 = not (okToFloatInside
(bndr
:bndrs
))
437 -- NB: Must line up with fiExpr (AnnLam...); see Trac #7088
439 (bndrs
, _
) = collectAnnBndrs e
440 -- IMPORTANT: don't say 'True' for a RHS with a one-shot lambda at the top.
441 -- This makes a big difference for things like
442 -- f x# = let x = I# x#
443 -- in let j = \() -> ...x...
444 -- in if <condition> then normal-path else j ()
445 -- If x is used only in the error case join point, j, we must float the
446 -- boxing constructor into it, else we box it every time which is very bad
449 noFloatIntoExpr rhs
= exprIsExpandable
(deAnnotate
' rhs
)
450 -- We'd just float right back out again...
451 -- Should match the test in SimplEnv.doFloatFromRhs
454 ************************************************************************
456 \subsection{@sepBindsByDropPoint@}
458 ************************************************************************
460 This is the crucial function. The idea is: We have a wad of bindings
461 that we'd like to distribute inside a collection of {\em drop points};
462 insides the alternatives of a \tr{case} would be one example of some
463 drop points; the RHS and body of a non-recursive \tr{let} binding
464 would be another (2-element) collection.
466 So: We're given a list of sets-of-free-variables, one per drop point,
467 and a list of floating-inwards bindings. If a binding can go into
468 only one drop point (without suddenly making something out-of-scope),
469 in it goes. If a binding is used inside {\em multiple} drop points,
470 then it has to go in a you-must-drop-it-above-all-these-drop-points
473 We have to maintain the order on these drop-point-related lists.
478 -> Bool -- True <=> is case expression
479 -> [FreeVarSet
] -- One set of FVs per drop point
480 -> FloatInBinds
-- Candidate floaters
481 -> [FloatInBinds
] -- FIRST one is bindings which must not be floated
482 -- inside any drop point; the rest correspond
483 -- one-to-one with the input list of FV sets
485 -- Every input floater is returned somewhere in the result;
486 -- none are dropped, not even ones which don't seem to be
487 -- free in *any* of the drop-point fvs. Why? Because, for example,
488 -- a binding (let x = E in B) might have a specialised version of
489 -- x (say x') stored inside x, but x' isn't free in E or B.
491 type DropBox
= (FreeVarSet
, FloatInBinds
)
493 sepBindsByDropPoint _ _is_case drop_pts
[]
494 = [] : [[] | _
<- drop_pts
] -- cut to the chase scene; it happens
496 sepBindsByDropPoint dflags is_case drop_pts floaters
497 = go floaters
(map (\fvs
-> (fvs
, [])) (emptyVarSet
: drop_pts
))
499 go
:: FloatInBinds
-> [DropBox
] -> [FloatInBinds
]
500 -- The *first* one in the argument list is the drop_here set
501 -- The FloatInBinds in the lists are in the reverse of
502 -- the normal FloatInBinds order; that is, they are the right way round!
504 go
[] drop_boxes
= map (reverse . snd) drop_boxes
506 go
(bind_w_fvs
@(FB bndrs bind_fvs bind
) : binds
) drop_boxes
@(here_box
: fork_boxes
)
509 -- "here" means the group of bindings dropped at the top of the fork
511 (used_here
: used_in_flags
) = [ fvs `intersectsVarSet` bndrs
512 |
(fvs
, _
) <- drop_boxes
]
514 drop_here
= used_here ||
not can_push
516 -- For case expressions we duplicate the binding if it is
517 -- reasonably small, and if it is not used in all the RHSs
518 -- This is good for situations like
523 -- E -> ...not mentioning x...
525 n_alts
= length used_in_flags
526 n_used_alts
= count
id used_in_flags
-- returns number of Trues in list.
528 can_push
= n_used_alts
== 1 -- Used in just one branch
529 ||
(is_case
&& -- We are looking at case alternatives
530 n_used_alts
> 1 && -- It's used in more than one
531 n_used_alts
< n_alts
&& -- ...but not all
532 floatIsDupable dflags bind
) -- and we can duplicate the binding
534 new_boxes | drop_here
= (insert here_box
: fork_boxes
)
535 |
otherwise = (here_box
: new_fork_boxes
)
537 new_fork_boxes
= zipWithEqual
"FloatIn.sepBinds" insert_maybe fork_boxes used_in_flags
539 insert :: DropBox
-> DropBox
540 insert (fvs
,drops
) = (fvs `unionVarSet` bind_fvs
, bind_w_fvs
:drops
)
542 insert_maybe box
True = insert box
543 insert_maybe box
False = box
545 go _ _
= panic
"sepBindsByDropPoint/go"
548 floatedBindsFVs
:: FloatInBinds
-> FreeVarSet
549 floatedBindsFVs binds
= mapUnionVarSet fbFVs binds
551 fbFVs
:: FloatInBind
-> VarSet
552 fbFVs
(FB _ fvs _
) = fvs
554 wrapFloats
:: FloatInBinds
-> CoreExpr
-> CoreExpr
555 -- Remember FloatInBinds is in *reverse* dependency order
557 wrapFloats
(FB _ _ fl
: bs
) e
= wrapFloats bs
(wrapFloat fl e
)
559 floatIsDupable
:: DynFlags
-> FloatBind
-> Bool
560 floatIsDupable dflags
(FloatCase scrut _ _ _
) = exprIsDupable dflags scrut
561 floatIsDupable dflags
(FloatLet
(Rec prs
)) = all (exprIsDupable dflags
. snd) prs
562 floatIsDupable dflags
(FloatLet
(NonRec _ r
)) = exprIsDupable dflags r