Document SCC determinism
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import CoreSyn
23 import CoreFVs
24 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
25 stripTicksTopE, mkTicks )
26 import Id
27 import Name( localiseName )
28 import BasicTypes
29 import Module( Module )
30 import Coercion
31
32 import VarSet
33 import VarEnv
34 import Var
35 import Demand ( argOneShots, argsOneShots )
36 import Maybes ( orElse )
37 import Digraph ( SCC(..), stronglyConnCompFromEdgedVerticesR )
38 import Unique
39 import UniqFM
40 import Util
41 import Outputable
42 import Data.List
43 import Control.Arrow ( second )
44
45 {-
46 ************************************************************************
47 * *
48 \subsection[OccurAnal-main]{Counting occurrences: main function}
49 * *
50 ************************************************************************
51
52 Here's the externally-callable interface:
53 -}
54
55 occurAnalysePgm :: Module -- Used only in debug output
56 -> (Activation -> Bool)
57 -> [CoreRule] -> [CoreVect] -> VarSet
58 -> CoreProgram -> CoreProgram
59 occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
60 | isEmptyVarEnv final_usage
61 = occ_anald_binds
62
63 | otherwise -- See Note [Glomming]
64 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
65 2 (ppr final_usage ) )
66 occ_anald_glommed_binds
67 where
68 init_env = initOccEnv active_rule
69 (final_usage, occ_anald_binds) = go init_env binds
70 (_, occ_anald_glommed_binds) = occAnalRecBind init_env imp_rule_edges
71 (flattenBinds occ_anald_binds)
72 initial_uds
73 -- It's crucial to re-analyse the glommed-together bindings
74 -- so that we establish the right loop breakers. Otherwise
75 -- we can easily create an infinite loop (Trac #9583 is an example)
76
77 initial_uds = addIdOccs emptyDetails
78 (rulesFreeVars imp_rules `unionVarSet`
79 vectsFreeVars vects `unionVarSet`
80 vectVars)
81 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
82 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
83 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
84
85 -- Note [Preventing loops due to imported functions rules]
86 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
87 [ mapVarEnv (const maps_to) (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
88 | imp_rule <- imp_rules
89 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
90 , let maps_to = exprFreeIds (ru_rhs imp_rule)
91 `delVarSetList` ru_bndrs imp_rule
92 , arg <- ru_args imp_rule ]
93
94 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
95 go _ []
96 = (initial_uds, [])
97 go env (bind:binds)
98 = (final_usage, bind' ++ binds')
99 where
100 (bs_usage, binds') = go env binds
101 (final_usage, bind') = occAnalBind env imp_rule_edges bind bs_usage
102
103 occurAnalyseExpr :: CoreExpr -> CoreExpr
104 -- Do occurrence analysis, and discard occurrence info returned
105 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
106
107 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
108 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
109
110 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
111 occurAnalyseExpr' enable_binder_swap expr
112 = snd (occAnal env expr)
113 where
114 env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
115 -- To be conservative, we say that all inlines and rules are active
116 all_active_rules = \_ -> True
117
118 {- Note [Plugin rules]
119 ~~~~~~~~~~~~~~~~~~~~~~
120 Conal Elliott (Trac #11651) built a GHC plugin that added some
121 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
122 do some domain-specific transformations that could not be expressed
123 with an ordinary pattern-matching CoreRule. But then we can't extract
124 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
125 BuiltinRule doesn't have any of that stuff.
126
127 So we simply assume that BuiltinRules have no dependencies, and filter
128 them out from the imp_rule_edges comprehension.
129 -}
130
131 {-
132 ************************************************************************
133 * *
134 \subsection[OccurAnal-main]{Counting occurrences: main function}
135 * *
136 ************************************************************************
137
138 Bindings
139 ~~~~~~~~
140 -}
141
142 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
143
144 noImpRuleEdges :: ImpRuleEdges
145 noImpRuleEdges = emptyVarEnv
146
147 occAnalBind :: OccEnv -- The incoming OccEnv
148 -> ImpRuleEdges
149 -> CoreBind
150 -> UsageDetails -- Usage details of scope
151 -> (UsageDetails, -- Of the whole let(rec)
152 [CoreBind])
153
154 occAnalBind env top_env (NonRec binder rhs) body_usage
155 = occAnalNonRecBind env top_env binder rhs body_usage
156 occAnalBind env top_env (Rec pairs) body_usage
157 = occAnalRecBind env top_env pairs body_usage
158
159 -----------------
160 occAnalNonRecBind :: OccEnv -> ImpRuleEdges -> Var -> CoreExpr
161 -> UsageDetails -> (UsageDetails, [CoreBind])
162 occAnalNonRecBind env imp_rule_edges binder rhs body_usage
163 | isTyVar binder -- A type let; we don't gather usage info
164 = (body_usage, [NonRec binder rhs])
165
166 | not (binder `usedIn` body_usage) -- It's not mentioned
167 = (body_usage, [])
168
169 | otherwise -- It's mentioned in the body
170 = (body_usage' +++ rhs_usage4, [NonRec tagged_binder rhs'])
171 where
172 (body_usage', tagged_binder) = tagBinder body_usage binder
173 (rhs_usage1, rhs') = occAnalNonRecRhs env tagged_binder rhs
174 rhs_usage2 = addIdOccs rhs_usage1 (idUnfoldingVars binder)
175
176 rhs_usage3 = addIdOccs rhs_usage2 (idRuleVars binder)
177 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
178
179 rhs_usage4 = maybe rhs_usage3 (addIdOccs rhs_usage3) $
180 lookupVarEnv imp_rule_edges binder
181 -- See Note [Preventing loops due to imported functions rules]
182
183 -----------------
184 occAnalRecBind :: OccEnv -> ImpRuleEdges -> [(Var,CoreExpr)]
185 -> UsageDetails -> (UsageDetails, [CoreBind])
186 occAnalRecBind env imp_rule_edges pairs body_usage
187 = foldr occAnalRec (body_usage, []) sccs
188 -- For a recursive group, we
189 -- * occ-analyse all the RHSs
190 -- * compute strongly-connected components
191 -- * feed those components to occAnalRec
192 where
193 bndr_set = mkVarSet (map fst pairs)
194
195 sccs :: [SCC (Node Details)]
196 sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompFromEdgedVerticesR nodes
197
198 nodes :: [Node Details]
199 nodes = {-# SCC "occAnalBind.assoc" #-} map (makeNode env imp_rule_edges bndr_set) pairs
200
201 {-
202 Note [Dead code]
203 ~~~~~~~~~~~~~~~~
204 Dropping dead code for a cyclic Strongly Connected Component is done
205 in a very simple way:
206
207 the entire SCC is dropped if none of its binders are mentioned
208 in the body; otherwise the whole thing is kept.
209
210 The key observation is that dead code elimination happens after
211 dependency analysis: so 'occAnalBind' processes SCCs instead of the
212 original term's binding groups.
213
214 Thus 'occAnalBind' does indeed drop 'f' in an example like
215
216 letrec f = ...g...
217 g = ...(...g...)...
218 in
219 ...g...
220
221 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
222 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
223 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
224
225 ------------------------------------------------------------
226 Note [Forming Rec groups]
227 ~~~~~~~~~~~~~~~~~~~~~~~~~
228 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
229 and "g uses f", no matter how indirectly. We do a SCC analysis
230 with an edge f -> g if "f uses g".
231
232 More precisely, "f uses g" iff g should be in scope wherever f is.
233 That is, g is free in:
234 a) the rhs 'ef'
235 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
236 c) or the LHS or a rule for f (Note [Rule dependency info])
237
238 These conditions apply regardless of the activation of the RULE (eg it might be
239 inactive in this phase but become active later). Once a Rec is broken up
240 it can never be put back together, so we must be conservative.
241
242 The principle is that, regardless of rule firings, every variable is
243 always in scope.
244
245 * Note [Rules are extra RHSs]
246 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
247 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
248 keeps the specialised "children" alive. If the parent dies
249 (because it isn't referenced any more), then the children will die
250 too (unless they are already referenced directly).
251
252 To that end, we build a Rec group for each cyclic strongly
253 connected component,
254 *treating f's rules as extra RHSs for 'f'*.
255 More concretely, the SCC analysis runs on a graph with an edge
256 from f -> g iff g is mentioned in
257 (a) f's rhs
258 (b) f's RULES
259 These are rec_edges.
260
261 Under (b) we include variables free in *either* LHS *or* RHS of
262 the rule. The former might seems silly, but see Note [Rule
263 dependency info]. So in Example [eftInt], eftInt and eftIntFB
264 will be put in the same Rec, even though their 'main' RHSs are
265 both non-recursive.
266
267 * Note [Rule dependency info]
268 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
269 The VarSet in a RuleInfo is used for dependency analysis in the
270 occurrence analyser. We must track free vars in *both* lhs and rhs.
271 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
272 Why both? Consider
273 x = y
274 RULE f x = v+4
275 Then if we substitute y for x, we'd better do so in the
276 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
277 as well as 'v'
278
279 * Note [Rules are visible in their own rec group]
280 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
281 We want the rules for 'f' to be visible in f's right-hand side.
282 And we'd like them to be visible in other functions in f's Rec
283 group. E.g. in Note [Specialisation rules] we want f' rule
284 to be visible in both f's RHS, and fs's RHS.
285
286 This means that we must simplify the RULEs first, before looking
287 at any of the definitions. This is done by Simplify.simplRecBind,
288 when it calls addLetIdInfo.
289
290 ------------------------------------------------------------
291 Note [Choosing loop breakers]
292 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
293 Loop breaking is surprisingly subtle. First read the section 4 of
294 "Secrets of the GHC inliner". This describes our basic plan.
295 We avoid infinite inlinings by choosing loop breakers, and
296 ensuring that a loop breaker cuts each loop.
297
298 Fundamentally, we do SCC analysis on a graph. For each recursive
299 group we choose a loop breaker, delete all edges to that node,
300 re-analyse the SCC, and iterate.
301
302 But what is the graph? NOT the same graph as was used for Note
303 [Forming Rec groups]! In particular, a RULE is like an equation for
304 'f' that is *always* inlined if it is applicable. We do *not* disable
305 rules for loop-breakers. It's up to whoever makes the rules to make
306 sure that the rules themselves always terminate. See Note [Rules for
307 recursive functions] in Simplify.hs
308
309 Hence, if
310 f's RHS (or its INLINE template if it has one) mentions g, and
311 g has a RULE that mentions h, and
312 h has a RULE that mentions f
313
314 then we *must* choose f to be a loop breaker. Example: see Note
315 [Specialisation rules].
316
317 In general, take the free variables of f's RHS, and augment it with
318 all the variables reachable by RULES from those starting points. That
319 is the whole reason for computing rule_fv_env in occAnalBind. (Of
320 course we only consider free vars that are also binders in this Rec
321 group.) See also Note [Finding rule RHS free vars]
322
323 Note that when we compute this rule_fv_env, we only consider variables
324 free in the *RHS* of the rule, in contrast to the way we build the
325 Rec group in the first place (Note [Rule dependency info])
326
327 Note that if 'g' has RHS that mentions 'w', we should add w to
328 g's loop-breaker edges. More concretely there is an edge from f -> g
329 iff
330 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
331 (see Note [Inline rules])
332 (b) or h is mentioned in f's RHS, and
333 g appears in the RHS of an active RULE of h
334 or a transitive sequence of active rules starting with h
335
336 Why "active rules"? See Note [Finding rule RHS free vars]
337
338 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
339 chosen as a loop breaker, because their RHSs don't mention each other.
340 And indeed both can be inlined safely.
341
342 Note again that the edges of the graph we use for computing loop breakers
343 are not the same as the edges we use for computing the Rec blocks.
344 That's why we compute
345
346 - rec_edges for the Rec block analysis
347 - loop_breaker_edges for the loop breaker analysis
348
349 * Note [Finding rule RHS free vars]
350 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
351 Consider this real example from Data Parallel Haskell
352 tagZero :: Array Int -> Array Tag
353 {-# INLINE [1] tagZeroes #-}
354 tagZero xs = pmap (\x -> fromBool (x==0)) xs
355
356 {-# RULES "tagZero" [~1] forall xs n.
357 pmap fromBool <blah blah> = tagZero xs #-}
358 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
359 However, tagZero can only be inlined in phase 1 and later, while
360 the RULE is only active *before* phase 1. So there's no problem.
361
362 To make this work, we look for the RHS free vars only for
363 *active* rules. That's the reason for the occ_rule_act field
364 of the OccEnv.
365
366 * Note [Weak loop breakers]
367 ~~~~~~~~~~~~~~~~~~~~~~~~~
368 There is a last nasty wrinkle. Suppose we have
369
370 Rec { f = f_rhs
371 RULE f [] = g
372
373 h = h_rhs
374 g = h
375 ...more...
376 }
377
378 Remember that we simplify the RULES before any RHS (see Note
379 [Rules are visible in their own rec group] above).
380
381 So we must *not* postInlineUnconditionally 'g', even though
382 its RHS turns out to be trivial. (I'm assuming that 'g' is
383 not choosen as a loop breaker.) Why not? Because then we
384 drop the binding for 'g', which leaves it out of scope in the
385 RULE!
386
387 Here's a somewhat different example of the same thing
388 Rec { g = h
389 ; h = ...f...
390 ; f = f_rhs
391 RULE f [] = g }
392 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
393 g, because the RULE for f is active throughout. So the RHS of h
394 might rewrite to h = ...g...
395 So g must remain in scope in the output program!
396
397 We "solve" this by:
398
399 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
400 iff g is a "missing free variable" of the Rec group
401
402 A "missing free variable" x is one that is mentioned in an RHS or
403 INLINE or RULE of a binding in the Rec group, but where the
404 dependency on x may not show up in the loop_breaker_edges (see
405 note [Choosing loop breakers} above).
406
407 A normal "strong" loop breaker has IAmLoopBreaker False. So
408
409 Inline postInlineUnconditionally
410 strong IAmLoopBreaker False no no
411 weak IAmLoopBreaker True yes no
412 other yes yes
413
414 The **sole** reason for this kind of loop breaker is so that
415 postInlineUnconditionally does not fire. Ugh. (Typically it'll
416 inline via the usual callSiteInline stuff, so it'll be dead in the
417 next pass, so the main Ugh is the tiresome complication.)
418
419 Note [Rules for imported functions]
420 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
421 Consider this
422 f = /\a. B.g a
423 RULE B.g Int = 1 + f Int
424 Note that
425 * The RULE is for an imported function.
426 * f is non-recursive
427 Now we
428 can get
429 f Int --> B.g Int Inlining f
430 --> 1 + f Int Firing RULE
431 and so the simplifier goes into an infinite loop. This
432 would not happen if the RULE was for a local function,
433 because we keep track of dependencies through rules. But
434 that is pretty much impossible to do for imported Ids. Suppose
435 f's definition had been
436 f = /\a. C.h a
437 where (by some long and devious process), C.h eventually inlines to
438 B.g. We could only spot such loops by exhaustively following
439 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
440 f.
441
442 Note that RULES for imported functions are important in practice; they
443 occur a lot in the libraries.
444
445 We regard this potential infinite loop as a *programmer* error.
446 It's up the programmer not to write silly rules like
447 RULE f x = f x
448 and the example above is just a more complicated version.
449
450 Note [Preventing loops due to imported functions rules]
451 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
452 Consider:
453 import GHC.Base (foldr)
454
455 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
456 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
457 filterFB c p = ...
458
459 f = filter p xs
460
461 Note that filter is not a loop-breaker, so what happens is:
462 f = filter p xs
463 = {inline} build (\c n -> foldr (filterFB c p) n xs)
464 = {inline} foldr (filterFB (:) p) [] xs
465 = {RULE} filter p xs
466
467 We are in an infinite loop.
468
469 A more elaborate example (that I actually saw in practice when I went to
470 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
471 {-# LANGUAGE RankNTypes #-}
472 module GHCList where
473
474 import Prelude hiding (filter)
475 import GHC.Base (build)
476
477 {-# INLINABLE filter #-}
478 filter :: (a -> Bool) -> [a] -> [a]
479 filter p [] = []
480 filter p (x:xs) = if p x then x : filter p xs else filter p xs
481
482 {-# NOINLINE [0] filterFB #-}
483 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
484 filterFB c p x r | p x = x `c` r
485 | otherwise = r
486
487 {-# RULES
488 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
489 (filterFB c p) n xs)
490 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
491 #-}
492
493 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
494 are not), the unfolding given to "filter" in the interface file will be:
495 filter p [] = []
496 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
497 else build (\c n -> foldr (filterFB c p) n xs
498
499 Note that because this unfolding does not mention "filter", filter is not
500 marked as a strong loop breaker. Therefore at a use site in another module:
501 filter p xs
502 = {inline}
503 case xs of [] -> []
504 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
505 else build (\c n -> foldr (filterFB c p) n xs)
506
507 build (\c n -> foldr (filterFB c p) n xs)
508 = {inline} foldr (filterFB (:) p) [] xs
509 = {RULE} filter p xs
510
511 And we are in an infinite loop again, except that this time the loop is producing an
512 infinitely large *term* (an unrolling of filter) and so the simplifier finally
513 dies with "ticks exhausted"
514
515 Because of this problem, we make a small change in the occurrence analyser
516 designed to mark functions like "filter" as strong loop breakers on the basis that:
517 1. The RHS of filter mentions the local function "filterFB"
518 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
519
520 So for each RULE for an *imported* function we are going to add
521 dependency edges between the *local* FVS of the rule LHS and the
522 *local* FVS of the rule RHS. We don't do anything special for RULES on
523 local functions because the standard occurrence analysis stuff is
524 pretty good at getting loop-breakerness correct there.
525
526 It is important to note that even with this extra hack we aren't always going to get
527 things right. For example, it might be that the rule LHS mentions an imported Id,
528 and another module has a RULE that can rewrite that imported Id to one of our local
529 Ids.
530
531 Note [Specialising imported functions]
532 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
533 BUT for *automatically-generated* rules, the programmer can't be
534 responsible for the "programmer error" in Note [Rules for imported
535 functions]. In paricular, consider specialising a recursive function
536 defined in another module. If we specialise a recursive function B.g,
537 we get
538 g_spec = .....(B.g Int).....
539 RULE B.g Int = g_spec
540 Here, g_spec doesn't look recursive, but when the rule fires, it
541 becomes so. And if B.g was mutually recursive, the loop might
542 not be as obvious as it is here.
543
544 To avoid this,
545 * When specialising a function that is a loop breaker,
546 give a NOINLINE pragma to the specialised function
547
548 Note [Glomming]
549 ~~~~~~~~~~~~~~~
550 RULES for imported Ids can make something at the top refer to something at the bottom:
551 f = \x -> B.g (q x)
552 h = \y -> 3
553
554 RULE: B.g (q x) = h x
555
556 Applying this rule makes f refer to h, although f doesn't appear to
557 depend on h. (And, as in Note [Rules for imported functions], the
558 dependency might be more indirect. For example, f might mention C.t
559 rather than B.g, where C.t eventually inlines to B.g.)
560
561 NOTICE that this cannot happen for rules whose head is a
562 locally-defined function, because we accurately track dependencies
563 through RULES. It only happens for rules whose head is an imported
564 function (B.g in the example above).
565
566 Solution:
567 - When simplifying, bring all top level identifiers into
568 scope at the start, ignoring the Rec/NonRec structure, so
569 that when 'h' pops up in f's rhs, we find it in the in-scope set
570 (as the simplifier generally expects). This happens in simplTopBinds.
571
572 - In the occurrence analyser, if there are any out-of-scope
573 occurrences that pop out of the top, which will happen after
574 firing the rule: f = \x -> h x
575 h = \y -> 3
576 then just glom all the bindings into a single Rec, so that
577 the *next* iteration of the occurrence analyser will sort
578 them all out. This part happens in occurAnalysePgm.
579
580 ------------------------------------------------------------
581 Note [Inline rules]
582 ~~~~~~~~~~~~~~~~~~~
583 None of the above stuff about RULES applies to Inline Rules,
584 stored in a CoreUnfolding. The unfolding, if any, is simplified
585 at the same time as the regular RHS of the function (ie *not* like
586 Note [Rules are visible in their own rec group]), so it should be
587 treated *exactly* like an extra RHS.
588
589 Or, rather, when computing loop-breaker edges,
590 * If f has an INLINE pragma, and it is active, we treat the
591 INLINE rhs as f's rhs
592 * If it's inactive, we treat f as having no rhs
593 * If it has no INLINE pragma, we look at f's actual rhs
594
595
596 There is a danger that we'll be sub-optimal if we see this
597 f = ...f...
598 [INLINE f = ..no f...]
599 where f is recursive, but the INLINE is not. This can just about
600 happen with a sufficiently odd set of rules; eg
601
602 foo :: Int -> Int
603 {-# INLINE [1] foo #-}
604 foo x = x+1
605
606 bar :: Int -> Int
607 {-# INLINE [1] bar #-}
608 bar x = foo x + 1
609
610 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
611
612 Here the RULE makes bar recursive; but it's INLINE pragma remains
613 non-recursive. It's tempting to then say that 'bar' should not be
614 a loop breaker, but an attempt to do so goes wrong in two ways:
615 a) We may get
616 $df = ...$cfoo...
617 $cfoo = ...$df....
618 [INLINE $cfoo = ...no-$df...]
619 But we want $cfoo to depend on $df explicitly so that we
620 put the bindings in the right order to inline $df in $cfoo
621 and perhaps break the loop altogether. (Maybe this
622 b)
623
624
625 Example [eftInt]
626 ~~~~~~~~~~~~~~~
627 Example (from GHC.Enum):
628
629 eftInt :: Int# -> Int# -> [Int]
630 eftInt x y = ...(non-recursive)...
631
632 {-# INLINE [0] eftIntFB #-}
633 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
634 eftIntFB c n x y = ...(non-recursive)...
635
636 {-# RULES
637 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
638 "eftIntList" [1] eftIntFB (:) [] = eftInt
639 #-}
640
641 Note [Specialisation rules]
642 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
643 Consider this group, which is typical of what SpecConstr builds:
644
645 fs a = ....f (C a)....
646 f x = ....f (C a)....
647 {-# RULE f (C a) = fs a #-}
648
649 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
650
651 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
652 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
653 - fs is inlined (say it's small)
654 - now there's another opportunity to apply the RULE
655
656 This showed up when compiling Control.Concurrent.Chan.getChanContents.
657 -}
658
659 type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
660 -- which is gotten from the Id.
661 data Details
662 = ND { nd_bndr :: Id -- Binder
663 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
664
665 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
666 -- ignoring phase (ie assuming all are active)
667 -- See Note [Forming Rec groups]
668
669 , nd_inl :: IdSet -- Free variables of
670 -- the stable unfolding (if present and active)
671 -- or the RHS (if not)
672 -- but excluding any RULES
673 -- This is the IdSet that may be used if the Id is inlined
674
675 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
676 -- but are *not* in nd_inl. These are the ones whose
677 -- dependencies might not be respected by loop_breaker_edges
678 -- See Note [Weak loop breakers]
679
680 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
681 }
682
683 instance Outputable Details where
684 ppr nd = text "ND" <> braces
685 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
686 , text "uds =" <+> ppr (nd_uds nd)
687 , text "inl =" <+> ppr (nd_inl nd)
688 , text "weak =" <+> ppr (nd_weak nd)
689 , text "rule =" <+> ppr (nd_active_rule_fvs nd)
690 ])
691
692 makeNode :: OccEnv -> ImpRuleEdges -> VarSet -> (Var, CoreExpr) -> Node Details
693 makeNode env imp_rule_edges bndr_set (bndr, rhs)
694 = (details, varUnique bndr, nonDetKeysUFM node_fvs)
695 -- It's OK to use nonDetKeysUFM here as stronglyConnCompFromEdgedVerticesR
696 -- is still deterministic with edges in nondeterministic order as
697 -- explained in Note [Deterministic SCC] in Digraph.
698 where
699 details = ND { nd_bndr = bndr
700 , nd_rhs = rhs'
701 , nd_uds = rhs_usage3
702 , nd_weak = node_fvs `minusVarSet` inl_fvs
703 , nd_inl = inl_fvs
704 , nd_active_rule_fvs = active_rule_fvs }
705
706 -- Constructing the edges for the main Rec computation
707 -- See Note [Forming Rec groups]
708 (rhs_usage1, rhs') = occAnalRecRhs env rhs
709 rhs_usage2 = addIdOccs rhs_usage1 all_rule_fvs -- Note [Rules are extra RHSs]
710 -- Note [Rule dependency info]
711 rhs_usage3 = case mb_unf_fvs of
712 Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs
713 Nothing -> rhs_usage2
714 node_fvs = udFreeVars bndr_set rhs_usage3
715
716 -- Finding the free variables of the rules
717 is_active = occ_rule_act env :: Activation -> Bool
718 rules = filterOut isBuiltinRule (idCoreRules bndr)
719 rules_w_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
720 rules_w_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) (lookupVarEnv imp_rule_edges bndr)
721 -- See Note [Preventing loops due to imported functions rules]
722 [ (ru_act rule, fvs)
723 | rule <- rules
724 , let fvs = exprFreeVars (ru_rhs rule)
725 `delVarSetList` ru_bndrs rule
726 , not (isEmptyVarSet fvs) ]
727 all_rule_fvs = rule_lhs_fvs `unionVarSet` rule_rhs_fvs
728 rule_rhs_fvs = mapUnionVarSet snd rules_w_fvs
729 rule_lhs_fvs = mapUnionVarSet (\ru -> exprsFreeVars (ru_args ru)
730 `delVarSetList` ru_bndrs ru) rules
731 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_fvs, is_active a]
732
733 -- Finding the free variables of the INLINE pragma (if any)
734 unf = realIdUnfolding bndr -- Ignore any current loop-breaker flag
735 mb_unf_fvs = stableUnfoldingVars unf
736
737 -- Find the "nd_inl" free vars; for the loop-breaker phase
738 inl_fvs = case mb_unf_fvs of
739 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
740 Just unf_fvs -> unf_fvs
741 -- We could check for an *active* INLINE (returning
742 -- emptyVarSet for an inactive one), but is_active
743 -- isn't the right thing (it tells about
744 -- RULE activation), so we'd need more plumbing
745
746 -----------------------------
747 occAnalRec :: SCC (Node Details)
748 -> (UsageDetails, [CoreBind])
749 -> (UsageDetails, [CoreBind])
750
751 -- The NonRec case is just like a Let (NonRec ...) above
752 occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}, _, _))
753 (body_uds, binds)
754 | not (bndr `usedIn` body_uds)
755 = (body_uds, binds) -- See Note [Dead code]
756
757 | otherwise -- It's mentioned in the body
758 = (body_uds' +++ rhs_uds,
759 NonRec tagged_bndr rhs : binds)
760 where
761 (body_uds', tagged_bndr) = tagBinder body_uds bndr
762
763 -- The Rec case is the interesting one
764 -- See Note [Loop breaking]
765 occAnalRec (CyclicSCC nodes) (body_uds, binds)
766 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
767 = (body_uds, binds) -- See Note [Dead code]
768
769 | otherwise -- At this point we always build a single Rec
770 = -- pprTrace "occAnalRec" (vcat
771 -- [ text "tagged nodes" <+> ppr tagged_nodes
772 -- , text "lb edges" <+> ppr loop_breaker_edges])
773 (final_uds, Rec pairs : binds)
774
775 where
776 bndrs = [b | (ND { nd_bndr = b }, _, _) <- nodes]
777 bndr_set = mkVarSet bndrs
778
779 ----------------------------
780 -- Tag the binders with their occurrence info
781 tagged_nodes = map tag_node nodes
782 total_uds = foldl add_uds body_uds nodes
783 final_uds = total_uds `minusVarEnv` bndr_set
784 add_uds usage_so_far (nd, _, _) = usage_so_far +++ nd_uds nd
785
786 tag_node :: Node Details -> Node Details
787 tag_node (details@ND { nd_bndr = bndr }, k, ks)
788 | let bndr1 = setBinderOcc total_uds bndr
789 = (details { nd_bndr = bndr1 }, k, ks)
790
791 ---------------------------
792 -- Now reconstruct the cycle
793 pairs :: [(Id,CoreExpr)]
794 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs tagged_nodes []
795 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_edges []
796 -- If weak_fvs is empty, the loop_breaker_edges will include all
797 -- the edges in tagged_nodes, so there isn't any point in doing
798 -- a fresh SCC computation that will yield a single CyclicSCC result.
799
800 weak_fvs :: VarSet
801 weak_fvs = mapUnionVarSet (nd_weak . fstOf3) nodes
802
803 -- See Note [Choosing loop breakers] for loop_breaker_edges
804 loop_breaker_edges = map mk_node tagged_nodes
805 mk_node (details@(ND { nd_inl = inl_fvs }), k, _)
806 = (details, k, nonDetKeysUFM (extendFvs_ rule_fv_env inl_fvs))
807 -- It's OK to use nonDetKeysUFM here as
808 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
809 -- in nondeterministic order as explained in
810 -- Note [Deterministic SCC] in Digraph.
811
812 ------------------------------------
813 rule_fv_env :: IdEnv IdSet
814 -- Maps a variable f to the variables from this group
815 -- mentioned in RHS of active rules for f
816 -- Domain is *subset* of bound vars (others have no rule fvs)
817 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
818 init_rule_fvs -- See Note [Finding rule RHS free vars]
819 = [ (b, trimmed_rule_fvs)
820 | (ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs },_,_) <- nodes
821 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
822 , not (isEmptyVarSet trimmed_rule_fvs)]
823
824 {-
825 @loopBreakSCC@ is applied to the list of (binder,rhs) pairs for a cyclic
826 strongly connected component (there's guaranteed to be a cycle). It returns the
827 same pairs, but
828 a) in a better order,
829 b) with some of the Ids having a IAmALoopBreaker pragma
830
831 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
832 that the simplifier can guarantee not to loop provided it never records an inlining
833 for these no-inline guys.
834
835 Furthermore, the order of the binds is such that if we neglect dependencies
836 on the no-inline Ids then the binds are topologically sorted. This means
837 that the simplifier will generally do a good job if it works from top bottom,
838 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
839 -}
840
841 type Binding = (Id,CoreExpr)
842
843 mk_loop_breaker :: Node Details -> Binding
844 mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
845 = (setIdOccInfo bndr strongLoopBreaker, rhs)
846
847 mk_non_loop_breaker :: VarSet -> Node Details -> Binding
848 -- See Note [Weak loop breakers]
849 mk_non_loop_breaker used_in_rules (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
850 | bndr `elemVarSet` used_in_rules = (setIdOccInfo bndr weakLoopBreaker, rhs)
851 | otherwise = (bndr, rhs)
852
853 udFreeVars :: VarSet -> UsageDetails -> VarSet
854 -- Find the subset of bndrs that are mentioned in uds
855 udFreeVars bndrs uds = intersectUFM_C (\b _ -> b) bndrs uds
856
857 loopBreakNodes :: Int
858 -> VarSet -- All binders
859 -> VarSet -- Binders whose dependencies may be "missing"
860 -- See Note [Weak loop breakers]
861 -> [Node Details]
862 -> [Binding] -- Append these to the end
863 -> [Binding]
864 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
865 loopBreakNodes depth bndr_set weak_fvs nodes binds
866 = go (stronglyConnCompFromEdgedVerticesR nodes) binds
867 where
868 go [] binds = binds
869 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
870
871 loop_break_scc scc binds
872 = case scc of
873 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
874 CyclicSCC [node] -> mk_loop_breaker node : binds
875 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
876
877 reOrderNodes :: Int -> VarSet -> VarSet -> [Node Details] -> [Binding] -> [Binding]
878 -- Choose a loop breaker, mark it no-inline,
879 -- do SCC analysis on the rest, and recursively sort them out
880 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
881 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
882 = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
883 -- text "chosen" <+> ppr chosen_nodes) $
884 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
885 (map mk_loop_breaker chosen_nodes ++ binds)
886 where
887 (chosen_nodes, unchosen) = choose_loop_breaker (score node) [node] [] nodes
888
889 approximate_loop_breaker = depth >= 2
890 new_depth | approximate_loop_breaker = 0
891 | otherwise = depth+1
892 -- After two iterations (d=0, d=1) give up
893 -- and approximate, returning to d=0
894
895 choose_loop_breaker :: Int -- Best score so far
896 -> [Node Details] -- Nodes with this score
897 -> [Node Details] -- Nodes with higher scores
898 -> [Node Details] -- Unprocessed nodes
899 -> ([Node Details], [Node Details])
900 -- This loop looks for the bind with the lowest score
901 -- to pick as the loop breaker. The rest accumulate in
902 choose_loop_breaker _ loop_nodes acc []
903 = (loop_nodes, acc) -- Done
904
905 -- If approximate_loop_breaker is True, we pick *all*
906 -- nodes with lowest score, else just one
907 -- See Note [Complexity of loop breaking]
908 choose_loop_breaker loop_sc loop_nodes acc (node : nodes)
909 | sc < loop_sc -- Lower score so pick this new one
910 = choose_loop_breaker sc [node] (loop_nodes ++ acc) nodes
911
912 | approximate_loop_breaker && sc == loop_sc
913 = choose_loop_breaker loop_sc (node : loop_nodes) acc nodes
914
915 | otherwise -- Higher score so don't pick it
916 = choose_loop_breaker loop_sc loop_nodes (node : acc) nodes
917 where
918 sc = score node
919
920 score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker
921 score (ND { nd_bndr = bndr, nd_rhs = rhs }, _, _)
922 | not (isId bndr) = 100 -- A type or cercion variable is never a loop breaker
923
924 | isDFunId bndr = 9 -- Never choose a DFun as a loop breaker
925 -- Note [DFuns should not be loop breakers]
926
927 | Just be_very_keen <- hasStableCoreUnfolding_maybe (idUnfolding bndr)
928 = if be_very_keen then 6 -- Note [Loop breakers and INLINE/INLINEABLE pragmas]
929 else 3
930 -- Data structures are more important than INLINE pragmas
931 -- so that dictionary/method recursion unravels
932 -- Note that this case hits all stable unfoldings, so we
933 -- never look at 'rhs' for stable unfoldings. That's right, because
934 -- 'rhs' is irrelevant for inlining things with a stable unfolding
935
936 | is_con_app rhs = 5 -- Data types help with cases: Note [Constructor applications]
937
938 | exprIsTrivial rhs = 10 -- Practically certain to be inlined
939 -- Used to have also: && not (isExportedId bndr)
940 -- But I found this sometimes cost an extra iteration when we have
941 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
942 -- where df is the exported dictionary. Then df makes a really
943 -- bad choice for loop breaker
944
945
946 -- If an Id is marked "never inline" then it makes a great loop breaker
947 -- The only reason for not checking that here is that it is rare
948 -- and I've never seen a situation where it makes a difference,
949 -- so it probably isn't worth the time to test on every binder
950 -- | isNeverActive (idInlinePragma bndr) = -10
951
952 | isOneOcc (idOccInfo bndr) = 2 -- Likely to be inlined
953
954 | canUnfold (realIdUnfolding bndr) = 1
955 -- The Id has some kind of unfolding
956 -- Ignore loop-breaker-ness here because that is what we are setting!
957
958 | otherwise = 0
959
960 -- Checking for a constructor application
961 -- Cheap and cheerful; the simplifer moves casts out of the way
962 -- The lambda case is important to spot x = /\a. C (f a)
963 -- which comes up when C is a dictionary constructor and
964 -- f is a default method.
965 -- Example: the instance for Show (ST s a) in GHC.ST
966 --
967 -- However we *also* treat (\x. C p q) as a con-app-like thing,
968 -- Note [Closure conversion]
969 is_con_app (Var v) = isConLikeId v
970 is_con_app (App f _) = is_con_app f
971 is_con_app (Lam _ e) = is_con_app e
972 is_con_app (Tick _ e) = is_con_app e
973 is_con_app _ = False
974
975 {-
976 Note [Complexity of loop breaking]
977 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
978 The loop-breaking algorithm knocks out one binder at a time, and
979 performs a new SCC analysis on the remaining binders. That can
980 behave very badly in tightly-coupled groups of bindings; in the
981 worst case it can be (N**2)*log N, because it does a full SCC
982 on N, then N-1, then N-2 and so on.
983
984 To avoid this, we switch plans after 2 (or whatever) attempts:
985 Plan A: pick one binder with the lowest score, make it
986 a loop breaker, and try again
987 Plan B: pick *all* binders with the lowest score, make them
988 all loop breakers, and try again
989 Since there are only a small finite number of scores, this will
990 terminate in a constant number of iterations, rather than O(N)
991 iterations.
992
993 You might thing that it's very unlikely, but RULES make it much
994 more likely. Here's a real example from Trac #1969:
995 Rec { $dm = \d.\x. op d
996 {-# RULES forall d. $dm Int d = $s$dm1
997 forall d. $dm Bool d = $s$dm2 #-}
998
999 dInt = MkD .... opInt ...
1000 dInt = MkD .... opBool ...
1001 opInt = $dm dInt
1002 opBool = $dm dBool
1003
1004 $s$dm1 = \x. op dInt
1005 $s$dm2 = \x. op dBool }
1006 The RULES stuff means that we can't choose $dm as a loop breaker
1007 (Note [Choosing loop breakers]), so we must choose at least (say)
1008 opInt *and* opBool, and so on. The number of loop breakders is
1009 linear in the number of instance declarations.
1010
1011 Note [Loop breakers and INLINE/INLINEABLE pragmas]
1012 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1013 Avoid choosing a function with an INLINE pramga as the loop breaker!
1014 If such a function is mutually-recursive with a non-INLINE thing,
1015 then the latter should be the loop-breaker.
1016
1017 It's vital to distinguish between INLINE and INLINEABLE (the
1018 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1019 Rec { {-# INLINEABLE f #-}
1020 f x = ...f... }
1021 and then worker/wrapper it through strictness analysis, we'll get
1022 Rec { {-# INLINEABLE $wf #-}
1023 $wf p q = let x = (p,q) in ...f...
1024
1025 {-# INLINE f #-}
1026 f x = case x of (p,q) -> $wf p q }
1027
1028 Now it is vital that we choose $wf as the loop breaker, so we can
1029 inline 'f' in '$wf'.
1030
1031 Note [DFuns should not be loop breakers]
1032 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1033 It's particularly bad to make a DFun into a loop breaker. See
1034 Note [How instance declarations are translated] in TcInstDcls
1035
1036 We give DFuns a higher score than ordinary CONLIKE things because
1037 if there's a choice we want the DFun to be the non-looop breker. Eg
1038
1039 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1040
1041 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1042 {-# DFUN #-}
1043 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1044 }
1045
1046 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1047 if we can't unravel the DFun first.
1048
1049 Note [Constructor applications]
1050 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1051 It's really really important to inline dictionaries. Real
1052 example (the Enum Ordering instance from GHC.Base):
1053
1054 rec f = \ x -> case d of (p,q,r) -> p x
1055 g = \ x -> case d of (p,q,r) -> q x
1056 d = (v, f, g)
1057
1058 Here, f and g occur just once; but we can't inline them into d.
1059 On the other hand we *could* simplify those case expressions if
1060 we didn't stupidly choose d as the loop breaker.
1061 But we won't because constructor args are marked "Many".
1062 Inlining dictionaries is really essential to unravelling
1063 the loops in static numeric dictionaries, see GHC.Float.
1064
1065 Note [Closure conversion]
1066 ~~~~~~~~~~~~~~~~~~~~~~~~~
1067 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1068 The immediate motivation came from the result of a closure-conversion transformation
1069 which generated code like this:
1070
1071 data Clo a b = forall c. Clo (c -> a -> b) c
1072
1073 ($:) :: Clo a b -> a -> b
1074 Clo f env $: x = f env x
1075
1076 rec { plus = Clo plus1 ()
1077
1078 ; plus1 _ n = Clo plus2 n
1079
1080 ; plus2 Zero n = n
1081 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1082
1083 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1084 we choose 'plus1' as the loop breaker (which is entirely possible
1085 otherwise), the loop does not unravel nicely.
1086
1087
1088 @occAnalRhs@ deals with the question of bindings where the Id is marked
1089 by an INLINE pragma. For these we record that anything which occurs
1090 in its RHS occurs many times. This pessimistically assumes that ths
1091 inlined binder also occurs many times in its scope, but if it doesn't
1092 we'll catch it next time round. At worst this costs an extra simplifier pass.
1093 ToDo: try using the occurrence info for the inline'd binder.
1094
1095 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1096 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1097 -}
1098
1099 occAnalRecRhs :: OccEnv -> CoreExpr -- Rhs
1100 -> (UsageDetails, CoreExpr)
1101 -- Returned usage details covers only the RHS,
1102 -- and *not* the RULE or INLINE template for the Id
1103 occAnalRecRhs env rhs = occAnal (rhsCtxt env) rhs
1104
1105 occAnalNonRecRhs :: OccEnv
1106 -> Id -> CoreExpr -- Binder and rhs
1107 -- Binder is already tagged with occurrence info
1108 -> (UsageDetails, CoreExpr)
1109 -- Returned usage details covers only the RHS,
1110 -- and *not* the RULE or INLINE template for the Id
1111 occAnalNonRecRhs env bndr rhs
1112 = occAnal rhs_env rhs
1113 where
1114 -- See Note [Cascading inlines]
1115 env1 | certainly_inline = env
1116 | otherwise = rhsCtxt env
1117
1118 -- See Note [Use one-shot info]
1119 rhs_env = env1 { occ_one_shots = argOneShots OneShotLam dmd }
1120
1121
1122 certainly_inline -- See Note [Cascading inlines]
1123 = case idOccInfo bndr of
1124 OneOcc in_lam one_br _ -> not in_lam && one_br && active && not_stable
1125 _ -> False
1126
1127 dmd = idDemandInfo bndr
1128 active = isAlwaysActive (idInlineActivation bndr)
1129 not_stable = not (isStableUnfolding (idUnfolding bndr))
1130
1131 addIdOccs :: UsageDetails -> VarSet -> UsageDetails
1132 addIdOccs usage id_set = foldVarSet addIdOcc usage id_set
1133
1134 addIdOcc :: Id -> UsageDetails -> UsageDetails
1135 addIdOcc v u | isId v = addOneOcc u v NoOccInfo
1136 | otherwise = u
1137 -- Give a non-committal binder info (i.e NoOccInfo) because
1138 -- a) Many copies of the specialised thing can appear
1139 -- b) We don't want to substitute a BIG expression inside a RULE
1140 -- even if that's the only occurrence of the thing
1141 -- (Same goes for INLINE.)
1142
1143 {-
1144 Note [Cascading inlines]
1145 ~~~~~~~~~~~~~~~~~~~~~~~~
1146 By default we use an rhsCtxt for the RHS of a binding. This tells the
1147 occ anal n that it's looking at an RHS, which has an effect in
1148 occAnalApp. In particular, for constructor applications, it makes
1149 the arguments appear to have NoOccInfo, so that we don't inline into
1150 them. Thus x = f y
1151 k = Just x
1152 we do not want to inline x.
1153
1154 But there's a problem. Consider
1155 x1 = a0 : []
1156 x2 = a1 : x1
1157 x3 = a2 : x2
1158 g = f x3
1159 First time round, it looks as if x1 and x2 occur as an arg of a
1160 let-bound constructor ==> give them a many-occurrence.
1161 But then x3 is inlined (unconditionally as it happens) and
1162 next time round, x2 will be, and the next time round x1 will be
1163 Result: multiple simplifier iterations. Sigh.
1164
1165 So, when analysing the RHS of x3 we notice that x3 will itself
1166 definitely inline the next time round, and so we analyse x3's rhs in
1167 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1168
1169 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1170 If we say "yes" when preInlineUnconditionally says "no" the simplifier iterates
1171 indefinitely:
1172 x = f y
1173 k = Just x
1174 inline ==>
1175 k = Just (f y)
1176 float ==>
1177 x1 = f y
1178 k = Just x1
1179
1180 This is worse than the slow cascade, so we only want to say "certainly_inline"
1181 if it really is certain. Look at the note with preInlineUnconditionally
1182 for the various clauses.
1183
1184 Expressions
1185 ~~~~~~~~~~~
1186 -}
1187
1188 occAnal :: OccEnv
1189 -> CoreExpr
1190 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1191 CoreExpr)
1192
1193 occAnal _ expr@(Type _) = (emptyDetails, expr)
1194 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1195 occAnal env expr@(Var v) = (mkOneOcc env v False, expr)
1196 -- At one stage, I gathered the idRuleVars for v here too,
1197 -- which in a way is the right thing to do.
1198 -- But that went wrong right after specialisation, when
1199 -- the *occurrences* of the overloaded function didn't have any
1200 -- rules in them, so the *specialised* versions looked as if they
1201 -- weren't used at all.
1202
1203 occAnal _ (Coercion co)
1204 = (addIdOccs emptyDetails (coVarsOfCo co), Coercion co)
1205 -- See Note [Gather occurrences of coercion variables]
1206
1207 {-
1208 Note [Gather occurrences of coercion variables]
1209 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1210 We need to gather info about what coercion variables appear, so that
1211 we can sort them into the right place when doing dependency analysis.
1212 -}
1213
1214 occAnal env (Tick tickish body)
1215 | tickish `tickishScopesLike` SoftScope
1216 = (usage, Tick tickish body')
1217
1218 | Breakpoint _ ids <- tickish
1219 = (usage_lam +++ mkVarEnv (zip ids (repeat NoOccInfo)), Tick tickish body')
1220 -- never substitute for any of the Ids in a Breakpoint
1221
1222 | otherwise
1223 = (usage_lam, Tick tickish body')
1224 where
1225 !(usage,body') = occAnal env body
1226 -- for a non-soft tick scope, we can inline lambdas only
1227 usage_lam = mapVarEnv markInsideLam usage
1228
1229 occAnal env (Cast expr co)
1230 = case occAnal env expr of { (usage, expr') ->
1231 let usage1 = markManyIf (isRhsEnv env) usage
1232 usage2 = addIdOccs usage1 (coVarsOfCo co)
1233 -- See Note [Gather occurrences of coercion variables]
1234 in (usage2, Cast expr' co)
1235 -- If we see let x = y `cast` co
1236 -- then mark y as 'Many' so that we don't
1237 -- immediately inline y again.
1238 }
1239
1240 occAnal env app@(App _ _)
1241 = occAnalApp env (collectArgsTicks tickishFloatable app)
1242
1243 -- Ignore type variables altogether
1244 -- (a) occurrences inside type lambdas only not marked as InsideLam
1245 -- (b) type variables not in environment
1246
1247 occAnal env (Lam x body) | isTyVar x
1248 = case occAnal env body of { (body_usage, body') ->
1249 (body_usage, Lam x body')
1250 }
1251
1252 -- For value lambdas we do a special hack. Consider
1253 -- (\x. \y. ...x...)
1254 -- If we did nothing, x is used inside the \y, so would be marked
1255 -- as dangerous to dup. But in the common case where the abstraction
1256 -- is applied to two arguments this is over-pessimistic.
1257 -- So instead, we just mark each binder with its occurrence
1258 -- info in the *body* of the multiple lambda.
1259 -- Then, the simplifier is careful when partially applying lambdas.
1260
1261 occAnal env expr@(Lam _ _)
1262 = case occAnal env_body body of { (body_usage, body') ->
1263 let
1264 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
1265 -- Use binders' to put one-shot info on the lambdas
1266
1267 really_final_usage
1268 | all isOneShotBndr binders' = final_usage
1269 | otherwise = mapVarEnv markInsideLam final_usage
1270 in
1271 (really_final_usage, mkLams tagged_binders body') }
1272 where
1273 (binders, body) = collectBinders expr
1274 (env_body, binders') = oneShotGroup env binders
1275
1276 occAnal env (Case scrut bndr ty alts)
1277 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1278 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1279 let
1280 alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
1281 (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
1282 total_usage = scrut_usage +++ alts_usage1
1283 in
1284 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1285 where
1286 -- Note [Case binder usage]
1287 -- ~~~~~~~~~~~~~~~~~~~~~~~~
1288 -- The case binder gets a usage of either "many" or "dead", never "one".
1289 -- Reason: we like to inline single occurrences, to eliminate a binding,
1290 -- but inlining a case binder *doesn't* eliminate a binding.
1291 -- We *don't* want to transform
1292 -- case x of w { (p,q) -> f w }
1293 -- into
1294 -- case x of w { (p,q) -> f (p,q) }
1295 tag_case_bndr usage bndr
1296 = case lookupVarEnv usage bndr of
1297 Nothing -> (usage, setIdOccInfo bndr IAmDead)
1298 Just _ -> (usage `delVarEnv` bndr, setIdOccInfo bndr NoOccInfo)
1299
1300 alt_env = mkAltEnv env scrut bndr
1301 occ_anal_alt = occAnalAlt alt_env
1302
1303 occ_anal_scrut (Var v) (alt1 : other_alts)
1304 | not (null other_alts) || not (isDefaultAlt alt1)
1305 = (mkOneOcc env v True, Var v) -- The 'True' says that the variable occurs
1306 -- in an interesting context; the case has
1307 -- at least one non-default alternative
1308 occ_anal_scrut (Tick t e) alts
1309 | t `tickishScopesLike` SoftScope
1310 -- No reason to not look through all ticks here, but only
1311 -- for soft-scoped ticks we can do so without having to
1312 -- update returned occurance info (see occAnal)
1313 = second (Tick t) $ occ_anal_scrut e alts
1314
1315 occ_anal_scrut scrut _alts
1316 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1317
1318 occAnal env (Let bind body)
1319 = case occAnal env body of { (body_usage, body') ->
1320 case occAnalBind env noImpRuleEdges bind body_usage of { (final_usage, new_binds) ->
1321 (final_usage, mkLets new_binds body') }}
1322
1323 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1324 occAnalArgs _ [] _
1325 = (emptyDetails, [])
1326
1327 occAnalArgs env (arg:args) one_shots
1328 | isTypeArg arg
1329 = case occAnalArgs env args one_shots of { (uds, args') ->
1330 (uds, arg:args') }
1331
1332 | otherwise
1333 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1334 case occAnal arg_env arg of { (uds1, arg') ->
1335 case occAnalArgs env args one_shots' of { (uds2, args') ->
1336 (uds1 +++ uds2, arg':args') }}}
1337
1338 {-
1339 Applications are dealt with specially because we want
1340 the "build hack" to work.
1341
1342 Note [Arguments of let-bound constructors]
1343 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1344 Consider
1345 f x = let y = expensive x in
1346 let z = (True,y) in
1347 (case z of {(p,q)->q}, case z of {(p,q)->q})
1348 We feel free to duplicate the WHNF (True,y), but that means
1349 that y may be duplicated thereby.
1350
1351 If we aren't careful we duplicate the (expensive x) call!
1352 Constructors are rather like lambdas in this way.
1353 -}
1354
1355 occAnalApp :: OccEnv
1356 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1357 -> (UsageDetails, Expr CoreBndr)
1358 occAnalApp env (Var fun, args, ticks)
1359 | null ticks = (uds, mkApps (Var fun) args')
1360 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1361 where
1362 uds = fun_uds +++ final_args_uds
1363
1364 !(args_uds, args') = occAnalArgs env args one_shots
1365 !final_args_uds = markManyIf (isRhsEnv env && is_exp) args_uds
1366 -- We mark the free vars of the argument of a constructor or PAP
1367 -- as "many", if it is the RHS of a let(rec).
1368 -- This means that nothing gets inlined into a constructor argument
1369 -- position, which is what we want. Typically those constructor
1370 -- arguments are just variables, or trivial expressions.
1371 --
1372 -- This is the *whole point* of the isRhsEnv predicate
1373 -- See Note [Arguments of let-bound constructors]
1374
1375 n_val_args = valArgCount args
1376 fun_uds = mkOneOcc env fun (n_val_args > 0)
1377 is_exp = isExpandableApp fun n_val_args
1378 -- See Note [CONLIKE pragma] in BasicTypes
1379 -- The definition of is_exp should match that in
1380 -- Simplify.prepareRhs
1381
1382 one_shots = argsOneShots (idStrictness fun) n_val_args
1383 -- See Note [Use one-shot info]
1384
1385 occAnalApp env (fun, args, ticks)
1386 = (fun_uds +++ args_uds, mkTicks ticks $ mkApps fun' args')
1387 where
1388 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1389 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1390 -- often leaves behind beta redexs like
1391 -- (\x y -> e) a1 a2
1392 -- Here we would like to mark x,y as one-shot, and treat the whole
1393 -- thing much like a let. We do this by pushing some True items
1394 -- onto the context stack.
1395 !(args_uds, args') = occAnalArgs env args []
1396
1397 markManyIf :: Bool -- If this is true
1398 -> UsageDetails -- Then do markMany on this
1399 -> UsageDetails
1400 markManyIf True uds = mapVarEnv markMany uds
1401 markManyIf False uds = uds
1402
1403 {-
1404 Note [Use one-shot information]
1405 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1406 The occurrrence analyser propagates one-shot-lambda information in two
1407 situations:
1408
1409 * Applications: eg build (\c n -> blah)
1410
1411 Propagate one-shot info from the strictness signature of 'build' to
1412 the \c n.
1413
1414 This strictness signature can come from a module interface, in the case of
1415 an imported function, or from a previous run of the demand analyser.
1416
1417 * Let-bindings: eg let f = \c. let ... in \n -> blah
1418 in (build f, build f)
1419
1420 Propagate one-shot info from the demanand-info on 'f' to the
1421 lambdas in its RHS (which may not be syntactically at the top)
1422
1423 This information must have come from a previous run of the demanand
1424 analyser.
1425
1426 Previously, the demand analyser would *also* set the one-shot information, but
1427 that code was buggy (see #11770), so doing it only in on place, namely here, is
1428 saner.
1429
1430 Note [Binders in case alternatives]
1431 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1432 Consider
1433 case x of y { (a,b) -> f y }
1434 We treat 'a', 'b' as dead, because they don't physically occur in the
1435 case alternative. (Indeed, a variable is dead iff it doesn't occur in
1436 its scope in the output of OccAnal.) It really helps to know when
1437 binders are unused. See esp the call to isDeadBinder in
1438 Simplify.mkDupableAlt
1439
1440 In this example, though, the Simplifier will bring 'a' and 'b' back to
1441 life, beause it binds 'y' to (a,b) (imagine got inlined and
1442 scrutinised y).
1443 -}
1444
1445 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
1446 -> CoreAlt
1447 -> (UsageDetails, Alt IdWithOccInfo)
1448 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
1449 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
1450 let
1451 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
1452 -- See Note [Binders in case alternatives]
1453 (alt_usg', rhs2) =
1454 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
1455 in
1456 (alt_usg', (con, tagged_bndrs, rhs2)) }
1457
1458 wrapAltRHS :: OccEnv
1459 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
1460 -> UsageDetails -- usage for entire alt (p -> rhs)
1461 -> [Var] -- alt binders
1462 -> CoreExpr -- alt RHS
1463 -> (UsageDetails, CoreExpr)
1464 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
1465 | occ_binder_swap env
1466 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
1467 -- handles condition (a) in Note [Binder swap]
1468 , not captured -- See condition (b) in Note [Binder swap]
1469 = ( alt_usg' +++ let_rhs_usg
1470 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
1471 where
1472 captured = any (`usedIn` let_rhs_usg) bndrs
1473 -- The rhs of the let may include coercion variables
1474 -- if the scrutinee was a cast, so we must gather their
1475 -- usage. See Note [Gather occurrences of coercion variables]
1476 (let_rhs_usg, let_rhs') = occAnal env let_rhs
1477 (alt_usg', tagged_scrut_var) = tagBinder alt_usg scrut_var
1478
1479 wrapAltRHS _ _ alt_usg _ alt_rhs
1480 = (alt_usg, alt_rhs)
1481
1482 {-
1483 ************************************************************************
1484 * *
1485 OccEnv
1486 * *
1487 ************************************************************************
1488 -}
1489
1490 data OccEnv
1491 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
1492 , occ_one_shots :: !OneShots -- Tells about linearity
1493 , occ_gbl_scrut :: GlobalScruts
1494 , occ_rule_act :: Activation -> Bool -- Which rules are active
1495 -- See Note [Finding rule RHS free vars]
1496 , occ_binder_swap :: !Bool -- enable the binder_swap
1497 -- See CorePrep Note [Dead code in CorePrep]
1498 }
1499
1500 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
1501
1502 -----------------------------
1503 -- OccEncl is used to control whether to inline into constructor arguments
1504 -- For example:
1505 -- x = (p,q) -- Don't inline p or q
1506 -- y = /\a -> (p a, q a) -- Still don't inline p or q
1507 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
1508 -- So OccEncl tells enought about the context to know what to do when
1509 -- we encounter a contructor application or PAP.
1510
1511 data OccEncl
1512 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
1513 -- Don't inline into constructor args here
1514 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
1515 -- Do inline into constructor args here
1516
1517 instance Outputable OccEncl where
1518 ppr OccRhs = text "occRhs"
1519 ppr OccVanilla = text "occVanilla"
1520
1521 type OneShots = [OneShotInfo]
1522 -- [] No info
1523 --
1524 -- one_shot_info:ctxt Analysing a function-valued expression that
1525 -- will be applied as described by one_shot_info
1526
1527 initOccEnv :: (Activation -> Bool) -> OccEnv
1528 initOccEnv active_rule
1529 = OccEnv { occ_encl = OccVanilla
1530 , occ_one_shots = []
1531 , occ_gbl_scrut = emptyVarSet
1532 , occ_rule_act = active_rule
1533 , occ_binder_swap = True }
1534
1535 vanillaCtxt :: OccEnv -> OccEnv
1536 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
1537
1538 rhsCtxt :: OccEnv -> OccEnv
1539 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
1540
1541 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
1542 argCtxt env []
1543 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
1544 argCtxt env (one_shots:one_shots_s)
1545 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
1546
1547 isRhsEnv :: OccEnv -> Bool
1548 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
1549 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
1550
1551 oneShotGroup :: OccEnv -> [CoreBndr]
1552 -> ( OccEnv
1553 , [CoreBndr] )
1554 -- The result binders have one-shot-ness set that they might not have had originally.
1555 -- This happens in (build (\c n -> e)). Here the occurrence analyser
1556 -- linearity context knows that c,n are one-shot, and it records that fact in
1557 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
1558
1559 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
1560 = go ctxt bndrs []
1561 where
1562 go ctxt [] rev_bndrs
1563 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
1564 , reverse rev_bndrs )
1565
1566 go [] bndrs rev_bndrs
1567 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
1568 , reverse rev_bndrs ++ bndrs )
1569
1570 go ctxt (bndr:bndrs) rev_bndrs
1571 | isId bndr
1572
1573 = case ctxt of
1574 [] -> go [] bndrs (bndr : rev_bndrs)
1575 (one_shot : ctxt) -> go ctxt bndrs (bndr': rev_bndrs)
1576 where
1577 bndr' = updOneShotInfo bndr one_shot
1578 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
1579 -- one-shot info might be better than what we can infer, e.g.
1580 -- due to explicit use of the magic 'oneShot' function.
1581 -- See Note [The oneShot function]
1582
1583 | otherwise
1584 = go ctxt bndrs (bndr:rev_bndrs)
1585
1586 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
1587 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
1588 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
1589
1590 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
1591 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
1592 -- as well as (f,g), (g,h)
1593 transClosureFV env
1594 | no_change = env
1595 | otherwise = transClosureFV (listToUFM new_fv_list)
1596 where
1597 (no_change, new_fv_list) = mapAccumL bump True (ufmToList env)
1598 bump no_change (b,fvs)
1599 | no_change_here = (no_change, (b,fvs))
1600 | otherwise = (False, (b,new_fvs))
1601 where
1602 (new_fvs, no_change_here) = extendFvs env fvs
1603
1604 -------------
1605 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
1606 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
1607
1608 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
1609 -- (extendFVs env s) returns
1610 -- (s `union` env(s), env(s) `subset` s)
1611 extendFvs env s
1612 | isNullUFM env
1613 = (s, True)
1614 | otherwise
1615 = (s `unionVarSet` extras, extras `subVarSet` s)
1616 where
1617 extras :: VarSet -- env(s)
1618 extras = foldUFM unionVarSet emptyVarSet $
1619 intersectUFM_C (\x _ -> x) env s
1620
1621 {-
1622 ************************************************************************
1623 * *
1624 Binder swap
1625 * *
1626 ************************************************************************
1627
1628 Note [Binder swap]
1629 ~~~~~~~~~~~~~~~~~~
1630 We do these two transformations right here:
1631
1632 (1) case x of b { pi -> ri }
1633 ==>
1634 case x of b { pi -> let x=b in ri }
1635
1636 (2) case (x |> co) of b { pi -> ri }
1637 ==>
1638 case (x |> co) of b { pi -> let x = b |> sym co in ri }
1639
1640 Why (2)? See Note [Case of cast]
1641
1642 In both cases, in a particular alternative (pi -> ri), we only
1643 add the binding if
1644 (a) x occurs free in (pi -> ri)
1645 (ie it occurs in ri, but is not bound in pi)
1646 (b) the pi does not bind b (or the free vars of co)
1647 We need (a) and (b) for the inserted binding to be correct.
1648
1649 For the alternatives where we inject the binding, we can transfer
1650 all x's OccInfo to b. And that is the point.
1651
1652 Notice that
1653 * The deliberate shadowing of 'x'.
1654 * That (a) rapidly becomes false, so no bindings are injected.
1655
1656 The reason for doing these transformations here is because it allows
1657 us to adjust the OccInfo for 'x' and 'b' as we go.
1658
1659 * Suppose the only occurrences of 'x' are the scrutinee and in the
1660 ri; then this transformation makes it occur just once, and hence
1661 get inlined right away.
1662
1663 * If we do this in the Simplifier, we don't know whether 'x' is used
1664 in ri, so we are forced to pessimistically zap b's OccInfo even
1665 though it is typically dead (ie neither it nor x appear in the
1666 ri). There's nothing actually wrong with zapping it, except that
1667 it's kind of nice to know which variables are dead. My nose
1668 tells me to keep this information as robustly as possible.
1669
1670 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
1671 {x=b}; it's Nothing if the binder-swap doesn't happen.
1672
1673 There is a danger though. Consider
1674 let v = x +# y
1675 in case (f v) of w -> ...v...v...
1676 And suppose that (f v) expands to just v. Then we'd like to
1677 use 'w' instead of 'v' in the alternative. But it may be too
1678 late; we may have substituted the (cheap) x+#y for v in the
1679 same simplifier pass that reduced (f v) to v.
1680
1681 I think this is just too bad. CSE will recover some of it.
1682
1683 Note [Case of cast]
1684 ~~~~~~~~~~~~~~~~~~~
1685 Consider case (x `cast` co) of b { I# ->
1686 ... (case (x `cast` co) of {...}) ...
1687 We'd like to eliminate the inner case. That is the motivation for
1688 equation (2) in Note [Binder swap]. When we get to the inner case, we
1689 inline x, cancel the casts, and away we go.
1690
1691 Note [Binder swap on GlobalId scrutinees]
1692 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1693 When the scrutinee is a GlobalId we must take care in two ways
1694
1695 i) In order to *know* whether 'x' occurs free in the RHS, we need its
1696 occurrence info. BUT, we don't gather occurrence info for
1697 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
1698 OccEnv is for: it says "gather occurrence info for these".
1699
1700 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
1701 has an External Name. See, for example, SimplEnv Note [Global Ids in
1702 the substitution].
1703
1704 Note [Zap case binders in proxy bindings]
1705 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1706 From the original
1707 case x of cb(dead) { p -> ...x... }
1708 we will get
1709 case x of cb(live) { p -> let x = cb in ...x... }
1710
1711 Core Lint never expects to find an *occurrence* of an Id marked
1712 as Dead, so we must zap the OccInfo on cb before making the
1713 binding x = cb. See Trac #5028.
1714
1715 Historical note [no-case-of-case]
1716 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1717 We *used* to suppress the binder-swap in case expressions when
1718 -fno-case-of-case is on. Old remarks:
1719 "This happens in the first simplifier pass,
1720 and enhances full laziness. Here's the bad case:
1721 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
1722 If we eliminate the inner case, we trap it inside the I# v -> arm,
1723 which might prevent some full laziness happening. I've seen this
1724 in action in spectral/cichelli/Prog.hs:
1725 [(m,n) | m <- [1..max], n <- [1..max]]
1726 Hence the check for NoCaseOfCase."
1727 However, now the full-laziness pass itself reverses the binder-swap, so this
1728 check is no longer necessary.
1729
1730 Historical note [Suppressing the case binder-swap]
1731 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1732 This old note describes a problem that is also fixed by doing the
1733 binder-swap in OccAnal:
1734
1735 There is another situation when it might make sense to suppress the
1736 case-expression binde-swap. If we have
1737
1738 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
1739 ...other cases .... }
1740
1741 We'll perform the binder-swap for the outer case, giving
1742
1743 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
1744 ...other cases .... }
1745
1746 But there is no point in doing it for the inner case, because w1 can't
1747 be inlined anyway. Furthermore, doing the case-swapping involves
1748 zapping w2's occurrence info (see paragraphs that follow), and that
1749 forces us to bind w2 when doing case merging. So we get
1750
1751 case x of w1 { A -> let w2 = w1 in e1
1752 B -> let w2 = w1 in e2
1753 ...other cases .... }
1754
1755 This is plain silly in the common case where w2 is dead.
1756
1757 Even so, I can't see a good way to implement this idea. I tried
1758 not doing the binder-swap if the scrutinee was already evaluated
1759 but that failed big-time:
1760
1761 data T = MkT !Int
1762
1763 case v of w { MkT x ->
1764 case x of x1 { I# y1 ->
1765 case x of x2 { I# y2 -> ...
1766
1767 Notice that because MkT is strict, x is marked "evaluated". But to
1768 eliminate the last case, we must either make sure that x (as well as
1769 x1) has unfolding MkT y1. The straightforward thing to do is to do
1770 the binder-swap. So this whole note is a no-op.
1771
1772 It's fixed by doing the binder-swap in OccAnal because we can do the
1773 binder-swap unconditionally and still get occurrence analysis
1774 information right.
1775 -}
1776
1777 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
1778 -- Does two things: a) makes the occ_one_shots = OccVanilla
1779 -- b) extends the GlobalScruts if possible
1780 -- c) returns a proxy mapping, binding the scrutinee
1781 -- to the case binder, if possible
1782 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
1783 = case stripTicksTopE (const True) scrut of
1784 Var v -> add_scrut v case_bndr'
1785 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
1786 -- See Note [Case of cast]
1787 _ -> (env { occ_encl = OccVanilla }, Nothing)
1788
1789 where
1790 add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
1791 , Just (localise v, rhs) )
1792
1793 case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
1794 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var)
1795 -- Localise the scrut_var before shadowing it; we're making a
1796 -- new binding for it, and it might have an External Name, or
1797 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
1798 -- Also we don't want any INLINE or NOINLINE pragmas!
1799
1800 {-
1801 ************************************************************************
1802 * *
1803 \subsection[OccurAnal-types]{OccEnv}
1804 * *
1805 ************************************************************************
1806 -}
1807
1808 type UsageDetails = IdEnv OccInfo -- A finite map from ids to their usage
1809 -- INVARIANT: never IAmDead
1810 -- (Deadness is signalled by not being in the map at all)
1811
1812 (+++), combineAltsUsageDetails
1813 :: UsageDetails -> UsageDetails -> UsageDetails
1814
1815 (+++) usage1 usage2
1816 = plusVarEnv_C addOccInfo usage1 usage2
1817
1818 combineAltsUsageDetails usage1 usage2
1819 = plusVarEnv_C orOccInfo usage1 usage2
1820
1821 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
1822 addOneOcc usage id info
1823 = plusVarEnv_C addOccInfo usage (unitVarEnv id info)
1824 -- ToDo: make this more efficient
1825
1826 emptyDetails :: UsageDetails
1827 emptyDetails = (emptyVarEnv :: UsageDetails)
1828
1829 usedIn :: Id -> UsageDetails -> Bool
1830 v `usedIn` details = isExportedId v || v `elemVarEnv` details
1831
1832 type IdWithOccInfo = Id
1833
1834 tagLamBinders :: UsageDetails -- Of scope
1835 -> [Id] -- Binders
1836 -> (UsageDetails, -- Details with binders removed
1837 [IdWithOccInfo]) -- Tagged binders
1838 -- Used for lambda and case binders
1839 -- It copes with the fact that lambda bindings can have a
1840 -- stable unfolding, used for join points
1841 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
1842 where
1843 (usage', bndrs') = mapAccumR tag_lam usage binders
1844 tag_lam usage bndr = (usage2, setBinderOcc usage bndr)
1845 where
1846 usage1 = usage `delVarEnv` bndr
1847 usage2 | isId bndr = addIdOccs usage1 (idUnfoldingVars bndr)
1848 | otherwise = usage1
1849
1850 tagBinder :: UsageDetails -- Of scope
1851 -> Id -- Binders
1852 -> (UsageDetails, -- Details with binders removed
1853 IdWithOccInfo) -- Tagged binders
1854
1855 tagBinder usage binder
1856 = let
1857 usage' = usage `delVarEnv` binder
1858 binder' = setBinderOcc usage binder
1859 in
1860 usage' `seq` (usage', binder')
1861
1862 setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr
1863 setBinderOcc usage bndr
1864 | isTyVar bndr = bndr
1865 | isExportedId bndr = case idOccInfo bndr of
1866 NoOccInfo -> bndr
1867 _ -> setIdOccInfo bndr NoOccInfo
1868 -- Don't use local usage info for visible-elsewhere things
1869 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
1870 -- about to re-generate it and it shouldn't be "sticky"
1871
1872 | otherwise = setIdOccInfo bndr occ_info
1873 where
1874 occ_info = lookupVarEnv usage bndr `orElse` IAmDead
1875
1876 {-
1877 ************************************************************************
1878 * *
1879 \subsection{Operations over OccInfo}
1880 * *
1881 ************************************************************************
1882 -}
1883
1884 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
1885 mkOneOcc env id int_cxt
1886 | isLocalId id
1887 = unitVarEnv id (OneOcc False True int_cxt)
1888
1889 | id `elemVarEnv` occ_gbl_scrut env
1890 = unitVarEnv id NoOccInfo
1891
1892 | otherwise
1893 = emptyDetails
1894
1895 markMany, markInsideLam :: OccInfo -> OccInfo
1896
1897 markMany _ = NoOccInfo
1898
1899 markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
1900 markInsideLam occ = occ
1901
1902 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
1903
1904 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1905 NoOccInfo -- Both branches are at least One
1906 -- (Argument is never IAmDead)
1907
1908 -- (orOccInfo orig new) is used
1909 -- when combining occurrence info from branches of a case
1910
1911 orOccInfo (OneOcc in_lam1 _ int_cxt1)
1912 (OneOcc in_lam2 _ int_cxt2)
1913 = OneOcc (in_lam1 || in_lam2)
1914 False -- False, because it occurs in both branches
1915 (int_cxt1 && int_cxt2)
1916 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1917 NoOccInfo