41a6f7fa71a687573770403695a3261ea5dbd77a
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import CoreSyn
23 import CoreFVs
24 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
25 stripTicksTopE, mkTicks )
26 import Id
27 import Name( localiseName )
28 import BasicTypes
29 import Module( Module )
30 import Coercion
31
32 import VarSet
33 import VarEnv
34 import Var
35 import Demand ( argOneShots, argsOneShots )
36 import Maybes ( orElse )
37 import Digraph ( SCC(..), stronglyConnCompFromEdgedVerticesR )
38 import Unique
39 import UniqFM
40 import Util
41 import Outputable
42 import Data.List
43 import Control.Arrow ( second )
44
45 {-
46 ************************************************************************
47 * *
48 \subsection[OccurAnal-main]{Counting occurrences: main function}
49 * *
50 ************************************************************************
51
52 Here's the externally-callable interface:
53 -}
54
55 occurAnalysePgm :: Module -- Used only in debug output
56 -> (Activation -> Bool)
57 -> [CoreRule] -> [CoreVect] -> VarSet
58 -> CoreProgram -> CoreProgram
59 occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
60 | isEmptyVarEnv final_usage
61 = occ_anald_binds
62
63 | otherwise -- See Note [Glomming]
64 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
65 2 (ppr final_usage ) )
66 occ_anald_glommed_binds
67 where
68 init_env = initOccEnv active_rule
69 (final_usage, occ_anald_binds) = go init_env binds
70 (_, occ_anald_glommed_binds) = occAnalRecBind init_env imp_rule_edges
71 (flattenBinds occ_anald_binds)
72 initial_uds
73 -- It's crucial to re-analyse the glommed-together bindings
74 -- so that we establish the right loop breakers. Otherwise
75 -- we can easily create an infinite loop (Trac #9583 is an example)
76
77 initial_uds = addIdOccs emptyDetails
78 (rulesFreeVars imp_rules `unionVarSet`
79 vectsFreeVars vects `unionVarSet`
80 vectVars)
81 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
82 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
83 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
84
85 -- Note [Preventing loops due to imported functions rules]
86 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
87 [ mapVarEnv (const maps_to) (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
88 | imp_rule <- imp_rules
89 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
90 , let maps_to = exprFreeIds (ru_rhs imp_rule)
91 `delVarSetList` ru_bndrs imp_rule
92 , arg <- ru_args imp_rule ]
93
94 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
95 go _ []
96 = (initial_uds, [])
97 go env (bind:binds)
98 = (final_usage, bind' ++ binds')
99 where
100 (bs_usage, binds') = go env binds
101 (final_usage, bind') = occAnalBind env imp_rule_edges bind bs_usage
102
103 occurAnalyseExpr :: CoreExpr -> CoreExpr
104 -- Do occurrence analysis, and discard occurrence info returned
105 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
106
107 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
108 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
109
110 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
111 occurAnalyseExpr' enable_binder_swap expr
112 = snd (occAnal env expr)
113 where
114 env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
115 -- To be conservative, we say that all inlines and rules are active
116 all_active_rules = \_ -> True
117
118 {- Note [Plugin rules]
119 ~~~~~~~~~~~~~~~~~~~~~~
120 Conal Elliott (Trac #11651) built a GHC plugin that added some
121 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
122 do some domain-specific transformations that could not be expressed
123 with an ordinary pattern-matching CoreRule. But then we can't extract
124 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
125 BuiltinRule doesn't have any of that stuff.
126
127 So we simply assume that BuiltinRules have no dependencies, and filter
128 them out from the imp_rule_edges comprehension.
129 -}
130
131 {-
132 ************************************************************************
133 * *
134 \subsection[OccurAnal-main]{Counting occurrences: main function}
135 * *
136 ************************************************************************
137
138 Bindings
139 ~~~~~~~~
140 -}
141
142 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
143
144 noImpRuleEdges :: ImpRuleEdges
145 noImpRuleEdges = emptyVarEnv
146
147 occAnalBind :: OccEnv -- The incoming OccEnv
148 -> ImpRuleEdges
149 -> CoreBind
150 -> UsageDetails -- Usage details of scope
151 -> (UsageDetails, -- Of the whole let(rec)
152 [CoreBind])
153
154 occAnalBind env top_env (NonRec binder rhs) body_usage
155 = occAnalNonRecBind env top_env binder rhs body_usage
156 occAnalBind env top_env (Rec pairs) body_usage
157 = occAnalRecBind env top_env pairs body_usage
158
159 -----------------
160 occAnalNonRecBind :: OccEnv -> ImpRuleEdges -> Var -> CoreExpr
161 -> UsageDetails -> (UsageDetails, [CoreBind])
162 occAnalNonRecBind env imp_rule_edges binder rhs body_usage
163 | isTyVar binder -- A type let; we don't gather usage info
164 = (body_usage, [NonRec binder rhs])
165
166 | not (binder `usedIn` body_usage) -- It's not mentioned
167 = (body_usage, [])
168
169 | otherwise -- It's mentioned in the body
170 = (body_usage' +++ rhs_usage4, [NonRec tagged_binder rhs'])
171 where
172 (body_usage', tagged_binder) = tagBinder body_usage binder
173 (rhs_usage1, rhs') = occAnalNonRecRhs env tagged_binder rhs
174 rhs_usage2 = addIdOccs rhs_usage1 (idUnfoldingVars binder)
175
176 rhs_usage3 = addIdOccs rhs_usage2 (idRuleVars binder)
177 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
178
179 rhs_usage4 = maybe rhs_usage3 (addIdOccs rhs_usage3) $
180 lookupVarEnv imp_rule_edges binder
181 -- See Note [Preventing loops due to imported functions rules]
182
183 -----------------
184 occAnalRecBind :: OccEnv -> ImpRuleEdges -> [(Var,CoreExpr)]
185 -> UsageDetails -> (UsageDetails, [CoreBind])
186 occAnalRecBind env imp_rule_edges pairs body_usage
187 = foldr occAnalRec (body_usage, []) sccs
188 -- For a recursive group, we
189 -- * occ-analyse all the RHSs
190 -- * compute strongly-connected components
191 -- * feed those components to occAnalRec
192 where
193 bndr_set = mkVarSet (map fst pairs)
194
195 sccs :: [SCC (Node Details)]
196 sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompFromEdgedVerticesR nodes
197
198 nodes :: [Node Details]
199 nodes = {-# SCC "occAnalBind.assoc" #-} map (makeNode env imp_rule_edges bndr_set) pairs
200
201 {-
202 Note [Dead code]
203 ~~~~~~~~~~~~~~~~
204 Dropping dead code for a cyclic Strongly Connected Component is done
205 in a very simple way:
206
207 the entire SCC is dropped if none of its binders are mentioned
208 in the body; otherwise the whole thing is kept.
209
210 The key observation is that dead code elimination happens after
211 dependency analysis: so 'occAnalBind' processes SCCs instead of the
212 original term's binding groups.
213
214 Thus 'occAnalBind' does indeed drop 'f' in an example like
215
216 letrec f = ...g...
217 g = ...(...g...)...
218 in
219 ...g...
220
221 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
222 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
223 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
224
225 ------------------------------------------------------------
226 Note [Forming Rec groups]
227 ~~~~~~~~~~~~~~~~~~~~~~~~~
228 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
229 and "g uses f", no matter how indirectly. We do a SCC analysis
230 with an edge f -> g if "f uses g".
231
232 More precisely, "f uses g" iff g should be in scope wherever f is.
233 That is, g is free in:
234 a) the rhs 'ef'
235 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
236 c) or the LHS or a rule for f (Note [Rule dependency info])
237
238 These conditions apply regardless of the activation of the RULE (eg it might be
239 inactive in this phase but become active later). Once a Rec is broken up
240 it can never be put back together, so we must be conservative.
241
242 The principle is that, regardless of rule firings, every variable is
243 always in scope.
244
245 * Note [Rules are extra RHSs]
246 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
247 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
248 keeps the specialised "children" alive. If the parent dies
249 (because it isn't referenced any more), then the children will die
250 too (unless they are already referenced directly).
251
252 To that end, we build a Rec group for each cyclic strongly
253 connected component,
254 *treating f's rules as extra RHSs for 'f'*.
255 More concretely, the SCC analysis runs on a graph with an edge
256 from f -> g iff g is mentioned in
257 (a) f's rhs
258 (b) f's RULES
259 These are rec_edges.
260
261 Under (b) we include variables free in *either* LHS *or* RHS of
262 the rule. The former might seems silly, but see Note [Rule
263 dependency info]. So in Example [eftInt], eftInt and eftIntFB
264 will be put in the same Rec, even though their 'main' RHSs are
265 both non-recursive.
266
267 * Note [Rule dependency info]
268 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
269 The VarSet in a RuleInfo is used for dependency analysis in the
270 occurrence analyser. We must track free vars in *both* lhs and rhs.
271 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
272 Why both? Consider
273 x = y
274 RULE f x = v+4
275 Then if we substitute y for x, we'd better do so in the
276 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
277 as well as 'v'
278
279 * Note [Rules are visible in their own rec group]
280 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
281 We want the rules for 'f' to be visible in f's right-hand side.
282 And we'd like them to be visible in other functions in f's Rec
283 group. E.g. in Note [Specialisation rules] we want f' rule
284 to be visible in both f's RHS, and fs's RHS.
285
286 This means that we must simplify the RULEs first, before looking
287 at any of the definitions. This is done by Simplify.simplRecBind,
288 when it calls addLetIdInfo.
289
290 ------------------------------------------------------------
291 Note [Choosing loop breakers]
292 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
293 Loop breaking is surprisingly subtle. First read the section 4 of
294 "Secrets of the GHC inliner". This describes our basic plan.
295 We avoid infinite inlinings by choosing loop breakers, and
296 ensuring that a loop breaker cuts each loop.
297
298 Fundamentally, we do SCC analysis on a graph. For each recursive
299 group we choose a loop breaker, delete all edges to that node,
300 re-analyse the SCC, and iterate.
301
302 But what is the graph? NOT the same graph as was used for Note
303 [Forming Rec groups]! In particular, a RULE is like an equation for
304 'f' that is *always* inlined if it is applicable. We do *not* disable
305 rules for loop-breakers. It's up to whoever makes the rules to make
306 sure that the rules themselves always terminate. See Note [Rules for
307 recursive functions] in Simplify.hs
308
309 Hence, if
310 f's RHS (or its INLINE template if it has one) mentions g, and
311 g has a RULE that mentions h, and
312 h has a RULE that mentions f
313
314 then we *must* choose f to be a loop breaker. Example: see Note
315 [Specialisation rules].
316
317 In general, take the free variables of f's RHS, and augment it with
318 all the variables reachable by RULES from those starting points. That
319 is the whole reason for computing rule_fv_env in occAnalBind. (Of
320 course we only consider free vars that are also binders in this Rec
321 group.) See also Note [Finding rule RHS free vars]
322
323 Note that when we compute this rule_fv_env, we only consider variables
324 free in the *RHS* of the rule, in contrast to the way we build the
325 Rec group in the first place (Note [Rule dependency info])
326
327 Note that if 'g' has RHS that mentions 'w', we should add w to
328 g's loop-breaker edges. More concretely there is an edge from f -> g
329 iff
330 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
331 (see Note [Inline rules])
332 (b) or h is mentioned in f's RHS, and
333 g appears in the RHS of an active RULE of h
334 or a transitive sequence of active rules starting with h
335
336 Why "active rules"? See Note [Finding rule RHS free vars]
337
338 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
339 chosen as a loop breaker, because their RHSs don't mention each other.
340 And indeed both can be inlined safely.
341
342 Note again that the edges of the graph we use for computing loop breakers
343 are not the same as the edges we use for computing the Rec blocks.
344 That's why we compute
345
346 - rec_edges for the Rec block analysis
347 - loop_breaker_edges for the loop breaker analysis
348
349 * Note [Finding rule RHS free vars]
350 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
351 Consider this real example from Data Parallel Haskell
352 tagZero :: Array Int -> Array Tag
353 {-# INLINE [1] tagZeroes #-}
354 tagZero xs = pmap (\x -> fromBool (x==0)) xs
355
356 {-# RULES "tagZero" [~1] forall xs n.
357 pmap fromBool <blah blah> = tagZero xs #-}
358 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
359 However, tagZero can only be inlined in phase 1 and later, while
360 the RULE is only active *before* phase 1. So there's no problem.
361
362 To make this work, we look for the RHS free vars only for
363 *active* rules. That's the reason for the occ_rule_act field
364 of the OccEnv.
365
366 * Note [Weak loop breakers]
367 ~~~~~~~~~~~~~~~~~~~~~~~~~
368 There is a last nasty wrinkle. Suppose we have
369
370 Rec { f = f_rhs
371 RULE f [] = g
372
373 h = h_rhs
374 g = h
375 ...more...
376 }
377
378 Remember that we simplify the RULES before any RHS (see Note
379 [Rules are visible in their own rec group] above).
380
381 So we must *not* postInlineUnconditionally 'g', even though
382 its RHS turns out to be trivial. (I'm assuming that 'g' is
383 not choosen as a loop breaker.) Why not? Because then we
384 drop the binding for 'g', which leaves it out of scope in the
385 RULE!
386
387 Here's a somewhat different example of the same thing
388 Rec { g = h
389 ; h = ...f...
390 ; f = f_rhs
391 RULE f [] = g }
392 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
393 g, because the RULE for f is active throughout. So the RHS of h
394 might rewrite to h = ...g...
395 So g must remain in scope in the output program!
396
397 We "solve" this by:
398
399 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
400 iff g is a "missing free variable" of the Rec group
401
402 A "missing free variable" x is one that is mentioned in an RHS or
403 INLINE or RULE of a binding in the Rec group, but where the
404 dependency on x may not show up in the loop_breaker_edges (see
405 note [Choosing loop breakers} above).
406
407 A normal "strong" loop breaker has IAmLoopBreaker False. So
408
409 Inline postInlineUnconditionally
410 strong IAmLoopBreaker False no no
411 weak IAmLoopBreaker True yes no
412 other yes yes
413
414 The **sole** reason for this kind of loop breaker is so that
415 postInlineUnconditionally does not fire. Ugh. (Typically it'll
416 inline via the usual callSiteInline stuff, so it'll be dead in the
417 next pass, so the main Ugh is the tiresome complication.)
418
419 Note [Rules for imported functions]
420 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
421 Consider this
422 f = /\a. B.g a
423 RULE B.g Int = 1 + f Int
424 Note that
425 * The RULE is for an imported function.
426 * f is non-recursive
427 Now we
428 can get
429 f Int --> B.g Int Inlining f
430 --> 1 + f Int Firing RULE
431 and so the simplifier goes into an infinite loop. This
432 would not happen if the RULE was for a local function,
433 because we keep track of dependencies through rules. But
434 that is pretty much impossible to do for imported Ids. Suppose
435 f's definition had been
436 f = /\a. C.h a
437 where (by some long and devious process), C.h eventually inlines to
438 B.g. We could only spot such loops by exhaustively following
439 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
440 f.
441
442 Note that RULES for imported functions are important in practice; they
443 occur a lot in the libraries.
444
445 We regard this potential infinite loop as a *programmer* error.
446 It's up the programmer not to write silly rules like
447 RULE f x = f x
448 and the example above is just a more complicated version.
449
450 Note [Preventing loops due to imported functions rules]
451 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
452 Consider:
453 import GHC.Base (foldr)
454
455 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
456 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
457 filterFB c p = ...
458
459 f = filter p xs
460
461 Note that filter is not a loop-breaker, so what happens is:
462 f = filter p xs
463 = {inline} build (\c n -> foldr (filterFB c p) n xs)
464 = {inline} foldr (filterFB (:) p) [] xs
465 = {RULE} filter p xs
466
467 We are in an infinite loop.
468
469 A more elaborate example (that I actually saw in practice when I went to
470 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
471 {-# LANGUAGE RankNTypes #-}
472 module GHCList where
473
474 import Prelude hiding (filter)
475 import GHC.Base (build)
476
477 {-# INLINABLE filter #-}
478 filter :: (a -> Bool) -> [a] -> [a]
479 filter p [] = []
480 filter p (x:xs) = if p x then x : filter p xs else filter p xs
481
482 {-# NOINLINE [0] filterFB #-}
483 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
484 filterFB c p x r | p x = x `c` r
485 | otherwise = r
486
487 {-# RULES
488 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
489 (filterFB c p) n xs)
490 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
491 #-}
492
493 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
494 are not), the unfolding given to "filter" in the interface file will be:
495 filter p [] = []
496 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
497 else build (\c n -> foldr (filterFB c p) n xs
498
499 Note that because this unfolding does not mention "filter", filter is not
500 marked as a strong loop breaker. Therefore at a use site in another module:
501 filter p xs
502 = {inline}
503 case xs of [] -> []
504 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
505 else build (\c n -> foldr (filterFB c p) n xs)
506
507 build (\c n -> foldr (filterFB c p) n xs)
508 = {inline} foldr (filterFB (:) p) [] xs
509 = {RULE} filter p xs
510
511 And we are in an infinite loop again, except that this time the loop is producing an
512 infinitely large *term* (an unrolling of filter) and so the simplifier finally
513 dies with "ticks exhausted"
514
515 Because of this problem, we make a small change in the occurrence analyser
516 designed to mark functions like "filter" as strong loop breakers on the basis that:
517 1. The RHS of filter mentions the local function "filterFB"
518 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
519
520 So for each RULE for an *imported* function we are going to add
521 dependency edges between the *local* FVS of the rule LHS and the
522 *local* FVS of the rule RHS. We don't do anything special for RULES on
523 local functions because the standard occurrence analysis stuff is
524 pretty good at getting loop-breakerness correct there.
525
526 It is important to note that even with this extra hack we aren't always going to get
527 things right. For example, it might be that the rule LHS mentions an imported Id,
528 and another module has a RULE that can rewrite that imported Id to one of our local
529 Ids.
530
531 Note [Specialising imported functions]
532 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
533 BUT for *automatically-generated* rules, the programmer can't be
534 responsible for the "programmer error" in Note [Rules for imported
535 functions]. In paricular, consider specialising a recursive function
536 defined in another module. If we specialise a recursive function B.g,
537 we get
538 g_spec = .....(B.g Int).....
539 RULE B.g Int = g_spec
540 Here, g_spec doesn't look recursive, but when the rule fires, it
541 becomes so. And if B.g was mutually recursive, the loop might
542 not be as obvious as it is here.
543
544 To avoid this,
545 * When specialising a function that is a loop breaker,
546 give a NOINLINE pragma to the specialised function
547
548 Note [Glomming]
549 ~~~~~~~~~~~~~~~
550 RULES for imported Ids can make something at the top refer to something at the bottom:
551 f = \x -> B.g (q x)
552 h = \y -> 3
553
554 RULE: B.g (q x) = h x
555
556 Applying this rule makes f refer to h, although f doesn't appear to
557 depend on h. (And, as in Note [Rules for imported functions], the
558 dependency might be more indirect. For example, f might mention C.t
559 rather than B.g, where C.t eventually inlines to B.g.)
560
561 NOTICE that this cannot happen for rules whose head is a
562 locally-defined function, because we accurately track dependencies
563 through RULES. It only happens for rules whose head is an imported
564 function (B.g in the example above).
565
566 Solution:
567 - When simplifying, bring all top level identifiers into
568 scope at the start, ignoring the Rec/NonRec structure, so
569 that when 'h' pops up in f's rhs, we find it in the in-scope set
570 (as the simplifier generally expects). This happens in simplTopBinds.
571
572 - In the occurrence analyser, if there are any out-of-scope
573 occurrences that pop out of the top, which will happen after
574 firing the rule: f = \x -> h x
575 h = \y -> 3
576 then just glom all the bindings into a single Rec, so that
577 the *next* iteration of the occurrence analyser will sort
578 them all out. This part happens in occurAnalysePgm.
579
580 ------------------------------------------------------------
581 Note [Inline rules]
582 ~~~~~~~~~~~~~~~~~~~
583 None of the above stuff about RULES applies to Inline Rules,
584 stored in a CoreUnfolding. The unfolding, if any, is simplified
585 at the same time as the regular RHS of the function (ie *not* like
586 Note [Rules are visible in their own rec group]), so it should be
587 treated *exactly* like an extra RHS.
588
589 Or, rather, when computing loop-breaker edges,
590 * If f has an INLINE pragma, and it is active, we treat the
591 INLINE rhs as f's rhs
592 * If it's inactive, we treat f as having no rhs
593 * If it has no INLINE pragma, we look at f's actual rhs
594
595
596 There is a danger that we'll be sub-optimal if we see this
597 f = ...f...
598 [INLINE f = ..no f...]
599 where f is recursive, but the INLINE is not. This can just about
600 happen with a sufficiently odd set of rules; eg
601
602 foo :: Int -> Int
603 {-# INLINE [1] foo #-}
604 foo x = x+1
605
606 bar :: Int -> Int
607 {-# INLINE [1] bar #-}
608 bar x = foo x + 1
609
610 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
611
612 Here the RULE makes bar recursive; but it's INLINE pragma remains
613 non-recursive. It's tempting to then say that 'bar' should not be
614 a loop breaker, but an attempt to do so goes wrong in two ways:
615 a) We may get
616 $df = ...$cfoo...
617 $cfoo = ...$df....
618 [INLINE $cfoo = ...no-$df...]
619 But we want $cfoo to depend on $df explicitly so that we
620 put the bindings in the right order to inline $df in $cfoo
621 and perhaps break the loop altogether. (Maybe this
622 b)
623
624
625 Example [eftInt]
626 ~~~~~~~~~~~~~~~
627 Example (from GHC.Enum):
628
629 eftInt :: Int# -> Int# -> [Int]
630 eftInt x y = ...(non-recursive)...
631
632 {-# INLINE [0] eftIntFB #-}
633 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
634 eftIntFB c n x y = ...(non-recursive)...
635
636 {-# RULES
637 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
638 "eftIntList" [1] eftIntFB (:) [] = eftInt
639 #-}
640
641 Note [Specialisation rules]
642 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
643 Consider this group, which is typical of what SpecConstr builds:
644
645 fs a = ....f (C a)....
646 f x = ....f (C a)....
647 {-# RULE f (C a) = fs a #-}
648
649 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
650
651 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
652 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
653 - fs is inlined (say it's small)
654 - now there's another opportunity to apply the RULE
655
656 This showed up when compiling Control.Concurrent.Chan.getChanContents.
657 -}
658
659 type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
660 -- which is gotten from the Id.
661 data Details
662 = ND { nd_bndr :: Id -- Binder
663 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
664
665 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
666 -- ignoring phase (ie assuming all are active)
667 -- See Note [Forming Rec groups]
668
669 , nd_inl :: IdSet -- Free variables of
670 -- the stable unfolding (if present and active)
671 -- or the RHS (if not)
672 -- but excluding any RULES
673 -- This is the IdSet that may be used if the Id is inlined
674
675 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
676 -- but are *not* in nd_inl. These are the ones whose
677 -- dependencies might not be respected by loop_breaker_edges
678 -- See Note [Weak loop breakers]
679
680 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
681 }
682
683 instance Outputable Details where
684 ppr nd = text "ND" <> braces
685 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
686 , text "uds =" <+> ppr (nd_uds nd)
687 , text "inl =" <+> ppr (nd_inl nd)
688 , text "weak =" <+> ppr (nd_weak nd)
689 , text "rule =" <+> ppr (nd_active_rule_fvs nd)
690 ])
691
692 makeNode :: OccEnv -> ImpRuleEdges -> VarSet -> (Var, CoreExpr) -> Node Details
693 makeNode env imp_rule_edges bndr_set (bndr, rhs)
694 = (details, varUnique bndr, keysUFM node_fvs)
695 where
696 details = ND { nd_bndr = bndr
697 , nd_rhs = rhs'
698 , nd_uds = rhs_usage3
699 , nd_weak = node_fvs `minusVarSet` inl_fvs
700 , nd_inl = inl_fvs
701 , nd_active_rule_fvs = active_rule_fvs }
702
703 -- Constructing the edges for the main Rec computation
704 -- See Note [Forming Rec groups]
705 (rhs_usage1, rhs') = occAnalRecRhs env rhs
706 rhs_usage2 = addIdOccs rhs_usage1 all_rule_fvs -- Note [Rules are extra RHSs]
707 -- Note [Rule dependency info]
708 rhs_usage3 = case mb_unf_fvs of
709 Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs
710 Nothing -> rhs_usage2
711 node_fvs = udFreeVars bndr_set rhs_usage3
712
713 -- Finding the free variables of the rules
714 is_active = occ_rule_act env :: Activation -> Bool
715 rules = filterOut isBuiltinRule (idCoreRules bndr)
716 rules_w_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
717 rules_w_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) (lookupVarEnv imp_rule_edges bndr)
718 -- See Note [Preventing loops due to imported functions rules]
719 [ (ru_act rule, fvs)
720 | rule <- rules
721 , let fvs = exprFreeVars (ru_rhs rule)
722 `delVarSetList` ru_bndrs rule
723 , not (isEmptyVarSet fvs) ]
724 all_rule_fvs = rule_lhs_fvs `unionVarSet` rule_rhs_fvs
725 rule_rhs_fvs = mapUnionVarSet snd rules_w_fvs
726 rule_lhs_fvs = mapUnionVarSet (\ru -> exprsFreeVars (ru_args ru)
727 `delVarSetList` ru_bndrs ru) rules
728 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_fvs, is_active a]
729
730 -- Finding the free variables of the INLINE pragma (if any)
731 unf = realIdUnfolding bndr -- Ignore any current loop-breaker flag
732 mb_unf_fvs = stableUnfoldingVars unf
733
734 -- Find the "nd_inl" free vars; for the loop-breaker phase
735 inl_fvs = case mb_unf_fvs of
736 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
737 Just unf_fvs -> unf_fvs
738 -- We could check for an *active* INLINE (returning
739 -- emptyVarSet for an inactive one), but is_active
740 -- isn't the right thing (it tells about
741 -- RULE activation), so we'd need more plumbing
742
743 -----------------------------
744 occAnalRec :: SCC (Node Details)
745 -> (UsageDetails, [CoreBind])
746 -> (UsageDetails, [CoreBind])
747
748 -- The NonRec case is just like a Let (NonRec ...) above
749 occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}, _, _))
750 (body_uds, binds)
751 | not (bndr `usedIn` body_uds)
752 = (body_uds, binds) -- See Note [Dead code]
753
754 | otherwise -- It's mentioned in the body
755 = (body_uds' +++ rhs_uds,
756 NonRec tagged_bndr rhs : binds)
757 where
758 (body_uds', tagged_bndr) = tagBinder body_uds bndr
759
760 -- The Rec case is the interesting one
761 -- See Note [Loop breaking]
762 occAnalRec (CyclicSCC nodes) (body_uds, binds)
763 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
764 = (body_uds, binds) -- See Note [Dead code]
765
766 | otherwise -- At this point we always build a single Rec
767 = -- pprTrace "occAnalRec" (vcat
768 -- [ text "tagged nodes" <+> ppr tagged_nodes
769 -- , text "lb edges" <+> ppr loop_breaker_edges])
770 (final_uds, Rec pairs : binds)
771
772 where
773 bndrs = [b | (ND { nd_bndr = b }, _, _) <- nodes]
774 bndr_set = mkVarSet bndrs
775
776 ----------------------------
777 -- Tag the binders with their occurrence info
778 tagged_nodes = map tag_node nodes
779 total_uds = foldl add_uds body_uds nodes
780 final_uds = total_uds `minusVarEnv` bndr_set
781 add_uds usage_so_far (nd, _, _) = usage_so_far +++ nd_uds nd
782
783 tag_node :: Node Details -> Node Details
784 tag_node (details@ND { nd_bndr = bndr }, k, ks)
785 | let bndr1 = setBinderOcc total_uds bndr
786 = (details { nd_bndr = bndr1 }, k, ks)
787
788 ---------------------------
789 -- Now reconstruct the cycle
790 pairs :: [(Id,CoreExpr)]
791 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs tagged_nodes []
792 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_edges []
793 -- If weak_fvs is empty, the loop_breaker_edges will include all
794 -- the edges in tagged_nodes, so there isn't any point in doing
795 -- a fresh SCC computation that will yield a single CyclicSCC result.
796
797 weak_fvs :: VarSet
798 weak_fvs = mapUnionVarSet (nd_weak . fstOf3) nodes
799
800 -- See Note [Choosing loop breakers] for loop_breaker_edges
801 loop_breaker_edges = map mk_node tagged_nodes
802 mk_node (details@(ND { nd_inl = inl_fvs }), k, _)
803 = (details, k, keysUFM (extendFvs_ rule_fv_env inl_fvs))
804
805 ------------------------------------
806 rule_fv_env :: IdEnv IdSet
807 -- Maps a variable f to the variables from this group
808 -- mentioned in RHS of active rules for f
809 -- Domain is *subset* of bound vars (others have no rule fvs)
810 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
811 init_rule_fvs -- See Note [Finding rule RHS free vars]
812 = [ (b, trimmed_rule_fvs)
813 | (ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs },_,_) <- nodes
814 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
815 , not (isEmptyVarSet trimmed_rule_fvs)]
816
817 {-
818 @loopBreakSCC@ is applied to the list of (binder,rhs) pairs for a cyclic
819 strongly connected component (there's guaranteed to be a cycle). It returns the
820 same pairs, but
821 a) in a better order,
822 b) with some of the Ids having a IAmALoopBreaker pragma
823
824 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
825 that the simplifier can guarantee not to loop provided it never records an inlining
826 for these no-inline guys.
827
828 Furthermore, the order of the binds is such that if we neglect dependencies
829 on the no-inline Ids then the binds are topologically sorted. This means
830 that the simplifier will generally do a good job if it works from top bottom,
831 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
832 -}
833
834 type Binding = (Id,CoreExpr)
835
836 mk_loop_breaker :: Node Details -> Binding
837 mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
838 = (setIdOccInfo bndr strongLoopBreaker, rhs)
839
840 mk_non_loop_breaker :: VarSet -> Node Details -> Binding
841 -- See Note [Weak loop breakers]
842 mk_non_loop_breaker used_in_rules (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
843 | bndr `elemVarSet` used_in_rules = (setIdOccInfo bndr weakLoopBreaker, rhs)
844 | otherwise = (bndr, rhs)
845
846 udFreeVars :: VarSet -> UsageDetails -> VarSet
847 -- Find the subset of bndrs that are mentioned in uds
848 udFreeVars bndrs uds = intersectUFM_C (\b _ -> b) bndrs uds
849
850 loopBreakNodes :: Int
851 -> VarSet -- All binders
852 -> VarSet -- Binders whose dependencies may be "missing"
853 -- See Note [Weak loop breakers]
854 -> [Node Details]
855 -> [Binding] -- Append these to the end
856 -> [Binding]
857 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
858 loopBreakNodes depth bndr_set weak_fvs nodes binds
859 = go (stronglyConnCompFromEdgedVerticesR nodes) binds
860 where
861 go [] binds = binds
862 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
863
864 loop_break_scc scc binds
865 = case scc of
866 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
867 CyclicSCC [node] -> mk_loop_breaker node : binds
868 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
869
870 reOrderNodes :: Int -> VarSet -> VarSet -> [Node Details] -> [Binding] -> [Binding]
871 -- Choose a loop breaker, mark it no-inline,
872 -- do SCC analysis on the rest, and recursively sort them out
873 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
874 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
875 = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
876 -- text "chosen" <+> ppr chosen_nodes) $
877 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
878 (map mk_loop_breaker chosen_nodes ++ binds)
879 where
880 (chosen_nodes, unchosen) = choose_loop_breaker (score node) [node] [] nodes
881
882 approximate_loop_breaker = depth >= 2
883 new_depth | approximate_loop_breaker = 0
884 | otherwise = depth+1
885 -- After two iterations (d=0, d=1) give up
886 -- and approximate, returning to d=0
887
888 choose_loop_breaker :: Int -- Best score so far
889 -> [Node Details] -- Nodes with this score
890 -> [Node Details] -- Nodes with higher scores
891 -> [Node Details] -- Unprocessed nodes
892 -> ([Node Details], [Node Details])
893 -- This loop looks for the bind with the lowest score
894 -- to pick as the loop breaker. The rest accumulate in
895 choose_loop_breaker _ loop_nodes acc []
896 = (loop_nodes, acc) -- Done
897
898 -- If approximate_loop_breaker is True, we pick *all*
899 -- nodes with lowest score, else just one
900 -- See Note [Complexity of loop breaking]
901 choose_loop_breaker loop_sc loop_nodes acc (node : nodes)
902 | sc < loop_sc -- Lower score so pick this new one
903 = choose_loop_breaker sc [node] (loop_nodes ++ acc) nodes
904
905 | approximate_loop_breaker && sc == loop_sc
906 = choose_loop_breaker loop_sc (node : loop_nodes) acc nodes
907
908 | otherwise -- Higher score so don't pick it
909 = choose_loop_breaker loop_sc loop_nodes (node : acc) nodes
910 where
911 sc = score node
912
913 score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker
914 score (ND { nd_bndr = bndr, nd_rhs = rhs }, _, _)
915 | not (isId bndr) = 100 -- A type or cercion variable is never a loop breaker
916
917 | isDFunId bndr = 9 -- Never choose a DFun as a loop breaker
918 -- Note [DFuns should not be loop breakers]
919
920 | Just be_very_keen <- hasStableCoreUnfolding_maybe (idUnfolding bndr)
921 = if be_very_keen then 6 -- Note [Loop breakers and INLINE/INLINEABLE pragmas]
922 else 3
923 -- Data structures are more important than INLINE pragmas
924 -- so that dictionary/method recursion unravels
925 -- Note that this case hits all stable unfoldings, so we
926 -- never look at 'rhs' for stable unfoldings. That's right, because
927 -- 'rhs' is irrelevant for inlining things with a stable unfolding
928
929 | is_con_app rhs = 5 -- Data types help with cases: Note [Constructor applications]
930
931 | exprIsTrivial rhs = 10 -- Practically certain to be inlined
932 -- Used to have also: && not (isExportedId bndr)
933 -- But I found this sometimes cost an extra iteration when we have
934 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
935 -- where df is the exported dictionary. Then df makes a really
936 -- bad choice for loop breaker
937
938
939 -- If an Id is marked "never inline" then it makes a great loop breaker
940 -- The only reason for not checking that here is that it is rare
941 -- and I've never seen a situation where it makes a difference,
942 -- so it probably isn't worth the time to test on every binder
943 -- | isNeverActive (idInlinePragma bndr) = -10
944
945 | isOneOcc (idOccInfo bndr) = 2 -- Likely to be inlined
946
947 | canUnfold (realIdUnfolding bndr) = 1
948 -- The Id has some kind of unfolding
949 -- Ignore loop-breaker-ness here because that is what we are setting!
950
951 | otherwise = 0
952
953 -- Checking for a constructor application
954 -- Cheap and cheerful; the simplifer moves casts out of the way
955 -- The lambda case is important to spot x = /\a. C (f a)
956 -- which comes up when C is a dictionary constructor and
957 -- f is a default method.
958 -- Example: the instance for Show (ST s a) in GHC.ST
959 --
960 -- However we *also* treat (\x. C p q) as a con-app-like thing,
961 -- Note [Closure conversion]
962 is_con_app (Var v) = isConLikeId v
963 is_con_app (App f _) = is_con_app f
964 is_con_app (Lam _ e) = is_con_app e
965 is_con_app (Tick _ e) = is_con_app e
966 is_con_app _ = False
967
968 {-
969 Note [Complexity of loop breaking]
970 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
971 The loop-breaking algorithm knocks out one binder at a time, and
972 performs a new SCC analysis on the remaining binders. That can
973 behave very badly in tightly-coupled groups of bindings; in the
974 worst case it can be (N**2)*log N, because it does a full SCC
975 on N, then N-1, then N-2 and so on.
976
977 To avoid this, we switch plans after 2 (or whatever) attempts:
978 Plan A: pick one binder with the lowest score, make it
979 a loop breaker, and try again
980 Plan B: pick *all* binders with the lowest score, make them
981 all loop breakers, and try again
982 Since there are only a small finite number of scores, this will
983 terminate in a constant number of iterations, rather than O(N)
984 iterations.
985
986 You might thing that it's very unlikely, but RULES make it much
987 more likely. Here's a real example from Trac #1969:
988 Rec { $dm = \d.\x. op d
989 {-# RULES forall d. $dm Int d = $s$dm1
990 forall d. $dm Bool d = $s$dm2 #-}
991
992 dInt = MkD .... opInt ...
993 dInt = MkD .... opBool ...
994 opInt = $dm dInt
995 opBool = $dm dBool
996
997 $s$dm1 = \x. op dInt
998 $s$dm2 = \x. op dBool }
999 The RULES stuff means that we can't choose $dm as a loop breaker
1000 (Note [Choosing loop breakers]), so we must choose at least (say)
1001 opInt *and* opBool, and so on. The number of loop breakders is
1002 linear in the number of instance declarations.
1003
1004 Note [Loop breakers and INLINE/INLINEABLE pragmas]
1005 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1006 Avoid choosing a function with an INLINE pramga as the loop breaker!
1007 If such a function is mutually-recursive with a non-INLINE thing,
1008 then the latter should be the loop-breaker.
1009
1010 It's vital to distinguish between INLINE and INLINEABLE (the
1011 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1012 Rec { {-# INLINEABLE f #-}
1013 f x = ...f... }
1014 and then worker/wrapper it through strictness analysis, we'll get
1015 Rec { {-# INLINEABLE $wf #-}
1016 $wf p q = let x = (p,q) in ...f...
1017
1018 {-# INLINE f #-}
1019 f x = case x of (p,q) -> $wf p q }
1020
1021 Now it is vital that we choose $wf as the loop breaker, so we can
1022 inline 'f' in '$wf'.
1023
1024 Note [DFuns should not be loop breakers]
1025 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1026 It's particularly bad to make a DFun into a loop breaker. See
1027 Note [How instance declarations are translated] in TcInstDcls
1028
1029 We give DFuns a higher score than ordinary CONLIKE things because
1030 if there's a choice we want the DFun to be the non-looop breker. Eg
1031
1032 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1033
1034 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1035 {-# DFUN #-}
1036 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1037 }
1038
1039 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1040 if we can't unravel the DFun first.
1041
1042 Note [Constructor applications]
1043 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1044 It's really really important to inline dictionaries. Real
1045 example (the Enum Ordering instance from GHC.Base):
1046
1047 rec f = \ x -> case d of (p,q,r) -> p x
1048 g = \ x -> case d of (p,q,r) -> q x
1049 d = (v, f, g)
1050
1051 Here, f and g occur just once; but we can't inline them into d.
1052 On the other hand we *could* simplify those case expressions if
1053 we didn't stupidly choose d as the loop breaker.
1054 But we won't because constructor args are marked "Many".
1055 Inlining dictionaries is really essential to unravelling
1056 the loops in static numeric dictionaries, see GHC.Float.
1057
1058 Note [Closure conversion]
1059 ~~~~~~~~~~~~~~~~~~~~~~~~~
1060 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1061 The immediate motivation came from the result of a closure-conversion transformation
1062 which generated code like this:
1063
1064 data Clo a b = forall c. Clo (c -> a -> b) c
1065
1066 ($:) :: Clo a b -> a -> b
1067 Clo f env $: x = f env x
1068
1069 rec { plus = Clo plus1 ()
1070
1071 ; plus1 _ n = Clo plus2 n
1072
1073 ; plus2 Zero n = n
1074 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1075
1076 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1077 we choose 'plus1' as the loop breaker (which is entirely possible
1078 otherwise), the loop does not unravel nicely.
1079
1080
1081 @occAnalRhs@ deals with the question of bindings where the Id is marked
1082 by an INLINE pragma. For these we record that anything which occurs
1083 in its RHS occurs many times. This pessimistically assumes that ths
1084 inlined binder also occurs many times in its scope, but if it doesn't
1085 we'll catch it next time round. At worst this costs an extra simplifier pass.
1086 ToDo: try using the occurrence info for the inline'd binder.
1087
1088 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1089 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1090 -}
1091
1092 occAnalRecRhs :: OccEnv -> CoreExpr -- Rhs
1093 -> (UsageDetails, CoreExpr)
1094 -- Returned usage details covers only the RHS,
1095 -- and *not* the RULE or INLINE template for the Id
1096 occAnalRecRhs env rhs = occAnal (rhsCtxt env) rhs
1097
1098 occAnalNonRecRhs :: OccEnv
1099 -> Id -> CoreExpr -- Binder and rhs
1100 -- Binder is already tagged with occurrence info
1101 -> (UsageDetails, CoreExpr)
1102 -- Returned usage details covers only the RHS,
1103 -- and *not* the RULE or INLINE template for the Id
1104 occAnalNonRecRhs env bndr rhs
1105 = occAnal rhs_env rhs
1106 where
1107 -- See Note [Cascading inlines]
1108 env1 | certainly_inline = env
1109 | otherwise = rhsCtxt env
1110
1111 -- See Note [Use one-shot info]
1112 rhs_env = env1 { occ_one_shots = argOneShots OneShotLam dmd }
1113
1114
1115 certainly_inline -- See Note [Cascading inlines]
1116 = case idOccInfo bndr of
1117 OneOcc in_lam one_br _ -> not in_lam && one_br && active && not_stable
1118 _ -> False
1119
1120 dmd = idDemandInfo bndr
1121 active = isAlwaysActive (idInlineActivation bndr)
1122 not_stable = not (isStableUnfolding (idUnfolding bndr))
1123
1124 addIdOccs :: UsageDetails -> VarSet -> UsageDetails
1125 addIdOccs usage id_set = foldVarSet addIdOcc usage id_set
1126
1127 addIdOcc :: Id -> UsageDetails -> UsageDetails
1128 addIdOcc v u | isId v = addOneOcc u v NoOccInfo
1129 | otherwise = u
1130 -- Give a non-committal binder info (i.e NoOccInfo) because
1131 -- a) Many copies of the specialised thing can appear
1132 -- b) We don't want to substitute a BIG expression inside a RULE
1133 -- even if that's the only occurrence of the thing
1134 -- (Same goes for INLINE.)
1135
1136 {-
1137 Note [Cascading inlines]
1138 ~~~~~~~~~~~~~~~~~~~~~~~~
1139 By default we use an rhsCtxt for the RHS of a binding. This tells the
1140 occ anal n that it's looking at an RHS, which has an effect in
1141 occAnalApp. In particular, for constructor applications, it makes
1142 the arguments appear to have NoOccInfo, so that we don't inline into
1143 them. Thus x = f y
1144 k = Just x
1145 we do not want to inline x.
1146
1147 But there's a problem. Consider
1148 x1 = a0 : []
1149 x2 = a1 : x1
1150 x3 = a2 : x2
1151 g = f x3
1152 First time round, it looks as if x1 and x2 occur as an arg of a
1153 let-bound constructor ==> give them a many-occurrence.
1154 But then x3 is inlined (unconditionally as it happens) and
1155 next time round, x2 will be, and the next time round x1 will be
1156 Result: multiple simplifier iterations. Sigh.
1157
1158 So, when analysing the RHS of x3 we notice that x3 will itself
1159 definitely inline the next time round, and so we analyse x3's rhs in
1160 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1161
1162 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1163 If we say "yes" when preInlineUnconditionally says "no" the simplifier iterates
1164 indefinitely:
1165 x = f y
1166 k = Just x
1167 inline ==>
1168 k = Just (f y)
1169 float ==>
1170 x1 = f y
1171 k = Just x1
1172
1173 This is worse than the slow cascade, so we only want to say "certainly_inline"
1174 if it really is certain. Look at the note with preInlineUnconditionally
1175 for the various clauses.
1176
1177 Expressions
1178 ~~~~~~~~~~~
1179 -}
1180
1181 occAnal :: OccEnv
1182 -> CoreExpr
1183 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1184 CoreExpr)
1185
1186 occAnal _ expr@(Type _) = (emptyDetails, expr)
1187 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1188 occAnal env expr@(Var v) = (mkOneOcc env v False, expr)
1189 -- At one stage, I gathered the idRuleVars for v here too,
1190 -- which in a way is the right thing to do.
1191 -- But that went wrong right after specialisation, when
1192 -- the *occurrences* of the overloaded function didn't have any
1193 -- rules in them, so the *specialised* versions looked as if they
1194 -- weren't used at all.
1195
1196 occAnal _ (Coercion co)
1197 = (addIdOccs emptyDetails (coVarsOfCo co), Coercion co)
1198 -- See Note [Gather occurrences of coercion variables]
1199
1200 {-
1201 Note [Gather occurrences of coercion variables]
1202 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1203 We need to gather info about what coercion variables appear, so that
1204 we can sort them into the right place when doing dependency analysis.
1205 -}
1206
1207 occAnal env (Tick tickish body)
1208 | tickish `tickishScopesLike` SoftScope
1209 = (usage, Tick tickish body')
1210
1211 | Breakpoint _ ids <- tickish
1212 = (usage_lam +++ mkVarEnv (zip ids (repeat NoOccInfo)), Tick tickish body')
1213 -- never substitute for any of the Ids in a Breakpoint
1214
1215 | otherwise
1216 = (usage_lam, Tick tickish body')
1217 where
1218 !(usage,body') = occAnal env body
1219 -- for a non-soft tick scope, we can inline lambdas only
1220 usage_lam = mapVarEnv markInsideLam usage
1221
1222 occAnal env (Cast expr co)
1223 = case occAnal env expr of { (usage, expr') ->
1224 let usage1 = markManyIf (isRhsEnv env) usage
1225 usage2 = addIdOccs usage1 (coVarsOfCo co)
1226 -- See Note [Gather occurrences of coercion variables]
1227 in (usage2, Cast expr' co)
1228 -- If we see let x = y `cast` co
1229 -- then mark y as 'Many' so that we don't
1230 -- immediately inline y again.
1231 }
1232
1233 occAnal env app@(App _ _)
1234 = occAnalApp env (collectArgsTicks tickishFloatable app)
1235
1236 -- Ignore type variables altogether
1237 -- (a) occurrences inside type lambdas only not marked as InsideLam
1238 -- (b) type variables not in environment
1239
1240 occAnal env (Lam x body) | isTyVar x
1241 = case occAnal env body of { (body_usage, body') ->
1242 (body_usage, Lam x body')
1243 }
1244
1245 -- For value lambdas we do a special hack. Consider
1246 -- (\x. \y. ...x...)
1247 -- If we did nothing, x is used inside the \y, so would be marked
1248 -- as dangerous to dup. But in the common case where the abstraction
1249 -- is applied to two arguments this is over-pessimistic.
1250 -- So instead, we just mark each binder with its occurrence
1251 -- info in the *body* of the multiple lambda.
1252 -- Then, the simplifier is careful when partially applying lambdas.
1253
1254 occAnal env expr@(Lam _ _)
1255 = case occAnal env_body body of { (body_usage, body') ->
1256 let
1257 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
1258 -- Use binders' to put one-shot info on the lambdas
1259
1260 really_final_usage
1261 | all isOneShotBndr binders' = final_usage
1262 | otherwise = mapVarEnv markInsideLam final_usage
1263 in
1264 (really_final_usage, mkLams tagged_binders body') }
1265 where
1266 (binders, body) = collectBinders expr
1267 (env_body, binders') = oneShotGroup env binders
1268
1269 occAnal env (Case scrut bndr ty alts)
1270 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1271 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1272 let
1273 alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
1274 (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
1275 total_usage = scrut_usage +++ alts_usage1
1276 in
1277 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1278 where
1279 -- Note [Case binder usage]
1280 -- ~~~~~~~~~~~~~~~~~~~~~~~~
1281 -- The case binder gets a usage of either "many" or "dead", never "one".
1282 -- Reason: we like to inline single occurrences, to eliminate a binding,
1283 -- but inlining a case binder *doesn't* eliminate a binding.
1284 -- We *don't* want to transform
1285 -- case x of w { (p,q) -> f w }
1286 -- into
1287 -- case x of w { (p,q) -> f (p,q) }
1288 tag_case_bndr usage bndr
1289 = case lookupVarEnv usage bndr of
1290 Nothing -> (usage, setIdOccInfo bndr IAmDead)
1291 Just _ -> (usage `delVarEnv` bndr, setIdOccInfo bndr NoOccInfo)
1292
1293 alt_env = mkAltEnv env scrut bndr
1294 occ_anal_alt = occAnalAlt alt_env
1295
1296 occ_anal_scrut (Var v) (alt1 : other_alts)
1297 | not (null other_alts) || not (isDefaultAlt alt1)
1298 = (mkOneOcc env v True, Var v) -- The 'True' says that the variable occurs
1299 -- in an interesting context; the case has
1300 -- at least one non-default alternative
1301 occ_anal_scrut (Tick t e) alts
1302 | t `tickishScopesLike` SoftScope
1303 -- No reason to not look through all ticks here, but only
1304 -- for soft-scoped ticks we can do so without having to
1305 -- update returned occurance info (see occAnal)
1306 = second (Tick t) $ occ_anal_scrut e alts
1307
1308 occ_anal_scrut scrut _alts
1309 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1310
1311 occAnal env (Let bind body)
1312 = case occAnal env body of { (body_usage, body') ->
1313 case occAnalBind env noImpRuleEdges bind body_usage of { (final_usage, new_binds) ->
1314 (final_usage, mkLets new_binds body') }}
1315
1316 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1317 occAnalArgs _ [] _
1318 = (emptyDetails, [])
1319
1320 occAnalArgs env (arg:args) one_shots
1321 | isTypeArg arg
1322 = case occAnalArgs env args one_shots of { (uds, args') ->
1323 (uds, arg:args') }
1324
1325 | otherwise
1326 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1327 case occAnal arg_env arg of { (uds1, arg') ->
1328 case occAnalArgs env args one_shots' of { (uds2, args') ->
1329 (uds1 +++ uds2, arg':args') }}}
1330
1331 {-
1332 Applications are dealt with specially because we want
1333 the "build hack" to work.
1334
1335 Note [Arguments of let-bound constructors]
1336 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1337 Consider
1338 f x = let y = expensive x in
1339 let z = (True,y) in
1340 (case z of {(p,q)->q}, case z of {(p,q)->q})
1341 We feel free to duplicate the WHNF (True,y), but that means
1342 that y may be duplicated thereby.
1343
1344 If we aren't careful we duplicate the (expensive x) call!
1345 Constructors are rather like lambdas in this way.
1346 -}
1347
1348 occAnalApp :: OccEnv
1349 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1350 -> (UsageDetails, Expr CoreBndr)
1351 occAnalApp env (Var fun, args, ticks)
1352 | null ticks = (uds, mkApps (Var fun) args')
1353 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1354 where
1355 uds = fun_uds +++ final_args_uds
1356
1357 !(args_uds, args') = occAnalArgs env args one_shots
1358 !final_args_uds = markManyIf (isRhsEnv env && is_exp) args_uds
1359 -- We mark the free vars of the argument of a constructor or PAP
1360 -- as "many", if it is the RHS of a let(rec).
1361 -- This means that nothing gets inlined into a constructor argument
1362 -- position, which is what we want. Typically those constructor
1363 -- arguments are just variables, or trivial expressions.
1364 --
1365 -- This is the *whole point* of the isRhsEnv predicate
1366 -- See Note [Arguments of let-bound constructors]
1367
1368 n_val_args = valArgCount args
1369 fun_uds = mkOneOcc env fun (n_val_args > 0)
1370 is_exp = isExpandableApp fun n_val_args
1371 -- See Note [CONLIKE pragma] in BasicTypes
1372 -- The definition of is_exp should match that in
1373 -- Simplify.prepareRhs
1374
1375 one_shots = argsOneShots (idStrictness fun) n_val_args
1376 -- See Note [Use one-shot info]
1377
1378 occAnalApp env (fun, args, ticks)
1379 = (fun_uds +++ args_uds, mkTicks ticks $ mkApps fun' args')
1380 where
1381 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1382 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1383 -- often leaves behind beta redexs like
1384 -- (\x y -> e) a1 a2
1385 -- Here we would like to mark x,y as one-shot, and treat the whole
1386 -- thing much like a let. We do this by pushing some True items
1387 -- onto the context stack.
1388 !(args_uds, args') = occAnalArgs env args []
1389
1390 markManyIf :: Bool -- If this is true
1391 -> UsageDetails -- Then do markMany on this
1392 -> UsageDetails
1393 markManyIf True uds = mapVarEnv markMany uds
1394 markManyIf False uds = uds
1395
1396 {-
1397 Note [Use one-shot information]
1398 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1399 The occurrrence analyser propagates one-shot-lambda information in two
1400 situations:
1401
1402 * Applications: eg build (\c n -> blah)
1403
1404 Propagate one-shot info from the strictness signature of 'build' to
1405 the \c n.
1406
1407 This strictness signature can come from a module interface, in the case of
1408 an imported function, or from a previous run of the demand analyser.
1409
1410 * Let-bindings: eg let f = \c. let ... in \n -> blah
1411 in (build f, build f)
1412
1413 Propagate one-shot info from the demanand-info on 'f' to the
1414 lambdas in its RHS (which may not be syntactically at the top)
1415
1416 This information must have come from a previous run of the demanand
1417 analyser.
1418
1419 Previously, the demand analyser would *also* set the one-shot information, but
1420 that code was buggy (see #11770), so doing it only in on place, namely here, is
1421 saner.
1422
1423 Note [Binders in case alternatives]
1424 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1425 Consider
1426 case x of y { (a,b) -> f y }
1427 We treat 'a', 'b' as dead, because they don't physically occur in the
1428 case alternative. (Indeed, a variable is dead iff it doesn't occur in
1429 its scope in the output of OccAnal.) It really helps to know when
1430 binders are unused. See esp the call to isDeadBinder in
1431 Simplify.mkDupableAlt
1432
1433 In this example, though, the Simplifier will bring 'a' and 'b' back to
1434 life, beause it binds 'y' to (a,b) (imagine got inlined and
1435 scrutinised y).
1436 -}
1437
1438 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
1439 -> CoreAlt
1440 -> (UsageDetails, Alt IdWithOccInfo)
1441 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
1442 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
1443 let
1444 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
1445 -- See Note [Binders in case alternatives]
1446 (alt_usg', rhs2) =
1447 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
1448 in
1449 (alt_usg', (con, tagged_bndrs, rhs2)) }
1450
1451 wrapAltRHS :: OccEnv
1452 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
1453 -> UsageDetails -- usage for entire alt (p -> rhs)
1454 -> [Var] -- alt binders
1455 -> CoreExpr -- alt RHS
1456 -> (UsageDetails, CoreExpr)
1457 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
1458 | occ_binder_swap env
1459 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
1460 -- handles condition (a) in Note [Binder swap]
1461 , not captured -- See condition (b) in Note [Binder swap]
1462 = ( alt_usg' +++ let_rhs_usg
1463 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
1464 where
1465 captured = any (`usedIn` let_rhs_usg) bndrs
1466 -- The rhs of the let may include coercion variables
1467 -- if the scrutinee was a cast, so we must gather their
1468 -- usage. See Note [Gather occurrences of coercion variables]
1469 (let_rhs_usg, let_rhs') = occAnal env let_rhs
1470 (alt_usg', tagged_scrut_var) = tagBinder alt_usg scrut_var
1471
1472 wrapAltRHS _ _ alt_usg _ alt_rhs
1473 = (alt_usg, alt_rhs)
1474
1475 {-
1476 ************************************************************************
1477 * *
1478 OccEnv
1479 * *
1480 ************************************************************************
1481 -}
1482
1483 data OccEnv
1484 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
1485 , occ_one_shots :: !OneShots -- Tells about linearity
1486 , occ_gbl_scrut :: GlobalScruts
1487 , occ_rule_act :: Activation -> Bool -- Which rules are active
1488 -- See Note [Finding rule RHS free vars]
1489 , occ_binder_swap :: !Bool -- enable the binder_swap
1490 -- See CorePrep Note [Dead code in CorePrep]
1491 }
1492
1493 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
1494
1495 -----------------------------
1496 -- OccEncl is used to control whether to inline into constructor arguments
1497 -- For example:
1498 -- x = (p,q) -- Don't inline p or q
1499 -- y = /\a -> (p a, q a) -- Still don't inline p or q
1500 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
1501 -- So OccEncl tells enought about the context to know what to do when
1502 -- we encounter a contructor application or PAP.
1503
1504 data OccEncl
1505 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
1506 -- Don't inline into constructor args here
1507 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
1508 -- Do inline into constructor args here
1509
1510 instance Outputable OccEncl where
1511 ppr OccRhs = text "occRhs"
1512 ppr OccVanilla = text "occVanilla"
1513
1514 type OneShots = [OneShotInfo]
1515 -- [] No info
1516 --
1517 -- one_shot_info:ctxt Analysing a function-valued expression that
1518 -- will be applied as described by one_shot_info
1519
1520 initOccEnv :: (Activation -> Bool) -> OccEnv
1521 initOccEnv active_rule
1522 = OccEnv { occ_encl = OccVanilla
1523 , occ_one_shots = []
1524 , occ_gbl_scrut = emptyVarSet
1525 , occ_rule_act = active_rule
1526 , occ_binder_swap = True }
1527
1528 vanillaCtxt :: OccEnv -> OccEnv
1529 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
1530
1531 rhsCtxt :: OccEnv -> OccEnv
1532 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
1533
1534 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
1535 argCtxt env []
1536 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
1537 argCtxt env (one_shots:one_shots_s)
1538 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
1539
1540 isRhsEnv :: OccEnv -> Bool
1541 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
1542 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
1543
1544 oneShotGroup :: OccEnv -> [CoreBndr]
1545 -> ( OccEnv
1546 , [CoreBndr] )
1547 -- The result binders have one-shot-ness set that they might not have had originally.
1548 -- This happens in (build (\c n -> e)). Here the occurrence analyser
1549 -- linearity context knows that c,n are one-shot, and it records that fact in
1550 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
1551
1552 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
1553 = go ctxt bndrs []
1554 where
1555 go ctxt [] rev_bndrs
1556 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
1557 , reverse rev_bndrs )
1558
1559 go [] bndrs rev_bndrs
1560 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
1561 , reverse rev_bndrs ++ bndrs )
1562
1563 go ctxt (bndr:bndrs) rev_bndrs
1564 | isId bndr
1565
1566 = case ctxt of
1567 [] -> go [] bndrs (bndr : rev_bndrs)
1568 (one_shot : ctxt) -> go ctxt bndrs (bndr': rev_bndrs)
1569 where
1570 bndr' = updOneShotInfo bndr one_shot
1571 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
1572 -- one-shot info might be better than what we can infer, e.g.
1573 -- due to explicit use of the magic 'oneShot' function.
1574 -- See Note [The oneShot function]
1575
1576 | otherwise
1577 = go ctxt bndrs (bndr:rev_bndrs)
1578
1579 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
1580 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
1581 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
1582
1583 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
1584 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
1585 -- as well as (f,g), (g,h)
1586 transClosureFV env
1587 | no_change = env
1588 | otherwise = transClosureFV (listToUFM new_fv_list)
1589 where
1590 (no_change, new_fv_list) = mapAccumL bump True (ufmToList env)
1591 bump no_change (b,fvs)
1592 | no_change_here = (no_change, (b,fvs))
1593 | otherwise = (False, (b,new_fvs))
1594 where
1595 (new_fvs, no_change_here) = extendFvs env fvs
1596
1597 -------------
1598 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
1599 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
1600
1601 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
1602 -- (extendFVs env s) returns
1603 -- (s `union` env(s), env(s) `subset` s)
1604 extendFvs env s
1605 | isNullUFM env
1606 = (s, True)
1607 | otherwise
1608 = (s `unionVarSet` extras, extras `subVarSet` s)
1609 where
1610 extras :: VarSet -- env(s)
1611 extras = foldUFM unionVarSet emptyVarSet $
1612 intersectUFM_C (\x _ -> x) env s
1613
1614 {-
1615 ************************************************************************
1616 * *
1617 Binder swap
1618 * *
1619 ************************************************************************
1620
1621 Note [Binder swap]
1622 ~~~~~~~~~~~~~~~~~~
1623 We do these two transformations right here:
1624
1625 (1) case x of b { pi -> ri }
1626 ==>
1627 case x of b { pi -> let x=b in ri }
1628
1629 (2) case (x |> co) of b { pi -> ri }
1630 ==>
1631 case (x |> co) of b { pi -> let x = b |> sym co in ri }
1632
1633 Why (2)? See Note [Case of cast]
1634
1635 In both cases, in a particular alternative (pi -> ri), we only
1636 add the binding if
1637 (a) x occurs free in (pi -> ri)
1638 (ie it occurs in ri, but is not bound in pi)
1639 (b) the pi does not bind b (or the free vars of co)
1640 We need (a) and (b) for the inserted binding to be correct.
1641
1642 For the alternatives where we inject the binding, we can transfer
1643 all x's OccInfo to b. And that is the point.
1644
1645 Notice that
1646 * The deliberate shadowing of 'x'.
1647 * That (a) rapidly becomes false, so no bindings are injected.
1648
1649 The reason for doing these transformations here is because it allows
1650 us to adjust the OccInfo for 'x' and 'b' as we go.
1651
1652 * Suppose the only occurrences of 'x' are the scrutinee and in the
1653 ri; then this transformation makes it occur just once, and hence
1654 get inlined right away.
1655
1656 * If we do this in the Simplifier, we don't know whether 'x' is used
1657 in ri, so we are forced to pessimistically zap b's OccInfo even
1658 though it is typically dead (ie neither it nor x appear in the
1659 ri). There's nothing actually wrong with zapping it, except that
1660 it's kind of nice to know which variables are dead. My nose
1661 tells me to keep this information as robustly as possible.
1662
1663 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
1664 {x=b}; it's Nothing if the binder-swap doesn't happen.
1665
1666 There is a danger though. Consider
1667 let v = x +# y
1668 in case (f v) of w -> ...v...v...
1669 And suppose that (f v) expands to just v. Then we'd like to
1670 use 'w' instead of 'v' in the alternative. But it may be too
1671 late; we may have substituted the (cheap) x+#y for v in the
1672 same simplifier pass that reduced (f v) to v.
1673
1674 I think this is just too bad. CSE will recover some of it.
1675
1676 Note [Case of cast]
1677 ~~~~~~~~~~~~~~~~~~~
1678 Consider case (x `cast` co) of b { I# ->
1679 ... (case (x `cast` co) of {...}) ...
1680 We'd like to eliminate the inner case. That is the motivation for
1681 equation (2) in Note [Binder swap]. When we get to the inner case, we
1682 inline x, cancel the casts, and away we go.
1683
1684 Note [Binder swap on GlobalId scrutinees]
1685 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1686 When the scrutinee is a GlobalId we must take care in two ways
1687
1688 i) In order to *know* whether 'x' occurs free in the RHS, we need its
1689 occurrence info. BUT, we don't gather occurrence info for
1690 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
1691 OccEnv is for: it says "gather occurrence info for these".
1692
1693 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
1694 has an External Name. See, for example, SimplEnv Note [Global Ids in
1695 the substitution].
1696
1697 Note [Zap case binders in proxy bindings]
1698 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1699 From the original
1700 case x of cb(dead) { p -> ...x... }
1701 we will get
1702 case x of cb(live) { p -> let x = cb in ...x... }
1703
1704 Core Lint never expects to find an *occurrence* of an Id marked
1705 as Dead, so we must zap the OccInfo on cb before making the
1706 binding x = cb. See Trac #5028.
1707
1708 Historical note [no-case-of-case]
1709 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1710 We *used* to suppress the binder-swap in case expressions when
1711 -fno-case-of-case is on. Old remarks:
1712 "This happens in the first simplifier pass,
1713 and enhances full laziness. Here's the bad case:
1714 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
1715 If we eliminate the inner case, we trap it inside the I# v -> arm,
1716 which might prevent some full laziness happening. I've seen this
1717 in action in spectral/cichelli/Prog.hs:
1718 [(m,n) | m <- [1..max], n <- [1..max]]
1719 Hence the check for NoCaseOfCase."
1720 However, now the full-laziness pass itself reverses the binder-swap, so this
1721 check is no longer necessary.
1722
1723 Historical note [Suppressing the case binder-swap]
1724 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1725 This old note describes a problem that is also fixed by doing the
1726 binder-swap in OccAnal:
1727
1728 There is another situation when it might make sense to suppress the
1729 case-expression binde-swap. If we have
1730
1731 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
1732 ...other cases .... }
1733
1734 We'll perform the binder-swap for the outer case, giving
1735
1736 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
1737 ...other cases .... }
1738
1739 But there is no point in doing it for the inner case, because w1 can't
1740 be inlined anyway. Furthermore, doing the case-swapping involves
1741 zapping w2's occurrence info (see paragraphs that follow), and that
1742 forces us to bind w2 when doing case merging. So we get
1743
1744 case x of w1 { A -> let w2 = w1 in e1
1745 B -> let w2 = w1 in e2
1746 ...other cases .... }
1747
1748 This is plain silly in the common case where w2 is dead.
1749
1750 Even so, I can't see a good way to implement this idea. I tried
1751 not doing the binder-swap if the scrutinee was already evaluated
1752 but that failed big-time:
1753
1754 data T = MkT !Int
1755
1756 case v of w { MkT x ->
1757 case x of x1 { I# y1 ->
1758 case x of x2 { I# y2 -> ...
1759
1760 Notice that because MkT is strict, x is marked "evaluated". But to
1761 eliminate the last case, we must either make sure that x (as well as
1762 x1) has unfolding MkT y1. The straightforward thing to do is to do
1763 the binder-swap. So this whole note is a no-op.
1764
1765 It's fixed by doing the binder-swap in OccAnal because we can do the
1766 binder-swap unconditionally and still get occurrence analysis
1767 information right.
1768 -}
1769
1770 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
1771 -- Does two things: a) makes the occ_one_shots = OccVanilla
1772 -- b) extends the GlobalScruts if possible
1773 -- c) returns a proxy mapping, binding the scrutinee
1774 -- to the case binder, if possible
1775 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
1776 = case stripTicksTopE (const True) scrut of
1777 Var v -> add_scrut v case_bndr'
1778 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
1779 -- See Note [Case of cast]
1780 _ -> (env { occ_encl = OccVanilla }, Nothing)
1781
1782 where
1783 add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
1784 , Just (localise v, rhs) )
1785
1786 case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
1787 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var)
1788 -- Localise the scrut_var before shadowing it; we're making a
1789 -- new binding for it, and it might have an External Name, or
1790 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
1791 -- Also we don't want any INLINE or NOINLINE pragmas!
1792
1793 {-
1794 ************************************************************************
1795 * *
1796 \subsection[OccurAnal-types]{OccEnv}
1797 * *
1798 ************************************************************************
1799 -}
1800
1801 type UsageDetails = IdEnv OccInfo -- A finite map from ids to their usage
1802 -- INVARIANT: never IAmDead
1803 -- (Deadness is signalled by not being in the map at all)
1804
1805 (+++), combineAltsUsageDetails
1806 :: UsageDetails -> UsageDetails -> UsageDetails
1807
1808 (+++) usage1 usage2
1809 = plusVarEnv_C addOccInfo usage1 usage2
1810
1811 combineAltsUsageDetails usage1 usage2
1812 = plusVarEnv_C orOccInfo usage1 usage2
1813
1814 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
1815 addOneOcc usage id info
1816 = plusVarEnv_C addOccInfo usage (unitVarEnv id info)
1817 -- ToDo: make this more efficient
1818
1819 emptyDetails :: UsageDetails
1820 emptyDetails = (emptyVarEnv :: UsageDetails)
1821
1822 usedIn :: Id -> UsageDetails -> Bool
1823 v `usedIn` details = isExportedId v || v `elemVarEnv` details
1824
1825 type IdWithOccInfo = Id
1826
1827 tagLamBinders :: UsageDetails -- Of scope
1828 -> [Id] -- Binders
1829 -> (UsageDetails, -- Details with binders removed
1830 [IdWithOccInfo]) -- Tagged binders
1831 -- Used for lambda and case binders
1832 -- It copes with the fact that lambda bindings can have a
1833 -- stable unfolding, used for join points
1834 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
1835 where
1836 (usage', bndrs') = mapAccumR tag_lam usage binders
1837 tag_lam usage bndr = (usage2, setBinderOcc usage bndr)
1838 where
1839 usage1 = usage `delVarEnv` bndr
1840 usage2 | isId bndr = addIdOccs usage1 (idUnfoldingVars bndr)
1841 | otherwise = usage1
1842
1843 tagBinder :: UsageDetails -- Of scope
1844 -> Id -- Binders
1845 -> (UsageDetails, -- Details with binders removed
1846 IdWithOccInfo) -- Tagged binders
1847
1848 tagBinder usage binder
1849 = let
1850 usage' = usage `delVarEnv` binder
1851 binder' = setBinderOcc usage binder
1852 in
1853 usage' `seq` (usage', binder')
1854
1855 setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr
1856 setBinderOcc usage bndr
1857 | isTyVar bndr = bndr
1858 | isExportedId bndr = case idOccInfo bndr of
1859 NoOccInfo -> bndr
1860 _ -> setIdOccInfo bndr NoOccInfo
1861 -- Don't use local usage info for visible-elsewhere things
1862 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
1863 -- about to re-generate it and it shouldn't be "sticky"
1864
1865 | otherwise = setIdOccInfo bndr occ_info
1866 where
1867 occ_info = lookupVarEnv usage bndr `orElse` IAmDead
1868
1869 {-
1870 ************************************************************************
1871 * *
1872 \subsection{Operations over OccInfo}
1873 * *
1874 ************************************************************************
1875 -}
1876
1877 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
1878 mkOneOcc env id int_cxt
1879 | isLocalId id
1880 = unitVarEnv id (OneOcc False True int_cxt)
1881
1882 | id `elemVarEnv` occ_gbl_scrut env
1883 = unitVarEnv id NoOccInfo
1884
1885 | otherwise
1886 = emptyDetails
1887
1888 markMany, markInsideLam :: OccInfo -> OccInfo
1889
1890 markMany _ = NoOccInfo
1891
1892 markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
1893 markInsideLam occ = occ
1894
1895 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
1896
1897 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1898 NoOccInfo -- Both branches are at least One
1899 -- (Argument is never IAmDead)
1900
1901 -- (orOccInfo orig new) is used
1902 -- when combining occurrence info from branches of a case
1903
1904 orOccInfo (OneOcc in_lam1 _ int_cxt1)
1905 (OneOcc in_lam2 _ int_cxt2)
1906 = OneOcc (in_lam1 || in_lam2)
1907 False -- False, because it occurs in both branches
1908 (int_cxt1 && int_cxt2)
1909 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1910 NoOccInfo