Rename SpecInfo to RuleInfo (upon SPJ's advice).
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import CoreSyn
23 import CoreFVs
24 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
25 stripTicksTopE, mkTicks )
26 import Id
27 import Name( localiseName )
28 import BasicTypes
29 import Module( Module )
30 import Coercion
31
32 import VarSet
33 import VarEnv
34 import Var
35 import Demand ( argOneShots, argsOneShots )
36 import Maybes ( orElse )
37 import Digraph ( SCC(..), stronglyConnCompFromEdgedVerticesR )
38 import Unique
39 import UniqFM
40 import Util
41 import Outputable
42 import FastString
43 import Data.List
44 import Control.Arrow ( second )
45
46 {-
47 ************************************************************************
48 * *
49 \subsection[OccurAnal-main]{Counting occurrences: main function}
50 * *
51 ************************************************************************
52
53 Here's the externally-callable interface:
54 -}
55
56 occurAnalysePgm :: Module -- Used only in debug output
57 -> (Activation -> Bool)
58 -> [CoreRule] -> [CoreVect] -> VarSet
59 -> CoreProgram -> CoreProgram
60 occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
61 | isEmptyVarEnv final_usage
62 = occ_anald_binds
63
64 | otherwise -- See Note [Glomming]
65 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
66 2 (ppr final_usage ) )
67 occ_anald_glommed_binds
68 where
69 init_env = initOccEnv active_rule
70 (final_usage, occ_anald_binds) = go init_env binds
71 (_, occ_anald_glommed_binds) = occAnalRecBind init_env imp_rule_edges
72 (flattenBinds occ_anald_binds)
73 initial_uds
74 -- It's crucial to re-analyse the glommed-together bindings
75 -- so that we establish the right loop breakers. Otherwise
76 -- we can easily create an infinite loop (Trac #9583 is an example)
77
78 initial_uds = addIdOccs emptyDetails
79 (rulesFreeVars imp_rules `unionVarSet`
80 vectsFreeVars vects `unionVarSet`
81 vectVars)
82 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
83 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
84 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
85
86 -- Note [Preventing loops due to imported functions rules]
87 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
88 [ mapVarEnv (const maps_to) (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
89 | imp_rule <- imp_rules
90 , let maps_to = exprFreeIds (ru_rhs imp_rule)
91 `delVarSetList` ru_bndrs imp_rule
92 , arg <- ru_args imp_rule ]
93
94 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
95 go _ []
96 = (initial_uds, [])
97 go env (bind:binds)
98 = (final_usage, bind' ++ binds')
99 where
100 (bs_usage, binds') = go env binds
101 (final_usage, bind') = occAnalBind env imp_rule_edges bind bs_usage
102
103 occurAnalyseExpr :: CoreExpr -> CoreExpr
104 -- Do occurrence analysis, and discard occurrence info returned
105 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
106
107 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
108 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
109
110 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
111 occurAnalyseExpr' enable_binder_swap expr
112 = snd (occAnal env expr)
113 where
114 env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
115 -- To be conservative, we say that all inlines and rules are active
116 all_active_rules = \_ -> True
117
118 {-
119 ************************************************************************
120 * *
121 \subsection[OccurAnal-main]{Counting occurrences: main function}
122 * *
123 ************************************************************************
124
125 Bindings
126 ~~~~~~~~
127 -}
128
129 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
130
131 noImpRuleEdges :: ImpRuleEdges
132 noImpRuleEdges = emptyVarEnv
133
134 occAnalBind :: OccEnv -- The incoming OccEnv
135 -> ImpRuleEdges
136 -> CoreBind
137 -> UsageDetails -- Usage details of scope
138 -> (UsageDetails, -- Of the whole let(rec)
139 [CoreBind])
140
141 occAnalBind env top_env (NonRec binder rhs) body_usage
142 = occAnalNonRecBind env top_env binder rhs body_usage
143 occAnalBind env top_env (Rec pairs) body_usage
144 = occAnalRecBind env top_env pairs body_usage
145
146 -----------------
147 occAnalNonRecBind :: OccEnv -> ImpRuleEdges -> Var -> CoreExpr
148 -> UsageDetails -> (UsageDetails, [CoreBind])
149 occAnalNonRecBind env imp_rule_edges binder rhs body_usage
150 | isTyVar binder -- A type let; we don't gather usage info
151 = (body_usage, [NonRec binder rhs])
152
153 | not (binder `usedIn` body_usage) -- It's not mentioned
154 = (body_usage, [])
155
156 | otherwise -- It's mentioned in the body
157 = (body_usage' +++ rhs_usage4, [NonRec tagged_binder rhs'])
158 where
159 (body_usage', tagged_binder) = tagBinder body_usage binder
160 (rhs_usage1, rhs') = occAnalNonRecRhs env tagged_binder rhs
161 rhs_usage2 = addIdOccs rhs_usage1 (idUnfoldingVars binder)
162
163 rhs_usage3 = addIdOccs rhs_usage2 (idRuleVars binder)
164 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
165
166 rhs_usage4 = maybe rhs_usage3 (addIdOccs rhs_usage3) $
167 lookupVarEnv imp_rule_edges binder
168 -- See Note [Preventing loops due to imported functions rules]
169
170 -----------------
171 occAnalRecBind :: OccEnv -> ImpRuleEdges -> [(Var,CoreExpr)]
172 -> UsageDetails -> (UsageDetails, [CoreBind])
173 occAnalRecBind env imp_rule_edges pairs body_usage
174 = foldr occAnalRec (body_usage, []) sccs
175 -- For a recursive group, we
176 -- * occ-analyse all the RHSs
177 -- * compute strongly-connected components
178 -- * feed those components to occAnalRec
179 where
180 bndr_set = mkVarSet (map fst pairs)
181
182 sccs :: [SCC (Node Details)]
183 sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompFromEdgedVerticesR nodes
184
185 nodes :: [Node Details]
186 nodes = {-# SCC "occAnalBind.assoc" #-} map (makeNode env imp_rule_edges bndr_set) pairs
187
188 {-
189 Note [Dead code]
190 ~~~~~~~~~~~~~~~~
191 Dropping dead code for a cyclic Strongly Connected Component is done
192 in a very simple way:
193
194 the entire SCC is dropped if none of its binders are mentioned
195 in the body; otherwise the whole thing is kept.
196
197 The key observation is that dead code elimination happens after
198 dependency analysis: so 'occAnalBind' processes SCCs instead of the
199 original term's binding groups.
200
201 Thus 'occAnalBind' does indeed drop 'f' in an example like
202
203 letrec f = ...g...
204 g = ...(...g...)...
205 in
206 ...g...
207
208 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
209 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
210 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
211
212 ------------------------------------------------------------
213 Note [Forming Rec groups]
214 ~~~~~~~~~~~~~~~~~~~~~~~~~
215 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
216 and "g uses f", no matter how indirectly. We do a SCC analysis
217 with an edge f -> g if "f uses g".
218
219 More precisely, "f uses g" iff g should be in scope wherever f is.
220 That is, g is free in:
221 a) the rhs 'ef'
222 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
223 c) or the LHS or a rule for f (Note [Rule dependency info])
224
225 These conditions apply regardless of the activation of the RULE (eg it might be
226 inactive in this phase but become active later). Once a Rec is broken up
227 it can never be put back together, so we must be conservative.
228
229 The principle is that, regardless of rule firings, every variable is
230 always in scope.
231
232 * Note [Rules are extra RHSs]
233 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
234 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
235 keeps the specialised "children" alive. If the parent dies
236 (because it isn't referenced any more), then the children will die
237 too (unless they are already referenced directly).
238
239 To that end, we build a Rec group for each cyclic strongly
240 connected component,
241 *treating f's rules as extra RHSs for 'f'*.
242 More concretely, the SCC analysis runs on a graph with an edge
243 from f -> g iff g is mentioned in
244 (a) f's rhs
245 (b) f's RULES
246 These are rec_edges.
247
248 Under (b) we include variables free in *either* LHS *or* RHS of
249 the rule. The former might seems silly, but see Note [Rule
250 dependency info]. So in Example [eftInt], eftInt and eftIntFB
251 will be put in the same Rec, even though their 'main' RHSs are
252 both non-recursive.
253
254 * Note [Rule dependency info]
255 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
256 The VarSet in a RuleInfo is used for dependency analysis in the
257 occurrence analyser. We must track free vars in *both* lhs and rhs.
258 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
259 Why both? Consider
260 x = y
261 RULE f x = v+4
262 Then if we substitute y for x, we'd better do so in the
263 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
264 as well as 'v'
265
266 * Note [Rules are visible in their own rec group]
267 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
268 We want the rules for 'f' to be visible in f's right-hand side.
269 And we'd like them to be visible in other functions in f's Rec
270 group. E.g. in Note [Specialisation rules] we want f' rule
271 to be visible in both f's RHS, and fs's RHS.
272
273 This means that we must simplify the RULEs first, before looking
274 at any of the definitions. This is done by Simplify.simplRecBind,
275 when it calls addLetIdInfo.
276
277 ------------------------------------------------------------
278 Note [Choosing loop breakers]
279 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
280 Loop breaking is surprisingly subtle. First read the section 4 of
281 "Secrets of the GHC inliner". This describes our basic plan.
282 We avoid infinite inlinings by choosing loop breakers, and
283 ensuring that a loop breaker cuts each loop.
284
285 Fundamentally, we do SCC analysis on a graph. For each recursive
286 group we choose a loop breaker, delete all edges to that node,
287 re-analyse the SCC, and iterate.
288
289 But what is the graph? NOT the same graph as was used for Note
290 [Forming Rec groups]! In particular, a RULE is like an equation for
291 'f' that is *always* inlined if it is applicable. We do *not* disable
292 rules for loop-breakers. It's up to whoever makes the rules to make
293 sure that the rules themselves always terminate. See Note [Rules for
294 recursive functions] in Simplify.hs
295
296 Hence, if
297 f's RHS (or its INLINE template if it has one) mentions g, and
298 g has a RULE that mentions h, and
299 h has a RULE that mentions f
300
301 then we *must* choose f to be a loop breaker. Example: see Note
302 [Specialisation rules].
303
304 In general, take the free variables of f's RHS, and augment it with
305 all the variables reachable by RULES from those starting points. That
306 is the whole reason for computing rule_fv_env in occAnalBind. (Of
307 course we only consider free vars that are also binders in this Rec
308 group.) See also Note [Finding rule RHS free vars]
309
310 Note that when we compute this rule_fv_env, we only consider variables
311 free in the *RHS* of the rule, in contrast to the way we build the
312 Rec group in the first place (Note [Rule dependency info])
313
314 Note that if 'g' has RHS that mentions 'w', we should add w to
315 g's loop-breaker edges. More concretely there is an edge from f -> g
316 iff
317 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
318 (see Note [Inline rules])
319 (b) or h is mentioned in f's RHS, and
320 g appears in the RHS of an active RULE of h
321 or a transitive sequence of active rules starting with h
322
323 Why "active rules"? See Note [Finding rule RHS free vars]
324
325 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
326 chosen as a loop breaker, because their RHSs don't mention each other.
327 And indeed both can be inlined safely.
328
329 Note again that the edges of the graph we use for computing loop breakers
330 are not the same as the edges we use for computing the Rec blocks.
331 That's why we compute
332
333 - rec_edges for the Rec block analysis
334 - loop_breaker_edges for the loop breaker analysis
335
336 * Note [Finding rule RHS free vars]
337 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
338 Consider this real example from Data Parallel Haskell
339 tagZero :: Array Int -> Array Tag
340 {-# INLINE [1] tagZeroes #-}
341 tagZero xs = pmap (\x -> fromBool (x==0)) xs
342
343 {-# RULES "tagZero" [~1] forall xs n.
344 pmap fromBool <blah blah> = tagZero xs #-}
345 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
346 However, tagZero can only be inlined in phase 1 and later, while
347 the RULE is only active *before* phase 1. So there's no problem.
348
349 To make this work, we look for the RHS free vars only for
350 *active* rules. That's the reason for the occ_rule_act field
351 of the OccEnv.
352
353 * Note [Weak loop breakers]
354 ~~~~~~~~~~~~~~~~~~~~~~~~~
355 There is a last nasty wrinkle. Suppose we have
356
357 Rec { f = f_rhs
358 RULE f [] = g
359
360 h = h_rhs
361 g = h
362 ...more...
363 }
364
365 Remember that we simplify the RULES before any RHS (see Note
366 [Rules are visible in their own rec group] above).
367
368 So we must *not* postInlineUnconditionally 'g', even though
369 its RHS turns out to be trivial. (I'm assuming that 'g' is
370 not choosen as a loop breaker.) Why not? Because then we
371 drop the binding for 'g', which leaves it out of scope in the
372 RULE!
373
374 Here's a somewhat different example of the same thing
375 Rec { g = h
376 ; h = ...f...
377 ; f = f_rhs
378 RULE f [] = g }
379 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
380 g, because the RULE for f is active throughout. So the RHS of h
381 might rewrite to h = ...g...
382 So g must remain in scope in the output program!
383
384 We "solve" this by:
385
386 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
387 iff g is a "missing free variable" of the Rec group
388
389 A "missing free variable" x is one that is mentioned in an RHS or
390 INLINE or RULE of a binding in the Rec group, but where the
391 dependency on x may not show up in the loop_breaker_edges (see
392 note [Choosing loop breakers} above).
393
394 A normal "strong" loop breaker has IAmLoopBreaker False. So
395
396 Inline postInlineUnconditionally
397 strong IAmLoopBreaker False no no
398 weak IAmLoopBreaker True yes no
399 other yes yes
400
401 The **sole** reason for this kind of loop breaker is so that
402 postInlineUnconditionally does not fire. Ugh. (Typically it'll
403 inline via the usual callSiteInline stuff, so it'll be dead in the
404 next pass, so the main Ugh is the tiresome complication.)
405
406 Note [Rules for imported functions]
407 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
408 Consider this
409 f = /\a. B.g a
410 RULE B.g Int = 1 + f Int
411 Note that
412 * The RULE is for an imported function.
413 * f is non-recursive
414 Now we
415 can get
416 f Int --> B.g Int Inlining f
417 --> 1 + f Int Firing RULE
418 and so the simplifier goes into an infinite loop. This
419 would not happen if the RULE was for a local function,
420 because we keep track of dependencies through rules. But
421 that is pretty much impossible to do for imported Ids. Suppose
422 f's definition had been
423 f = /\a. C.h a
424 where (by some long and devious process), C.h eventually inlines to
425 B.g. We could only spot such loops by exhaustively following
426 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
427 f.
428
429 Note that RULES for imported functions are important in practice; they
430 occur a lot in the libraries.
431
432 We regard this potential infinite loop as a *programmer* error.
433 It's up the programmer not to write silly rules like
434 RULE f x = f x
435 and the example above is just a more complicated version.
436
437 Note [Preventing loops due to imported functions rules]
438 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
439 Consider:
440 import GHC.Base (foldr)
441
442 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
443 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
444 filterFB c p = ...
445
446 f = filter p xs
447
448 Note that filter is not a loop-breaker, so what happens is:
449 f = filter p xs
450 = {inline} build (\c n -> foldr (filterFB c p) n xs)
451 = {inline} foldr (filterFB (:) p) [] xs
452 = {RULE} filter p xs
453
454 We are in an infinite loop.
455
456 A more elaborate example (that I actually saw in practice when I went to
457 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
458 {-# LANGUAGE RankNTypes #-}
459 module GHCList where
460
461 import Prelude hiding (filter)
462 import GHC.Base (build)
463
464 {-# INLINABLE filter #-}
465 filter :: (a -> Bool) -> [a] -> [a]
466 filter p [] = []
467 filter p (x:xs) = if p x then x : filter p xs else filter p xs
468
469 {-# NOINLINE [0] filterFB #-}
470 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
471 filterFB c p x r | p x = x `c` r
472 | otherwise = r
473
474 {-# RULES
475 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
476 (filterFB c p) n xs)
477 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
478 #-}
479
480 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
481 are not), the unfolding given to "filter" in the interface file will be:
482 filter p [] = []
483 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
484 else build (\c n -> foldr (filterFB c p) n xs
485
486 Note that because this unfolding does not mention "filter", filter is not
487 marked as a strong loop breaker. Therefore at a use site in another module:
488 filter p xs
489 = {inline}
490 case xs of [] -> []
491 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
492 else build (\c n -> foldr (filterFB c p) n xs)
493
494 build (\c n -> foldr (filterFB c p) n xs)
495 = {inline} foldr (filterFB (:) p) [] xs
496 = {RULE} filter p xs
497
498 And we are in an infinite loop again, except that this time the loop is producing an
499 infinitely large *term* (an unrolling of filter) and so the simplifier finally
500 dies with "ticks exhausted"
501
502 Because of this problem, we make a small change in the occurrence analyser
503 designed to mark functions like "filter" as strong loop breakers on the basis that:
504 1. The RHS of filter mentions the local function "filterFB"
505 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
506
507 So for each RULE for an *imported* function we are going to add
508 dependency edges between the *local* FVS of the rule LHS and the
509 *local* FVS of the rule RHS. We don't do anything special for RULES on
510 local functions because the standard occurrence analysis stuff is
511 pretty good at getting loop-breakerness correct there.
512
513 It is important to note that even with this extra hack we aren't always going to get
514 things right. For example, it might be that the rule LHS mentions an imported Id,
515 and another module has a RULE that can rewrite that imported Id to one of our local
516 Ids.
517
518 Note [Specialising imported functions]
519 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
520 BUT for *automatically-generated* rules, the programmer can't be
521 responsible for the "programmer error" in Note [Rules for imported
522 functions]. In paricular, consider specialising a recursive function
523 defined in another module. If we specialise a recursive function B.g,
524 we get
525 g_spec = .....(B.g Int).....
526 RULE B.g Int = g_spec
527 Here, g_spec doesn't look recursive, but when the rule fires, it
528 becomes so. And if B.g was mutually recursive, the loop might
529 not be as obvious as it is here.
530
531 To avoid this,
532 * When specialising a function that is a loop breaker,
533 give a NOINLINE pragma to the specialised function
534
535 Note [Glomming]
536 ~~~~~~~~~~~~~~~
537 RULES for imported Ids can make something at the top refer to something at the bottom:
538 f = \x -> B.g (q x)
539 h = \y -> 3
540
541 RULE: B.g (q x) = h x
542
543 Applying this rule makes f refer to h, although f doesn't appear to
544 depend on h. (And, as in Note [Rules for imported functions], the
545 dependency might be more indirect. For example, f might mention C.t
546 rather than B.g, where C.t eventually inlines to B.g.)
547
548 NOTICE that this cannot happen for rules whose head is a
549 locally-defined function, because we accurately track dependencies
550 through RULES. It only happens for rules whose head is an imported
551 function (B.g in the example above).
552
553 Solution:
554 - When simplifying, bring all top level identifiers into
555 scope at the start, ignoring the Rec/NonRec structure, so
556 that when 'h' pops up in f's rhs, we find it in the in-scope set
557 (as the simplifier generally expects). This happens in simplTopBinds.
558
559 - In the occurrence analyser, if there are any out-of-scope
560 occurrences that pop out of the top, which will happen after
561 firing the rule: f = \x -> h x
562 h = \y -> 3
563 then just glom all the bindings into a single Rec, so that
564 the *next* iteration of the occurrence analyser will sort
565 them all out. This part happens in occurAnalysePgm.
566
567 ------------------------------------------------------------
568 Note [Inline rules]
569 ~~~~~~~~~~~~~~~~~~~
570 None of the above stuff about RULES applies to Inline Rules,
571 stored in a CoreUnfolding. The unfolding, if any, is simplified
572 at the same time as the regular RHS of the function (ie *not* like
573 Note [Rules are visible in their own rec group]), so it should be
574 treated *exactly* like an extra RHS.
575
576 Or, rather, when computing loop-breaker edges,
577 * If f has an INLINE pragma, and it is active, we treat the
578 INLINE rhs as f's rhs
579 * If it's inactive, we treat f as having no rhs
580 * If it has no INLINE pragma, we look at f's actual rhs
581
582
583 There is a danger that we'll be sub-optimal if we see this
584 f = ...f...
585 [INLINE f = ..no f...]
586 where f is recursive, but the INLINE is not. This can just about
587 happen with a sufficiently odd set of rules; eg
588
589 foo :: Int -> Int
590 {-# INLINE [1] foo #-}
591 foo x = x+1
592
593 bar :: Int -> Int
594 {-# INLINE [1] bar #-}
595 bar x = foo x + 1
596
597 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
598
599 Here the RULE makes bar recursive; but it's INLINE pragma remains
600 non-recursive. It's tempting to then say that 'bar' should not be
601 a loop breaker, but an attempt to do so goes wrong in two ways:
602 a) We may get
603 $df = ...$cfoo...
604 $cfoo = ...$df....
605 [INLINE $cfoo = ...no-$df...]
606 But we want $cfoo to depend on $df explicitly so that we
607 put the bindings in the right order to inline $df in $cfoo
608 and perhaps break the loop altogether. (Maybe this
609 b)
610
611
612 Example [eftInt]
613 ~~~~~~~~~~~~~~~
614 Example (from GHC.Enum):
615
616 eftInt :: Int# -> Int# -> [Int]
617 eftInt x y = ...(non-recursive)...
618
619 {-# INLINE [0] eftIntFB #-}
620 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
621 eftIntFB c n x y = ...(non-recursive)...
622
623 {-# RULES
624 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
625 "eftIntList" [1] eftIntFB (:) [] = eftInt
626 #-}
627
628 Note [Specialisation rules]
629 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
630 Consider this group, which is typical of what SpecConstr builds:
631
632 fs a = ....f (C a)....
633 f x = ....f (C a)....
634 {-# RULE f (C a) = fs a #-}
635
636 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
637
638 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
639 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
640 - fs is inlined (say it's small)
641 - now there's another opportunity to apply the RULE
642
643 This showed up when compiling Control.Concurrent.Chan.getChanContents.
644 -}
645
646 type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
647 -- which is gotten from the Id.
648 data Details
649 = ND { nd_bndr :: Id -- Binder
650 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
651
652 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
653 -- ignoring phase (ie assuming all are active)
654 -- See Note [Forming Rec groups]
655
656 , nd_inl :: IdSet -- Free variables of
657 -- the stable unfolding (if present and active)
658 -- or the RHS (if not)
659 -- but excluding any RULES
660 -- This is the IdSet that may be used if the Id is inlined
661
662 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
663 -- but are *not* in nd_inl. These are the ones whose
664 -- dependencies might not be respected by loop_breaker_edges
665 -- See Note [Weak loop breakers]
666
667 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
668 }
669
670 instance Outputable Details where
671 ppr nd = ptext (sLit "ND") <> braces
672 (sep [ ptext (sLit "bndr =") <+> ppr (nd_bndr nd)
673 , ptext (sLit "uds =") <+> ppr (nd_uds nd)
674 , ptext (sLit "inl =") <+> ppr (nd_inl nd)
675 , ptext (sLit "weak =") <+> ppr (nd_weak nd)
676 , ptext (sLit "rule =") <+> ppr (nd_active_rule_fvs nd)
677 ])
678
679 makeNode :: OccEnv -> ImpRuleEdges -> VarSet -> (Var, CoreExpr) -> Node Details
680 makeNode env imp_rule_edges bndr_set (bndr, rhs)
681 = (details, varUnique bndr, keysUFM node_fvs)
682 where
683 details = ND { nd_bndr = bndr
684 , nd_rhs = rhs'
685 , nd_uds = rhs_usage3
686 , nd_weak = node_fvs `minusVarSet` inl_fvs
687 , nd_inl = inl_fvs
688 , nd_active_rule_fvs = active_rule_fvs }
689
690 -- Constructing the edges for the main Rec computation
691 -- See Note [Forming Rec groups]
692 (rhs_usage1, rhs') = occAnalRecRhs env rhs
693 rhs_usage2 = addIdOccs rhs_usage1 all_rule_fvs -- Note [Rules are extra RHSs]
694 -- Note [Rule dependency info]
695 rhs_usage3 = case mb_unf_fvs of
696 Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs
697 Nothing -> rhs_usage2
698 node_fvs = udFreeVars bndr_set rhs_usage3
699
700 -- Finding the free variables of the rules
701 is_active = occ_rule_act env :: Activation -> Bool
702 rules = filterOut isBuiltinRule (idCoreRules bndr)
703 rules_w_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
704 rules_w_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) (lookupVarEnv imp_rule_edges bndr)
705 -- See Note [Preventing loops due to imported functions rules]
706 [ (ru_act rule, fvs)
707 | rule <- rules
708 , let fvs = exprFreeVars (ru_rhs rule)
709 `delVarSetList` ru_bndrs rule
710 , not (isEmptyVarSet fvs) ]
711 all_rule_fvs = rule_lhs_fvs `unionVarSet` rule_rhs_fvs
712 rule_rhs_fvs = mapUnionVarSet snd rules_w_fvs
713 rule_lhs_fvs = mapUnionVarSet (\ru -> exprsFreeVars (ru_args ru)
714 `delVarSetList` ru_bndrs ru) rules
715 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_fvs, is_active a]
716
717 -- Finding the free variables of the INLINE pragma (if any)
718 unf = realIdUnfolding bndr -- Ignore any current loop-breaker flag
719 mb_unf_fvs = stableUnfoldingVars unf
720
721 -- Find the "nd_inl" free vars; for the loop-breaker phase
722 inl_fvs = case mb_unf_fvs of
723 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
724 Just unf_fvs -> unf_fvs
725 -- We could check for an *active* INLINE (returning
726 -- emptyVarSet for an inactive one), but is_active
727 -- isn't the right thing (it tells about
728 -- RULE activation), so we'd need more plumbing
729
730 -----------------------------
731 occAnalRec :: SCC (Node Details)
732 -> (UsageDetails, [CoreBind])
733 -> (UsageDetails, [CoreBind])
734
735 -- The NonRec case is just like a Let (NonRec ...) above
736 occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}, _, _))
737 (body_uds, binds)
738 | not (bndr `usedIn` body_uds)
739 = (body_uds, binds) -- See Note [Dead code]
740
741 | otherwise -- It's mentioned in the body
742 = (body_uds' +++ rhs_uds,
743 NonRec tagged_bndr rhs : binds)
744 where
745 (body_uds', tagged_bndr) = tagBinder body_uds bndr
746
747 -- The Rec case is the interesting one
748 -- See Note [Loop breaking]
749 occAnalRec (CyclicSCC nodes) (body_uds, binds)
750 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
751 = (body_uds, binds) -- See Note [Dead code]
752
753 | otherwise -- At this point we always build a single Rec
754 = -- pprTrace "occAnalRec" (vcat
755 -- [ text "tagged nodes" <+> ppr tagged_nodes
756 -- , text "lb edges" <+> ppr loop_breaker_edges])
757 (final_uds, Rec pairs : binds)
758
759 where
760 bndrs = [b | (ND { nd_bndr = b }, _, _) <- nodes]
761 bndr_set = mkVarSet bndrs
762
763 ----------------------------
764 -- Tag the binders with their occurrence info
765 tagged_nodes = map tag_node nodes
766 total_uds = foldl add_uds body_uds nodes
767 final_uds = total_uds `minusVarEnv` bndr_set
768 add_uds usage_so_far (nd, _, _) = usage_so_far +++ nd_uds nd
769
770 tag_node :: Node Details -> Node Details
771 tag_node (details@ND { nd_bndr = bndr }, k, ks)
772 | let bndr1 = setBinderOcc total_uds bndr
773 = (details { nd_bndr = bndr1 }, k, ks)
774
775 ---------------------------
776 -- Now reconstruct the cycle
777 pairs :: [(Id,CoreExpr)]
778 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs tagged_nodes []
779 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_edges []
780 -- If weak_fvs is empty, the loop_breaker_edges will include all
781 -- the edges in tagged_nodes, so there isn't any point in doing
782 -- a fresh SCC computation that will yield a single CyclicSCC result.
783
784 weak_fvs :: VarSet
785 weak_fvs = mapUnionVarSet (nd_weak . fstOf3) nodes
786
787 -- See Note [Choosing loop breakers] for loop_breaker_edges
788 loop_breaker_edges = map mk_node tagged_nodes
789 mk_node (details@(ND { nd_inl = inl_fvs }), k, _)
790 = (details, k, keysUFM (extendFvs_ rule_fv_env inl_fvs))
791
792 ------------------------------------
793 rule_fv_env :: IdEnv IdSet
794 -- Maps a variable f to the variables from this group
795 -- mentioned in RHS of active rules for f
796 -- Domain is *subset* of bound vars (others have no rule fvs)
797 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
798 init_rule_fvs -- See Note [Finding rule RHS free vars]
799 = [ (b, trimmed_rule_fvs)
800 | (ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs },_,_) <- nodes
801 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
802 , not (isEmptyVarSet trimmed_rule_fvs)]
803
804 {-
805 @loopBreakSCC@ is applied to the list of (binder,rhs) pairs for a cyclic
806 strongly connected component (there's guaranteed to be a cycle). It returns the
807 same pairs, but
808 a) in a better order,
809 b) with some of the Ids having a IAmALoopBreaker pragma
810
811 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
812 that the simplifier can guarantee not to loop provided it never records an inlining
813 for these no-inline guys.
814
815 Furthermore, the order of the binds is such that if we neglect dependencies
816 on the no-inline Ids then the binds are topologically sorted. This means
817 that the simplifier will generally do a good job if it works from top bottom,
818 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
819 -}
820
821 type Binding = (Id,CoreExpr)
822
823 mk_loop_breaker :: Node Details -> Binding
824 mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
825 = (setIdOccInfo bndr strongLoopBreaker, rhs)
826
827 mk_non_loop_breaker :: VarSet -> Node Details -> Binding
828 -- See Note [Weak loop breakers]
829 mk_non_loop_breaker used_in_rules (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
830 | bndr `elemVarSet` used_in_rules = (setIdOccInfo bndr weakLoopBreaker, rhs)
831 | otherwise = (bndr, rhs)
832
833 udFreeVars :: VarSet -> UsageDetails -> VarSet
834 -- Find the subset of bndrs that are mentioned in uds
835 udFreeVars bndrs uds = intersectUFM_C (\b _ -> b) bndrs uds
836
837 loopBreakNodes :: Int
838 -> VarSet -- All binders
839 -> VarSet -- Binders whose dependencies may be "missing"
840 -- See Note [Weak loop breakers]
841 -> [Node Details]
842 -> [Binding] -- Append these to the end
843 -> [Binding]
844 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
845 loopBreakNodes depth bndr_set weak_fvs nodes binds
846 = go (stronglyConnCompFromEdgedVerticesR nodes) binds
847 where
848 go [] binds = binds
849 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
850
851 loop_break_scc scc binds
852 = case scc of
853 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
854 CyclicSCC [node] -> mk_loop_breaker node : binds
855 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
856
857 reOrderNodes :: Int -> VarSet -> VarSet -> [Node Details] -> [Binding] -> [Binding]
858 -- Choose a loop breaker, mark it no-inline,
859 -- do SCC analysis on the rest, and recursively sort them out
860 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
861 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
862 = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
863 -- text "chosen" <+> ppr chosen_nodes) $
864 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
865 (map mk_loop_breaker chosen_nodes ++ binds)
866 where
867 (chosen_nodes, unchosen) = choose_loop_breaker (score node) [node] [] nodes
868
869 approximate_loop_breaker = depth >= 2
870 new_depth | approximate_loop_breaker = 0
871 | otherwise = depth+1
872 -- After two iterations (d=0, d=1) give up
873 -- and approximate, returning to d=0
874
875 choose_loop_breaker :: Int -- Best score so far
876 -> [Node Details] -- Nodes with this score
877 -> [Node Details] -- Nodes with higher scores
878 -> [Node Details] -- Unprocessed nodes
879 -> ([Node Details], [Node Details])
880 -- This loop looks for the bind with the lowest score
881 -- to pick as the loop breaker. The rest accumulate in
882 choose_loop_breaker _ loop_nodes acc []
883 = (loop_nodes, acc) -- Done
884
885 -- If approximate_loop_breaker is True, we pick *all*
886 -- nodes with lowest score, else just one
887 -- See Note [Complexity of loop breaking]
888 choose_loop_breaker loop_sc loop_nodes acc (node : nodes)
889 | sc < loop_sc -- Lower score so pick this new one
890 = choose_loop_breaker sc [node] (loop_nodes ++ acc) nodes
891
892 | approximate_loop_breaker && sc == loop_sc
893 = choose_loop_breaker loop_sc (node : loop_nodes) acc nodes
894
895 | otherwise -- Higher score so don't pick it
896 = choose_loop_breaker loop_sc loop_nodes (node : acc) nodes
897 where
898 sc = score node
899
900 score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker
901 score (ND { nd_bndr = bndr, nd_rhs = rhs }, _, _)
902 | not (isId bndr) = 100 -- A type or cercion variable is never a loop breaker
903
904 | isDFunId bndr = 9 -- Never choose a DFun as a loop breaker
905 -- Note [DFuns should not be loop breakers]
906
907 | Just be_very_keen <- hasStableCoreUnfolding_maybe (idUnfolding bndr)
908 = if be_very_keen then 6 -- Note [Loop breakers and INLINE/INLINEABLE pragmas]
909 else 3
910 -- Data structures are more important than INLINE pragmas
911 -- so that dictionary/method recursion unravels
912 -- Note that this case hits all stable unfoldings, so we
913 -- never look at 'rhs' for stable unfoldings. That's right, because
914 -- 'rhs' is irrelevant for inlining things with a stable unfolding
915
916 | is_con_app rhs = 5 -- Data types help with cases: Note [Constructor applications]
917
918 | exprIsTrivial rhs = 10 -- Practically certain to be inlined
919 -- Used to have also: && not (isExportedId bndr)
920 -- But I found this sometimes cost an extra iteration when we have
921 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
922 -- where df is the exported dictionary. Then df makes a really
923 -- bad choice for loop breaker
924
925
926 -- If an Id is marked "never inline" then it makes a great loop breaker
927 -- The only reason for not checking that here is that it is rare
928 -- and I've never seen a situation where it makes a difference,
929 -- so it probably isn't worth the time to test on every binder
930 -- | isNeverActive (idInlinePragma bndr) = -10
931
932 | isOneOcc (idOccInfo bndr) = 2 -- Likely to be inlined
933
934 | canUnfold (realIdUnfolding bndr) = 1
935 -- The Id has some kind of unfolding
936 -- Ignore loop-breaker-ness here because that is what we are setting!
937
938 | otherwise = 0
939
940 -- Checking for a constructor application
941 -- Cheap and cheerful; the simplifer moves casts out of the way
942 -- The lambda case is important to spot x = /\a. C (f a)
943 -- which comes up when C is a dictionary constructor and
944 -- f is a default method.
945 -- Example: the instance for Show (ST s a) in GHC.ST
946 --
947 -- However we *also* treat (\x. C p q) as a con-app-like thing,
948 -- Note [Closure conversion]
949 is_con_app (Var v) = isConLikeId v
950 is_con_app (App f _) = is_con_app f
951 is_con_app (Lam _ e) = is_con_app e
952 is_con_app (Tick _ e) = is_con_app e
953 is_con_app _ = False
954
955 {-
956 Note [Complexity of loop breaking]
957 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
958 The loop-breaking algorithm knocks out one binder at a time, and
959 performs a new SCC analysis on the remaining binders. That can
960 behave very badly in tightly-coupled groups of bindings; in the
961 worst case it can be (N**2)*log N, because it does a full SCC
962 on N, then N-1, then N-2 and so on.
963
964 To avoid this, we switch plans after 2 (or whatever) attempts:
965 Plan A: pick one binder with the lowest score, make it
966 a loop breaker, and try again
967 Plan B: pick *all* binders with the lowest score, make them
968 all loop breakers, and try again
969 Since there are only a small finite number of scores, this will
970 terminate in a constant number of iterations, rather than O(N)
971 iterations.
972
973 You might thing that it's very unlikely, but RULES make it much
974 more likely. Here's a real example from Trac #1969:
975 Rec { $dm = \d.\x. op d
976 {-# RULES forall d. $dm Int d = $s$dm1
977 forall d. $dm Bool d = $s$dm2 #-}
978
979 dInt = MkD .... opInt ...
980 dInt = MkD .... opBool ...
981 opInt = $dm dInt
982 opBool = $dm dBool
983
984 $s$dm1 = \x. op dInt
985 $s$dm2 = \x. op dBool }
986 The RULES stuff means that we can't choose $dm as a loop breaker
987 (Note [Choosing loop breakers]), so we must choose at least (say)
988 opInt *and* opBool, and so on. The number of loop breakders is
989 linear in the number of instance declarations.
990
991 Note [Loop breakers and INLINE/INLINEABLE pragmas]
992 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
993 Avoid choosing a function with an INLINE pramga as the loop breaker!
994 If such a function is mutually-recursive with a non-INLINE thing,
995 then the latter should be the loop-breaker.
996
997 It's vital to distinguish between INLINE and INLINEABLE (the
998 Bool returned by hasStableCoreUnfolding_maybe). If we start with
999 Rec { {-# INLINEABLE f #-}
1000 f x = ...f... }
1001 and then worker/wrapper it through strictness analysis, we'll get
1002 Rec { {-# INLINEABLE $wf #-}
1003 $wf p q = let x = (p,q) in ...f...
1004
1005 {-# INLINE f #-}
1006 f x = case x of (p,q) -> $wf p q }
1007
1008 Now it is vital that we choose $wf as the loop breaker, so we can
1009 inline 'f' in '$wf'.
1010
1011 Note [DFuns should not be loop breakers]
1012 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1013 It's particularly bad to make a DFun into a loop breaker. See
1014 Note [How instance declarations are translated] in TcInstDcls
1015
1016 We give DFuns a higher score than ordinary CONLIKE things because
1017 if there's a choice we want the DFun to be the non-looop breker. Eg
1018
1019 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1020
1021 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1022 {-# DFUN #-}
1023 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1024 }
1025
1026 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1027 if we can't unravel the DFun first.
1028
1029 Note [Constructor applications]
1030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1031 It's really really important to inline dictionaries. Real
1032 example (the Enum Ordering instance from GHC.Base):
1033
1034 rec f = \ x -> case d of (p,q,r) -> p x
1035 g = \ x -> case d of (p,q,r) -> q x
1036 d = (v, f, g)
1037
1038 Here, f and g occur just once; but we can't inline them into d.
1039 On the other hand we *could* simplify those case expressions if
1040 we didn't stupidly choose d as the loop breaker.
1041 But we won't because constructor args are marked "Many".
1042 Inlining dictionaries is really essential to unravelling
1043 the loops in static numeric dictionaries, see GHC.Float.
1044
1045 Note [Closure conversion]
1046 ~~~~~~~~~~~~~~~~~~~~~~~~~
1047 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1048 The immediate motivation came from the result of a closure-conversion transformation
1049 which generated code like this:
1050
1051 data Clo a b = forall c. Clo (c -> a -> b) c
1052
1053 ($:) :: Clo a b -> a -> b
1054 Clo f env $: x = f env x
1055
1056 rec { plus = Clo plus1 ()
1057
1058 ; plus1 _ n = Clo plus2 n
1059
1060 ; plus2 Zero n = n
1061 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1062
1063 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1064 we choose 'plus1' as the loop breaker (which is entirely possible
1065 otherwise), the loop does not unravel nicely.
1066
1067
1068 @occAnalRhs@ deals with the question of bindings where the Id is marked
1069 by an INLINE pragma. For these we record that anything which occurs
1070 in its RHS occurs many times. This pessimistically assumes that ths
1071 inlined binder also occurs many times in its scope, but if it doesn't
1072 we'll catch it next time round. At worst this costs an extra simplifier pass.
1073 ToDo: try using the occurrence info for the inline'd binder.
1074
1075 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1076 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1077 -}
1078
1079 occAnalRecRhs :: OccEnv -> CoreExpr -- Rhs
1080 -> (UsageDetails, CoreExpr)
1081 -- Returned usage details covers only the RHS,
1082 -- and *not* the RULE or INLINE template for the Id
1083 occAnalRecRhs env rhs = occAnal (rhsCtxt env) rhs
1084
1085 occAnalNonRecRhs :: OccEnv
1086 -> Id -> CoreExpr -- Binder and rhs
1087 -- Binder is already tagged with occurrence info
1088 -> (UsageDetails, CoreExpr)
1089 -- Returned usage details covers only the RHS,
1090 -- and *not* the RULE or INLINE template for the Id
1091 occAnalNonRecRhs env bndr rhs
1092 = occAnal rhs_env rhs
1093 where
1094 -- See Note [Use one-shot info]
1095 env1 = env { occ_one_shots = argOneShots OneShotLam dmd }
1096
1097 -- See Note [Cascading inlines]
1098 rhs_env | certainly_inline = env1
1099 | otherwise = rhsCtxt env1
1100
1101 certainly_inline -- See Note [Cascading inlines]
1102 = case idOccInfo bndr of
1103 OneOcc in_lam one_br _ -> not in_lam && one_br && active && not_stable
1104 _ -> False
1105
1106 dmd = idDemandInfo bndr
1107 active = isAlwaysActive (idInlineActivation bndr)
1108 not_stable = not (isStableUnfolding (idUnfolding bndr))
1109
1110 addIdOccs :: UsageDetails -> VarSet -> UsageDetails
1111 addIdOccs usage id_set = foldVarSet addIdOcc usage id_set
1112
1113 addIdOcc :: Id -> UsageDetails -> UsageDetails
1114 addIdOcc v u | isId v = addOneOcc u v NoOccInfo
1115 | otherwise = u
1116 -- Give a non-committal binder info (i.e NoOccInfo) because
1117 -- a) Many copies of the specialised thing can appear
1118 -- b) We don't want to substitute a BIG expression inside a RULE
1119 -- even if that's the only occurrence of the thing
1120 -- (Same goes for INLINE.)
1121
1122 {-
1123 Note [Cascading inlines]
1124 ~~~~~~~~~~~~~~~~~~~~~~~~
1125 By default we use an rhsCtxt for the RHS of a binding. This tells the
1126 occ anal n that it's looking at an RHS, which has an effect in
1127 occAnalApp. In particular, for constructor applications, it makes
1128 the arguments appear to have NoOccInfo, so that we don't inline into
1129 them. Thus x = f y
1130 k = Just x
1131 we do not want to inline x.
1132
1133 But there's a problem. Consider
1134 x1 = a0 : []
1135 x2 = a1 : x1
1136 x3 = a2 : x2
1137 g = f x3
1138 First time round, it looks as if x1 and x2 occur as an arg of a
1139 let-bound constructor ==> give them a many-occurrence.
1140 But then x3 is inlined (unconditionally as it happens) and
1141 next time round, x2 will be, and the next time round x1 will be
1142 Result: multiple simplifier iterations. Sigh.
1143
1144 So, when analysing the RHS of x3 we notice that x3 will itself
1145 definitely inline the next time round, and so we analyse x3's rhs in
1146 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1147
1148 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1149 If we say "yes" when preInlineUnconditionally says "no" the simplifier iterates
1150 indefinitely:
1151 x = f y
1152 k = Just x
1153 inline ==>
1154 k = Just (f y)
1155 float ==>
1156 x1 = f y
1157 k = Just x1
1158
1159 This is worse than the slow cascade, so we only want to say "certainly_inline"
1160 if it really is certain. Look at the note with preInlineUnconditionally
1161 for the various clauses.
1162
1163 Expressions
1164 ~~~~~~~~~~~
1165 -}
1166
1167 occAnal :: OccEnv
1168 -> CoreExpr
1169 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1170 CoreExpr)
1171
1172 occAnal _ expr@(Type _) = (emptyDetails, expr)
1173 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1174 occAnal env expr@(Var v) = (mkOneOcc env v False, expr)
1175 -- At one stage, I gathered the idRuleVars for v here too,
1176 -- which in a way is the right thing to do.
1177 -- But that went wrong right after specialisation, when
1178 -- the *occurrences* of the overloaded function didn't have any
1179 -- rules in them, so the *specialised* versions looked as if they
1180 -- weren't used at all.
1181
1182 occAnal _ (Coercion co)
1183 = (addIdOccs emptyDetails (coVarsOfCo co), Coercion co)
1184 -- See Note [Gather occurrences of coercion variables]
1185
1186 {-
1187 Note [Gather occurrences of coercion variables]
1188 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1189 We need to gather info about what coercion variables appear, so that
1190 we can sort them into the right place when doing dependency analysis.
1191 -}
1192
1193 occAnal env (Tick tickish body)
1194 | tickish `tickishScopesLike` SoftScope
1195 = (usage, Tick tickish body')
1196
1197 | Breakpoint _ ids <- tickish
1198 = (usage_lam +++ mkVarEnv (zip ids (repeat NoOccInfo)), Tick tickish body')
1199 -- never substitute for any of the Ids in a Breakpoint
1200
1201 | otherwise
1202 = (usage_lam, Tick tickish body')
1203 where
1204 !(usage,body') = occAnal env body
1205 -- for a non-soft tick scope, we can inline lambdas only
1206 usage_lam = mapVarEnv markInsideLam usage
1207
1208 occAnal env (Cast expr co)
1209 = case occAnal env expr of { (usage, expr') ->
1210 let usage1 = markManyIf (isRhsEnv env) usage
1211 usage2 = addIdOccs usage1 (coVarsOfCo co)
1212 -- See Note [Gather occurrences of coercion variables]
1213 in (usage2, Cast expr' co)
1214 -- If we see let x = y `cast` co
1215 -- then mark y as 'Many' so that we don't
1216 -- immediately inline y again.
1217 }
1218
1219 occAnal env app@(App _ _)
1220 = occAnalApp env (collectArgsTicks tickishFloatable app)
1221
1222 -- Ignore type variables altogether
1223 -- (a) occurrences inside type lambdas only not marked as InsideLam
1224 -- (b) type variables not in environment
1225
1226 occAnal env (Lam x body) | isTyVar x
1227 = case occAnal env body of { (body_usage, body') ->
1228 (body_usage, Lam x body')
1229 }
1230
1231 -- For value lambdas we do a special hack. Consider
1232 -- (\x. \y. ...x...)
1233 -- If we did nothing, x is used inside the \y, so would be marked
1234 -- as dangerous to dup. But in the common case where the abstraction
1235 -- is applied to two arguments this is over-pessimistic.
1236 -- So instead, we just mark each binder with its occurrence
1237 -- info in the *body* of the multiple lambda.
1238 -- Then, the simplifier is careful when partially applying lambdas.
1239
1240 occAnal env expr@(Lam _ _)
1241 = case occAnal env_body body of { (body_usage, body') ->
1242 let
1243 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
1244 -- Use binders' to put one-shot info on the lambdas
1245
1246 really_final_usage
1247 | all isOneShotBndr binders' = final_usage
1248 | otherwise = mapVarEnv markInsideLam final_usage
1249 in
1250 (really_final_usage, mkLams tagged_binders body') }
1251 where
1252 (binders, body) = collectBinders expr
1253 (env_body, binders') = oneShotGroup env binders
1254
1255 occAnal env (Case scrut bndr ty alts)
1256 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1257 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1258 let
1259 alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
1260 (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
1261 total_usage = scrut_usage +++ alts_usage1
1262 in
1263 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1264 where
1265 -- Note [Case binder usage]
1266 -- ~~~~~~~~~~~~~~~~~~~~~~~~
1267 -- The case binder gets a usage of either "many" or "dead", never "one".
1268 -- Reason: we like to inline single occurrences, to eliminate a binding,
1269 -- but inlining a case binder *doesn't* eliminate a binding.
1270 -- We *don't* want to transform
1271 -- case x of w { (p,q) -> f w }
1272 -- into
1273 -- case x of w { (p,q) -> f (p,q) }
1274 tag_case_bndr usage bndr
1275 = case lookupVarEnv usage bndr of
1276 Nothing -> (usage, setIdOccInfo bndr IAmDead)
1277 Just _ -> (usage `delVarEnv` bndr, setIdOccInfo bndr NoOccInfo)
1278
1279 alt_env = mkAltEnv env scrut bndr
1280 occ_anal_alt = occAnalAlt alt_env
1281
1282 occ_anal_scrut (Var v) (alt1 : other_alts)
1283 | not (null other_alts) || not (isDefaultAlt alt1)
1284 = (mkOneOcc env v True, Var v) -- The 'True' says that the variable occurs
1285 -- in an interesting context; the case has
1286 -- at least one non-default alternative
1287 occ_anal_scrut (Tick t e) alts
1288 | t `tickishScopesLike` SoftScope
1289 -- No reason to not look through all ticks here, but only
1290 -- for soft-scoped ticks we can do so without having to
1291 -- update returned occurance info (see occAnal)
1292 = second (Tick t) $ occ_anal_scrut e alts
1293
1294 occ_anal_scrut scrut _alts
1295 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1296
1297 occAnal env (Let bind body)
1298 = case occAnal env body of { (body_usage, body') ->
1299 case occAnalBind env noImpRuleEdges bind body_usage of { (final_usage, new_binds) ->
1300 (final_usage, mkLets new_binds body') }}
1301
1302 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1303 occAnalArgs _ [] _
1304 = (emptyDetails, [])
1305
1306 occAnalArgs env (arg:args) one_shots
1307 | isTypeArg arg
1308 = case occAnalArgs env args one_shots of { (uds, args') ->
1309 (uds, arg:args') }
1310
1311 | otherwise
1312 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1313 case occAnal arg_env arg of { (uds1, arg') ->
1314 case occAnalArgs env args one_shots' of { (uds2, args') ->
1315 (uds1 +++ uds2, arg':args') }}}
1316
1317 {-
1318 Applications are dealt with specially because we want
1319 the "build hack" to work.
1320
1321 Note [Arguments of let-bound constructors]
1322 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1323 Consider
1324 f x = let y = expensive x in
1325 let z = (True,y) in
1326 (case z of {(p,q)->q}, case z of {(p,q)->q})
1327 We feel free to duplicate the WHNF (True,y), but that means
1328 that y may be duplicated thereby.
1329
1330 If we aren't careful we duplicate the (expensive x) call!
1331 Constructors are rather like lambdas in this way.
1332 -}
1333
1334 occAnalApp :: OccEnv
1335 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1336 -> (UsageDetails, Expr CoreBndr)
1337 occAnalApp env (Var fun, args, ticks)
1338 | null ticks = (uds, mkApps (Var fun) args')
1339 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1340 where
1341 uds = fun_uds +++ final_args_uds
1342
1343 !(args_uds, args') = occAnalArgs env args one_shots
1344 !final_args_uds = markManyIf (isRhsEnv env && is_exp) args_uds
1345 -- We mark the free vars of the argument of a constructor or PAP
1346 -- as "many", if it is the RHS of a let(rec).
1347 -- This means that nothing gets inlined into a constructor argument
1348 -- position, which is what we want. Typically those constructor
1349 -- arguments are just variables, or trivial expressions.
1350 --
1351 -- This is the *whole point* of the isRhsEnv predicate
1352 -- See Note [Arguments of let-bound constructors]
1353
1354 n_val_args = valArgCount args
1355 fun_uds = mkOneOcc env fun (n_val_args > 0)
1356 is_exp = isExpandableApp fun n_val_args
1357 -- See Note [CONLIKE pragma] in BasicTypes
1358 -- The definition of is_exp should match that in
1359 -- Simplify.prepareRhs
1360
1361 one_shots = argsOneShots (idStrictness fun) n_val_args
1362 -- See Note [Use one-shot info]
1363
1364 occAnalApp env (fun, args, ticks)
1365 = (fun_uds +++ args_uds, mkTicks ticks $ mkApps fun' args')
1366 where
1367 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1368 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1369 -- often leaves behind beta redexs like
1370 -- (\x y -> e) a1 a2
1371 -- Here we would like to mark x,y as one-shot, and treat the whole
1372 -- thing much like a let. We do this by pushing some True items
1373 -- onto the context stack.
1374 !(args_uds, args') = occAnalArgs env args []
1375
1376 markManyIf :: Bool -- If this is true
1377 -> UsageDetails -- Then do markMany on this
1378 -> UsageDetails
1379 markManyIf True uds = mapVarEnv markMany uds
1380 markManyIf False uds = uds
1381
1382 {-
1383 Note [Use one-shot information]
1384 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1385 The occurrrence analyser propagates one-shot-lambda information in two situation
1386 * Applications: eg build (\cn -> blah)
1387 Propagate one-shot info from the strictness signature of 'build' to
1388 the \cn
1389
1390 * Let-bindings: eg let f = \c. let ... in \n -> blah
1391 in (build f, build f)
1392 Propagate one-shot info from the demanand-info on 'f' to the
1393 lambdas in its RHS (which may not be syntactically at the top)
1394
1395 Some of this is done by the demand analyser, but this way it happens
1396 much earlier, taking advantage of the strictness signature of
1397 imported functions.
1398
1399 Note [Binders in case alternatives]
1400 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1401 Consider
1402 case x of y { (a,b) -> f y }
1403 We treat 'a', 'b' as dead, because they don't physically occur in the
1404 case alternative. (Indeed, a variable is dead iff it doesn't occur in
1405 its scope in the output of OccAnal.) It really helps to know when
1406 binders are unused. See esp the call to isDeadBinder in
1407 Simplify.mkDupableAlt
1408
1409 In this example, though, the Simplifier will bring 'a' and 'b' back to
1410 life, beause it binds 'y' to (a,b) (imagine got inlined and
1411 scrutinised y).
1412 -}
1413
1414 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
1415 -> CoreAlt
1416 -> (UsageDetails, Alt IdWithOccInfo)
1417 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
1418 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
1419 let
1420 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
1421 -- See Note [Binders in case alternatives]
1422 (alt_usg', rhs2) =
1423 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
1424 in
1425 (alt_usg', (con, tagged_bndrs, rhs2)) }
1426
1427 wrapAltRHS :: OccEnv
1428 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
1429 -> UsageDetails -- usage for entire alt (p -> rhs)
1430 -> [Var] -- alt binders
1431 -> CoreExpr -- alt RHS
1432 -> (UsageDetails, CoreExpr)
1433 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
1434 | occ_binder_swap env
1435 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
1436 -- handles condition (a) in Note [Binder swap]
1437 , not captured -- See condition (b) in Note [Binder swap]
1438 = ( alt_usg' +++ let_rhs_usg
1439 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
1440 where
1441 captured = any (`usedIn` let_rhs_usg) bndrs
1442 -- The rhs of the let may include coercion variables
1443 -- if the scrutinee was a cast, so we must gather their
1444 -- usage. See Note [Gather occurrences of coercion variables]
1445 (let_rhs_usg, let_rhs') = occAnal env let_rhs
1446 (alt_usg', tagged_scrut_var) = tagBinder alt_usg scrut_var
1447
1448 wrapAltRHS _ _ alt_usg _ alt_rhs
1449 = (alt_usg, alt_rhs)
1450
1451 {-
1452 ************************************************************************
1453 * *
1454 OccEnv
1455 * *
1456 ************************************************************************
1457 -}
1458
1459 data OccEnv
1460 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
1461 , occ_one_shots :: !OneShots -- Tells about linearity
1462 , occ_gbl_scrut :: GlobalScruts
1463 , occ_rule_act :: Activation -> Bool -- Which rules are active
1464 -- See Note [Finding rule RHS free vars]
1465 , occ_binder_swap :: !Bool -- enable the binder_swap
1466 -- See CorePrep Note [Dead code in CorePrep]
1467 }
1468
1469 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
1470
1471 -----------------------------
1472 -- OccEncl is used to control whether to inline into constructor arguments
1473 -- For example:
1474 -- x = (p,q) -- Don't inline p or q
1475 -- y = /\a -> (p a, q a) -- Still don't inline p or q
1476 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
1477 -- So OccEncl tells enought about the context to know what to do when
1478 -- we encounter a contructor application or PAP.
1479
1480 data OccEncl
1481 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
1482 -- Don't inline into constructor args here
1483 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
1484 -- Do inline into constructor args here
1485
1486 instance Outputable OccEncl where
1487 ppr OccRhs = ptext (sLit "occRhs")
1488 ppr OccVanilla = ptext (sLit "occVanilla")
1489
1490 type OneShots = [OneShotInfo]
1491 -- [] No info
1492 --
1493 -- one_shot_info:ctxt Analysing a function-valued expression that
1494 -- will be applied as described by one_shot_info
1495
1496 initOccEnv :: (Activation -> Bool) -> OccEnv
1497 initOccEnv active_rule
1498 = OccEnv { occ_encl = OccVanilla
1499 , occ_one_shots = []
1500 , occ_gbl_scrut = emptyVarSet
1501 , occ_rule_act = active_rule
1502 , occ_binder_swap = True }
1503
1504 vanillaCtxt :: OccEnv -> OccEnv
1505 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
1506
1507 rhsCtxt :: OccEnv -> OccEnv
1508 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
1509
1510 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
1511 argCtxt env []
1512 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
1513 argCtxt env (one_shots:one_shots_s)
1514 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
1515
1516 isRhsEnv :: OccEnv -> Bool
1517 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
1518 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
1519
1520 oneShotGroup :: OccEnv -> [CoreBndr]
1521 -> ( OccEnv
1522 , [CoreBndr] )
1523 -- The result binders have one-shot-ness set that they might not have had originally.
1524 -- This happens in (build (\cn -> e)). Here the occurrence analyser
1525 -- linearity context knows that c,n are one-shot, and it records that fact in
1526 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
1527
1528 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
1529 = go ctxt bndrs []
1530 where
1531 go ctxt [] rev_bndrs
1532 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
1533 , reverse rev_bndrs )
1534
1535 go [] bndrs rev_bndrs
1536 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
1537 , reverse rev_bndrs ++ bndrs )
1538
1539 go ctxt (bndr:bndrs) rev_bndrs
1540 | isId bndr
1541
1542 = case ctxt of
1543 [] -> go [] bndrs (bndr : rev_bndrs)
1544 (one_shot : ctxt) -> go ctxt bndrs (bndr': rev_bndrs)
1545 where
1546 bndr' = updOneShotInfo bndr one_shot
1547 | otherwise
1548 = go ctxt bndrs (bndr:rev_bndrs)
1549
1550 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
1551 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
1552 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
1553
1554 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
1555 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
1556 -- as well as (f,g), (g,h)
1557 transClosureFV env
1558 | no_change = env
1559 | otherwise = transClosureFV (listToUFM new_fv_list)
1560 where
1561 (no_change, new_fv_list) = mapAccumL bump True (ufmToList env)
1562 bump no_change (b,fvs)
1563 | no_change_here = (no_change, (b,fvs))
1564 | otherwise = (False, (b,new_fvs))
1565 where
1566 (new_fvs, no_change_here) = extendFvs env fvs
1567
1568 -------------
1569 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
1570 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
1571
1572 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
1573 -- (extendFVs env s) returns
1574 -- (s `union` env(s), env(s) `subset` s)
1575 extendFvs env s
1576 | isNullUFM env
1577 = (s, True)
1578 | otherwise
1579 = (s `unionVarSet` extras, extras `subVarSet` s)
1580 where
1581 extras :: VarSet -- env(s)
1582 extras = foldUFM unionVarSet emptyVarSet $
1583 intersectUFM_C (\x _ -> x) env s
1584
1585 {-
1586 ************************************************************************
1587 * *
1588 Binder swap
1589 * *
1590 ************************************************************************
1591
1592 Note [Binder swap]
1593 ~~~~~~~~~~~~~~~~~~
1594 We do these two transformations right here:
1595
1596 (1) case x of b { pi -> ri }
1597 ==>
1598 case x of b { pi -> let x=b in ri }
1599
1600 (2) case (x |> co) of b { pi -> ri }
1601 ==>
1602 case (x |> co) of b { pi -> let x = b |> sym co in ri }
1603
1604 Why (2)? See Note [Case of cast]
1605
1606 In both cases, in a particular alternative (pi -> ri), we only
1607 add the binding if
1608 (a) x occurs free in (pi -> ri)
1609 (ie it occurs in ri, but is not bound in pi)
1610 (b) the pi does not bind b (or the free vars of co)
1611 We need (a) and (b) for the inserted binding to be correct.
1612
1613 For the alternatives where we inject the binding, we can transfer
1614 all x's OccInfo to b. And that is the point.
1615
1616 Notice that
1617 * The deliberate shadowing of 'x'.
1618 * That (a) rapidly becomes false, so no bindings are injected.
1619
1620 The reason for doing these transformations here is because it allows
1621 us to adjust the OccInfo for 'x' and 'b' as we go.
1622
1623 * Suppose the only occurrences of 'x' are the scrutinee and in the
1624 ri; then this transformation makes it occur just once, and hence
1625 get inlined right away.
1626
1627 * If we do this in the Simplifier, we don't know whether 'x' is used
1628 in ri, so we are forced to pessimistically zap b's OccInfo even
1629 though it is typically dead (ie neither it nor x appear in the
1630 ri). There's nothing actually wrong with zapping it, except that
1631 it's kind of nice to know which variables are dead. My nose
1632 tells me to keep this information as robustly as possible.
1633
1634 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
1635 {x=b}; it's Nothing if the binder-swap doesn't happen.
1636
1637 There is a danger though. Consider
1638 let v = x +# y
1639 in case (f v) of w -> ...v...v...
1640 And suppose that (f v) expands to just v. Then we'd like to
1641 use 'w' instead of 'v' in the alternative. But it may be too
1642 late; we may have substituted the (cheap) x+#y for v in the
1643 same simplifier pass that reduced (f v) to v.
1644
1645 I think this is just too bad. CSE will recover some of it.
1646
1647 Note [Case of cast]
1648 ~~~~~~~~~~~~~~~~~~~
1649 Consider case (x `cast` co) of b { I# ->
1650 ... (case (x `cast` co) of {...}) ...
1651 We'd like to eliminate the inner case. That is the motivation for
1652 equation (2) in Note [Binder swap]. When we get to the inner case, we
1653 inline x, cancel the casts, and away we go.
1654
1655 Note [Binder swap on GlobalId scrutinees]
1656 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1657 When the scrutinee is a GlobalId we must take care in two ways
1658
1659 i) In order to *know* whether 'x' occurs free in the RHS, we need its
1660 occurrence info. BUT, we don't gather occurrence info for
1661 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
1662 OccEnv is for: it says "gather occurrence info for these".
1663
1664 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
1665 has an External Name. See, for example, SimplEnv Note [Global Ids in
1666 the substitution].
1667
1668 Note [Zap case binders in proxy bindings]
1669 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1670 From the original
1671 case x of cb(dead) { p -> ...x... }
1672 we will get
1673 case x of cb(live) { p -> let x = cb in ...x... }
1674
1675 Core Lint never expects to find an *occurrence* of an Id marked
1676 as Dead, so we must zap the OccInfo on cb before making the
1677 binding x = cb. See Trac #5028.
1678
1679 Historical note [no-case-of-case]
1680 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1681 We *used* to suppress the binder-swap in case expressions when
1682 -fno-case-of-case is on. Old remarks:
1683 "This happens in the first simplifier pass,
1684 and enhances full laziness. Here's the bad case:
1685 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
1686 If we eliminate the inner case, we trap it inside the I# v -> arm,
1687 which might prevent some full laziness happening. I've seen this
1688 in action in spectral/cichelli/Prog.hs:
1689 [(m,n) | m <- [1..max], n <- [1..max]]
1690 Hence the check for NoCaseOfCase."
1691 However, now the full-laziness pass itself reverses the binder-swap, so this
1692 check is no longer necessary.
1693
1694 Historical note [Suppressing the case binder-swap]
1695 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1696 This old note describes a problem that is also fixed by doing the
1697 binder-swap in OccAnal:
1698
1699 There is another situation when it might make sense to suppress the
1700 case-expression binde-swap. If we have
1701
1702 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
1703 ...other cases .... }
1704
1705 We'll perform the binder-swap for the outer case, giving
1706
1707 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
1708 ...other cases .... }
1709
1710 But there is no point in doing it for the inner case, because w1 can't
1711 be inlined anyway. Furthermore, doing the case-swapping involves
1712 zapping w2's occurrence info (see paragraphs that follow), and that
1713 forces us to bind w2 when doing case merging. So we get
1714
1715 case x of w1 { A -> let w2 = w1 in e1
1716 B -> let w2 = w1 in e2
1717 ...other cases .... }
1718
1719 This is plain silly in the common case where w2 is dead.
1720
1721 Even so, I can't see a good way to implement this idea. I tried
1722 not doing the binder-swap if the scrutinee was already evaluated
1723 but that failed big-time:
1724
1725 data T = MkT !Int
1726
1727 case v of w { MkT x ->
1728 case x of x1 { I# y1 ->
1729 case x of x2 { I# y2 -> ...
1730
1731 Notice that because MkT is strict, x is marked "evaluated". But to
1732 eliminate the last case, we must either make sure that x (as well as
1733 x1) has unfolding MkT y1. The straightforward thing to do is to do
1734 the binder-swap. So this whole note is a no-op.
1735
1736 It's fixed by doing the binder-swap in OccAnal because we can do the
1737 binder-swap unconditionally and still get occurrence analysis
1738 information right.
1739 -}
1740
1741 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
1742 -- Does two things: a) makes the occ_one_shots = OccVanilla
1743 -- b) extends the GlobalScruts if possible
1744 -- c) returns a proxy mapping, binding the scrutinee
1745 -- to the case binder, if possible
1746 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
1747 = case stripTicksTopE (const True) scrut of
1748 Var v -> add_scrut v case_bndr'
1749 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
1750 -- See Note [Case of cast]
1751 _ -> (env { occ_encl = OccVanilla }, Nothing)
1752
1753 where
1754 add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
1755 , Just (localise v, rhs) )
1756
1757 case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
1758 localise scrut_var = mkLocalId (localiseName (idName scrut_var)) (idType scrut_var)
1759 -- Localise the scrut_var before shadowing it; we're making a
1760 -- new binding for it, and it might have an External Name, or
1761 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
1762 -- Also we don't want any INLINE or NOINLINE pragmas!
1763
1764 {-
1765 ************************************************************************
1766 * *
1767 \subsection[OccurAnal-types]{OccEnv}
1768 * *
1769 ************************************************************************
1770 -}
1771
1772 type UsageDetails = IdEnv OccInfo -- A finite map from ids to their usage
1773 -- INVARIANT: never IAmDead
1774 -- (Deadness is signalled by not being in the map at all)
1775
1776 (+++), combineAltsUsageDetails
1777 :: UsageDetails -> UsageDetails -> UsageDetails
1778
1779 (+++) usage1 usage2
1780 = plusVarEnv_C addOccInfo usage1 usage2
1781
1782 combineAltsUsageDetails usage1 usage2
1783 = plusVarEnv_C orOccInfo usage1 usage2
1784
1785 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
1786 addOneOcc usage id info
1787 = plusVarEnv_C addOccInfo usage (unitVarEnv id info)
1788 -- ToDo: make this more efficient
1789
1790 emptyDetails :: UsageDetails
1791 emptyDetails = (emptyVarEnv :: UsageDetails)
1792
1793 usedIn :: Id -> UsageDetails -> Bool
1794 v `usedIn` details = isExportedId v || v `elemVarEnv` details
1795
1796 type IdWithOccInfo = Id
1797
1798 tagLamBinders :: UsageDetails -- Of scope
1799 -> [Id] -- Binders
1800 -> (UsageDetails, -- Details with binders removed
1801 [IdWithOccInfo]) -- Tagged binders
1802 -- Used for lambda and case binders
1803 -- It copes with the fact that lambda bindings can have a
1804 -- stable unfolding, used for join points
1805 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
1806 where
1807 (usage', bndrs') = mapAccumR tag_lam usage binders
1808 tag_lam usage bndr = (usage2, setBinderOcc usage bndr)
1809 where
1810 usage1 = usage `delVarEnv` bndr
1811 usage2 | isId bndr = addIdOccs usage1 (idUnfoldingVars bndr)
1812 | otherwise = usage1
1813
1814 tagBinder :: UsageDetails -- Of scope
1815 -> Id -- Binders
1816 -> (UsageDetails, -- Details with binders removed
1817 IdWithOccInfo) -- Tagged binders
1818
1819 tagBinder usage binder
1820 = let
1821 usage' = usage `delVarEnv` binder
1822 binder' = setBinderOcc usage binder
1823 in
1824 usage' `seq` (usage', binder')
1825
1826 setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr
1827 setBinderOcc usage bndr
1828 | isTyVar bndr = bndr
1829 | isExportedId bndr = case idOccInfo bndr of
1830 NoOccInfo -> bndr
1831 _ -> setIdOccInfo bndr NoOccInfo
1832 -- Don't use local usage info for visible-elsewhere things
1833 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
1834 -- about to re-generate it and it shouldn't be "sticky"
1835
1836 | otherwise = setIdOccInfo bndr occ_info
1837 where
1838 occ_info = lookupVarEnv usage bndr `orElse` IAmDead
1839
1840 {-
1841 ************************************************************************
1842 * *
1843 \subsection{Operations over OccInfo}
1844 * *
1845 ************************************************************************
1846 -}
1847
1848 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
1849 mkOneOcc env id int_cxt
1850 | isLocalId id
1851 = unitVarEnv id (OneOcc False True int_cxt)
1852
1853 | id `elemVarEnv` occ_gbl_scrut env
1854 = unitVarEnv id NoOccInfo
1855
1856 | otherwise
1857 = emptyDetails
1858
1859 markMany, markInsideLam :: OccInfo -> OccInfo
1860
1861 markMany _ = NoOccInfo
1862
1863 markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
1864 markInsideLam occ = occ
1865
1866 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
1867
1868 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1869 NoOccInfo -- Both branches are at least One
1870 -- (Argument is never IAmDead)
1871
1872 -- (orOccInfo orig new) is used
1873 -- when combining occurrence info from branches of a case
1874
1875 orOccInfo (OneOcc in_lam1 _ int_cxt1)
1876 (OneOcc in_lam2 _ int_cxt2)
1877 = OneOcc (in_lam1 || in_lam2)
1878 False -- False, because it occurs in both branches
1879 (int_cxt1 && int_cxt2)
1880 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1881 NoOccInfo