b14dbd972471b63991bac05d78c05952d203b8e6
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns, MultiWayIf #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import CoreSyn
23 import CoreFVs
24 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
25 stripTicksTopE, mkTicks )
26 import Id
27 import IdInfo
28 import Name( localiseName )
29 import BasicTypes
30 import Module( Module )
31 import Coercion
32 import Type
33
34 import VarSet
35 import VarEnv
36 import Var
37 import Demand ( argOneShots, argsOneShots )
38 import Digraph ( SCC(..), Node
39 , stronglyConnCompFromEdgedVerticesUniq
40 , stronglyConnCompFromEdgedVerticesUniqR )
41 import Unique
42 import UniqFM
43 import UniqSet
44 import Util
45 import Outputable
46 import Data.List
47 import Control.Arrow ( second )
48
49 {-
50 ************************************************************************
51 * *
52 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
53 * *
54 ************************************************************************
55
56 Here's the externally-callable interface:
57 -}
58
59 occurAnalysePgm :: Module -- Used only in debug output
60 -> (Activation -> Bool)
61 -> [CoreRule] -> [CoreVect] -> VarSet
62 -> CoreProgram -> CoreProgram
63 occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
64 | isEmptyDetails final_usage
65 = occ_anald_binds
66
67 | otherwise -- See Note [Glomming]
68 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
69 2 (ppr final_usage ) )
70 occ_anald_glommed_binds
71 where
72 init_env = initOccEnv active_rule
73 (final_usage, occ_anald_binds) = go init_env binds
74 (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel
75 imp_rule_edges
76 (flattenBinds occ_anald_binds)
77 initial_uds
78 -- It's crucial to re-analyse the glommed-together bindings
79 -- so that we establish the right loop breakers. Otherwise
80 -- we can easily create an infinite loop (Trac #9583 is an example)
81
82 initial_uds = addManyOccsSet emptyDetails
83 (rulesFreeVars imp_rules `unionVarSet`
84 vectsFreeVars vects `unionVarSet`
85 vectVars)
86 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
87 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
88 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
89
90 -- Note [Preventing loops due to imported functions rules]
91 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
92 [ mapVarEnv (const maps_to) $
93 getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
94 | imp_rule <- imp_rules
95 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
96 , let maps_to = exprFreeIds (ru_rhs imp_rule)
97 `delVarSetList` ru_bndrs imp_rule
98 , arg <- ru_args imp_rule ]
99
100 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
101 go _ []
102 = (initial_uds, [])
103 go env (bind:binds)
104 = (final_usage, bind' ++ binds')
105 where
106 (bs_usage, binds') = go env binds
107 (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind
108 bs_usage
109
110 occurAnalyseExpr :: CoreExpr -> CoreExpr
111 -- Do occurrence analysis, and discard occurrence info returned
112 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
113
114 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
115 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
116
117 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
118 occurAnalyseExpr' enable_binder_swap expr
119 = snd (occAnal env expr)
120 where
121 env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
122 -- To be conservative, we say that all inlines and rules are active
123 all_active_rules = \_ -> True
124
125 {- Note [Plugin rules]
126 ~~~~~~~~~~~~~~~~~~~~~~
127 Conal Elliott (Trac #11651) built a GHC plugin that added some
128 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
129 do some domain-specific transformations that could not be expressed
130 with an ordinary pattern-matching CoreRule. But then we can't extract
131 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
132 BuiltinRule doesn't have any of that stuff.
133
134 So we simply assume that BuiltinRules have no dependencies, and filter
135 them out from the imp_rule_edges comprehension.
136 -}
137
138 {-
139 ************************************************************************
140 * *
141 Bindings
142 * *
143 ************************************************************************
144
145 Note [Recursive bindings: the grand plan]
146 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
147 When we come across a binding group
148 Rec { x1 = r1; ...; xn = rn }
149 we treat it like this (occAnalRecBind):
150
151 1. Occurrence-analyse each right hand side, and build a
152 "Details" for each binding to capture the results.
153
154 Wrap the details in a Node (details, node-id, dep-node-ids),
155 where node-id is just the unique of the binder, and
156 dep-node-ids lists all binders on which this binding depends.
157 We'll call these the "scope edges".
158 See Note [Forming the Rec groups].
159
160 All this is done by makeNode.
161
162 2. Do SCC-analysis on these Nodes. Each SCC will become a new Rec or
163 NonRec. The key property is that every free variable of a binding
164 is accounted for by the scope edges, so that when we are done
165 everything is still in scope.
166
167 3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
168 identify suitable loop-breakers to ensure that inlining terminates.
169 This is done by occAnalRec.
170
171 4. To do so we form a new set of Nodes, with the same details, but
172 different edges, the "loop-breaker nodes". The loop-breaker nodes
173 have both more and fewer depedencies than the scope edges
174 (see Note [Choosing loop breakers])
175
176 More edges: if f calls g, and g has an active rule that mentions h
177 then we add an edge from f -> h
178
179 Fewer edges: we only include dependencies on active rules, on rule
180 RHSs (not LHSs) and if there is an INLINE pragma only
181 on the stable unfolding (and vice versa). The scope
182 edges must be much more inclusive.
183
184 5. The "weak fvs" of a node are, by definition:
185 the scope fvs - the loop-breaker fvs
186 See Note [Weak loop breakers], and the nd_weak field of Details
187
188 6. Having formed the loop-breaker nodes
189
190 Note [Dead code]
191 ~~~~~~~~~~~~~~~~
192 Dropping dead code for a cyclic Strongly Connected Component is done
193 in a very simple way:
194
195 the entire SCC is dropped if none of its binders are mentioned
196 in the body; otherwise the whole thing is kept.
197
198 The key observation is that dead code elimination happens after
199 dependency analysis: so 'occAnalBind' processes SCCs instead of the
200 original term's binding groups.
201
202 Thus 'occAnalBind' does indeed drop 'f' in an example like
203
204 letrec f = ...g...
205 g = ...(...g...)...
206 in
207 ...g...
208
209 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
210 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
211 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
212
213 ------------------------------------------------------------
214 Note [Forming Rec groups]
215 ~~~~~~~~~~~~~~~~~~~~~~~~~
216 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
217 and "g uses f", no matter how indirectly. We do a SCC analysis
218 with an edge f -> g if "f uses g".
219
220 More precisely, "f uses g" iff g should be in scope wherever f is.
221 That is, g is free in:
222 a) the rhs 'ef'
223 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
224 c) or the LHS or a rule for f (Note [Rule dependency info])
225
226 These conditions apply regardless of the activation of the RULE (eg it might be
227 inactive in this phase but become active later). Once a Rec is broken up
228 it can never be put back together, so we must be conservative.
229
230 The principle is that, regardless of rule firings, every variable is
231 always in scope.
232
233 * Note [Rules are extra RHSs]
234 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
235 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
236 keeps the specialised "children" alive. If the parent dies
237 (because it isn't referenced any more), then the children will die
238 too (unless they are already referenced directly).
239
240 To that end, we build a Rec group for each cyclic strongly
241 connected component,
242 *treating f's rules as extra RHSs for 'f'*.
243 More concretely, the SCC analysis runs on a graph with an edge
244 from f -> g iff g is mentioned in
245 (a) f's rhs
246 (b) f's RULES
247 These are rec_edges.
248
249 Under (b) we include variables free in *either* LHS *or* RHS of
250 the rule. The former might seems silly, but see Note [Rule
251 dependency info]. So in Example [eftInt], eftInt and eftIntFB
252 will be put in the same Rec, even though their 'main' RHSs are
253 both non-recursive.
254
255 * Note [Rule dependency info]
256 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
257 The VarSet in a RuleInfo is used for dependency analysis in the
258 occurrence analyser. We must track free vars in *both* lhs and rhs.
259 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
260 Why both? Consider
261 x = y
262 RULE f x = v+4
263 Then if we substitute y for x, we'd better do so in the
264 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
265 as well as 'v'
266
267 * Note [Rules are visible in their own rec group]
268 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
269 We want the rules for 'f' to be visible in f's right-hand side.
270 And we'd like them to be visible in other functions in f's Rec
271 group. E.g. in Note [Specialisation rules] we want f' rule
272 to be visible in both f's RHS, and fs's RHS.
273
274 This means that we must simplify the RULEs first, before looking
275 at any of the definitions. This is done by Simplify.simplRecBind,
276 when it calls addLetIdInfo.
277
278 ------------------------------------------------------------
279 Note [Choosing loop breakers]
280 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
281 Loop breaking is surprisingly subtle. First read the section 4 of
282 "Secrets of the GHC inliner". This describes our basic plan.
283 We avoid infinite inlinings by choosing loop breakers, and
284 ensuring that a loop breaker cuts each loop.
285
286 See also Note [Inlining and hs-boot files] in ToIface, which deals
287 with a closely related source of infinite loops.
288
289 Fundamentally, we do SCC analysis on a graph. For each recursive
290 group we choose a loop breaker, delete all edges to that node,
291 re-analyse the SCC, and iterate.
292
293 But what is the graph? NOT the same graph as was used for Note
294 [Forming Rec groups]! In particular, a RULE is like an equation for
295 'f' that is *always* inlined if it is applicable. We do *not* disable
296 rules for loop-breakers. It's up to whoever makes the rules to make
297 sure that the rules themselves always terminate. See Note [Rules for
298 recursive functions] in Simplify.hs
299
300 Hence, if
301 f's RHS (or its INLINE template if it has one) mentions g, and
302 g has a RULE that mentions h, and
303 h has a RULE that mentions f
304
305 then we *must* choose f to be a loop breaker. Example: see Note
306 [Specialisation rules].
307
308 In general, take the free variables of f's RHS, and augment it with
309 all the variables reachable by RULES from those starting points. That
310 is the whole reason for computing rule_fv_env in occAnalBind. (Of
311 course we only consider free vars that are also binders in this Rec
312 group.) See also Note [Finding rule RHS free vars]
313
314 Note that when we compute this rule_fv_env, we only consider variables
315 free in the *RHS* of the rule, in contrast to the way we build the
316 Rec group in the first place (Note [Rule dependency info])
317
318 Note that if 'g' has RHS that mentions 'w', we should add w to
319 g's loop-breaker edges. More concretely there is an edge from f -> g
320 iff
321 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
322 (see Note [Inline rules])
323 (b) or h is mentioned in f's RHS, and
324 g appears in the RHS of an active RULE of h
325 or a transitive sequence of active rules starting with h
326
327 Why "active rules"? See Note [Finding rule RHS free vars]
328
329 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
330 chosen as a loop breaker, because their RHSs don't mention each other.
331 And indeed both can be inlined safely.
332
333 Note again that the edges of the graph we use for computing loop breakers
334 are not the same as the edges we use for computing the Rec blocks.
335 That's why we compute
336
337 - rec_edges for the Rec block analysis
338 - loop_breaker_nodes for the loop breaker analysis
339
340 * Note [Finding rule RHS free vars]
341 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
342 Consider this real example from Data Parallel Haskell
343 tagZero :: Array Int -> Array Tag
344 {-# INLINE [1] tagZeroes #-}
345 tagZero xs = pmap (\x -> fromBool (x==0)) xs
346
347 {-# RULES "tagZero" [~1] forall xs n.
348 pmap fromBool <blah blah> = tagZero xs #-}
349 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
350 However, tagZero can only be inlined in phase 1 and later, while
351 the RULE is only active *before* phase 1. So there's no problem.
352
353 To make this work, we look for the RHS free vars only for
354 *active* rules. That's the reason for the occ_rule_act field
355 of the OccEnv.
356
357 * Note [Weak loop breakers]
358 ~~~~~~~~~~~~~~~~~~~~~~~~~
359 There is a last nasty wrinkle. Suppose we have
360
361 Rec { f = f_rhs
362 RULE f [] = g
363
364 h = h_rhs
365 g = h
366 ...more...
367 }
368
369 Remember that we simplify the RULES before any RHS (see Note
370 [Rules are visible in their own rec group] above).
371
372 So we must *not* postInlineUnconditionally 'g', even though
373 its RHS turns out to be trivial. (I'm assuming that 'g' is
374 not choosen as a loop breaker.) Why not? Because then we
375 drop the binding for 'g', which leaves it out of scope in the
376 RULE!
377
378 Here's a somewhat different example of the same thing
379 Rec { g = h
380 ; h = ...f...
381 ; f = f_rhs
382 RULE f [] = g }
383 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
384 g, because the RULE for f is active throughout. So the RHS of h
385 might rewrite to h = ...g...
386 So g must remain in scope in the output program!
387
388 We "solve" this by:
389
390 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
391 iff g is a "missing free variable" of the Rec group
392
393 A "missing free variable" x is one that is mentioned in an RHS or
394 INLINE or RULE of a binding in the Rec group, but where the
395 dependency on x may not show up in the loop_breaker_nodes (see
396 note [Choosing loop breakers} above).
397
398 A normal "strong" loop breaker has IAmLoopBreaker False. So
399
400 Inline postInlineUnconditionally
401 strong IAmLoopBreaker False no no
402 weak IAmLoopBreaker True yes no
403 other yes yes
404
405 The **sole** reason for this kind of loop breaker is so that
406 postInlineUnconditionally does not fire. Ugh. (Typically it'll
407 inline via the usual callSiteInline stuff, so it'll be dead in the
408 next pass, so the main Ugh is the tiresome complication.)
409
410 Note [Rules for imported functions]
411 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
412 Consider this
413 f = /\a. B.g a
414 RULE B.g Int = 1 + f Int
415 Note that
416 * The RULE is for an imported function.
417 * f is non-recursive
418 Now we
419 can get
420 f Int --> B.g Int Inlining f
421 --> 1 + f Int Firing RULE
422 and so the simplifier goes into an infinite loop. This
423 would not happen if the RULE was for a local function,
424 because we keep track of dependencies through rules. But
425 that is pretty much impossible to do for imported Ids. Suppose
426 f's definition had been
427 f = /\a. C.h a
428 where (by some long and devious process), C.h eventually inlines to
429 B.g. We could only spot such loops by exhaustively following
430 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
431 f.
432
433 Note that RULES for imported functions are important in practice; they
434 occur a lot in the libraries.
435
436 We regard this potential infinite loop as a *programmer* error.
437 It's up the programmer not to write silly rules like
438 RULE f x = f x
439 and the example above is just a more complicated version.
440
441 Note [Preventing loops due to imported functions rules]
442 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
443 Consider:
444 import GHC.Base (foldr)
445
446 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
447 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
448 filterFB c p = ...
449
450 f = filter p xs
451
452 Note that filter is not a loop-breaker, so what happens is:
453 f = filter p xs
454 = {inline} build (\c n -> foldr (filterFB c p) n xs)
455 = {inline} foldr (filterFB (:) p) [] xs
456 = {RULE} filter p xs
457
458 We are in an infinite loop.
459
460 A more elaborate example (that I actually saw in practice when I went to
461 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
462 {-# LANGUAGE RankNTypes #-}
463 module GHCList where
464
465 import Prelude hiding (filter)
466 import GHC.Base (build)
467
468 {-# INLINABLE filter #-}
469 filter :: (a -> Bool) -> [a] -> [a]
470 filter p [] = []
471 filter p (x:xs) = if p x then x : filter p xs else filter p xs
472
473 {-# NOINLINE [0] filterFB #-}
474 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
475 filterFB c p x r | p x = x `c` r
476 | otherwise = r
477
478 {-# RULES
479 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
480 (filterFB c p) n xs)
481 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
482 #-}
483
484 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
485 are not), the unfolding given to "filter" in the interface file will be:
486 filter p [] = []
487 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
488 else build (\c n -> foldr (filterFB c p) n xs
489
490 Note that because this unfolding does not mention "filter", filter is not
491 marked as a strong loop breaker. Therefore at a use site in another module:
492 filter p xs
493 = {inline}
494 case xs of [] -> []
495 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
496 else build (\c n -> foldr (filterFB c p) n xs)
497
498 build (\c n -> foldr (filterFB c p) n xs)
499 = {inline} foldr (filterFB (:) p) [] xs
500 = {RULE} filter p xs
501
502 And we are in an infinite loop again, except that this time the loop is producing an
503 infinitely large *term* (an unrolling of filter) and so the simplifier finally
504 dies with "ticks exhausted"
505
506 Because of this problem, we make a small change in the occurrence analyser
507 designed to mark functions like "filter" as strong loop breakers on the basis that:
508 1. The RHS of filter mentions the local function "filterFB"
509 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
510
511 So for each RULE for an *imported* function we are going to add
512 dependency edges between the *local* FVS of the rule LHS and the
513 *local* FVS of the rule RHS. We don't do anything special for RULES on
514 local functions because the standard occurrence analysis stuff is
515 pretty good at getting loop-breakerness correct there.
516
517 It is important to note that even with this extra hack we aren't always going to get
518 things right. For example, it might be that the rule LHS mentions an imported Id,
519 and another module has a RULE that can rewrite that imported Id to one of our local
520 Ids.
521
522 Note [Specialising imported functions] (referred to from Specialise)
523 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
524 BUT for *automatically-generated* rules, the programmer can't be
525 responsible for the "programmer error" in Note [Rules for imported
526 functions]. In paricular, consider specialising a recursive function
527 defined in another module. If we specialise a recursive function B.g,
528 we get
529 g_spec = .....(B.g Int).....
530 RULE B.g Int = g_spec
531 Here, g_spec doesn't look recursive, but when the rule fires, it
532 becomes so. And if B.g was mutually recursive, the loop might
533 not be as obvious as it is here.
534
535 To avoid this,
536 * When specialising a function that is a loop breaker,
537 give a NOINLINE pragma to the specialised function
538
539 Note [Glomming]
540 ~~~~~~~~~~~~~~~
541 RULES for imported Ids can make something at the top refer to something at the bottom:
542 f = \x -> B.g (q x)
543 h = \y -> 3
544
545 RULE: B.g (q x) = h x
546
547 Applying this rule makes f refer to h, although f doesn't appear to
548 depend on h. (And, as in Note [Rules for imported functions], the
549 dependency might be more indirect. For example, f might mention C.t
550 rather than B.g, where C.t eventually inlines to B.g.)
551
552 NOTICE that this cannot happen for rules whose head is a
553 locally-defined function, because we accurately track dependencies
554 through RULES. It only happens for rules whose head is an imported
555 function (B.g in the example above).
556
557 Solution:
558 - When simplifying, bring all top level identifiers into
559 scope at the start, ignoring the Rec/NonRec structure, so
560 that when 'h' pops up in f's rhs, we find it in the in-scope set
561 (as the simplifier generally expects). This happens in simplTopBinds.
562
563 - In the occurrence analyser, if there are any out-of-scope
564 occurrences that pop out of the top, which will happen after
565 firing the rule: f = \x -> h x
566 h = \y -> 3
567 then just glom all the bindings into a single Rec, so that
568 the *next* iteration of the occurrence analyser will sort
569 them all out. This part happens in occurAnalysePgm.
570
571 ------------------------------------------------------------
572 Note [Inline rules]
573 ~~~~~~~~~~~~~~~~~~~
574 None of the above stuff about RULES applies to Inline Rules,
575 stored in a CoreUnfolding. The unfolding, if any, is simplified
576 at the same time as the regular RHS of the function (ie *not* like
577 Note [Rules are visible in their own rec group]), so it should be
578 treated *exactly* like an extra RHS.
579
580 Or, rather, when computing loop-breaker edges,
581 * If f has an INLINE pragma, and it is active, we treat the
582 INLINE rhs as f's rhs
583 * If it's inactive, we treat f as having no rhs
584 * If it has no INLINE pragma, we look at f's actual rhs
585
586
587 There is a danger that we'll be sub-optimal if we see this
588 f = ...f...
589 [INLINE f = ..no f...]
590 where f is recursive, but the INLINE is not. This can just about
591 happen with a sufficiently odd set of rules; eg
592
593 foo :: Int -> Int
594 {-# INLINE [1] foo #-}
595 foo x = x+1
596
597 bar :: Int -> Int
598 {-# INLINE [1] bar #-}
599 bar x = foo x + 1
600
601 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
602
603 Here the RULE makes bar recursive; but it's INLINE pragma remains
604 non-recursive. It's tempting to then say that 'bar' should not be
605 a loop breaker, but an attempt to do so goes wrong in two ways:
606 a) We may get
607 $df = ...$cfoo...
608 $cfoo = ...$df....
609 [INLINE $cfoo = ...no-$df...]
610 But we want $cfoo to depend on $df explicitly so that we
611 put the bindings in the right order to inline $df in $cfoo
612 and perhaps break the loop altogether. (Maybe this
613 b)
614
615
616 Example [eftInt]
617 ~~~~~~~~~~~~~~~
618 Example (from GHC.Enum):
619
620 eftInt :: Int# -> Int# -> [Int]
621 eftInt x y = ...(non-recursive)...
622
623 {-# INLINE [0] eftIntFB #-}
624 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
625 eftIntFB c n x y = ...(non-recursive)...
626
627 {-# RULES
628 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
629 "eftIntList" [1] eftIntFB (:) [] = eftInt
630 #-}
631
632 Note [Specialisation rules]
633 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
634 Consider this group, which is typical of what SpecConstr builds:
635
636 fs a = ....f (C a)....
637 f x = ....f (C a)....
638 {-# RULE f (C a) = fs a #-}
639
640 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
641
642 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
643 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
644 - fs is inlined (say it's small)
645 - now there's another opportunity to apply the RULE
646
647 This showed up when compiling Control.Concurrent.Chan.getChanContents.
648
649 ------------------------------------------------------------
650 Note [Finding join points]
651 ~~~~~~~~~~~~~~~~~~~~~~~~~~
652 It's the occurrence analyser's job to find bindings that we can turn into join
653 points, but it doesn't perform that transformation right away. Rather, it marks
654 the eligible bindings as part of their occurrence data, leaving it to the
655 simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'.
656 The simplifier then eta-expands the RHS if needed and then updates the
657 occurrence sites. Dividing the work this way means that the occurrence analyser
658 still only takes one pass, yet one can always tell the difference between a
659 function call and a jump by looking at the occurrence (because the same pass
660 changes the 'IdDetails' and propagates the binders to their occurrence sites).
661
662 To track potential join points, we use the 'occ_tail' field of OccInfo. A value
663 of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
664 tail call with `n` arguments (counting both value and type arguments). Otherwise
665 'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
666 rest of 'OccInfo' until it goes on the binder.
667
668 Note [Rules and join points]
669 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
670
671 Things get fiddly with rules. Suppose we have:
672
673 let j :: Int -> Int
674 j y = 2 * y
675 k :: Int -> Int -> Int
676 {-# RULES "SPEC k 0" k 0 = j #-}
677 k x y = x + 2 * y
678 in ...
679
680 Now suppose that both j and k appear only as saturated tail calls in the body.
681 Thus we would like to make them both join points. The rule complicates matters,
682 though, as its RHS has an unapplied occurrence of j. *However*, if we were to
683 eta-expand the rule, all would be well:
684
685 {-# RULES "SPEC k 0" forall a. k 0 a = j a #-}
686
687 So conceivably we could notice that a potential join point would have an
688 "undersaturated" rule and account for it. This would mean we could make
689 something that's been specialised a join point, for instance. But local bindings
690 are rarely specialised, and being overly cautious about rules only
691 costs us anything when, for some `j`:
692
693 * Before specialisation, `j` has non-tail calls, so it can't be a join point.
694 * During specialisation, `j` gets specialised and thus acquires rules.
695 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say),
696 and so now `j` *could* become a join point.
697
698 This appears to be very rare in practice. TODO Perhaps we should gather
699 statistics to be sure.
700
701 Note [Excess polymorphism and join points]
702 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
703
704 In principle, if a function would be a join point except that it fails
705 the polymorphism rule (see Note [The polymorphism rule of join points] in
706 CoreSyn), it can still be made a join point with some effort. This is because
707 all tail calls must return the same type (they return to the same context!), and
708 thus if the return type depends on an argument, that argument must always be the
709 same.
710
711 For instance, consider:
712
713 let f :: forall a. a -> Char -> [a]
714 f @a x c = ... f @a x 'a' ...
715 in ... f @Int 1 'b' ... f @Int 2 'c' ...
716
717 (where the calls are tail calls). `f` fails the polymorphism rule because its
718 return type is [a], where [a] is bound. But since the type argument is always
719 'Int', we can rewrite it as:
720
721 let f' :: Int -> Char -> [Int]
722 f' x c = ... f' x 'a' ...
723 in ... f' 1 'b' ... f 2 'c' ...
724
725 and now we can make f' a join point:
726
727 join f' :: Int -> Char -> [Int]
728 f' x c = ... jump f' x 'a' ...
729 in ... jump f' 1 'b' ... jump f' 2 'c' ...
730
731 It's not clear that this comes up often, however. TODO: Measure how often and
732 add this analysis if necessary.
733
734 ------------------------------------------------------------
735 Note [Adjusting right-hand sides]
736 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
737 There's a bit of a dance we need to do after analysing a lambda expression or
738 a right-hand side. In particular, we need to
739
740 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot
741 lambda, or a non-recursive join point; and
742 b) call 'markAllNonTailCalled' *unless* the binding is for a join point.
743
744 Some examples, with how the free occurrences in e (assumed not to be a value
745 lambda) get marked:
746
747 inside lam non-tail-called
748 ------------------------------------------------------------
749 let x = e No Yes
750 let f = \x -> e Yes Yes
751 let f = \x{OneShot} -> e No Yes
752 \x -> e Yes Yes
753 join j x = e No No
754 joinrec j x = e Yes No
755
756 There are a few other caveats; most importantly, if we're marking a binding as
757 'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so
758 that the effect cascades properly. Consequently, at the time the RHS is
759 analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must
760 return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once
761 join-point-hood has been decided.
762
763 Thus the overall sequence taking place in 'occAnalNonRecBind' and
764 'occAnalRecBind' is as follows:
765
766 1. Call 'occAnalLamOrRhs' to find usage information for the RHS.
767 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make
768 the binding a join point.
769 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when
770 recursive.)
771
772 (In the recursive case, this logic is spread between 'makeNode' and
773 'occAnalRec'.)
774 -}
775
776 ------------------------------------------------------------------
777 -- occAnalBind
778 ------------------------------------------------------------------
779
780 occAnalBind :: OccEnv -- The incoming OccEnv
781 -> TopLevelFlag
782 -> ImpRuleEdges
783 -> CoreBind
784 -> UsageDetails -- Usage details of scope
785 -> (UsageDetails, -- Of the whole let(rec)
786 [CoreBind])
787
788 occAnalBind env lvl top_env (NonRec binder rhs) body_usage
789 = occAnalNonRecBind env lvl top_env binder rhs body_usage
790 occAnalBind env lvl top_env (Rec pairs) body_usage
791 = occAnalRecBind env lvl top_env pairs body_usage
792
793 -----------------
794 occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr
795 -> UsageDetails -> (UsageDetails, [CoreBind])
796 occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage
797 | isTyVar binder -- A type let; we don't gather usage info
798 = (body_usage, [NonRec binder rhs])
799
800 | not (binder `usedIn` body_usage) -- It's not mentioned
801 = (body_usage, [])
802
803 | otherwise -- It's mentioned in the body
804 = (body_usage' +++ rhs_usage', [NonRec tagged_binder rhs'])
805 where
806 (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder
807 mb_join_arity = willBeJoinId_maybe tagged_binder
808
809 (bndrs, body) = collectBinders rhs
810
811 (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body
812 rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body'
813 -- For a /non-recursive/ join point we can mark all
814 -- its join-lambda as one-shot; and it's a good idea to do so
815
816 -- Unfoldings
817 -- See Note [Unfoldings and join points]
818 rhs_usage2 = case occAnalUnfolding env NonRecursive binder of
819 Just unf_usage -> rhs_usage1 +++ unf_usage
820 Nothing -> rhs_usage1
821
822 -- Rules
823 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
824 rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder
825 rhs_usage3 = rhs_usage2 +++ combineUsageDetailsList
826 (map (\(_, l, r) -> l +++ r) rules_w_uds)
827 rhs_usage4 = maybe rhs_usage3 (addManyOccsSet rhs_usage3) $
828 lookupVarEnv imp_rule_edges binder
829 -- See Note [Preventing loops due to imported functions rules]
830
831 -- Final adjustment
832 rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4
833
834 -----------------
835 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
836 -> UsageDetails -> (UsageDetails, [CoreBind])
837 occAnalRecBind env lvl imp_rule_edges pairs body_usage
838 = foldr (occAnalRec lvl) (body_usage, []) sccs
839 -- For a recursive group, we
840 -- * occ-analyse all the RHSs
841 -- * compute strongly-connected components
842 -- * feed those components to occAnalRec
843 -- See Note [Recursive bindings: the grand plan]
844 where
845 sccs :: [SCC Details]
846 sccs = {-# SCC "occAnalBind.scc" #-}
847 stronglyConnCompFromEdgedVerticesUniq nodes
848
849 nodes :: [LetrecNode]
850 nodes = {-# SCC "occAnalBind.assoc" #-}
851 map (makeNode env imp_rule_edges bndr_set) pairs
852
853 bndr_set = mkVarSet (map fst pairs)
854
855 {-
856 Note [Unfoldings and join points]
857 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
858
859 We assume that anything in an unfolding occurs multiple times, since unfoldings
860 are often copied (that's the whole point!). But we still need to track tail
861 calls for the purpose of finding join points.
862 -}
863
864 -----------------------------
865 occAnalRec :: TopLevelFlag
866 -> SCC Details
867 -> (UsageDetails, [CoreBind])
868 -> (UsageDetails, [CoreBind])
869
870 -- The NonRec case is just like a Let (NonRec ...) above
871 occAnalRec lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
872 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs }))
873 (body_uds, binds)
874 | not (bndr `usedIn` body_uds)
875 = (body_uds, binds) -- See Note [Dead code]
876
877 | otherwise -- It's mentioned in the body
878 = (body_uds' +++ rhs_uds',
879 NonRec tagged_bndr rhs : binds)
880 where
881 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
882 rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive
883 rhs_bndrs rhs_uds
884
885 -- The Rec case is the interesting one
886 -- See Note [Recursive bindings: the grand plan]
887 -- See Note [Loop breaking]
888 occAnalRec lvl (CyclicSCC details_s) (body_uds, binds)
889 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
890 = (body_uds, binds) -- See Note [Dead code]
891
892 | otherwise -- At this point we always build a single Rec
893 = -- pprTrace "occAnalRec" (vcat
894 -- [ text "weak_fvs" <+> ppr weak_fvs
895 -- , text "lb nodes" <+> ppr loop_breaker_nodes])
896 (final_uds, Rec pairs : binds)
897
898 where
899 bndrs = map nd_bndr details_s
900 bndr_set = mkVarSet bndrs
901
902 ------------------------------
903 -- See Note [Choosing loop breakers] for loop_breaker_nodes
904 final_uds :: UsageDetails
905 loop_breaker_nodes :: [LetrecNode]
906 (final_uds, loop_breaker_nodes)
907 = mkLoopBreakerNodes lvl bndr_set body_uds details_s
908
909 ------------------------------
910 weak_fvs :: VarSet
911 weak_fvs = mapUnionVarSet nd_weak details_s
912
913 ---------------------------
914 -- Now reconstruct the cycle
915 pairs :: [(Id,CoreExpr)]
916 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs loop_breaker_nodes []
917 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
918 -- If weak_fvs is empty, the loop_breaker_nodes will include
919 -- all the edges in the original scope edges [remember,
920 -- weak_fvs is the difference between scope edges and
921 -- lb-edges], so a fresh SCC computation would yield a
922 -- single CyclicSCC result; and reOrderNodes deals with
923 -- exactly that case
924
925
926 ------------------------------------------------------------------
927 -- Loop breaking
928 ------------------------------------------------------------------
929
930 type Binding = (Id,CoreExpr)
931
932 loopBreakNodes :: Int
933 -> VarSet -- All binders
934 -> VarSet -- Binders whose dependencies may be "missing"
935 -- See Note [Weak loop breakers]
936 -> [LetrecNode]
937 -> [Binding] -- Append these to the end
938 -> [Binding]
939 {-
940 loopBreakNodes is applied to the list of nodes for a cyclic strongly
941 connected component (there's guaranteed to be a cycle). It returns
942 the same nodes, but
943 a) in a better order,
944 b) with some of the Ids having a IAmALoopBreaker pragma
945
946 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
947 that the simplifier can guarantee not to loop provided it never records an inlining
948 for these no-inline guys.
949
950 Furthermore, the order of the binds is such that if we neglect dependencies
951 on the no-inline Ids then the binds are topologically sorted. This means
952 that the simplifier will generally do a good job if it works from top bottom,
953 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
954 -}
955
956 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
957 loopBreakNodes depth bndr_set weak_fvs nodes binds
958 = go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
959 where
960 go [] binds = binds
961 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
962
963 loop_break_scc scc binds
964 = case scc of
965 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
966 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
967
968 ----------------------------------
969 reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
970 -- Choose a loop breaker, mark it no-inline,
971 -- and call loopBreakNodes on the rest
972 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
973 reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
974 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
975 = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
976 -- text "chosen" <+> ppr chosen_nodes) $
977 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
978 (map mk_loop_breaker chosen_nodes ++ binds)
979 where
980 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
981 (nd_score (fstOf3 node))
982 [node] [] nodes
983
984 approximate_lb = depth >= 2
985 new_depth | approximate_lb = 0
986 | otherwise = depth+1
987 -- After two iterations (d=0, d=1) give up
988 -- and approximate, returning to d=0
989
990 mk_loop_breaker :: LetrecNode -> Binding
991 mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
992 = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs)
993 where
994 tail_info = tailCallInfo (idOccInfo bndr)
995
996 mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
997 -- See Note [Weak loop breakers]
998 mk_non_loop_breaker weak_fvs (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
999 | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs)
1000 | otherwise = (bndr, rhs)
1001 where
1002 occ' = weakLoopBreaker { occ_tail = tail_info }
1003 tail_info = tailCallInfo (idOccInfo bndr)
1004
1005 ----------------------------------
1006 chooseLoopBreaker :: Bool -- True <=> Too many iterations,
1007 -- so approximate
1008 -> NodeScore -- Best score so far
1009 -> [LetrecNode] -- Nodes with this score
1010 -> [LetrecNode] -- Nodes with higher scores
1011 -> [LetrecNode] -- Unprocessed nodes
1012 -> ([LetrecNode], [LetrecNode])
1013 -- This loop looks for the bind with the lowest score
1014 -- to pick as the loop breaker. The rest accumulate in
1015 chooseLoopBreaker _ _ loop_nodes acc []
1016 = (loop_nodes, acc) -- Done
1017
1018 -- If approximate_loop_breaker is True, we pick *all*
1019 -- nodes with lowest score, else just one
1020 -- See Note [Complexity of loop breaking]
1021 chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
1022 | approx_lb
1023 , rank sc == rank loop_sc
1024 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
1025
1026 | sc `betterLB` loop_sc -- Better score so pick this new one
1027 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
1028
1029 | otherwise -- Worse score so don't pick it
1030 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
1031 where
1032 sc = nd_score (fstOf3 node)
1033
1034 {-
1035 Note [Complexity of loop breaking]
1036 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1037 The loop-breaking algorithm knocks out one binder at a time, and
1038 performs a new SCC analysis on the remaining binders. That can
1039 behave very badly in tightly-coupled groups of bindings; in the
1040 worst case it can be (N**2)*log N, because it does a full SCC
1041 on N, then N-1, then N-2 and so on.
1042
1043 To avoid this, we switch plans after 2 (or whatever) attempts:
1044 Plan A: pick one binder with the lowest score, make it
1045 a loop breaker, and try again
1046 Plan B: pick *all* binders with the lowest score, make them
1047 all loop breakers, and try again
1048 Since there are only a small finite number of scores, this will
1049 terminate in a constant number of iterations, rather than O(N)
1050 iterations.
1051
1052 You might thing that it's very unlikely, but RULES make it much
1053 more likely. Here's a real example from Trac #1969:
1054 Rec { $dm = \d.\x. op d
1055 {-# RULES forall d. $dm Int d = $s$dm1
1056 forall d. $dm Bool d = $s$dm2 #-}
1057
1058 dInt = MkD .... opInt ...
1059 dInt = MkD .... opBool ...
1060 opInt = $dm dInt
1061 opBool = $dm dBool
1062
1063 $s$dm1 = \x. op dInt
1064 $s$dm2 = \x. op dBool }
1065 The RULES stuff means that we can't choose $dm as a loop breaker
1066 (Note [Choosing loop breakers]), so we must choose at least (say)
1067 opInt *and* opBool, and so on. The number of loop breakders is
1068 linear in the number of instance declarations.
1069
1070 Note [Loop breakers and INLINE/INLINABLE pragmas]
1071 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1072 Avoid choosing a function with an INLINE pramga as the loop breaker!
1073 If such a function is mutually-recursive with a non-INLINE thing,
1074 then the latter should be the loop-breaker.
1075
1076 It's vital to distinguish between INLINE and INLINABLE (the
1077 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1078 Rec { {-# INLINABLE f #-}
1079 f x = ...f... }
1080 and then worker/wrapper it through strictness analysis, we'll get
1081 Rec { {-# INLINABLE $wf #-}
1082 $wf p q = let x = (p,q) in ...f...
1083
1084 {-# INLINE f #-}
1085 f x = case x of (p,q) -> $wf p q }
1086
1087 Now it is vital that we choose $wf as the loop breaker, so we can
1088 inline 'f' in '$wf'.
1089
1090 Note [DFuns should not be loop breakers]
1091 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1092 It's particularly bad to make a DFun into a loop breaker. See
1093 Note [How instance declarations are translated] in TcInstDcls
1094
1095 We give DFuns a higher score than ordinary CONLIKE things because
1096 if there's a choice we want the DFun to be the non-loop breaker. Eg
1097
1098 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1099
1100 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1101 {-# DFUN #-}
1102 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1103 }
1104
1105 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1106 if we can't unravel the DFun first.
1107
1108 Note [Constructor applications]
1109 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1110 It's really really important to inline dictionaries. Real
1111 example (the Enum Ordering instance from GHC.Base):
1112
1113 rec f = \ x -> case d of (p,q,r) -> p x
1114 g = \ x -> case d of (p,q,r) -> q x
1115 d = (v, f, g)
1116
1117 Here, f and g occur just once; but we can't inline them into d.
1118 On the other hand we *could* simplify those case expressions if
1119 we didn't stupidly choose d as the loop breaker.
1120 But we won't because constructor args are marked "Many".
1121 Inlining dictionaries is really essential to unravelling
1122 the loops in static numeric dictionaries, see GHC.Float.
1123
1124 Note [Closure conversion]
1125 ~~~~~~~~~~~~~~~~~~~~~~~~~
1126 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1127 The immediate motivation came from the result of a closure-conversion transformation
1128 which generated code like this:
1129
1130 data Clo a b = forall c. Clo (c -> a -> b) c
1131
1132 ($:) :: Clo a b -> a -> b
1133 Clo f env $: x = f env x
1134
1135 rec { plus = Clo plus1 ()
1136
1137 ; plus1 _ n = Clo plus2 n
1138
1139 ; plus2 Zero n = n
1140 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1141
1142 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1143 we choose 'plus1' as the loop breaker (which is entirely possible
1144 otherwise), the loop does not unravel nicely.
1145
1146
1147 @occAnalUnfolding@ deals with the question of bindings where the Id is marked
1148 by an INLINE pragma. For these we record that anything which occurs
1149 in its RHS occurs many times. This pessimistically assumes that this
1150 inlined binder also occurs many times in its scope, but if it doesn't
1151 we'll catch it next time round. At worst this costs an extra simplifier pass.
1152 ToDo: try using the occurrence info for the inline'd binder.
1153
1154 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1155 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1156
1157
1158 ************************************************************************
1159 * *
1160 Making nodes
1161 * *
1162 ************************************************************************
1163 -}
1164
1165 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
1166
1167 noImpRuleEdges :: ImpRuleEdges
1168 noImpRuleEdges = emptyVarEnv
1169
1170 type LetrecNode = Node Unique Details -- Node comes from Digraph
1171 -- The Unique key is gotten from the Id
1172 data Details
1173 = ND { nd_bndr :: Id -- Binder
1174 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
1175 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS
1176 -- INVARIANT: (nd_rhs_bndrs nd, _) ==
1177 -- collectBinders (nd_rhs nd)
1178
1179 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
1180 -- ignoring phase (ie assuming all are active)
1181 -- See Note [Forming Rec groups]
1182
1183 , nd_inl :: IdSet -- Free variables of
1184 -- the stable unfolding (if present and active)
1185 -- or the RHS (if not)
1186 -- but excluding any RULES
1187 -- This is the IdSet that may be used if the Id is inlined
1188
1189 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
1190 -- but are *not* in nd_inl. These are the ones whose
1191 -- dependencies might not be respected by loop_breaker_nodes
1192 -- See Note [Weak loop breakers]
1193
1194 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
1195
1196 , nd_score :: NodeScore
1197 }
1198
1199 instance Outputable Details where
1200 ppr nd = text "ND" <> braces
1201 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
1202 , text "uds =" <+> ppr (nd_uds nd)
1203 , text "inl =" <+> ppr (nd_inl nd)
1204 , text "weak =" <+> ppr (nd_weak nd)
1205 , text "rule =" <+> ppr (nd_active_rule_fvs nd)
1206 , text "score =" <+> ppr (nd_score nd)
1207 ])
1208
1209 -- The NodeScore is compared lexicographically;
1210 -- e.g. lower rank wins regardless of size
1211 type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker
1212 , Int -- Size of rhs: higher => more likely to be picked as LB
1213 -- Maxes out at maxExprSize; we just use it to prioritise
1214 -- small functions
1215 , Bool ) -- Was it a loop breaker before?
1216 -- True => more likely to be picked
1217 -- Note [Loop breakers, node scoring, and stability]
1218
1219 rank :: NodeScore -> Int
1220 rank (r, _, _) = r
1221
1222 makeNode :: OccEnv -> ImpRuleEdges -> VarSet
1223 -> (Var, CoreExpr) -> LetrecNode
1224 -- See Note [Recursive bindings: the grand plan]
1225 makeNode env imp_rule_edges bndr_set (bndr, rhs)
1226 = (details, varUnique bndr, nonDetKeysUniqSet node_fvs)
1227 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
1228 -- is still deterministic with edges in nondeterministic order as
1229 -- explained in Note [Deterministic SCC] in Digraph.
1230 where
1231 details = ND { nd_bndr = bndr
1232 , nd_rhs = rhs'
1233 , nd_rhs_bndrs = bndrs'
1234 , nd_uds = rhs_usage3
1235 , nd_inl = inl_fvs
1236 , nd_weak = node_fvs `minusVarSet` inl_fvs
1237 , nd_active_rule_fvs = active_rule_fvs
1238 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
1239
1240 -- Constructing the edges for the main Rec computation
1241 -- See Note [Forming Rec groups]
1242 (bndrs, body) = collectBinders rhs
1243 (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body
1244 rhs' = mkLams bndrs' body'
1245 rhs_usage2 = rhs_usage1 +++ all_rule_uds
1246 -- Note [Rules are extra RHSs]
1247 -- Note [Rule dependency info]
1248 rhs_usage3 = case mb_unf_uds of
1249 Just unf_uds -> rhs_usage2 +++ unf_uds
1250 Nothing -> rhs_usage2
1251 node_fvs = udFreeVars bndr_set rhs_usage3
1252
1253 -- Finding the free variables of the rules
1254 is_active = occ_rule_act env :: Activation -> Bool
1255
1256 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
1257 rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr
1258
1259 rules_w_rhs_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
1260 rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):))
1261 (lookupVarEnv imp_rule_edges bndr)
1262 -- See Note [Preventing loops due to imported functions rules]
1263 [ (ru_act rule, udFreeVars bndr_set rhs_uds)
1264 | (rule, _, rhs_uds) <- rules_w_uds ]
1265 all_rule_uds = combineUsageDetailsList $
1266 concatMap (\(_, l, r) -> [l, r]) rules_w_uds
1267 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs
1268 , is_active a]
1269
1270 -- Finding the usage details of the INLINE pragma (if any)
1271 mb_unf_uds = occAnalUnfolding env Recursive bndr
1272
1273 -- Find the "nd_inl" free vars; for the loop-breaker phase
1274 inl_fvs = case mb_unf_uds of
1275 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
1276 Just unf_uds -> udFreeVars bndr_set unf_uds
1277 -- We could check for an *active* INLINE (returning
1278 -- emptyVarSet for an inactive one), but is_active
1279 -- isn't the right thing (it tells about
1280 -- RULE activation), so we'd need more plumbing
1281
1282 mkLoopBreakerNodes :: TopLevelFlag
1283 -> VarSet
1284 -> UsageDetails -- for BODY of let
1285 -> [Details]
1286 -> (UsageDetails, -- adjusted
1287 [LetrecNode])
1288 -- Does four things
1289 -- a) tag each binder with its occurrence info
1290 -- b) add a NodeScore to each node
1291 -- c) make a Node with the right dependency edges for
1292 -- the loop-breaker SCC analysis
1293 -- d) adjust each RHS's usage details according to
1294 -- the binder's (new) shotness and join-point-hood
1295 mkLoopBreakerNodes lvl bndr_set body_uds details_s
1296 = (final_uds, zipWith mk_lb_node details_s bndrs')
1297 where
1298 (final_uds, bndrs') = tagRecBinders lvl body_uds
1299 [ (nd_bndr nd, nd_uds nd, nd_rhs_bndrs nd)
1300 | nd <- details_s ]
1301 mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr'
1302 = (nd', varUnique bndr, nonDetKeysUniqSet lb_deps)
1303 -- It's OK to use nonDetKeysUniqSet here as
1304 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
1305 -- in nondeterministic order as explained in
1306 -- Note [Deterministic SCC] in Digraph.
1307 where
1308 nd' = nd { nd_bndr = bndr', nd_score = score }
1309 score = nodeScore bndr bndr' rhs lb_deps
1310 lb_deps = extendFvs_ rule_fv_env inl_fvs
1311
1312 rule_fv_env :: IdEnv IdSet
1313 -- Maps a variable f to the variables from this group
1314 -- mentioned in RHS of active rules for f
1315 -- Domain is *subset* of bound vars (others have no rule fvs)
1316 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
1317 init_rule_fvs -- See Note [Finding rule RHS free vars]
1318 = [ (b, trimmed_rule_fvs)
1319 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
1320 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
1321 , not (isEmptyVarSet trimmed_rule_fvs) ]
1322
1323
1324 ------------------------------------------
1325 nodeScore :: Id -- Binder has old occ-info (just for loop-breaker-ness)
1326 -> Id -- Binder with new occ-info
1327 -> CoreExpr -- RHS
1328 -> VarSet -- Loop-breaker dependencies
1329 -> NodeScore
1330 nodeScore old_bndr new_bndr bind_rhs lb_deps
1331 | not (isId old_bndr) -- A type or cercion variable is never a loop breaker
1332 = (100, 0, False)
1333
1334 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers
1335 = (0, 0, True) -- See Note [Self-recursion and loop breakers]
1336
1337 | exprIsTrivial rhs
1338 = mk_score 10 -- Practically certain to be inlined
1339 -- Used to have also: && not (isExportedId bndr)
1340 -- But I found this sometimes cost an extra iteration when we have
1341 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
1342 -- where df is the exported dictionary. Then df makes a really
1343 -- bad choice for loop breaker
1344
1345 | DFunUnfolding { df_args = args } <- id_unfolding
1346 -- Never choose a DFun as a loop breaker
1347 -- Note [DFuns should not be loop breakers]
1348 = (9, length args, is_lb)
1349
1350 -- Data structures are more important than INLINE pragmas
1351 -- so that dictionary/method recursion unravels
1352
1353 | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding
1354 = mk_score 6
1355
1356 | is_con_app rhs -- Data types help with cases:
1357 = mk_score 5 -- Note [Constructor applications]
1358
1359 | isStableUnfolding id_unfolding
1360 , can_unfold
1361 = mk_score 3
1362
1363 | isOneOcc (idOccInfo new_bndr)
1364 = mk_score 2 -- Likely to be inlined
1365
1366 | can_unfold -- The Id has some kind of unfolding
1367 = mk_score 1
1368
1369 | otherwise
1370 = (0, 0, is_lb)
1371
1372 where
1373 mk_score :: Int -> NodeScore
1374 mk_score rank = (rank, rhs_size, is_lb)
1375
1376 is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
1377 rhs = case id_unfolding of
1378 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
1379 | isStableSource src
1380 -> unf_rhs
1381 _ -> bind_rhs
1382 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
1383 rhs_size = case id_unfolding of
1384 CoreUnfolding { uf_guidance = guidance }
1385 | UnfIfGoodArgs { ug_size = size } <- guidance
1386 -> size
1387 _ -> cheapExprSize rhs
1388
1389 can_unfold = canUnfold id_unfolding
1390 id_unfolding = realIdUnfolding old_bndr
1391 -- realIdUnfolding: Ignore loop-breaker-ness here because
1392 -- that is what we are setting!
1393
1394 -- Checking for a constructor application
1395 -- Cheap and cheerful; the simplifier moves casts out of the way
1396 -- The lambda case is important to spot x = /\a. C (f a)
1397 -- which comes up when C is a dictionary constructor and
1398 -- f is a default method.
1399 -- Example: the instance for Show (ST s a) in GHC.ST
1400 --
1401 -- However we *also* treat (\x. C p q) as a con-app-like thing,
1402 -- Note [Closure conversion]
1403 is_con_app (Var v) = isConLikeId v
1404 is_con_app (App f _) = is_con_app f
1405 is_con_app (Lam _ e) = is_con_app e
1406 is_con_app (Tick _ e) = is_con_app e
1407 is_con_app _ = False
1408
1409 maxExprSize :: Int
1410 maxExprSize = 20 -- Rather arbitrary
1411
1412 cheapExprSize :: CoreExpr -> Int
1413 -- Maxes out at maxExprSize
1414 cheapExprSize e
1415 = go 0 e
1416 where
1417 go n e | n >= maxExprSize = n
1418 | otherwise = go1 n e
1419
1420 go1 n (Var {}) = n+1
1421 go1 n (Lit {}) = n+1
1422 go1 n (Type {}) = n
1423 go1 n (Coercion {}) = n
1424 go1 n (Tick _ e) = go1 n e
1425 go1 n (Cast e _) = go1 n e
1426 go1 n (App f a) = go (go1 n f) a
1427 go1 n (Lam b e)
1428 | isTyVar b = go1 n e
1429 | otherwise = go (n+1) e
1430 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b)
1431 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
1432
1433 gos n [] = n
1434 gos n (e:es) | n >= maxExprSize = n
1435 | otherwise = gos (go1 n e) es
1436
1437 betterLB :: NodeScore -> NodeScore -> Bool
1438 -- If n1 `betterLB` n2 then choose n1 as the loop breaker
1439 betterLB (rank1, size1, lb1) (rank2, size2, _)
1440 | rank1 < rank2 = True
1441 | rank1 > rank2 = False
1442 | size1 < size2 = False -- Make the bigger n2 into the loop breaker
1443 | size1 > size2 = True
1444 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it
1445 | otherwise = False -- See Note [Loop breakers, node scoring, and stability]
1446
1447 {- Note [Self-recursion and loop breakers]
1448 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1449 If we have
1450 rec { f = ...f...g...
1451 ; g = .....f... }
1452 then 'f' has to be a loop breaker anyway, so we may as well choose it
1453 right away, so that g can inline freely.
1454
1455 This is really just a cheap hack. Consider
1456 rec { f = ...g...
1457 ; g = ..f..h...
1458 ; h = ...f....}
1459 Here f or g are better loop breakers than h; but we might accidentally
1460 choose h. Finding the minimal set of loop breakers is hard.
1461
1462 Note [Loop breakers, node scoring, and stability]
1463 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1464 To choose a loop breaker, we give a NodeScore to each node in the SCC,
1465 and pick the one with the best score (according to 'betterLB').
1466
1467 We need to be jolly careful (Trac #12425, #12234) about the stability
1468 of this choice. Suppose we have
1469
1470 let rec { f = ...g...g...
1471 ; g = ...f...f... }
1472 in
1473 case x of
1474 True -> ...f..
1475 False -> ..f...
1476
1477 In each iteration of the simplifier the occurrence analyser OccAnal
1478 chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
1479 breaker. That means it is free to inline f.
1480
1481 Suppose that GHC decides to inline f in the branches of the case, but
1482 (for some reason; eg it is not saturated) in the rhs of g. So we get
1483
1484 let rec { f = ...g...g...
1485 ; g = ...f...f... }
1486 in
1487 case x of
1488 True -> ...g...g.....
1489 False -> ..g..g....
1490
1491 Now suppose that, for some reason, in the next iteration the occurrence
1492 analyser chooses f as the loop breaker, so it can freely inline g. And
1493 again for some reason the simplifier inlines g at its calls in the case
1494 branches, but not in the RHS of f. Then we get
1495
1496 let rec { f = ...g...g...
1497 ; g = ...f...f... }
1498 in
1499 case x of
1500 True -> ...(...f...f...)...(...f..f..).....
1501 False -> ..(...f...f...)...(..f..f...)....
1502
1503 You can see where this is going! Each iteration of the simplifier
1504 doubles the number of calls to f or g. No wonder GHC is slow!
1505
1506 (In the particular example in comment:3 of #12425, f and g are the two
1507 mutually recursive fmap instances for CondT and Result. They are both
1508 marked INLINE which, oddly, is why they don't inline in each other's
1509 RHS, because the call there is not saturated.)
1510
1511 The root cause is that we flip-flop on our choice of loop breaker. I
1512 always thought it didn't matter, and indeed for any single iteration
1513 to terminate, it doesn't matter. But when we iterate, it matters a
1514 lot!!
1515
1516 So The Plan is this:
1517 If there is a tie, choose the node that
1518 was a loop breaker last time round
1519
1520 Hence the is_lb field of NodeScore
1521
1522 ************************************************************************
1523 * *
1524 Right hand sides
1525 * *
1526 ************************************************************************
1527 -}
1528
1529 occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr
1530 -> (UsageDetails, [CoreBndr], CoreExpr)
1531 -- Returned usage details covers only the RHS,
1532 -- and *not* the RULE or INLINE template for the Id
1533 occAnalRhs env Recursive _ bndrs body
1534 = occAnalRecRhs env bndrs body
1535 occAnalRhs env NonRecursive id bndrs body
1536 = occAnalNonRecRhs env id bndrs body
1537
1538 occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr -- Rhs lambdas, body
1539 -> (UsageDetails, [CoreBndr], CoreExpr)
1540 -- Returned usage details covers only the RHS,
1541 -- and *not* the RULE or INLINE template for the Id
1542 occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body
1543
1544 occAnalNonRecRhs :: OccEnv
1545 -> Id -> [CoreBndr] -> CoreExpr -- Binder; rhs lams, body
1546 -- Binder is already tagged with occurrence info
1547 -> (UsageDetails, [CoreBndr], CoreExpr)
1548 -- Returned usage details covers only the RHS,
1549 -- and *not* the RULE or INLINE template for the Id
1550 occAnalNonRecRhs env bndr bndrs body
1551 = occAnalLamOrRhs rhs_env bndrs body
1552 where
1553 -- See Note [Cascading inlines]
1554 env1 | certainly_inline = env
1555 | otherwise = rhsCtxt env
1556
1557 -- See Note [Sources of one-shot information]
1558 rhs_env = env1 { occ_one_shots = argOneShots dmd }
1559
1560 certainly_inline -- See Note [Cascading inlines]
1561 = case idOccInfo bndr of
1562 OneOcc { occ_in_lam = in_lam, occ_one_br = one_br }
1563 -> not in_lam && one_br && active && not_stable
1564 _ -> False
1565
1566 dmd = idDemandInfo bndr
1567 active = isAlwaysActive (idInlineActivation bndr)
1568 not_stable = not (isStableUnfolding (idUnfolding bndr))
1569
1570 occAnalUnfolding :: OccEnv
1571 -> RecFlag
1572 -> Id
1573 -> Maybe UsageDetails
1574 -- Just the analysis, not a new unfolding. The unfolding
1575 -- got analysed when it was created and we don't need to
1576 -- update it.
1577 occAnalUnfolding env rec_flag id
1578 = case realIdUnfolding id of -- ignore previous loop-breaker flag
1579 CoreUnfolding { uf_tmpl = rhs, uf_src = src }
1580 | not (isStableSource src)
1581 -> Nothing
1582 | otherwise
1583 -> Just $ markAllMany usage
1584 where
1585 (bndrs, body) = collectBinders rhs
1586 (usage, _, _) = occAnalRhs env rec_flag id bndrs body
1587
1588 DFunUnfolding { df_bndrs = bndrs, df_args = args }
1589 -> Just $ zapDetails (delDetailsList usage bndrs)
1590 where
1591 usage = foldr (+++) emptyDetails (map (fst . occAnal env) args)
1592
1593 _ -> Nothing
1594
1595 occAnalRules :: OccEnv
1596 -> Maybe JoinArity -- If the binder is (or MAY become) a join
1597 -- point, what its join arity is (or WOULD
1598 -- become). See Note [Rules and join points].
1599 -> RecFlag
1600 -> Id
1601 -> [(CoreRule, -- Each (non-built-in) rule
1602 UsageDetails, -- Usage details for LHS
1603 UsageDetails)] -- Usage details for RHS
1604 occAnalRules env mb_expected_join_arity rec_flag id
1605 = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id
1606 , let (lhs_uds, rhs_uds) = occ_anal_rule rule ]
1607 where
1608 occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
1609 = (lhs_uds, final_rhs_uds)
1610 where
1611 lhs_uds = addManyOccsSet emptyDetails $
1612 (exprsFreeVars args `delVarSetList` bndrs)
1613 (rhs_bndrs, rhs_body) = collectBinders rhs
1614 (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body
1615 -- Note [Rules are extra RHSs]
1616 -- Note [Rule dependency info]
1617 final_rhs_uds = adjust_tail_info args $ markAllMany $
1618 (rhs_uds `delDetailsList` bndrs)
1619 occ_anal_rule _
1620 = (emptyDetails, emptyDetails)
1621
1622 adjust_tail_info args uds -- see Note [Rules and join points]
1623 = case mb_expected_join_arity of
1624 Just ar | args `lengthIs` ar -> uds
1625 _ -> markAllNonTailCalled uds
1626 {-
1627 Note [Cascading inlines]
1628 ~~~~~~~~~~~~~~~~~~~~~~~~
1629 By default we use an rhsCtxt for the RHS of a binding. This tells the
1630 occ anal n that it's looking at an RHS, which has an effect in
1631 occAnalApp. In particular, for constructor applications, it makes
1632 the arguments appear to have NoOccInfo, so that we don't inline into
1633 them. Thus x = f y
1634 k = Just x
1635 we do not want to inline x.
1636
1637 But there's a problem. Consider
1638 x1 = a0 : []
1639 x2 = a1 : x1
1640 x3 = a2 : x2
1641 g = f x3
1642 First time round, it looks as if x1 and x2 occur as an arg of a
1643 let-bound constructor ==> give them a many-occurrence.
1644 But then x3 is inlined (unconditionally as it happens) and
1645 next time round, x2 will be, and the next time round x1 will be
1646 Result: multiple simplifier iterations. Sigh.
1647
1648 So, when analysing the RHS of x3 we notice that x3 will itself
1649 definitely inline the next time round, and so we analyse x3's rhs in
1650 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1651
1652 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1653 If we say "yes" when preInlineUnconditionally says "no" the simplifier iterates
1654 indefinitely:
1655 x = f y
1656 k = Just x
1657 inline ==>
1658 k = Just (f y)
1659 float ==>
1660 x1 = f y
1661 k = Just x1
1662
1663 This is worse than the slow cascade, so we only want to say "certainly_inline"
1664 if it really is certain. Look at the note with preInlineUnconditionally
1665 for the various clauses.
1666
1667
1668 ************************************************************************
1669 * *
1670 Expressions
1671 * *
1672 ************************************************************************
1673 -}
1674
1675 occAnal :: OccEnv
1676 -> CoreExpr
1677 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1678 CoreExpr)
1679
1680 occAnal _ expr@(Type _) = (emptyDetails, expr)
1681 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1682 occAnal env expr@(Var _) = occAnalApp env (expr, [], [])
1683 -- At one stage, I gathered the idRuleVars for the variable here too,
1684 -- which in a way is the right thing to do.
1685 -- But that went wrong right after specialisation, when
1686 -- the *occurrences* of the overloaded function didn't have any
1687 -- rules in them, so the *specialised* versions looked as if they
1688 -- weren't used at all.
1689
1690 occAnal _ (Coercion co)
1691 = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co)
1692 -- See Note [Gather occurrences of coercion variables]
1693
1694 {-
1695 Note [Gather occurrences of coercion variables]
1696 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1697 We need to gather info about what coercion variables appear, so that
1698 we can sort them into the right place when doing dependency analysis.
1699 -}
1700
1701 occAnal env (Tick tickish body)
1702 | tickish `tickishScopesLike` SoftScope
1703 = (markAllNonTailCalled usage, Tick tickish body')
1704
1705 | Breakpoint _ ids <- tickish
1706 = (usage_lam +++ foldr addManyOccs emptyDetails ids, Tick tickish body')
1707 -- never substitute for any of the Ids in a Breakpoint
1708
1709 | otherwise
1710 = (usage_lam, Tick tickish body')
1711 where
1712 !(usage,body') = occAnal env body
1713 -- for a non-soft tick scope, we can inline lambdas only
1714 usage_lam = markAllNonTailCalled (markAllInsideLam usage)
1715 -- TODO There may be ways to make ticks and join points play
1716 -- nicer together, but right now there are problems:
1717 -- let j x = ... in tick<t> (j 1)
1718 -- Making j a join point may cause the simplifier to drop t
1719 -- (if the tick is put into the continuation). So we don't
1720 -- count j 1 as a tail call.
1721
1722 occAnal env (Cast expr co)
1723 = case occAnal env expr of { (usage, expr') ->
1724 let usage1 = zapDetailsIf (isRhsEnv env) usage
1725 usage2 = addManyOccsSet usage1 (coVarsOfCo co)
1726 -- See Note [Gather occurrences of coercion variables]
1727 in (markAllNonTailCalled usage2, Cast expr' co)
1728 -- If we see let x = y `cast` co
1729 -- then mark y as 'Many' so that we don't
1730 -- immediately inline y again.
1731 }
1732
1733 occAnal env app@(App _ _)
1734 = occAnalApp env (collectArgsTicks tickishFloatable app)
1735
1736 -- Ignore type variables altogether
1737 -- (a) occurrences inside type lambdas only not marked as InsideLam
1738 -- (b) type variables not in environment
1739
1740 occAnal env (Lam x body)
1741 | isTyVar x
1742 = case occAnal env body of { (body_usage, body') ->
1743 (markAllNonTailCalled body_usage, Lam x body')
1744 }
1745
1746 -- For value lambdas we do a special hack. Consider
1747 -- (\x. \y. ...x...)
1748 -- If we did nothing, x is used inside the \y, so would be marked
1749 -- as dangerous to dup. But in the common case where the abstraction
1750 -- is applied to two arguments this is over-pessimistic.
1751 -- So instead, we just mark each binder with its occurrence
1752 -- info in the *body* of the multiple lambda.
1753 -- Then, the simplifier is careful when partially applying lambdas.
1754
1755 occAnal env expr@(Lam _ _)
1756 = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') ->
1757 let
1758 expr' = mkLams tagged_binders body'
1759 usage1 = markAllNonTailCalled usage
1760 one_shot_gp = all isOneShotBndr tagged_binders
1761 final_usage | one_shot_gp = usage1
1762 | otherwise = markAllInsideLam usage1
1763 in
1764 (final_usage, expr') }
1765 where
1766 (binders, body) = collectBinders expr
1767
1768 occAnal env (Case scrut bndr ty alts)
1769 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1770 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1771 let
1772 alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
1773 (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
1774 total_usage = markAllNonTailCalled scrut_usage +++ alts_usage1
1775 -- Alts can have tail calls, but the scrutinee can't
1776 in
1777 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1778 where
1779 -- Note [Case binder usage]
1780 -- ~~~~~~~~~~~~~~~~~~~~~~~~
1781 -- The case binder gets a usage of either "many" or "dead", never "one".
1782 -- Reason: we like to inline single occurrences, to eliminate a binding,
1783 -- but inlining a case binder *doesn't* eliminate a binding.
1784 -- We *don't* want to transform
1785 -- case x of w { (p,q) -> f w }
1786 -- into
1787 -- case x of w { (p,q) -> f (p,q) }
1788 tag_case_bndr usage bndr
1789 = (usage', setIdOccInfo bndr final_occ_info)
1790 where
1791 occ_info = lookupDetails usage bndr
1792 usage' = usage `delDetails` bndr
1793 final_occ_info = case occ_info of IAmDead -> IAmDead
1794 _ -> noOccInfo
1795
1796 alt_env = mkAltEnv env scrut bndr
1797 occ_anal_alt = occAnalAlt alt_env
1798
1799 occ_anal_scrut (Var v) (alt1 : other_alts)
1800 | not (null other_alts) || not (isDefaultAlt alt1)
1801 = (mkOneOcc env v True 0, Var v)
1802 -- The 'True' says that the variable occurs in an interesting
1803 -- context; the case has at least one non-default alternative
1804 occ_anal_scrut (Tick t e) alts
1805 | t `tickishScopesLike` SoftScope
1806 -- No reason to not look through all ticks here, but only
1807 -- for soft-scoped ticks we can do so without having to
1808 -- update returned occurance info (see occAnal)
1809 = second (Tick t) $ occ_anal_scrut e alts
1810
1811 occ_anal_scrut scrut _alts
1812 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1813
1814 occAnal env (Let bind body)
1815 = case occAnal env body of { (body_usage, body') ->
1816 case occAnalBind env NotTopLevel
1817 noImpRuleEdges bind
1818 body_usage of { (final_usage, new_binds) ->
1819 (final_usage, mkLets new_binds body') }}
1820
1821 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1822 occAnalArgs _ [] _
1823 = (emptyDetails, [])
1824
1825 occAnalArgs env (arg:args) one_shots
1826 | isTypeArg arg
1827 = case occAnalArgs env args one_shots of { (uds, args') ->
1828 (uds, arg:args') }
1829
1830 | otherwise
1831 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1832 case occAnal arg_env arg of { (uds1, arg') ->
1833 case occAnalArgs env args one_shots' of { (uds2, args') ->
1834 (uds1 +++ uds2, arg':args') }}}
1835
1836 {-
1837 Applications are dealt with specially because we want
1838 the "build hack" to work.
1839
1840 Note [Arguments of let-bound constructors]
1841 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1842 Consider
1843 f x = let y = expensive x in
1844 let z = (True,y) in
1845 (case z of {(p,q)->q}, case z of {(p,q)->q})
1846 We feel free to duplicate the WHNF (True,y), but that means
1847 that y may be duplicated thereby.
1848
1849 If we aren't careful we duplicate the (expensive x) call!
1850 Constructors are rather like lambdas in this way.
1851 -}
1852
1853 occAnalApp :: OccEnv
1854 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1855 -> (UsageDetails, Expr CoreBndr)
1856 occAnalApp env (Var fun, args, ticks)
1857 | null ticks = (uds, mkApps (Var fun) args')
1858 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1859 where
1860 uds = fun_uds +++ final_args_uds
1861
1862 !(args_uds, args') = occAnalArgs env args one_shots
1863 !final_args_uds
1864 | isRhsEnv env && is_exp = markAllNonTailCalled $
1865 markAllInsideLam args_uds
1866 | otherwise = markAllNonTailCalled args_uds
1867 -- We mark the free vars of the argument of a constructor or PAP
1868 -- as "inside-lambda", if it is the RHS of a let(rec).
1869 -- This means that nothing gets inlined into a constructor or PAP
1870 -- argument position, which is what we want. Typically those
1871 -- constructor arguments are just variables, or trivial expressions.
1872 -- We use inside-lam because it's like eta-expanding the PAP.
1873 --
1874 -- This is the *whole point* of the isRhsEnv predicate
1875 -- See Note [Arguments of let-bound constructors]
1876
1877 n_val_args = valArgCount args
1878 n_args = length args
1879 fun_uds = mkOneOcc env fun (n_val_args > 0) n_args
1880 is_exp = isExpandableApp fun n_val_args
1881 -- See Note [CONLIKE pragma] in BasicTypes
1882 -- The definition of is_exp should match that in Simplify.prepareRhs
1883
1884 one_shots = argsOneShots (idStrictness fun) guaranteed_val_args
1885 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo
1886 (occ_one_shots env))
1887 -- See Note [Sources of one-shot information], bullet point A']
1888
1889 occAnalApp env (fun, args, ticks)
1890 = (markAllNonTailCalled (fun_uds +++ args_uds),
1891 mkTicks ticks $ mkApps fun' args')
1892 where
1893 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1894 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1895 -- often leaves behind beta redexs like
1896 -- (\x y -> e) a1 a2
1897 -- Here we would like to mark x,y as one-shot, and treat the whole
1898 -- thing much like a let. We do this by pushing some True items
1899 -- onto the context stack.
1900 !(args_uds, args') = occAnalArgs env args []
1901
1902 zapDetailsIf :: Bool -- If this is true
1903 -> UsageDetails -- Then do zapDetails on this
1904 -> UsageDetails
1905 zapDetailsIf True uds = zapDetails uds
1906 zapDetailsIf False uds = uds
1907
1908 {-
1909 Note [Sources of one-shot information]
1910 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1911 The occurrence analyser obtains one-shot-lambda information from two sources:
1912
1913 A: Saturated applications: eg f e1 .. en
1914
1915 In general, given a call (f e1 .. en) we can propagate one-shot info from
1916 f's strictness signature into e1 .. en, but /only/ if n is enough to
1917 saturate the strictness signature. A strictness signature like
1918
1919 f :: C1(C1(L))LS
1920
1921 means that *if f is applied to three arguments* then it will guarantee to
1922 call its first argument at most once, and to call the result of that at
1923 most once. But if f has fewer than three arguments, all bets are off; e.g.
1924
1925 map (f (\x y. expensive) e2) xs
1926
1927 Here the \x y abstraction may be called many times (once for each element of
1928 xs) so we should not mark x and y as one-shot. But if it was
1929
1930 map (f (\x y. expensive) 3 2) xs
1931
1932 then the first argument of f will be called at most once.
1933
1934 The one-shot info, derived from f's strictness signature, is
1935 computed by 'argsOneShots', called in occAnalApp.
1936
1937 A': Non-obviously saturated applications: eg build (f (\x y -> expensive))
1938 where f is as above.
1939
1940 In this case, f is only manifestly applied to one argument, so it does not
1941 look saturated. So by the previous point, we should not use its strictness
1942 signature to learn about the one-shotness of \x y. But in this case we can:
1943 build is fully applied, so we may use its strictness signature; and from
1944 that we learn that build calls its argument with two arguments *at most once*.
1945
1946 So there is really only one call to f, and it will have three arguments. In
1947 that sense, f is saturated, and we may proceed as described above.
1948
1949 Hence the computation of 'guaranteed_val_args' in occAnalApp, using
1950 '(occ_one_shots env)'. See also Trac #13227, comment:9
1951
1952 B: Let-bindings: eg let f = \c. let ... in \n -> blah
1953 in (build f, build f)
1954
1955 Propagate one-shot info from the demanand-info on 'f' to the
1956 lambdas in its RHS (which may not be syntactically at the top)
1957
1958 This information must have come from a previous run of the demanand
1959 analyser.
1960
1961 Previously, the demand analyser would *also* set the one-shot information, but
1962 that code was buggy (see #11770), so doing it only in on place, namely here, is
1963 saner.
1964
1965 Note [OneShots]
1966 ~~~~~~~~~~~~~~~
1967 When analysing an expression, the occ_one_shots argument contains information
1968 about how the function is being used. The length of the list indicates
1969 how many arguments will eventually be passed to the analysed expression,
1970 and the OneShotInfo indicates whether this application is once or multiple times.
1971
1972 Example:
1973
1974 Context of f occ_one_shots when analysing f
1975
1976 f 1 2 [OneShot, OneShot]
1977 map (f 1) [OneShot, NoOneShotInfo]
1978 build f [OneShot, OneShot]
1979 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot]
1980
1981 Note [Binders in case alternatives]
1982 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1983 Consider
1984 case x of y { (a,b) -> f y }
1985 We treat 'a', 'b' as dead, because they don't physically occur in the
1986 case alternative. (Indeed, a variable is dead iff it doesn't occur in
1987 its scope in the output of OccAnal.) It really helps to know when
1988 binders are unused. See esp the call to isDeadBinder in
1989 Simplify.mkDupableAlt
1990
1991 In this example, though, the Simplifier will bring 'a' and 'b' back to
1992 life, beause it binds 'y' to (a,b) (imagine got inlined and
1993 scrutinised y).
1994 -}
1995
1996 occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr
1997 -> (UsageDetails, [CoreBndr], CoreExpr)
1998 occAnalLamOrRhs env [] body
1999 = case occAnal env body of (body_usage, body') -> (body_usage, [], body')
2000 -- RHS of thunk or nullary join point
2001 occAnalLamOrRhs env (bndr:bndrs) body
2002 | isTyVar bndr
2003 = -- Important: Keep the environment so that we don't inline into an RHS like
2004 -- \(@ x) -> C @x (f @x)
2005 -- (see the beginning of Note [Cascading inlines]).
2006 case occAnalLamOrRhs env bndrs body of
2007 (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body')
2008 occAnalLamOrRhs env binders body
2009 = case occAnal env_body body of { (body_usage, body') ->
2010 let
2011 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
2012 -- Use binders' to put one-shot info on the lambdas
2013 in
2014 (final_usage, tagged_binders, body') }
2015 where
2016 (env_body, binders') = oneShotGroup env binders
2017
2018 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
2019 -> CoreAlt
2020 -> (UsageDetails, Alt IdWithOccInfo)
2021 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
2022 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
2023 let
2024 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
2025 -- See Note [Binders in case alternatives]
2026 (alt_usg', rhs2) =
2027 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
2028 in
2029 (alt_usg', (con, tagged_bndrs, rhs2)) }
2030
2031 wrapAltRHS :: OccEnv
2032 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
2033 -> UsageDetails -- usage for entire alt (p -> rhs)
2034 -> [Var] -- alt binders
2035 -> CoreExpr -- alt RHS
2036 -> (UsageDetails, CoreExpr)
2037 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
2038 | occ_binder_swap env
2039 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
2040 -- handles condition (a) in Note [Binder swap]
2041 , not captured -- See condition (b) in Note [Binder swap]
2042 = ( alt_usg' +++ let_rhs_usg
2043 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
2044 where
2045 captured = any (`usedIn` let_rhs_usg) bndrs
2046 -- The rhs of the let may include coercion variables
2047 -- if the scrutinee was a cast, so we must gather their
2048 -- usage. See Note [Gather occurrences of coercion variables]
2049 (let_rhs_usg, let_rhs') = occAnal env let_rhs
2050 (alt_usg', [tagged_scrut_var]) = tagLamBinders alt_usg [scrut_var]
2051
2052 wrapAltRHS _ _ alt_usg _ alt_rhs
2053 = (alt_usg, alt_rhs)
2054
2055 {-
2056 ************************************************************************
2057 * *
2058 OccEnv
2059 * *
2060 ************************************************************************
2061 -}
2062
2063 data OccEnv
2064 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
2065 , occ_one_shots :: !OneShots -- See Note [OneShots]
2066 , occ_gbl_scrut :: GlobalScruts
2067 , occ_rule_act :: Activation -> Bool -- Which rules are active
2068 -- See Note [Finding rule RHS free vars]
2069 , occ_binder_swap :: !Bool -- enable the binder_swap
2070 -- See CorePrep Note [Dead code in CorePrep]
2071 }
2072
2073 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
2074
2075 -----------------------------
2076 -- OccEncl is used to control whether to inline into constructor arguments
2077 -- For example:
2078 -- x = (p,q) -- Don't inline p or q
2079 -- y = /\a -> (p a, q a) -- Still don't inline p or q
2080 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
2081 -- So OccEncl tells enought about the context to know what to do when
2082 -- we encounter a constructor application or PAP.
2083
2084 data OccEncl
2085 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
2086 -- Don't inline into constructor args here
2087 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
2088 -- Do inline into constructor args here
2089
2090 instance Outputable OccEncl where
2091 ppr OccRhs = text "occRhs"
2092 ppr OccVanilla = text "occVanilla"
2093
2094 -- See note [OneShots]
2095 type OneShots = [OneShotInfo]
2096
2097 initOccEnv :: (Activation -> Bool) -> OccEnv
2098 initOccEnv active_rule
2099 = OccEnv { occ_encl = OccVanilla
2100 , occ_one_shots = []
2101 , occ_gbl_scrut = emptyVarSet
2102 , occ_rule_act = active_rule
2103 , occ_binder_swap = True }
2104
2105 vanillaCtxt :: OccEnv -> OccEnv
2106 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
2107
2108 rhsCtxt :: OccEnv -> OccEnv
2109 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
2110
2111 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
2112 argCtxt env []
2113 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
2114 argCtxt env (one_shots:one_shots_s)
2115 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
2116
2117 isRhsEnv :: OccEnv -> Bool
2118 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
2119 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
2120
2121 oneShotGroup :: OccEnv -> [CoreBndr]
2122 -> ( OccEnv
2123 , [CoreBndr] )
2124 -- The result binders have one-shot-ness set that they might not have had originally.
2125 -- This happens in (build (\c n -> e)). Here the occurrence analyser
2126 -- linearity context knows that c,n are one-shot, and it records that fact in
2127 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
2128
2129 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
2130 = go ctxt bndrs []
2131 where
2132 go ctxt [] rev_bndrs
2133 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
2134 , reverse rev_bndrs )
2135
2136 go [] bndrs rev_bndrs
2137 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
2138 , reverse rev_bndrs ++ bndrs )
2139
2140 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs
2141 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs)
2142 | otherwise = go ctxt bndrs (bndr : rev_bndrs)
2143 where
2144 bndr' = updOneShotInfo bndr one_shot
2145 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
2146 -- one-shot info might be better than what we can infer, e.g.
2147 -- due to explicit use of the magic 'oneShot' function.
2148 -- See Note [The oneShot function]
2149
2150
2151 markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var]
2152 -- Mark the lambdas of a non-recursive join point as one-shot.
2153 -- This is good to prevent gratuitous float-out etc
2154 markJoinOneShots mb_join_arity bndrs
2155 = case mb_join_arity of
2156 Nothing -> bndrs
2157 Just n -> go n bndrs
2158 where
2159 go 0 bndrs = bndrs
2160 go _ [] = WARN( True, ppr mb_join_arity <+> ppr bndrs ) []
2161 go n (b:bs) = b' : go (n-1) bs
2162 where
2163 b' | isId b = setOneShotLambda b
2164 | otherwise = b
2165
2166 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
2167 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
2168 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
2169
2170 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
2171 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
2172 -- as well as (f,g), (g,h)
2173 transClosureFV env
2174 | no_change = env
2175 | otherwise = transClosureFV (listToUFM new_fv_list)
2176 where
2177 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
2178 -- It's OK to use nonDetUFMToList here because we'll forget the
2179 -- ordering by creating a new set with listToUFM
2180 bump no_change (b,fvs)
2181 | no_change_here = (no_change, (b,fvs))
2182 | otherwise = (False, (b,new_fvs))
2183 where
2184 (new_fvs, no_change_here) = extendFvs env fvs
2185
2186 -------------
2187 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
2188 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
2189
2190 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
2191 -- (extendFVs env s) returns
2192 -- (s `union` env(s), env(s) `subset` s)
2193 extendFvs env s
2194 | isNullUFM env
2195 = (s, True)
2196 | otherwise
2197 = (s `unionVarSet` extras, extras `subVarSet` s)
2198 where
2199 extras :: VarSet -- env(s)
2200 extras = nonDetFoldUFM unionVarSet emptyVarSet $
2201 -- It's OK to use nonDetFoldUFM here because unionVarSet commutes
2202 intersectUFM_C (\x _ -> x) env (getUniqSet s)
2203
2204 {-
2205 ************************************************************************
2206 * *
2207 Binder swap
2208 * *
2209 ************************************************************************
2210
2211 Note [Binder swap]
2212 ~~~~~~~~~~~~~~~~~~
2213 We do these two transformations right here:
2214
2215 (1) case x of b { pi -> ri }
2216 ==>
2217 case x of b { pi -> let x=b in ri }
2218
2219 (2) case (x |> co) of b { pi -> ri }
2220 ==>
2221 case (x |> co) of b { pi -> let x = b |> sym co in ri }
2222
2223 Why (2)? See Note [Case of cast]
2224
2225 In both cases, in a particular alternative (pi -> ri), we only
2226 add the binding if
2227 (a) x occurs free in (pi -> ri)
2228 (ie it occurs in ri, but is not bound in pi)
2229 (b) the pi does not bind b (or the free vars of co)
2230 We need (a) and (b) for the inserted binding to be correct.
2231
2232 For the alternatives where we inject the binding, we can transfer
2233 all x's OccInfo to b. And that is the point.
2234
2235 Notice that
2236 * The deliberate shadowing of 'x'.
2237 * That (a) rapidly becomes false, so no bindings are injected.
2238
2239 The reason for doing these transformations here is because it allows
2240 us to adjust the OccInfo for 'x' and 'b' as we go.
2241
2242 * Suppose the only occurrences of 'x' are the scrutinee and in the
2243 ri; then this transformation makes it occur just once, and hence
2244 get inlined right away.
2245
2246 * If we do this in the Simplifier, we don't know whether 'x' is used
2247 in ri, so we are forced to pessimistically zap b's OccInfo even
2248 though it is typically dead (ie neither it nor x appear in the
2249 ri). There's nothing actually wrong with zapping it, except that
2250 it's kind of nice to know which variables are dead. My nose
2251 tells me to keep this information as robustly as possible.
2252
2253 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
2254 {x=b}; it's Nothing if the binder-swap doesn't happen.
2255
2256 There is a danger though. Consider
2257 let v = x +# y
2258 in case (f v) of w -> ...v...v...
2259 And suppose that (f v) expands to just v. Then we'd like to
2260 use 'w' instead of 'v' in the alternative. But it may be too
2261 late; we may have substituted the (cheap) x+#y for v in the
2262 same simplifier pass that reduced (f v) to v.
2263
2264 I think this is just too bad. CSE will recover some of it.
2265
2266 Note [Case of cast]
2267 ~~~~~~~~~~~~~~~~~~~
2268 Consider case (x `cast` co) of b { I# ->
2269 ... (case (x `cast` co) of {...}) ...
2270 We'd like to eliminate the inner case. That is the motivation for
2271 equation (2) in Note [Binder swap]. When we get to the inner case, we
2272 inline x, cancel the casts, and away we go.
2273
2274 Note [Binder swap on GlobalId scrutinees]
2275 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2276 When the scrutinee is a GlobalId we must take care in two ways
2277
2278 i) In order to *know* whether 'x' occurs free in the RHS, we need its
2279 occurrence info. BUT, we don't gather occurrence info for
2280 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
2281 OccEnv is for: it says "gather occurrence info for these".
2282
2283 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
2284 has an External Name. See, for example, SimplEnv Note [Global Ids in
2285 the substitution].
2286
2287 Note [Zap case binders in proxy bindings]
2288 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2289 From the original
2290 case x of cb(dead) { p -> ...x... }
2291 we will get
2292 case x of cb(live) { p -> let x = cb in ...x... }
2293
2294 Core Lint never expects to find an *occurrence* of an Id marked
2295 as Dead, so we must zap the OccInfo on cb before making the
2296 binding x = cb. See Trac #5028.
2297
2298 Historical note [no-case-of-case]
2299 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2300 We *used* to suppress the binder-swap in case expressions when
2301 -fno-case-of-case is on. Old remarks:
2302 "This happens in the first simplifier pass,
2303 and enhances full laziness. Here's the bad case:
2304 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
2305 If we eliminate the inner case, we trap it inside the I# v -> arm,
2306 which might prevent some full laziness happening. I've seen this
2307 in action in spectral/cichelli/Prog.hs:
2308 [(m,n) | m <- [1..max], n <- [1..max]]
2309 Hence the check for NoCaseOfCase."
2310 However, now the full-laziness pass itself reverses the binder-swap, so this
2311 check is no longer necessary.
2312
2313 Historical note [Suppressing the case binder-swap]
2314 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2315 This old note describes a problem that is also fixed by doing the
2316 binder-swap in OccAnal:
2317
2318 There is another situation when it might make sense to suppress the
2319 case-expression binde-swap. If we have
2320
2321 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
2322 ...other cases .... }
2323
2324 We'll perform the binder-swap for the outer case, giving
2325
2326 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
2327 ...other cases .... }
2328
2329 But there is no point in doing it for the inner case, because w1 can't
2330 be inlined anyway. Furthermore, doing the case-swapping involves
2331 zapping w2's occurrence info (see paragraphs that follow), and that
2332 forces us to bind w2 when doing case merging. So we get
2333
2334 case x of w1 { A -> let w2 = w1 in e1
2335 B -> let w2 = w1 in e2
2336 ...other cases .... }
2337
2338 This is plain silly in the common case where w2 is dead.
2339
2340 Even so, I can't see a good way to implement this idea. I tried
2341 not doing the binder-swap if the scrutinee was already evaluated
2342 but that failed big-time:
2343
2344 data T = MkT !Int
2345
2346 case v of w { MkT x ->
2347 case x of x1 { I# y1 ->
2348 case x of x2 { I# y2 -> ...
2349
2350 Notice that because MkT is strict, x is marked "evaluated". But to
2351 eliminate the last case, we must either make sure that x (as well as
2352 x1) has unfolding MkT y1. The straightforward thing to do is to do
2353 the binder-swap. So this whole note is a no-op.
2354
2355 It's fixed by doing the binder-swap in OccAnal because we can do the
2356 binder-swap unconditionally and still get occurrence analysis
2357 information right.
2358 -}
2359
2360 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
2361 -- Does two things: a) makes the occ_one_shots = OccVanilla
2362 -- b) extends the GlobalScruts if possible
2363 -- c) returns a proxy mapping, binding the scrutinee
2364 -- to the case binder, if possible
2365 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
2366 = case stripTicksTopE (const True) scrut of
2367 Var v -> add_scrut v case_bndr'
2368 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
2369 -- See Note [Case of cast]
2370 _ -> (env { occ_encl = OccVanilla }, Nothing)
2371
2372 where
2373 add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
2374 , Just (localise v, rhs) )
2375
2376 case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
2377 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var)
2378 -- Localise the scrut_var before shadowing it; we're making a
2379 -- new binding for it, and it might have an External Name, or
2380 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
2381 -- Also we don't want any INLINE or NOINLINE pragmas!
2382
2383 {-
2384 ************************************************************************
2385 * *
2386 \subsection[OccurAnal-types]{OccEnv}
2387 * *
2388 ************************************************************************
2389
2390 Note [UsageDetails and zapping]
2391 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2392
2393 On many occasions, we must modify all gathered occurrence data at once. For
2394 instance, all occurrences underneath a (non-one-shot) lambda set the
2395 'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but
2396 that takes O(n) time and we will do this often---in particular, there are many
2397 places where tail calls are not allowed, and each of these causes all variables
2398 to get marked with 'NoTailCallInfo'.
2399
2400 Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along
2401 with the 'OccInfoEnv'. Each of these extra environments is a "zapped set"
2402 recording which variables have been zapped in some way. Zapping all occurrence
2403 info then simply means setting the corresponding zapped set to the whole
2404 'OccInfoEnv', a fast O(1) operation.
2405 -}
2406
2407 type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage
2408 -- INVARIANT: never IAmDead
2409 -- (Deadness is signalled by not being in the map at all)
2410
2411 type ZappedSet = OccInfoEnv -- Values are ignored
2412
2413 data UsageDetails
2414 = UD { ud_env :: !OccInfoEnv
2415 , ud_z_many :: ZappedSet -- apply 'markMany' to these
2416 , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these
2417 , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these
2418 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv
2419
2420 instance Outputable UsageDetails where
2421 ppr ud = ppr (ud_env (flattenUsageDetails ud))
2422
2423 -------------------
2424 -- UsageDetails API
2425
2426 (+++), combineAltsUsageDetails
2427 :: UsageDetails -> UsageDetails -> UsageDetails
2428 (+++) = combineUsageDetailsWith addOccInfo
2429 combineAltsUsageDetails = combineUsageDetailsWith orOccInfo
2430
2431 combineUsageDetailsList :: [UsageDetails] -> UsageDetails
2432 combineUsageDetailsList = foldl (+++) emptyDetails
2433
2434 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
2435 mkOneOcc env id int_cxt arity
2436 | isLocalId id
2437 = singleton $ OneOcc { occ_in_lam = False
2438 , occ_one_br = True
2439 , occ_int_cxt = int_cxt
2440 , occ_tail = AlwaysTailCalled arity }
2441 | id `elemVarSet` occ_gbl_scrut env
2442 = singleton noOccInfo
2443
2444 | otherwise
2445 = emptyDetails
2446 where
2447 singleton info = emptyDetails { ud_env = unitVarEnv id info }
2448
2449 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
2450 addOneOcc ud id info
2451 = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info }
2452 `alterZappedSets` (`delVarEnv` id)
2453 where
2454 plus_zapped old new = doZapping ud id old `addOccInfo` new
2455
2456 addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails
2457 addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set
2458 -- It's OK to use nonDetFoldUFM here because addManyOccs commutes
2459
2460 -- Add several occurrences, assumed not to be tail calls
2461 addManyOccs :: Var -> UsageDetails -> UsageDetails
2462 addManyOccs v u | isId v = addOneOcc u v noOccInfo
2463 | otherwise = u
2464 -- Give a non-committal binder info (i.e noOccInfo) because
2465 -- a) Many copies of the specialised thing can appear
2466 -- b) We don't want to substitute a BIG expression inside a RULE
2467 -- even if that's the only occurrence of the thing
2468 -- (Same goes for INLINE.)
2469
2470 delDetails :: UsageDetails -> Id -> UsageDetails
2471 delDetails ud bndr
2472 = ud `alterUsageDetails` (`delVarEnv` bndr)
2473
2474 delDetailsList :: UsageDetails -> [Id] -> UsageDetails
2475 delDetailsList ud bndrs
2476 = ud `alterUsageDetails` (`delVarEnvList` bndrs)
2477
2478 emptyDetails :: UsageDetails
2479 emptyDetails = UD { ud_env = emptyVarEnv
2480 , ud_z_many = emptyVarEnv
2481 , ud_z_in_lam = emptyVarEnv
2482 , ud_z_no_tail = emptyVarEnv }
2483
2484 isEmptyDetails :: UsageDetails -> Bool
2485 isEmptyDetails = isEmptyVarEnv . ud_env
2486
2487 markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails
2488 :: UsageDetails -> UsageDetails
2489 markAllMany ud = ud { ud_z_many = ud_env ud }
2490 markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud }
2491 markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud }
2492
2493 zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo
2494
2495 lookupDetails :: UsageDetails -> Id -> OccInfo
2496 lookupDetails ud id
2497 = case lookupVarEnv (ud_env ud) id of
2498 Just occ -> doZapping ud id occ
2499 Nothing -> IAmDead
2500
2501 usedIn :: Id -> UsageDetails -> Bool
2502 v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
2503
2504 udFreeVars :: VarSet -> UsageDetails -> VarSet
2505 -- Find the subset of bndrs that are mentioned in uds
2506 udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud)
2507
2508 -------------------
2509 -- Auxiliary functions for UsageDetails implementation
2510
2511 combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo)
2512 -> UsageDetails -> UsageDetails -> UsageDetails
2513 combineUsageDetailsWith plus_occ_info ud1 ud2
2514 | isEmptyDetails ud1 = ud2
2515 | isEmptyDetails ud2 = ud1
2516 | otherwise
2517 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2)
2518 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2)
2519 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2)
2520 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) }
2521
2522 doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo
2523 doZapping ud var occ
2524 = doZappingByUnique ud (varUnique var) occ
2525
2526 doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo
2527 doZappingByUnique ud uniq
2528 = (if | in_subset ud_z_many -> markMany
2529 | in_subset ud_z_in_lam -> markInsideLam
2530 | otherwise -> id) .
2531 (if | in_subset ud_z_no_tail -> markNonTailCalled
2532 | otherwise -> id)
2533 where
2534 in_subset field = uniq `elemVarEnvByKey` field ud
2535
2536 alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails
2537 alterZappedSets ud f
2538 = ud { ud_z_many = f (ud_z_many ud)
2539 , ud_z_in_lam = f (ud_z_in_lam ud)
2540 , ud_z_no_tail = f (ud_z_no_tail ud) }
2541
2542 alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails
2543 alterUsageDetails ud f
2544 = ud { ud_env = f (ud_env ud) }
2545 `alterZappedSets` f
2546
2547 flattenUsageDetails :: UsageDetails -> UsageDetails
2548 flattenUsageDetails ud
2549 = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) }
2550 `alterZappedSets` const emptyVarEnv
2551
2552 -------------------
2553 -- See Note [Adjusting right-hand sides]
2554 adjustRhsUsage :: Maybe JoinArity -> RecFlag
2555 -> [CoreBndr] -- Outer lambdas, AFTER occ anal
2556 -> UsageDetails -> UsageDetails
2557 adjustRhsUsage mb_join_arity rec_flag bndrs usage
2558 = maybe_mark_lam (maybe_drop_tails usage)
2559 where
2560 maybe_mark_lam ud | one_shot = ud
2561 | otherwise = markAllInsideLam ud
2562 maybe_drop_tails ud | exact_join = ud
2563 | otherwise = markAllNonTailCalled ud
2564
2565 one_shot = case mb_join_arity of
2566 Just join_arity
2567 | isRec rec_flag -> False
2568 | otherwise -> all isOneShotBndr (drop join_arity bndrs)
2569 Nothing -> all isOneShotBndr bndrs
2570
2571 exact_join = case mb_join_arity of
2572 Just join_arity -> join_arity == length bndrs
2573 _ -> False
2574
2575 type IdWithOccInfo = Id
2576
2577 tagLamBinders :: UsageDetails -- Of scope
2578 -> [Id] -- Binders
2579 -> (UsageDetails, -- Details with binders removed
2580 [IdWithOccInfo]) -- Tagged binders
2581 -- Used for lambda and case binders
2582 -- It copes with the fact that lambda bindings can have a
2583 -- stable unfolding, used for join points
2584 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
2585 where
2586 (usage', bndrs') = mapAccumR tag_lam usage binders
2587 tag_lam usage bndr = (usage2, bndr')
2588 where
2589 occ = lookupDetails usage bndr
2590 bndr' = setBinderOcc (markNonTailCalled occ) bndr
2591 -- Don't try to make an argument into a join point
2592 usage1 = usage `delDetails` bndr
2593 usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr)
2594 -- This is effectively the RHS of a
2595 -- non-join-point binding, so it's okay to use
2596 -- addManyOccsSet, which assumes no tail calls
2597 | otherwise = usage1
2598
2599 tagNonRecBinder :: TopLevelFlag -- At top level?
2600 -> UsageDetails -- Of scope
2601 -> CoreBndr -- Binder
2602 -> (UsageDetails, -- Details with binder removed
2603 IdWithOccInfo) -- Tagged binder
2604
2605 tagNonRecBinder lvl usage binder
2606 = let
2607 occ = lookupDetails usage binder
2608 will_be_join = decideJoinPointHood lvl usage [binder]
2609 occ' | will_be_join = -- must already be marked AlwaysTailCalled
2610 ASSERT(isAlwaysTailCalled occ) occ
2611 | otherwise = markNonTailCalled occ
2612 binder' = setBinderOcc occ' binder
2613 usage' = usage `delDetails` binder
2614 in
2615 usage' `seq` (usage', binder')
2616
2617 tagRecBinders :: TopLevelFlag -- At top level?
2618 -> UsageDetails -- Of body of let ONLY
2619 -> [(CoreBndr, -- Binder
2620 UsageDetails, -- RHS usage details
2621 [CoreBndr])] -- Lambdas in new RHS
2622 -> (UsageDetails, -- Adjusted details for whole scope,
2623 -- with binders removed
2624 [IdWithOccInfo]) -- Tagged binders
2625 -- Substantially more complicated than non-recursive case. Need to adjust RHS
2626 -- details *before* tagging binders (because the tags depend on the RHSes).
2627 tagRecBinders lvl body_uds triples
2628 = let
2629 (bndrs, rhs_udss, _) = unzip3 triples
2630
2631 -- 1. Determine join-point-hood of whole group, as determined by
2632 -- the *unadjusted* usage details
2633 unadj_uds = body_uds +++ combineUsageDetailsList rhs_udss
2634 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs
2635
2636 -- 2. Adjust usage details of each RHS, taking into account the
2637 -- join-point-hood decision
2638 rhs_udss' = map adjust triples
2639 adjust (bndr, rhs_uds, rhs_bndrs)
2640 = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds
2641 where
2642 -- Can't use willBeJoinId_maybe here because we haven't tagged the
2643 -- binder yet (the tag depends on these adjustments!)
2644 mb_join_arity
2645 | will_be_joins
2646 , let occ = lookupDetails unadj_uds bndr
2647 , AlwaysTailCalled arity <- tailCallInfo occ
2648 = Just arity
2649 | otherwise
2650 = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if we're
2651 -- making join points!
2652 Nothing
2653
2654 -- 3. Compute final usage details from adjusted RHS details
2655 adj_uds = body_uds +++ combineUsageDetailsList rhs_udss'
2656
2657 -- 4. Tag each binder with its adjusted details modulo the
2658 -- join-point-hood decision
2659 occs = map (lookupDetails adj_uds) bndrs
2660 occs' | will_be_joins = occs
2661 | otherwise = map markNonTailCalled occs
2662 bndrs' = zipWith setBinderOcc occs' bndrs
2663
2664 -- 5. Drop the binders from the adjusted details and return
2665 usage' = adj_uds `delDetailsList` bndrs
2666 in
2667 (usage', bndrs')
2668
2669 setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr
2670 setBinderOcc occ_info bndr
2671 | isTyVar bndr = bndr
2672 | isExportedId bndr = if isManyOccs (idOccInfo bndr)
2673 then bndr
2674 else setIdOccInfo bndr noOccInfo
2675 -- Don't use local usage info for visible-elsewhere things
2676 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
2677 -- about to re-generate it and it shouldn't be "sticky"
2678
2679 | otherwise = setIdOccInfo bndr occ_info
2680
2681 -- | Decide whether some bindings should be made into join points or not.
2682 -- Returns `False` if they can't be join points. Note that it's an
2683 -- all-or-nothing decision, as if multiple binders are given, they're assumed to
2684 -- be mutually recursive.
2685 --
2686 -- See Note [Invariants for join points] in CoreSyn.
2687 decideJoinPointHood :: TopLevelFlag -> UsageDetails
2688 -> [CoreBndr]
2689 -> Bool
2690 decideJoinPointHood TopLevel _ _
2691 = False
2692 decideJoinPointHood NotTopLevel usage bndrs
2693 | isJoinId (head bndrs)
2694 = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+>
2695 ppr bndrs)
2696 all_ok
2697 | otherwise
2698 = all_ok
2699 where
2700 -- See Note [Invariants on join points]; invariants cited by number below.
2701 -- Invariant 2 is always satisfiable by the simplifier by eta expansion.
2702 all_ok = -- Invariant 3: Either all are join points or none are
2703 all ok bndrs
2704
2705 ok bndr
2706 | -- Invariant 1: Only tail calls, all same join arity
2707 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr)
2708 , -- Invariant 1 as applied to LHSes of rules
2709 all (ok_rule arity) (idCoreRules bndr)
2710 -- Invariant 4: Satisfies polymorphism rule
2711 , isValidJoinPointType arity (idType bndr)
2712 = True
2713 | otherwise
2714 = False
2715
2716 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
2717 ok_rule join_arity (Rule { ru_args = args })
2718 = length args == join_arity
2719 -- Invariant 1 as applied to LHSes of rules
2720
2721 willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity
2722 willBeJoinId_maybe bndr
2723 | AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr)
2724 = Just arity
2725 | otherwise
2726 = isJoinId_maybe bndr
2727
2728 {-
2729 ************************************************************************
2730 * *
2731 \subsection{Operations over OccInfo}
2732 * *
2733 ************************************************************************
2734 -}
2735
2736 markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo
2737
2738 markMany IAmDead = IAmDead
2739 markMany occ = ManyOccs { occ_tail = occ_tail occ }
2740
2741 markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = True }
2742 markInsideLam occ = occ
2743
2744 markNonTailCalled IAmDead = IAmDead
2745 markNonTailCalled occ = occ { occ_tail = NoTailCallInfo }
2746
2747 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
2748
2749 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2750 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2751 tailCallInfo a2 }
2752 -- Both branches are at least One
2753 -- (Argument is never IAmDead)
2754
2755 -- (orOccInfo orig new) is used
2756 -- when combining occurrence info from branches of a case
2757
2758 orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1
2759 , occ_tail = tail1 })
2760 (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2
2761 , occ_tail = tail2 })
2762 = OneOcc { occ_in_lam = in_lam1 || in_lam2
2763 , occ_one_br = False -- False, because it occurs in both branches
2764 , occ_int_cxt = int_cxt1 && int_cxt2
2765 , occ_tail = tail1 `andTailCallInfo` tail2 }
2766 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2767 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2768 tailCallInfo a2 }
2769
2770 andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
2771 andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
2772 | arity1 == arity2 = info
2773 andTailCallInfo _ _ = NoTailCallInfo