Comments about join point types
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns, MultiWayIf, ViewPatterns #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import GhcPrelude
23
24 import CoreSyn
25 import CoreFVs
26 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
27 stripTicksTopE, mkTicks )
28 import Id
29 import IdInfo
30 import Name( localiseName )
31 import BasicTypes
32 import Module( Module )
33 import Coercion
34 import Type
35
36 import VarSet
37 import VarEnv
38 import Var
39 import Demand ( argOneShots, argsOneShots )
40 import Digraph ( SCC(..), Node(..)
41 , stronglyConnCompFromEdgedVerticesUniq
42 , stronglyConnCompFromEdgedVerticesUniqR )
43 import Unique
44 import UniqFM
45 import UniqSet
46 import Util
47 import Outputable
48 import Data.List
49 import Control.Arrow ( second )
50
51 {-
52 ************************************************************************
53 * *
54 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
55 * *
56 ************************************************************************
57
58 Here's the externally-callable interface:
59 -}
60
61 occurAnalysePgm :: Module -- Used only in debug output
62 -> (Id -> Bool) -- Active unfoldings
63 -> (Activation -> Bool) -- Active rules
64 -> [CoreRule] -> [CoreVect] -> VarSet
65 -> CoreProgram -> CoreProgram
66 occurAnalysePgm this_mod active_unf active_rule imp_rules vects vectVars binds
67 | isEmptyDetails final_usage
68 = occ_anald_binds
69
70 | otherwise -- See Note [Glomming]
71 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
72 2 (ppr final_usage ) )
73 occ_anald_glommed_binds
74 where
75 init_env = initOccEnv { occ_rule_act = active_rule
76 , occ_unf_act = active_unf }
77
78 (final_usage, occ_anald_binds) = go init_env binds
79 (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel
80 imp_rule_edges
81 (flattenBinds occ_anald_binds)
82 initial_uds
83 -- It's crucial to re-analyse the glommed-together bindings
84 -- so that we establish the right loop breakers. Otherwise
85 -- we can easily create an infinite loop (Trac #9583 is an example)
86
87 initial_uds = addManyOccsSet emptyDetails
88 (rulesFreeVars imp_rules `unionVarSet`
89 vectsFreeVars vects `unionVarSet`
90 vectVars)
91 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
92 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
93 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
94
95 -- Note [Preventing loops due to imported functions rules]
96 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
97 [ mapVarEnv (const maps_to) $
98 getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
99 | imp_rule <- imp_rules
100 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
101 , let maps_to = exprFreeIds (ru_rhs imp_rule)
102 `delVarSetList` ru_bndrs imp_rule
103 , arg <- ru_args imp_rule ]
104
105 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
106 go _ []
107 = (initial_uds, [])
108 go env (bind:binds)
109 = (final_usage, bind' ++ binds')
110 where
111 (bs_usage, binds') = go env binds
112 (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind
113 bs_usage
114
115 occurAnalyseExpr :: CoreExpr -> CoreExpr
116 -- Do occurrence analysis, and discard occurrence info returned
117 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
118
119 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
120 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
121
122 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
123 occurAnalyseExpr' enable_binder_swap expr
124 = snd (occAnal env expr)
125 where
126 env = initOccEnv { occ_binder_swap = enable_binder_swap }
127
128 {- Note [Plugin rules]
129 ~~~~~~~~~~~~~~~~~~~~~~
130 Conal Elliott (Trac #11651) built a GHC plugin that added some
131 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
132 do some domain-specific transformations that could not be expressed
133 with an ordinary pattern-matching CoreRule. But then we can't extract
134 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
135 BuiltinRule doesn't have any of that stuff.
136
137 So we simply assume that BuiltinRules have no dependencies, and filter
138 them out from the imp_rule_edges comprehension.
139 -}
140
141 {-
142 ************************************************************************
143 * *
144 Bindings
145 * *
146 ************************************************************************
147
148 Note [Recursive bindings: the grand plan]
149 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
150 When we come across a binding group
151 Rec { x1 = r1; ...; xn = rn }
152 we treat it like this (occAnalRecBind):
153
154 1. Occurrence-analyse each right hand side, and build a
155 "Details" for each binding to capture the results.
156
157 Wrap the details in a Node (details, node-id, dep-node-ids),
158 where node-id is just the unique of the binder, and
159 dep-node-ids lists all binders on which this binding depends.
160 We'll call these the "scope edges".
161 See Note [Forming the Rec groups].
162
163 All this is done by makeNode.
164
165 2. Do SCC-analysis on these Nodes. Each SCC will become a new Rec or
166 NonRec. The key property is that every free variable of a binding
167 is accounted for by the scope edges, so that when we are done
168 everything is still in scope.
169
170 3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
171 identify suitable loop-breakers to ensure that inlining terminates.
172 This is done by occAnalRec.
173
174 4. To do so we form a new set of Nodes, with the same details, but
175 different edges, the "loop-breaker nodes". The loop-breaker nodes
176 have both more and fewer dependencies than the scope edges
177 (see Note [Choosing loop breakers])
178
179 More edges: if f calls g, and g has an active rule that mentions h
180 then we add an edge from f -> h
181
182 Fewer edges: we only include dependencies on active rules, on rule
183 RHSs (not LHSs) and if there is an INLINE pragma only
184 on the stable unfolding (and vice versa). The scope
185 edges must be much more inclusive.
186
187 5. The "weak fvs" of a node are, by definition:
188 the scope fvs - the loop-breaker fvs
189 See Note [Weak loop breakers], and the nd_weak field of Details
190
191 6. Having formed the loop-breaker nodes
192
193 Note [Dead code]
194 ~~~~~~~~~~~~~~~~
195 Dropping dead code for a cyclic Strongly Connected Component is done
196 in a very simple way:
197
198 the entire SCC is dropped if none of its binders are mentioned
199 in the body; otherwise the whole thing is kept.
200
201 The key observation is that dead code elimination happens after
202 dependency analysis: so 'occAnalBind' processes SCCs instead of the
203 original term's binding groups.
204
205 Thus 'occAnalBind' does indeed drop 'f' in an example like
206
207 letrec f = ...g...
208 g = ...(...g...)...
209 in
210 ...g...
211
212 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
213 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
214 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
215
216 ------------------------------------------------------------
217 Note [Forming Rec groups]
218 ~~~~~~~~~~~~~~~~~~~~~~~~~
219 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
220 and "g uses f", no matter how indirectly. We do a SCC analysis
221 with an edge f -> g if "f uses g".
222
223 More precisely, "f uses g" iff g should be in scope wherever f is.
224 That is, g is free in:
225 a) the rhs 'ef'
226 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
227 c) or the LHS or a rule for f (Note [Rule dependency info])
228
229 These conditions apply regardless of the activation of the RULE (eg it might be
230 inactive in this phase but become active later). Once a Rec is broken up
231 it can never be put back together, so we must be conservative.
232
233 The principle is that, regardless of rule firings, every variable is
234 always in scope.
235
236 * Note [Rules are extra RHSs]
237 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
238 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
239 keeps the specialised "children" alive. If the parent dies
240 (because it isn't referenced any more), then the children will die
241 too (unless they are already referenced directly).
242
243 To that end, we build a Rec group for each cyclic strongly
244 connected component,
245 *treating f's rules as extra RHSs for 'f'*.
246 More concretely, the SCC analysis runs on a graph with an edge
247 from f -> g iff g is mentioned in
248 (a) f's rhs
249 (b) f's RULES
250 These are rec_edges.
251
252 Under (b) we include variables free in *either* LHS *or* RHS of
253 the rule. The former might seems silly, but see Note [Rule
254 dependency info]. So in Example [eftInt], eftInt and eftIntFB
255 will be put in the same Rec, even though their 'main' RHSs are
256 both non-recursive.
257
258 * Note [Rule dependency info]
259 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
260 The VarSet in a RuleInfo is used for dependency analysis in the
261 occurrence analyser. We must track free vars in *both* lhs and rhs.
262 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
263 Why both? Consider
264 x = y
265 RULE f x = v+4
266 Then if we substitute y for x, we'd better do so in the
267 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
268 as well as 'v'
269
270 * Note [Rules are visible in their own rec group]
271 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
272 We want the rules for 'f' to be visible in f's right-hand side.
273 And we'd like them to be visible in other functions in f's Rec
274 group. E.g. in Note [Specialisation rules] we want f' rule
275 to be visible in both f's RHS, and fs's RHS.
276
277 This means that we must simplify the RULEs first, before looking
278 at any of the definitions. This is done by Simplify.simplRecBind,
279 when it calls addLetIdInfo.
280
281 ------------------------------------------------------------
282 Note [Choosing loop breakers]
283 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
284 Loop breaking is surprisingly subtle. First read the section 4 of
285 "Secrets of the GHC inliner". This describes our basic plan.
286 We avoid infinite inlinings by choosing loop breakers, and
287 ensuring that a loop breaker cuts each loop.
288
289 See also Note [Inlining and hs-boot files] in ToIface, which deals
290 with a closely related source of infinite loops.
291
292 Fundamentally, we do SCC analysis on a graph. For each recursive
293 group we choose a loop breaker, delete all edges to that node,
294 re-analyse the SCC, and iterate.
295
296 But what is the graph? NOT the same graph as was used for Note
297 [Forming Rec groups]! In particular, a RULE is like an equation for
298 'f' that is *always* inlined if it is applicable. We do *not* disable
299 rules for loop-breakers. It's up to whoever makes the rules to make
300 sure that the rules themselves always terminate. See Note [Rules for
301 recursive functions] in Simplify.hs
302
303 Hence, if
304 f's RHS (or its INLINE template if it has one) mentions g, and
305 g has a RULE that mentions h, and
306 h has a RULE that mentions f
307
308 then we *must* choose f to be a loop breaker. Example: see Note
309 [Specialisation rules].
310
311 In general, take the free variables of f's RHS, and augment it with
312 all the variables reachable by RULES from those starting points. That
313 is the whole reason for computing rule_fv_env in occAnalBind. (Of
314 course we only consider free vars that are also binders in this Rec
315 group.) See also Note [Finding rule RHS free vars]
316
317 Note that when we compute this rule_fv_env, we only consider variables
318 free in the *RHS* of the rule, in contrast to the way we build the
319 Rec group in the first place (Note [Rule dependency info])
320
321 Note that if 'g' has RHS that mentions 'w', we should add w to
322 g's loop-breaker edges. More concretely there is an edge from f -> g
323 iff
324 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
325 (see Note [Inline rules])
326 (b) or h is mentioned in f's RHS, and
327 g appears in the RHS of an active RULE of h
328 or a transitive sequence of active rules starting with h
329
330 Why "active rules"? See Note [Finding rule RHS free vars]
331
332 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
333 chosen as a loop breaker, because their RHSs don't mention each other.
334 And indeed both can be inlined safely.
335
336 Note again that the edges of the graph we use for computing loop breakers
337 are not the same as the edges we use for computing the Rec blocks.
338 That's why we compute
339
340 - rec_edges for the Rec block analysis
341 - loop_breaker_nodes for the loop breaker analysis
342
343 * Note [Finding rule RHS free vars]
344 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
345 Consider this real example from Data Parallel Haskell
346 tagZero :: Array Int -> Array Tag
347 {-# INLINE [1] tagZeroes #-}
348 tagZero xs = pmap (\x -> fromBool (x==0)) xs
349
350 {-# RULES "tagZero" [~1] forall xs n.
351 pmap fromBool <blah blah> = tagZero xs #-}
352 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
353 However, tagZero can only be inlined in phase 1 and later, while
354 the RULE is only active *before* phase 1. So there's no problem.
355
356 To make this work, we look for the RHS free vars only for
357 *active* rules. That's the reason for the occ_rule_act field
358 of the OccEnv.
359
360 * Note [Weak loop breakers]
361 ~~~~~~~~~~~~~~~~~~~~~~~~~
362 There is a last nasty wrinkle. Suppose we have
363
364 Rec { f = f_rhs
365 RULE f [] = g
366
367 h = h_rhs
368 g = h
369 ...more...
370 }
371
372 Remember that we simplify the RULES before any RHS (see Note
373 [Rules are visible in their own rec group] above).
374
375 So we must *not* postInlineUnconditionally 'g', even though
376 its RHS turns out to be trivial. (I'm assuming that 'g' is
377 not choosen as a loop breaker.) Why not? Because then we
378 drop the binding for 'g', which leaves it out of scope in the
379 RULE!
380
381 Here's a somewhat different example of the same thing
382 Rec { g = h
383 ; h = ...f...
384 ; f = f_rhs
385 RULE f [] = g }
386 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
387 g, because the RULE for f is active throughout. So the RHS of h
388 might rewrite to h = ...g...
389 So g must remain in scope in the output program!
390
391 We "solve" this by:
392
393 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
394 iff g is a "missing free variable" of the Rec group
395
396 A "missing free variable" x is one that is mentioned in an RHS or
397 INLINE or RULE of a binding in the Rec group, but where the
398 dependency on x may not show up in the loop_breaker_nodes (see
399 note [Choosing loop breakers} above).
400
401 A normal "strong" loop breaker has IAmLoopBreaker False. So
402
403 Inline postInlineUnconditionally
404 strong IAmLoopBreaker False no no
405 weak IAmLoopBreaker True yes no
406 other yes yes
407
408 The **sole** reason for this kind of loop breaker is so that
409 postInlineUnconditionally does not fire. Ugh. (Typically it'll
410 inline via the usual callSiteInline stuff, so it'll be dead in the
411 next pass, so the main Ugh is the tiresome complication.)
412
413 Note [Rules for imported functions]
414 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
415 Consider this
416 f = /\a. B.g a
417 RULE B.g Int = 1 + f Int
418 Note that
419 * The RULE is for an imported function.
420 * f is non-recursive
421 Now we
422 can get
423 f Int --> B.g Int Inlining f
424 --> 1 + f Int Firing RULE
425 and so the simplifier goes into an infinite loop. This
426 would not happen if the RULE was for a local function,
427 because we keep track of dependencies through rules. But
428 that is pretty much impossible to do for imported Ids. Suppose
429 f's definition had been
430 f = /\a. C.h a
431 where (by some long and devious process), C.h eventually inlines to
432 B.g. We could only spot such loops by exhaustively following
433 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
434 f.
435
436 Note that RULES for imported functions are important in practice; they
437 occur a lot in the libraries.
438
439 We regard this potential infinite loop as a *programmer* error.
440 It's up the programmer not to write silly rules like
441 RULE f x = f x
442 and the example above is just a more complicated version.
443
444 Note [Preventing loops due to imported functions rules]
445 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
446 Consider:
447 import GHC.Base (foldr)
448
449 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
450 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
451 filterFB c p = ...
452
453 f = filter p xs
454
455 Note that filter is not a loop-breaker, so what happens is:
456 f = filter p xs
457 = {inline} build (\c n -> foldr (filterFB c p) n xs)
458 = {inline} foldr (filterFB (:) p) [] xs
459 = {RULE} filter p xs
460
461 We are in an infinite loop.
462
463 A more elaborate example (that I actually saw in practice when I went to
464 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
465 {-# LANGUAGE RankNTypes #-}
466 module GHCList where
467
468 import Prelude hiding (filter)
469 import GHC.Base (build)
470
471 {-# INLINABLE filter #-}
472 filter :: (a -> Bool) -> [a] -> [a]
473 filter p [] = []
474 filter p (x:xs) = if p x then x : filter p xs else filter p xs
475
476 {-# NOINLINE [0] filterFB #-}
477 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
478 filterFB c p x r | p x = x `c` r
479 | otherwise = r
480
481 {-# RULES
482 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
483 (filterFB c p) n xs)
484 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
485 #-}
486
487 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
488 are not), the unfolding given to "filter" in the interface file will be:
489 filter p [] = []
490 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
491 else build (\c n -> foldr (filterFB c p) n xs
492
493 Note that because this unfolding does not mention "filter", filter is not
494 marked as a strong loop breaker. Therefore at a use site in another module:
495 filter p xs
496 = {inline}
497 case xs of [] -> []
498 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
499 else build (\c n -> foldr (filterFB c p) n xs)
500
501 build (\c n -> foldr (filterFB c p) n xs)
502 = {inline} foldr (filterFB (:) p) [] xs
503 = {RULE} filter p xs
504
505 And we are in an infinite loop again, except that this time the loop is producing an
506 infinitely large *term* (an unrolling of filter) and so the simplifier finally
507 dies with "ticks exhausted"
508
509 Because of this problem, we make a small change in the occurrence analyser
510 designed to mark functions like "filter" as strong loop breakers on the basis that:
511 1. The RHS of filter mentions the local function "filterFB"
512 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
513
514 So for each RULE for an *imported* function we are going to add
515 dependency edges between the *local* FVS of the rule LHS and the
516 *local* FVS of the rule RHS. We don't do anything special for RULES on
517 local functions because the standard occurrence analysis stuff is
518 pretty good at getting loop-breakerness correct there.
519
520 It is important to note that even with this extra hack we aren't always going to get
521 things right. For example, it might be that the rule LHS mentions an imported Id,
522 and another module has a RULE that can rewrite that imported Id to one of our local
523 Ids.
524
525 Note [Specialising imported functions] (referred to from Specialise)
526 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
527 BUT for *automatically-generated* rules, the programmer can't be
528 responsible for the "programmer error" in Note [Rules for imported
529 functions]. In paricular, consider specialising a recursive function
530 defined in another module. If we specialise a recursive function B.g,
531 we get
532 g_spec = .....(B.g Int).....
533 RULE B.g Int = g_spec
534 Here, g_spec doesn't look recursive, but when the rule fires, it
535 becomes so. And if B.g was mutually recursive, the loop might
536 not be as obvious as it is here.
537
538 To avoid this,
539 * When specialising a function that is a loop breaker,
540 give a NOINLINE pragma to the specialised function
541
542 Note [Glomming]
543 ~~~~~~~~~~~~~~~
544 RULES for imported Ids can make something at the top refer to something at the bottom:
545 f = \x -> B.g (q x)
546 h = \y -> 3
547
548 RULE: B.g (q x) = h x
549
550 Applying this rule makes f refer to h, although f doesn't appear to
551 depend on h. (And, as in Note [Rules for imported functions], the
552 dependency might be more indirect. For example, f might mention C.t
553 rather than B.g, where C.t eventually inlines to B.g.)
554
555 NOTICE that this cannot happen for rules whose head is a
556 locally-defined function, because we accurately track dependencies
557 through RULES. It only happens for rules whose head is an imported
558 function (B.g in the example above).
559
560 Solution:
561 - When simplifying, bring all top level identifiers into
562 scope at the start, ignoring the Rec/NonRec structure, so
563 that when 'h' pops up in f's rhs, we find it in the in-scope set
564 (as the simplifier generally expects). This happens in simplTopBinds.
565
566 - In the occurrence analyser, if there are any out-of-scope
567 occurrences that pop out of the top, which will happen after
568 firing the rule: f = \x -> h x
569 h = \y -> 3
570 then just glom all the bindings into a single Rec, so that
571 the *next* iteration of the occurrence analyser will sort
572 them all out. This part happens in occurAnalysePgm.
573
574 ------------------------------------------------------------
575 Note [Inline rules]
576 ~~~~~~~~~~~~~~~~~~~
577 None of the above stuff about RULES applies to Inline Rules,
578 stored in a CoreUnfolding. The unfolding, if any, is simplified
579 at the same time as the regular RHS of the function (ie *not* like
580 Note [Rules are visible in their own rec group]), so it should be
581 treated *exactly* like an extra RHS.
582
583 Or, rather, when computing loop-breaker edges,
584 * If f has an INLINE pragma, and it is active, we treat the
585 INLINE rhs as f's rhs
586 * If it's inactive, we treat f as having no rhs
587 * If it has no INLINE pragma, we look at f's actual rhs
588
589
590 There is a danger that we'll be sub-optimal if we see this
591 f = ...f...
592 [INLINE f = ..no f...]
593 where f is recursive, but the INLINE is not. This can just about
594 happen with a sufficiently odd set of rules; eg
595
596 foo :: Int -> Int
597 {-# INLINE [1] foo #-}
598 foo x = x+1
599
600 bar :: Int -> Int
601 {-# INLINE [1] bar #-}
602 bar x = foo x + 1
603
604 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
605
606 Here the RULE makes bar recursive; but it's INLINE pragma remains
607 non-recursive. It's tempting to then say that 'bar' should not be
608 a loop breaker, but an attempt to do so goes wrong in two ways:
609 a) We may get
610 $df = ...$cfoo...
611 $cfoo = ...$df....
612 [INLINE $cfoo = ...no-$df...]
613 But we want $cfoo to depend on $df explicitly so that we
614 put the bindings in the right order to inline $df in $cfoo
615 and perhaps break the loop altogether. (Maybe this
616 b)
617
618
619 Example [eftInt]
620 ~~~~~~~~~~~~~~~
621 Example (from GHC.Enum):
622
623 eftInt :: Int# -> Int# -> [Int]
624 eftInt x y = ...(non-recursive)...
625
626 {-# INLINE [0] eftIntFB #-}
627 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
628 eftIntFB c n x y = ...(non-recursive)...
629
630 {-# RULES
631 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
632 "eftIntList" [1] eftIntFB (:) [] = eftInt
633 #-}
634
635 Note [Specialisation rules]
636 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
637 Consider this group, which is typical of what SpecConstr builds:
638
639 fs a = ....f (C a)....
640 f x = ....f (C a)....
641 {-# RULE f (C a) = fs a #-}
642
643 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
644
645 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
646 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
647 - fs is inlined (say it's small)
648 - now there's another opportunity to apply the RULE
649
650 This showed up when compiling Control.Concurrent.Chan.getChanContents.
651
652 ------------------------------------------------------------
653 Note [Finding join points]
654 ~~~~~~~~~~~~~~~~~~~~~~~~~~
655 It's the occurrence analyser's job to find bindings that we can turn into join
656 points, but it doesn't perform that transformation right away. Rather, it marks
657 the eligible bindings as part of their occurrence data, leaving it to the
658 simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'.
659 The simplifier then eta-expands the RHS if needed and then updates the
660 occurrence sites. Dividing the work this way means that the occurrence analyser
661 still only takes one pass, yet one can always tell the difference between a
662 function call and a jump by looking at the occurrence (because the same pass
663 changes the 'IdDetails' and propagates the binders to their occurrence sites).
664
665 To track potential join points, we use the 'occ_tail' field of OccInfo. A value
666 of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
667 tail call with `n` arguments (counting both value and type arguments). Otherwise
668 'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
669 rest of 'OccInfo' until it goes on the binder.
670
671 Note [Rules and join points]
672 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
673
674 Things get fiddly with rules. Suppose we have:
675
676 let j :: Int -> Int
677 j y = 2 * y
678 k :: Int -> Int -> Int
679 {-# RULES "SPEC k 0" k 0 = j #-}
680 k x y = x + 2 * y
681 in ...
682
683 Now suppose that both j and k appear only as saturated tail calls in the body.
684 Thus we would like to make them both join points. The rule complicates matters,
685 though, as its RHS has an unapplied occurrence of j. *However*, if we were to
686 eta-expand the rule, all would be well:
687
688 {-# RULES "SPEC k 0" forall a. k 0 a = j a #-}
689
690 So conceivably we could notice that a potential join point would have an
691 "undersaturated" rule and account for it. This would mean we could make
692 something that's been specialised a join point, for instance. But local bindings
693 are rarely specialised, and being overly cautious about rules only
694 costs us anything when, for some `j`:
695
696 * Before specialisation, `j` has non-tail calls, so it can't be a join point.
697 * During specialisation, `j` gets specialised and thus acquires rules.
698 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say),
699 and so now `j` *could* become a join point.
700
701 This appears to be very rare in practice. TODO Perhaps we should gather
702 statistics to be sure.
703
704 ------------------------------------------------------------
705 Note [Adjusting right-hand sides]
706 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
707 There's a bit of a dance we need to do after analysing a lambda expression or
708 a right-hand side. In particular, we need to
709
710 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot
711 lambda, or a non-recursive join point; and
712 b) call 'markAllNonTailCalled' *unless* the binding is for a join point.
713
714 Some examples, with how the free occurrences in e (assumed not to be a value
715 lambda) get marked:
716
717 inside lam non-tail-called
718 ------------------------------------------------------------
719 let x = e No Yes
720 let f = \x -> e Yes Yes
721 let f = \x{OneShot} -> e No Yes
722 \x -> e Yes Yes
723 join j x = e No No
724 joinrec j x = e Yes No
725
726 There are a few other caveats; most importantly, if we're marking a binding as
727 'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so
728 that the effect cascades properly. Consequently, at the time the RHS is
729 analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must
730 return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once
731 join-point-hood has been decided.
732
733 Thus the overall sequence taking place in 'occAnalNonRecBind' and
734 'occAnalRecBind' is as follows:
735
736 1. Call 'occAnalLamOrRhs' to find usage information for the RHS.
737 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make
738 the binding a join point.
739 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when
740 recursive.)
741
742 (In the recursive case, this logic is spread between 'makeNode' and
743 'occAnalRec'.)
744 -}
745
746 ------------------------------------------------------------------
747 -- occAnalBind
748 ------------------------------------------------------------------
749
750 occAnalBind :: OccEnv -- The incoming OccEnv
751 -> TopLevelFlag
752 -> ImpRuleEdges
753 -> CoreBind
754 -> UsageDetails -- Usage details of scope
755 -> (UsageDetails, -- Of the whole let(rec)
756 [CoreBind])
757
758 occAnalBind env lvl top_env (NonRec binder rhs) body_usage
759 = occAnalNonRecBind env lvl top_env binder rhs body_usage
760 occAnalBind env lvl top_env (Rec pairs) body_usage
761 = occAnalRecBind env lvl top_env pairs body_usage
762
763 -----------------
764 occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr
765 -> UsageDetails -> (UsageDetails, [CoreBind])
766 occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage
767 | isTyVar binder -- A type let; we don't gather usage info
768 = (body_usage, [NonRec binder rhs])
769
770 | not (binder `usedIn` body_usage) -- It's not mentioned
771 = (body_usage, [])
772
773 | otherwise -- It's mentioned in the body
774 = (body_usage' +++ rhs_usage', [NonRec tagged_binder rhs'])
775 where
776 (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder
777 mb_join_arity = willBeJoinId_maybe tagged_binder
778
779 (bndrs, body) = collectBinders rhs
780
781 (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body
782 rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body'
783 -- For a /non-recursive/ join point we can mark all
784 -- its join-lambda as one-shot; and it's a good idea to do so
785
786 -- Unfoldings
787 -- See Note [Unfoldings and join points]
788 rhs_usage2 = case occAnalUnfolding env NonRecursive binder of
789 Just unf_usage -> rhs_usage1 +++ unf_usage
790 Nothing -> rhs_usage1
791
792 -- Rules
793 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
794 rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder
795 rhs_usage3 = rhs_usage2 +++ combineUsageDetailsList
796 (map (\(_, l, r) -> l +++ r) rules_w_uds)
797 rhs_usage4 = maybe rhs_usage3 (addManyOccsSet rhs_usage3) $
798 lookupVarEnv imp_rule_edges binder
799 -- See Note [Preventing loops due to imported functions rules]
800
801 -- Final adjustment
802 rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4
803
804 -----------------
805 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
806 -> UsageDetails -> (UsageDetails, [CoreBind])
807 occAnalRecBind env lvl imp_rule_edges pairs body_usage
808 = foldr (occAnalRec env lvl) (body_usage, []) sccs
809 -- For a recursive group, we
810 -- * occ-analyse all the RHSs
811 -- * compute strongly-connected components
812 -- * feed those components to occAnalRec
813 -- See Note [Recursive bindings: the grand plan]
814 where
815 sccs :: [SCC Details]
816 sccs = {-# SCC "occAnalBind.scc" #-}
817 stronglyConnCompFromEdgedVerticesUniq nodes
818
819 nodes :: [LetrecNode]
820 nodes = {-# SCC "occAnalBind.assoc" #-}
821 map (makeNode env imp_rule_edges bndr_set) pairs
822
823 bndr_set = mkVarSet (map fst pairs)
824
825 {-
826 Note [Unfoldings and join points]
827 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
828
829 We assume that anything in an unfolding occurs multiple times, since unfoldings
830 are often copied (that's the whole point!). But we still need to track tail
831 calls for the purpose of finding join points.
832 -}
833
834 -----------------------------
835 occAnalRec :: OccEnv -> TopLevelFlag
836 -> SCC Details
837 -> (UsageDetails, [CoreBind])
838 -> (UsageDetails, [CoreBind])
839
840 -- The NonRec case is just like a Let (NonRec ...) above
841 occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
842 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs }))
843 (body_uds, binds)
844 | not (bndr `usedIn` body_uds)
845 = (body_uds, binds) -- See Note [Dead code]
846
847 | otherwise -- It's mentioned in the body
848 = (body_uds' +++ rhs_uds',
849 NonRec tagged_bndr rhs : binds)
850 where
851 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
852 rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive
853 rhs_bndrs rhs_uds
854
855 -- The Rec case is the interesting one
856 -- See Note [Recursive bindings: the grand plan]
857 -- See Note [Loop breaking]
858 occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
859 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
860 = (body_uds, binds) -- See Note [Dead code]
861
862 | otherwise -- At this point we always build a single Rec
863 = -- pprTrace "occAnalRec" (vcat
864 -- [ text "weak_fvs" <+> ppr weak_fvs
865 -- , text "lb nodes" <+> ppr loop_breaker_nodes])
866 (final_uds, Rec pairs : binds)
867
868 where
869 bndrs = map nd_bndr details_s
870 bndr_set = mkVarSet bndrs
871
872 ------------------------------
873 -- See Note [Choosing loop breakers] for loop_breaker_nodes
874 final_uds :: UsageDetails
875 loop_breaker_nodes :: [LetrecNode]
876 (final_uds, loop_breaker_nodes)
877 = mkLoopBreakerNodes env lvl bndr_set body_uds details_s
878
879 ------------------------------
880 weak_fvs :: VarSet
881 weak_fvs = mapUnionVarSet nd_weak details_s
882
883 ---------------------------
884 -- Now reconstruct the cycle
885 pairs :: [(Id,CoreExpr)]
886 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs loop_breaker_nodes []
887 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
888 -- If weak_fvs is empty, the loop_breaker_nodes will include
889 -- all the edges in the original scope edges [remember,
890 -- weak_fvs is the difference between scope edges and
891 -- lb-edges], so a fresh SCC computation would yield a
892 -- single CyclicSCC result; and reOrderNodes deals with
893 -- exactly that case
894
895
896 ------------------------------------------------------------------
897 -- Loop breaking
898 ------------------------------------------------------------------
899
900 type Binding = (Id,CoreExpr)
901
902 loopBreakNodes :: Int
903 -> VarSet -- All binders
904 -> VarSet -- Binders whose dependencies may be "missing"
905 -- See Note [Weak loop breakers]
906 -> [LetrecNode]
907 -> [Binding] -- Append these to the end
908 -> [Binding]
909 {-
910 loopBreakNodes is applied to the list of nodes for a cyclic strongly
911 connected component (there's guaranteed to be a cycle). It returns
912 the same nodes, but
913 a) in a better order,
914 b) with some of the Ids having a IAmALoopBreaker pragma
915
916 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
917 that the simplifier can guarantee not to loop provided it never records an inlining
918 for these no-inline guys.
919
920 Furthermore, the order of the binds is such that if we neglect dependencies
921 on the no-inline Ids then the binds are topologically sorted. This means
922 that the simplifier will generally do a good job if it works from top bottom,
923 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
924 -}
925
926 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
927 loopBreakNodes depth bndr_set weak_fvs nodes binds
928 = -- pprTrace "loopBreakNodes" (ppr nodes) $
929 go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
930 where
931 go [] binds = binds
932 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
933
934 loop_break_scc scc binds
935 = case scc of
936 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
937 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
938
939 ----------------------------------
940 reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
941 -- Choose a loop breaker, mark it no-inline,
942 -- and call loopBreakNodes on the rest
943 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
944 reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
945 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
946 = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen
947 -- , text "chosen" <+> ppr chosen_nodes ]) $
948 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
949 (map mk_loop_breaker chosen_nodes ++ binds)
950 where
951 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
952 (nd_score (node_payload node))
953 [node] [] nodes
954
955 approximate_lb = depth >= 2
956 new_depth | approximate_lb = 0
957 | otherwise = depth+1
958 -- After two iterations (d=0, d=1) give up
959 -- and approximate, returning to d=0
960
961 mk_loop_breaker :: LetrecNode -> Binding
962 mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
963 = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs)
964 where
965 tail_info = tailCallInfo (idOccInfo bndr)
966
967 mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
968 -- See Note [Weak loop breakers]
969 mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr
970 , nd_rhs = rhs})
971 | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs)
972 | otherwise = (bndr, rhs)
973 where
974 occ' = weakLoopBreaker { occ_tail = tail_info }
975 tail_info = tailCallInfo (idOccInfo bndr)
976
977 ----------------------------------
978 chooseLoopBreaker :: Bool -- True <=> Too many iterations,
979 -- so approximate
980 -> NodeScore -- Best score so far
981 -> [LetrecNode] -- Nodes with this score
982 -> [LetrecNode] -- Nodes with higher scores
983 -> [LetrecNode] -- Unprocessed nodes
984 -> ([LetrecNode], [LetrecNode])
985 -- This loop looks for the bind with the lowest score
986 -- to pick as the loop breaker. The rest accumulate in
987 chooseLoopBreaker _ _ loop_nodes acc []
988 = (loop_nodes, acc) -- Done
989
990 -- If approximate_loop_breaker is True, we pick *all*
991 -- nodes with lowest score, else just one
992 -- See Note [Complexity of loop breaking]
993 chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
994 | approx_lb
995 , rank sc == rank loop_sc
996 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
997
998 | sc `betterLB` loop_sc -- Better score so pick this new one
999 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
1000
1001 | otherwise -- Worse score so don't pick it
1002 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
1003 where
1004 sc = nd_score (node_payload node)
1005
1006 {-
1007 Note [Complexity of loop breaking]
1008 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1009 The loop-breaking algorithm knocks out one binder at a time, and
1010 performs a new SCC analysis on the remaining binders. That can
1011 behave very badly in tightly-coupled groups of bindings; in the
1012 worst case it can be (N**2)*log N, because it does a full SCC
1013 on N, then N-1, then N-2 and so on.
1014
1015 To avoid this, we switch plans after 2 (or whatever) attempts:
1016 Plan A: pick one binder with the lowest score, make it
1017 a loop breaker, and try again
1018 Plan B: pick *all* binders with the lowest score, make them
1019 all loop breakers, and try again
1020 Since there are only a small finite number of scores, this will
1021 terminate in a constant number of iterations, rather than O(N)
1022 iterations.
1023
1024 You might thing that it's very unlikely, but RULES make it much
1025 more likely. Here's a real example from Trac #1969:
1026 Rec { $dm = \d.\x. op d
1027 {-# RULES forall d. $dm Int d = $s$dm1
1028 forall d. $dm Bool d = $s$dm2 #-}
1029
1030 dInt = MkD .... opInt ...
1031 dInt = MkD .... opBool ...
1032 opInt = $dm dInt
1033 opBool = $dm dBool
1034
1035 $s$dm1 = \x. op dInt
1036 $s$dm2 = \x. op dBool }
1037 The RULES stuff means that we can't choose $dm as a loop breaker
1038 (Note [Choosing loop breakers]), so we must choose at least (say)
1039 opInt *and* opBool, and so on. The number of loop breakders is
1040 linear in the number of instance declarations.
1041
1042 Note [Loop breakers and INLINE/INLINABLE pragmas]
1043 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1044 Avoid choosing a function with an INLINE pramga as the loop breaker!
1045 If such a function is mutually-recursive with a non-INLINE thing,
1046 then the latter should be the loop-breaker.
1047
1048 It's vital to distinguish between INLINE and INLINABLE (the
1049 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1050 Rec { {-# INLINABLE f #-}
1051 f x = ...f... }
1052 and then worker/wrapper it through strictness analysis, we'll get
1053 Rec { {-# INLINABLE $wf #-}
1054 $wf p q = let x = (p,q) in ...f...
1055
1056 {-# INLINE f #-}
1057 f x = case x of (p,q) -> $wf p q }
1058
1059 Now it is vital that we choose $wf as the loop breaker, so we can
1060 inline 'f' in '$wf'.
1061
1062 Note [DFuns should not be loop breakers]
1063 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1064 It's particularly bad to make a DFun into a loop breaker. See
1065 Note [How instance declarations are translated] in TcInstDcls
1066
1067 We give DFuns a higher score than ordinary CONLIKE things because
1068 if there's a choice we want the DFun to be the non-loop breaker. Eg
1069
1070 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1071
1072 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1073 {-# DFUN #-}
1074 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1075 }
1076
1077 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1078 if we can't unravel the DFun first.
1079
1080 Note [Constructor applications]
1081 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1082 It's really really important to inline dictionaries. Real
1083 example (the Enum Ordering instance from GHC.Base):
1084
1085 rec f = \ x -> case d of (p,q,r) -> p x
1086 g = \ x -> case d of (p,q,r) -> q x
1087 d = (v, f, g)
1088
1089 Here, f and g occur just once; but we can't inline them into d.
1090 On the other hand we *could* simplify those case expressions if
1091 we didn't stupidly choose d as the loop breaker.
1092 But we won't because constructor args are marked "Many".
1093 Inlining dictionaries is really essential to unravelling
1094 the loops in static numeric dictionaries, see GHC.Float.
1095
1096 Note [Closure conversion]
1097 ~~~~~~~~~~~~~~~~~~~~~~~~~
1098 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1099 The immediate motivation came from the result of a closure-conversion transformation
1100 which generated code like this:
1101
1102 data Clo a b = forall c. Clo (c -> a -> b) c
1103
1104 ($:) :: Clo a b -> a -> b
1105 Clo f env $: x = f env x
1106
1107 rec { plus = Clo plus1 ()
1108
1109 ; plus1 _ n = Clo plus2 n
1110
1111 ; plus2 Zero n = n
1112 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1113
1114 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1115 we choose 'plus1' as the loop breaker (which is entirely possible
1116 otherwise), the loop does not unravel nicely.
1117
1118
1119 @occAnalUnfolding@ deals with the question of bindings where the Id is marked
1120 by an INLINE pragma. For these we record that anything which occurs
1121 in its RHS occurs many times. This pessimistically assumes that this
1122 inlined binder also occurs many times in its scope, but if it doesn't
1123 we'll catch it next time round. At worst this costs an extra simplifier pass.
1124 ToDo: try using the occurrence info for the inline'd binder.
1125
1126 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1127 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1128
1129
1130 ************************************************************************
1131 * *
1132 Making nodes
1133 * *
1134 ************************************************************************
1135 -}
1136
1137 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
1138
1139 noImpRuleEdges :: ImpRuleEdges
1140 noImpRuleEdges = emptyVarEnv
1141
1142 type LetrecNode = Node Unique Details -- Node comes from Digraph
1143 -- The Unique key is gotten from the Id
1144 data Details
1145 = ND { nd_bndr :: Id -- Binder
1146 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
1147 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS
1148 -- INVARIANT: (nd_rhs_bndrs nd, _) ==
1149 -- collectBinders (nd_rhs nd)
1150
1151 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
1152 -- ignoring phase (ie assuming all are active)
1153 -- See Note [Forming Rec groups]
1154
1155 , nd_inl :: IdSet -- Free variables of
1156 -- the stable unfolding (if present and active)
1157 -- or the RHS (if not)
1158 -- but excluding any RULES
1159 -- This is the IdSet that may be used if the Id is inlined
1160
1161 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
1162 -- but are *not* in nd_inl. These are the ones whose
1163 -- dependencies might not be respected by loop_breaker_nodes
1164 -- See Note [Weak loop breakers]
1165
1166 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
1167
1168 , nd_score :: NodeScore
1169 }
1170
1171 instance Outputable Details where
1172 ppr nd = text "ND" <> braces
1173 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
1174 , text "uds =" <+> ppr (nd_uds nd)
1175 , text "inl =" <+> ppr (nd_inl nd)
1176 , text "weak =" <+> ppr (nd_weak nd)
1177 , text "rule =" <+> ppr (nd_active_rule_fvs nd)
1178 , text "score =" <+> ppr (nd_score nd)
1179 ])
1180
1181 -- The NodeScore is compared lexicographically;
1182 -- e.g. lower rank wins regardless of size
1183 type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker
1184 , Int -- Size of rhs: higher => more likely to be picked as LB
1185 -- Maxes out at maxExprSize; we just use it to prioritise
1186 -- small functions
1187 , Bool ) -- Was it a loop breaker before?
1188 -- True => more likely to be picked
1189 -- Note [Loop breakers, node scoring, and stability]
1190
1191 rank :: NodeScore -> Int
1192 rank (r, _, _) = r
1193
1194 makeNode :: OccEnv -> ImpRuleEdges -> VarSet
1195 -> (Var, CoreExpr) -> LetrecNode
1196 -- See Note [Recursive bindings: the grand plan]
1197 makeNode env imp_rule_edges bndr_set (bndr, rhs)
1198 = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs)
1199 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
1200 -- is still deterministic with edges in nondeterministic order as
1201 -- explained in Note [Deterministic SCC] in Digraph.
1202 where
1203 details = ND { nd_bndr = bndr
1204 , nd_rhs = rhs'
1205 , nd_rhs_bndrs = bndrs'
1206 , nd_uds = rhs_usage3
1207 , nd_inl = inl_fvs
1208 , nd_weak = node_fvs `minusVarSet` inl_fvs
1209 , nd_active_rule_fvs = active_rule_fvs
1210 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
1211
1212 -- Constructing the edges for the main Rec computation
1213 -- See Note [Forming Rec groups]
1214 (bndrs, body) = collectBinders rhs
1215 (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body
1216 rhs' = mkLams bndrs' body'
1217 rhs_usage2 = rhs_usage1 +++ all_rule_uds
1218 -- Note [Rules are extra RHSs]
1219 -- Note [Rule dependency info]
1220 rhs_usage3 = case mb_unf_uds of
1221 Just unf_uds -> rhs_usage2 +++ unf_uds
1222 Nothing -> rhs_usage2
1223 node_fvs = udFreeVars bndr_set rhs_usage3
1224
1225 -- Finding the free variables of the rules
1226 is_active = occ_rule_act env :: Activation -> Bool
1227
1228 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
1229 rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr
1230
1231 rules_w_rhs_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
1232 rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):))
1233 (lookupVarEnv imp_rule_edges bndr)
1234 -- See Note [Preventing loops due to imported functions rules]
1235 [ (ru_act rule, udFreeVars bndr_set rhs_uds)
1236 | (rule, _, rhs_uds) <- rules_w_uds ]
1237 all_rule_uds = combineUsageDetailsList $
1238 concatMap (\(_, l, r) -> [l, r]) rules_w_uds
1239 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs
1240 , is_active a]
1241
1242 -- Finding the usage details of the INLINE pragma (if any)
1243 mb_unf_uds = occAnalUnfolding env Recursive bndr
1244
1245 -- Find the "nd_inl" free vars; for the loop-breaker phase
1246 inl_fvs = case mb_unf_uds of
1247 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
1248 Just unf_uds -> udFreeVars bndr_set unf_uds
1249 -- We could check for an *active* INLINE (returning
1250 -- emptyVarSet for an inactive one), but is_active
1251 -- isn't the right thing (it tells about
1252 -- RULE activation), so we'd need more plumbing
1253
1254 mkLoopBreakerNodes :: OccEnv -> TopLevelFlag
1255 -> VarSet
1256 -> UsageDetails -- for BODY of let
1257 -> [Details]
1258 -> (UsageDetails, -- adjusted
1259 [LetrecNode])
1260 -- Does four things
1261 -- a) tag each binder with its occurrence info
1262 -- b) add a NodeScore to each node
1263 -- c) make a Node with the right dependency edges for
1264 -- the loop-breaker SCC analysis
1265 -- d) adjust each RHS's usage details according to
1266 -- the binder's (new) shotness and join-point-hood
1267 mkLoopBreakerNodes env lvl bndr_set body_uds details_s
1268 = (final_uds, zipWith mk_lb_node details_s bndrs')
1269 where
1270 (final_uds, bndrs') = tagRecBinders lvl body_uds
1271 [ ((nd_bndr nd)
1272 ,(nd_uds nd)
1273 ,(nd_rhs_bndrs nd))
1274 | nd <- details_s ]
1275 mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr'
1276 = DigraphNode nd' (varUnique bndr) (nonDetKeysUniqSet lb_deps)
1277 -- It's OK to use nonDetKeysUniqSet here as
1278 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
1279 -- in nondeterministic order as explained in
1280 -- Note [Deterministic SCC] in Digraph.
1281 where
1282 nd' = nd { nd_bndr = bndr', nd_score = score }
1283 score = nodeScore env bndr bndr' rhs lb_deps
1284 lb_deps = extendFvs_ rule_fv_env inl_fvs
1285
1286 rule_fv_env :: IdEnv IdSet
1287 -- Maps a variable f to the variables from this group
1288 -- mentioned in RHS of active rules for f
1289 -- Domain is *subset* of bound vars (others have no rule fvs)
1290 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
1291 init_rule_fvs -- See Note [Finding rule RHS free vars]
1292 = [ (b, trimmed_rule_fvs)
1293 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
1294 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
1295 , not (isEmptyVarSet trimmed_rule_fvs) ]
1296
1297
1298 ------------------------------------------
1299 nodeScore :: OccEnv
1300 -> Id -- Binder has old occ-info (just for loop-breaker-ness)
1301 -> Id -- Binder with new occ-info
1302 -> CoreExpr -- RHS
1303 -> VarSet -- Loop-breaker dependencies
1304 -> NodeScore
1305 nodeScore env old_bndr new_bndr bind_rhs lb_deps
1306 | not (isId old_bndr) -- A type or cercion variable is never a loop breaker
1307 = (100, 0, False)
1308
1309 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers
1310 = (0, 0, True) -- See Note [Self-recursion and loop breakers]
1311
1312 | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has
1313 = (0, 0, True) -- a NOINLINE pragam) makes a great loop breaker
1314
1315 | exprIsTrivial rhs
1316 = mk_score 10 -- Practically certain to be inlined
1317 -- Used to have also: && not (isExportedId bndr)
1318 -- But I found this sometimes cost an extra iteration when we have
1319 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
1320 -- where df is the exported dictionary. Then df makes a really
1321 -- bad choice for loop breaker
1322
1323 | DFunUnfolding { df_args = args } <- id_unfolding
1324 -- Never choose a DFun as a loop breaker
1325 -- Note [DFuns should not be loop breakers]
1326 = (9, length args, is_lb)
1327
1328 -- Data structures are more important than INLINE pragmas
1329 -- so that dictionary/method recursion unravels
1330
1331 | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding
1332 = mk_score 6
1333
1334 | is_con_app rhs -- Data types help with cases:
1335 = mk_score 5 -- Note [Constructor applications]
1336
1337 | isStableUnfolding id_unfolding
1338 , can_unfold
1339 = mk_score 3
1340
1341 | isOneOcc (idOccInfo new_bndr)
1342 = mk_score 2 -- Likely to be inlined
1343
1344 | can_unfold -- The Id has some kind of unfolding
1345 = mk_score 1
1346
1347 | otherwise
1348 = (0, 0, is_lb)
1349
1350 where
1351 mk_score :: Int -> NodeScore
1352 mk_score rank = (rank, rhs_size, is_lb)
1353
1354 is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
1355 rhs = case id_unfolding of
1356 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
1357 | isStableSource src
1358 -> unf_rhs
1359 _ -> bind_rhs
1360 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
1361 rhs_size = case id_unfolding of
1362 CoreUnfolding { uf_guidance = guidance }
1363 | UnfIfGoodArgs { ug_size = size } <- guidance
1364 -> size
1365 _ -> cheapExprSize rhs
1366
1367 can_unfold = canUnfold id_unfolding
1368 id_unfolding = realIdUnfolding old_bndr
1369 -- realIdUnfolding: Ignore loop-breaker-ness here because
1370 -- that is what we are setting!
1371
1372 -- Checking for a constructor application
1373 -- Cheap and cheerful; the simplifier moves casts out of the way
1374 -- The lambda case is important to spot x = /\a. C (f a)
1375 -- which comes up when C is a dictionary constructor and
1376 -- f is a default method.
1377 -- Example: the instance for Show (ST s a) in GHC.ST
1378 --
1379 -- However we *also* treat (\x. C p q) as a con-app-like thing,
1380 -- Note [Closure conversion]
1381 is_con_app (Var v) = isConLikeId v
1382 is_con_app (App f _) = is_con_app f
1383 is_con_app (Lam _ e) = is_con_app e
1384 is_con_app (Tick _ e) = is_con_app e
1385 is_con_app _ = False
1386
1387 maxExprSize :: Int
1388 maxExprSize = 20 -- Rather arbitrary
1389
1390 cheapExprSize :: CoreExpr -> Int
1391 -- Maxes out at maxExprSize
1392 cheapExprSize e
1393 = go 0 e
1394 where
1395 go n e | n >= maxExprSize = n
1396 | otherwise = go1 n e
1397
1398 go1 n (Var {}) = n+1
1399 go1 n (Lit {}) = n+1
1400 go1 n (Type {}) = n
1401 go1 n (Coercion {}) = n
1402 go1 n (Tick _ e) = go1 n e
1403 go1 n (Cast e _) = go1 n e
1404 go1 n (App f a) = go (go1 n f) a
1405 go1 n (Lam b e)
1406 | isTyVar b = go1 n e
1407 | otherwise = go (n+1) e
1408 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b)
1409 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
1410
1411 gos n [] = n
1412 gos n (e:es) | n >= maxExprSize = n
1413 | otherwise = gos (go1 n e) es
1414
1415 betterLB :: NodeScore -> NodeScore -> Bool
1416 -- If n1 `betterLB` n2 then choose n1 as the loop breaker
1417 betterLB (rank1, size1, lb1) (rank2, size2, _)
1418 | rank1 < rank2 = True
1419 | rank1 > rank2 = False
1420 | size1 < size2 = False -- Make the bigger n2 into the loop breaker
1421 | size1 > size2 = True
1422 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it
1423 | otherwise = False -- See Note [Loop breakers, node scoring, and stability]
1424
1425 {- Note [Self-recursion and loop breakers]
1426 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1427 If we have
1428 rec { f = ...f...g...
1429 ; g = .....f... }
1430 then 'f' has to be a loop breaker anyway, so we may as well choose it
1431 right away, so that g can inline freely.
1432
1433 This is really just a cheap hack. Consider
1434 rec { f = ...g...
1435 ; g = ..f..h...
1436 ; h = ...f....}
1437 Here f or g are better loop breakers than h; but we might accidentally
1438 choose h. Finding the minimal set of loop breakers is hard.
1439
1440 Note [Loop breakers, node scoring, and stability]
1441 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1442 To choose a loop breaker, we give a NodeScore to each node in the SCC,
1443 and pick the one with the best score (according to 'betterLB').
1444
1445 We need to be jolly careful (Trac #12425, #12234) about the stability
1446 of this choice. Suppose we have
1447
1448 let rec { f = ...g...g...
1449 ; g = ...f...f... }
1450 in
1451 case x of
1452 True -> ...f..
1453 False -> ..f...
1454
1455 In each iteration of the simplifier the occurrence analyser OccAnal
1456 chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
1457 breaker. That means it is free to inline f.
1458
1459 Suppose that GHC decides to inline f in the branches of the case, but
1460 (for some reason; eg it is not saturated) in the rhs of g. So we get
1461
1462 let rec { f = ...g...g...
1463 ; g = ...f...f... }
1464 in
1465 case x of
1466 True -> ...g...g.....
1467 False -> ..g..g....
1468
1469 Now suppose that, for some reason, in the next iteration the occurrence
1470 analyser chooses f as the loop breaker, so it can freely inline g. And
1471 again for some reason the simplifier inlines g at its calls in the case
1472 branches, but not in the RHS of f. Then we get
1473
1474 let rec { f = ...g...g...
1475 ; g = ...f...f... }
1476 in
1477 case x of
1478 True -> ...(...f...f...)...(...f..f..).....
1479 False -> ..(...f...f...)...(..f..f...)....
1480
1481 You can see where this is going! Each iteration of the simplifier
1482 doubles the number of calls to f or g. No wonder GHC is slow!
1483
1484 (In the particular example in comment:3 of #12425, f and g are the two
1485 mutually recursive fmap instances for CondT and Result. They are both
1486 marked INLINE which, oddly, is why they don't inline in each other's
1487 RHS, because the call there is not saturated.)
1488
1489 The root cause is that we flip-flop on our choice of loop breaker. I
1490 always thought it didn't matter, and indeed for any single iteration
1491 to terminate, it doesn't matter. But when we iterate, it matters a
1492 lot!!
1493
1494 So The Plan is this:
1495 If there is a tie, choose the node that
1496 was a loop breaker last time round
1497
1498 Hence the is_lb field of NodeScore
1499
1500 ************************************************************************
1501 * *
1502 Right hand sides
1503 * *
1504 ************************************************************************
1505 -}
1506
1507 occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr
1508 -> (UsageDetails, [CoreBndr], CoreExpr)
1509 -- Returned usage details covers only the RHS,
1510 -- and *not* the RULE or INLINE template for the Id
1511 occAnalRhs env Recursive _ bndrs body
1512 = occAnalRecRhs env bndrs body
1513 occAnalRhs env NonRecursive id bndrs body
1514 = occAnalNonRecRhs env id bndrs body
1515
1516 occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr -- Rhs lambdas, body
1517 -> (UsageDetails, [CoreBndr], CoreExpr)
1518 -- Returned usage details covers only the RHS,
1519 -- and *not* the RULE or INLINE template for the Id
1520 occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body
1521
1522 occAnalNonRecRhs :: OccEnv
1523 -> Id -> [CoreBndr] -> CoreExpr -- Binder; rhs lams, body
1524 -- Binder is already tagged with occurrence info
1525 -> (UsageDetails, [CoreBndr], CoreExpr)
1526 -- Returned usage details covers only the RHS,
1527 -- and *not* the RULE or INLINE template for the Id
1528 occAnalNonRecRhs env bndr bndrs body
1529 = occAnalLamOrRhs rhs_env bndrs body
1530 where
1531 env1 | is_join_point = env -- See Note [Join point RHSs]
1532 | certainly_inline = env -- See Note [Cascading inlines]
1533 | otherwise = rhsCtxt env
1534
1535 -- See Note [Sources of one-shot information]
1536 rhs_env = env1 { occ_one_shots = argOneShots dmd }
1537
1538 certainly_inline -- See Note [Cascading inlines]
1539 = case occ of
1540 OneOcc { occ_in_lam = in_lam, occ_one_br = one_br }
1541 -> not in_lam && one_br && active && not_stable
1542 _ -> False
1543
1544 is_join_point = isAlwaysTailCalled occ
1545 -- Like (isJoinId bndr) but happens one step earlier
1546 -- c.f. willBeJoinId_maybe
1547
1548 occ = idOccInfo bndr
1549 dmd = idDemandInfo bndr
1550 active = isAlwaysActive (idInlineActivation bndr)
1551 not_stable = not (isStableUnfolding (idUnfolding bndr))
1552
1553 occAnalUnfolding :: OccEnv
1554 -> RecFlag
1555 -> Id
1556 -> Maybe UsageDetails
1557 -- Just the analysis, not a new unfolding. The unfolding
1558 -- got analysed when it was created and we don't need to
1559 -- update it.
1560 occAnalUnfolding env rec_flag id
1561 = case realIdUnfolding id of -- ignore previous loop-breaker flag
1562 CoreUnfolding { uf_tmpl = rhs, uf_src = src }
1563 | not (isStableSource src)
1564 -> Nothing
1565 | otherwise
1566 -> Just $ markAllMany usage
1567 where
1568 (bndrs, body) = collectBinders rhs
1569 (usage, _, _) = occAnalRhs env rec_flag id bndrs body
1570
1571 DFunUnfolding { df_bndrs = bndrs, df_args = args }
1572 -> Just $ zapDetails (delDetailsList usage bndrs)
1573 where
1574 usage = foldr (+++) emptyDetails (map (fst . occAnal env) args)
1575
1576 _ -> Nothing
1577
1578 occAnalRules :: OccEnv
1579 -> Maybe JoinArity -- If the binder is (or MAY become) a join
1580 -- point, what its join arity is (or WOULD
1581 -- become). See Note [Rules and join points].
1582 -> RecFlag
1583 -> Id
1584 -> [(CoreRule, -- Each (non-built-in) rule
1585 UsageDetails, -- Usage details for LHS
1586 UsageDetails)] -- Usage details for RHS
1587 occAnalRules env mb_expected_join_arity rec_flag id
1588 = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id
1589 , let (lhs_uds, rhs_uds) = occ_anal_rule rule ]
1590 where
1591 occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
1592 = (lhs_uds, final_rhs_uds)
1593 where
1594 lhs_uds = addManyOccsSet emptyDetails $
1595 (exprsFreeVars args `delVarSetList` bndrs)
1596 (rhs_bndrs, rhs_body) = collectBinders rhs
1597 (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body
1598 -- Note [Rules are extra RHSs]
1599 -- Note [Rule dependency info]
1600 final_rhs_uds = adjust_tail_info args $ markAllMany $
1601 (rhs_uds `delDetailsList` bndrs)
1602 occ_anal_rule _
1603 = (emptyDetails, emptyDetails)
1604
1605 adjust_tail_info args uds -- see Note [Rules and join points]
1606 = case mb_expected_join_arity of
1607 Just ar | args `lengthIs` ar -> uds
1608 _ -> markAllNonTailCalled uds
1609 {- Note [Join point RHSs]
1610 ~~~~~~~~~~~~~~~~~~~~~~~~~
1611 Consider
1612 x = e
1613 join j = Just x
1614
1615 We want to inline x into j right away, so we don't want to give
1616 the join point a RhsCtxt (Trac #14137). It's not a huge deal, because
1617 the FloatIn pass knows to float into join point RHSs; and the simplifier
1618 does not float things out of join point RHSs. But it's a simple, cheap
1619 thing to do. See Trac #14137.
1620
1621 Note [Cascading inlines]
1622 ~~~~~~~~~~~~~~~~~~~~~~~~
1623 By default we use an rhsCtxt for the RHS of a binding. This tells the
1624 occ anal n that it's looking at an RHS, which has an effect in
1625 occAnalApp. In particular, for constructor applications, it makes
1626 the arguments appear to have NoOccInfo, so that we don't inline into
1627 them. Thus x = f y
1628 k = Just x
1629 we do not want to inline x.
1630
1631 But there's a problem. Consider
1632 x1 = a0 : []
1633 x2 = a1 : x1
1634 x3 = a2 : x2
1635 g = f x3
1636 First time round, it looks as if x1 and x2 occur as an arg of a
1637 let-bound constructor ==> give them a many-occurrence.
1638 But then x3 is inlined (unconditionally as it happens) and
1639 next time round, x2 will be, and the next time round x1 will be
1640 Result: multiple simplifier iterations. Sigh.
1641
1642 So, when analysing the RHS of x3 we notice that x3 will itself
1643 definitely inline the next time round, and so we analyse x3's rhs in
1644 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1645
1646 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1647 If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and
1648 (b) certainly_inline says "yes" when preInlineUnconditionally says "no"
1649 then the simplifier iterates indefinitely:
1650 x = f y
1651 k = Just x -- We decide that k is 'certainly_inline'
1652 v = ...k... -- but preInlineUnconditionally doesn't inline it
1653 inline ==>
1654 k = Just (f y)
1655 v = ...k...
1656 float ==>
1657 x1 = f y
1658 k = Just x1
1659 v = ...k...
1660
1661 This is worse than the slow cascade, so we only want to say "certainly_inline"
1662 if it really is certain. Look at the note with preInlineUnconditionally
1663 for the various clauses.
1664
1665
1666 ************************************************************************
1667 * *
1668 Expressions
1669 * *
1670 ************************************************************************
1671 -}
1672
1673 occAnal :: OccEnv
1674 -> CoreExpr
1675 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1676 CoreExpr)
1677
1678 occAnal _ expr@(Type _) = (emptyDetails, expr)
1679 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1680 occAnal env expr@(Var _) = occAnalApp env (expr, [], [])
1681 -- At one stage, I gathered the idRuleVars for the variable here too,
1682 -- which in a way is the right thing to do.
1683 -- But that went wrong right after specialisation, when
1684 -- the *occurrences* of the overloaded function didn't have any
1685 -- rules in them, so the *specialised* versions looked as if they
1686 -- weren't used at all.
1687
1688 occAnal _ (Coercion co)
1689 = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co)
1690 -- See Note [Gather occurrences of coercion variables]
1691
1692 {-
1693 Note [Gather occurrences of coercion variables]
1694 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1695 We need to gather info about what coercion variables appear, so that
1696 we can sort them into the right place when doing dependency analysis.
1697 -}
1698
1699 occAnal env (Tick tickish body)
1700 | SourceNote{} <- tickish
1701 = (usage, Tick tickish body')
1702 -- SourceNotes are best-effort; so we just proceed as usual.
1703 -- If we drop a tick due to the issues described below it's
1704 -- not the end of the world.
1705
1706 | tickish `tickishScopesLike` SoftScope
1707 = (markAllNonTailCalled usage, Tick tickish body')
1708
1709 | Breakpoint _ ids <- tickish
1710 = (usage_lam +++ foldr addManyOccs emptyDetails ids, Tick tickish body')
1711 -- never substitute for any of the Ids in a Breakpoint
1712
1713 | otherwise
1714 = (usage_lam, Tick tickish body')
1715 where
1716 !(usage,body') = occAnal env body
1717 -- for a non-soft tick scope, we can inline lambdas only
1718 usage_lam = markAllNonTailCalled (markAllInsideLam usage)
1719 -- TODO There may be ways to make ticks and join points play
1720 -- nicer together, but right now there are problems:
1721 -- let j x = ... in tick<t> (j 1)
1722 -- Making j a join point may cause the simplifier to drop t
1723 -- (if the tick is put into the continuation). So we don't
1724 -- count j 1 as a tail call.
1725 -- See #14242.
1726
1727 occAnal env (Cast expr co)
1728 = case occAnal env expr of { (usage, expr') ->
1729 let usage1 = zapDetailsIf (isRhsEnv env) usage
1730 -- usage1: if we see let x = y `cast` co
1731 -- then mark y as 'Many' so that we don't
1732 -- immediately inline y again.
1733 usage2 = addManyOccsSet usage1 (coVarsOfCo co)
1734 -- usage2: see Note [Gather occurrences of coercion variables]
1735 in (markAllNonTailCalled usage2, Cast expr' co)
1736 }
1737
1738 occAnal env app@(App _ _)
1739 = occAnalApp env (collectArgsTicks tickishFloatable app)
1740
1741 -- Ignore type variables altogether
1742 -- (a) occurrences inside type lambdas only not marked as InsideLam
1743 -- (b) type variables not in environment
1744
1745 occAnal env (Lam x body)
1746 | isTyVar x
1747 = case occAnal env body of { (body_usage, body') ->
1748 (markAllNonTailCalled body_usage, Lam x body')
1749 }
1750
1751 -- For value lambdas we do a special hack. Consider
1752 -- (\x. \y. ...x...)
1753 -- If we did nothing, x is used inside the \y, so would be marked
1754 -- as dangerous to dup. But in the common case where the abstraction
1755 -- is applied to two arguments this is over-pessimistic.
1756 -- So instead, we just mark each binder with its occurrence
1757 -- info in the *body* of the multiple lambda.
1758 -- Then, the simplifier is careful when partially applying lambdas.
1759
1760 occAnal env expr@(Lam _ _)
1761 = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') ->
1762 let
1763 expr' = mkLams tagged_binders body'
1764 usage1 = markAllNonTailCalled usage
1765 one_shot_gp = all isOneShotBndr tagged_binders
1766 final_usage | one_shot_gp = usage1
1767 | otherwise = markAllInsideLam usage1
1768 in
1769 (final_usage, expr') }
1770 where
1771 (binders, body) = collectBinders expr
1772
1773 occAnal env (Case scrut bndr ty alts)
1774 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1775 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1776 let
1777 alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
1778 (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
1779 total_usage = markAllNonTailCalled scrut_usage +++ alts_usage1
1780 -- Alts can have tail calls, but the scrutinee can't
1781 in
1782 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1783 where
1784 -- Note [Case binder usage]
1785 -- ~~~~~~~~~~~~~~~~~~~~~~~~
1786 -- The case binder gets a usage of either "many" or "dead", never "one".
1787 -- Reason: we like to inline single occurrences, to eliminate a binding,
1788 -- but inlining a case binder *doesn't* eliminate a binding.
1789 -- We *don't* want to transform
1790 -- case x of w { (p,q) -> f w }
1791 -- into
1792 -- case x of w { (p,q) -> f (p,q) }
1793 tag_case_bndr usage bndr
1794 = (usage', setIdOccInfo bndr final_occ_info)
1795 where
1796 occ_info = lookupDetails usage bndr
1797 usage' = usage `delDetails` bndr
1798 final_occ_info = case occ_info of IAmDead -> IAmDead
1799 _ -> noOccInfo
1800
1801 alt_env = mkAltEnv env scrut bndr
1802 occ_anal_alt = occAnalAlt alt_env
1803
1804 occ_anal_scrut (Var v) (alt1 : other_alts)
1805 | not (null other_alts) || not (isDefaultAlt alt1)
1806 = (mkOneOcc env v True 0, Var v)
1807 -- The 'True' says that the variable occurs in an interesting
1808 -- context; the case has at least one non-default alternative
1809 occ_anal_scrut (Tick t e) alts
1810 | t `tickishScopesLike` SoftScope
1811 -- No reason to not look through all ticks here, but only
1812 -- for soft-scoped ticks we can do so without having to
1813 -- update returned occurance info (see occAnal)
1814 = second (Tick t) $ occ_anal_scrut e alts
1815
1816 occ_anal_scrut scrut _alts
1817 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1818
1819 occAnal env (Let bind body)
1820 = case occAnal env body of { (body_usage, body') ->
1821 case occAnalBind env NotTopLevel
1822 noImpRuleEdges bind
1823 body_usage of { (final_usage, new_binds) ->
1824 (final_usage, mkLets new_binds body') }}
1825
1826 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1827 occAnalArgs _ [] _
1828 = (emptyDetails, [])
1829
1830 occAnalArgs env (arg:args) one_shots
1831 | isTypeArg arg
1832 = case occAnalArgs env args one_shots of { (uds, args') ->
1833 (uds, arg:args') }
1834
1835 | otherwise
1836 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1837 case occAnal arg_env arg of { (uds1, arg') ->
1838 case occAnalArgs env args one_shots' of { (uds2, args') ->
1839 (uds1 +++ uds2, arg':args') }}}
1840
1841 {-
1842 Applications are dealt with specially because we want
1843 the "build hack" to work.
1844
1845 Note [Arguments of let-bound constructors]
1846 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1847 Consider
1848 f x = let y = expensive x in
1849 let z = (True,y) in
1850 (case z of {(p,q)->q}, case z of {(p,q)->q})
1851 We feel free to duplicate the WHNF (True,y), but that means
1852 that y may be duplicated thereby.
1853
1854 If we aren't careful we duplicate the (expensive x) call!
1855 Constructors are rather like lambdas in this way.
1856 -}
1857
1858 occAnalApp :: OccEnv
1859 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1860 -> (UsageDetails, Expr CoreBndr)
1861 occAnalApp env (Var fun, args, ticks)
1862 | null ticks = (uds, mkApps (Var fun) args')
1863 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1864 where
1865 uds = fun_uds +++ final_args_uds
1866
1867 !(args_uds, args') = occAnalArgs env args one_shots
1868 !final_args_uds
1869 | isRhsEnv env && is_exp = markAllNonTailCalled $
1870 markAllInsideLam args_uds
1871 | otherwise = markAllNonTailCalled args_uds
1872 -- We mark the free vars of the argument of a constructor or PAP
1873 -- as "inside-lambda", if it is the RHS of a let(rec).
1874 -- This means that nothing gets inlined into a constructor or PAP
1875 -- argument position, which is what we want. Typically those
1876 -- constructor arguments are just variables, or trivial expressions.
1877 -- We use inside-lam because it's like eta-expanding the PAP.
1878 --
1879 -- This is the *whole point* of the isRhsEnv predicate
1880 -- See Note [Arguments of let-bound constructors]
1881
1882 n_val_args = valArgCount args
1883 n_args = length args
1884 fun_uds = mkOneOcc env fun (n_val_args > 0) n_args
1885 is_exp = isExpandableApp fun n_val_args
1886 -- See Note [CONLIKE pragma] in BasicTypes
1887 -- The definition of is_exp should match that in Simplify.prepareRhs
1888
1889 one_shots = argsOneShots (idStrictness fun) guaranteed_val_args
1890 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo
1891 (occ_one_shots env))
1892 -- See Note [Sources of one-shot information], bullet point A']
1893
1894 occAnalApp env (fun, args, ticks)
1895 = (markAllNonTailCalled (fun_uds +++ args_uds),
1896 mkTicks ticks $ mkApps fun' args')
1897 where
1898 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1899 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1900 -- often leaves behind beta redexs like
1901 -- (\x y -> e) a1 a2
1902 -- Here we would like to mark x,y as one-shot, and treat the whole
1903 -- thing much like a let. We do this by pushing some True items
1904 -- onto the context stack.
1905 !(args_uds, args') = occAnalArgs env args []
1906
1907 zapDetailsIf :: Bool -- If this is true
1908 -> UsageDetails -- Then do zapDetails on this
1909 -> UsageDetails
1910 zapDetailsIf True uds = zapDetails uds
1911 zapDetailsIf False uds = uds
1912
1913 {-
1914 Note [Sources of one-shot information]
1915 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1916 The occurrence analyser obtains one-shot-lambda information from two sources:
1917
1918 A: Saturated applications: eg f e1 .. en
1919
1920 In general, given a call (f e1 .. en) we can propagate one-shot info from
1921 f's strictness signature into e1 .. en, but /only/ if n is enough to
1922 saturate the strictness signature. A strictness signature like
1923
1924 f :: C1(C1(L))LS
1925
1926 means that *if f is applied to three arguments* then it will guarantee to
1927 call its first argument at most once, and to call the result of that at
1928 most once. But if f has fewer than three arguments, all bets are off; e.g.
1929
1930 map (f (\x y. expensive) e2) xs
1931
1932 Here the \x y abstraction may be called many times (once for each element of
1933 xs) so we should not mark x and y as one-shot. But if it was
1934
1935 map (f (\x y. expensive) 3 2) xs
1936
1937 then the first argument of f will be called at most once.
1938
1939 The one-shot info, derived from f's strictness signature, is
1940 computed by 'argsOneShots', called in occAnalApp.
1941
1942 A': Non-obviously saturated applications: eg build (f (\x y -> expensive))
1943 where f is as above.
1944
1945 In this case, f is only manifestly applied to one argument, so it does not
1946 look saturated. So by the previous point, we should not use its strictness
1947 signature to learn about the one-shotness of \x y. But in this case we can:
1948 build is fully applied, so we may use its strictness signature; and from
1949 that we learn that build calls its argument with two arguments *at most once*.
1950
1951 So there is really only one call to f, and it will have three arguments. In
1952 that sense, f is saturated, and we may proceed as described above.
1953
1954 Hence the computation of 'guaranteed_val_args' in occAnalApp, using
1955 '(occ_one_shots env)'. See also Trac #13227, comment:9
1956
1957 B: Let-bindings: eg let f = \c. let ... in \n -> blah
1958 in (build f, build f)
1959
1960 Propagate one-shot info from the demanand-info on 'f' to the
1961 lambdas in its RHS (which may not be syntactically at the top)
1962
1963 This information must have come from a previous run of the demanand
1964 analyser.
1965
1966 Previously, the demand analyser would *also* set the one-shot information, but
1967 that code was buggy (see #11770), so doing it only in on place, namely here, is
1968 saner.
1969
1970 Note [OneShots]
1971 ~~~~~~~~~~~~~~~
1972 When analysing an expression, the occ_one_shots argument contains information
1973 about how the function is being used. The length of the list indicates
1974 how many arguments will eventually be passed to the analysed expression,
1975 and the OneShotInfo indicates whether this application is once or multiple times.
1976
1977 Example:
1978
1979 Context of f occ_one_shots when analysing f
1980
1981 f 1 2 [OneShot, OneShot]
1982 map (f 1) [OneShot, NoOneShotInfo]
1983 build f [OneShot, OneShot]
1984 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot]
1985
1986 Note [Binders in case alternatives]
1987 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1988 Consider
1989 case x of y { (a,b) -> f y }
1990 We treat 'a', 'b' as dead, because they don't physically occur in the
1991 case alternative. (Indeed, a variable is dead iff it doesn't occur in
1992 its scope in the output of OccAnal.) It really helps to know when
1993 binders are unused. See esp the call to isDeadBinder in
1994 Simplify.mkDupableAlt
1995
1996 In this example, though, the Simplifier will bring 'a' and 'b' back to
1997 life, beause it binds 'y' to (a,b) (imagine got inlined and
1998 scrutinised y).
1999 -}
2000
2001 occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr
2002 -> (UsageDetails, [CoreBndr], CoreExpr)
2003 occAnalLamOrRhs env [] body
2004 = case occAnal env body of (body_usage, body') -> (body_usage, [], body')
2005 -- RHS of thunk or nullary join point
2006 occAnalLamOrRhs env (bndr:bndrs) body
2007 | isTyVar bndr
2008 = -- Important: Keep the environment so that we don't inline into an RHS like
2009 -- \(@ x) -> C @x (f @x)
2010 -- (see the beginning of Note [Cascading inlines]).
2011 case occAnalLamOrRhs env bndrs body of
2012 (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body')
2013 occAnalLamOrRhs env binders body
2014 = case occAnal env_body body of { (body_usage, body') ->
2015 let
2016 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
2017 -- Use binders' to put one-shot info on the lambdas
2018 in
2019 (final_usage, tagged_binders, body') }
2020 where
2021 (env_body, binders') = oneShotGroup env binders
2022
2023 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
2024 -> CoreAlt
2025 -> (UsageDetails, Alt IdWithOccInfo)
2026 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
2027 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
2028 let
2029 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
2030 -- See Note [Binders in case alternatives]
2031 (alt_usg', rhs2) =
2032 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
2033 in
2034 (alt_usg', (con, tagged_bndrs, rhs2)) }
2035
2036 wrapAltRHS :: OccEnv
2037 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
2038 -> UsageDetails -- usage for entire alt (p -> rhs)
2039 -> [Var] -- alt binders
2040 -> CoreExpr -- alt RHS
2041 -> (UsageDetails, CoreExpr)
2042 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
2043 | occ_binder_swap env
2044 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
2045 -- handles condition (a) in Note [Binder swap]
2046 , not captured -- See condition (b) in Note [Binder swap]
2047 = ( alt_usg' +++ let_rhs_usg
2048 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
2049 where
2050 captured = any (`usedIn` let_rhs_usg) bndrs
2051 -- The rhs of the let may include coercion variables
2052 -- if the scrutinee was a cast, so we must gather their
2053 -- usage. See Note [Gather occurrences of coercion variables]
2054 (let_rhs_usg, let_rhs') = occAnal env let_rhs
2055 (alt_usg', [tagged_scrut_var]) = tagLamBinders alt_usg [scrut_var]
2056
2057 wrapAltRHS _ _ alt_usg _ alt_rhs
2058 = (alt_usg, alt_rhs)
2059
2060 {-
2061 ************************************************************************
2062 * *
2063 OccEnv
2064 * *
2065 ************************************************************************
2066 -}
2067
2068 data OccEnv
2069 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
2070 , occ_one_shots :: !OneShots -- See Note [OneShots]
2071 , occ_gbl_scrut :: GlobalScruts
2072
2073 , occ_unf_act :: Id -> Bool -- Which Id unfoldings are active
2074
2075 , occ_rule_act :: Activation -> Bool -- Which rules are active
2076 -- See Note [Finding rule RHS free vars]
2077
2078 , occ_binder_swap :: !Bool -- enable the binder_swap
2079 -- See CorePrep Note [Dead code in CorePrep]
2080 }
2081
2082 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
2083
2084 -----------------------------
2085 -- OccEncl is used to control whether to inline into constructor arguments
2086 -- For example:
2087 -- x = (p,q) -- Don't inline p or q
2088 -- y = /\a -> (p a, q a) -- Still don't inline p or q
2089 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
2090 -- So OccEncl tells enought about the context to know what to do when
2091 -- we encounter a constructor application or PAP.
2092
2093 data OccEncl
2094 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
2095 -- Don't inline into constructor args here
2096 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
2097 -- Do inline into constructor args here
2098
2099 instance Outputable OccEncl where
2100 ppr OccRhs = text "occRhs"
2101 ppr OccVanilla = text "occVanilla"
2102
2103 -- See note [OneShots]
2104 type OneShots = [OneShotInfo]
2105
2106 initOccEnv :: OccEnv
2107 initOccEnv
2108 = OccEnv { occ_encl = OccVanilla
2109 , occ_one_shots = []
2110 , occ_gbl_scrut = emptyVarSet
2111 -- To be conservative, we say that all
2112 -- inlines and rules are active
2113 , occ_unf_act = \_ -> True
2114 , occ_rule_act = \_ -> True
2115 , occ_binder_swap = True }
2116
2117 vanillaCtxt :: OccEnv -> OccEnv
2118 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
2119
2120 rhsCtxt :: OccEnv -> OccEnv
2121 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
2122
2123 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
2124 argCtxt env []
2125 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
2126 argCtxt env (one_shots:one_shots_s)
2127 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
2128
2129 isRhsEnv :: OccEnv -> Bool
2130 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
2131 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
2132
2133 oneShotGroup :: OccEnv -> [CoreBndr]
2134 -> ( OccEnv
2135 , [CoreBndr] )
2136 -- The result binders have one-shot-ness set that they might not have had originally.
2137 -- This happens in (build (\c n -> e)). Here the occurrence analyser
2138 -- linearity context knows that c,n are one-shot, and it records that fact in
2139 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
2140
2141 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
2142 = go ctxt bndrs []
2143 where
2144 go ctxt [] rev_bndrs
2145 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
2146 , reverse rev_bndrs )
2147
2148 go [] bndrs rev_bndrs
2149 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
2150 , reverse rev_bndrs ++ bndrs )
2151
2152 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs
2153 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs)
2154 | otherwise = go ctxt bndrs (bndr : rev_bndrs)
2155 where
2156 bndr' = updOneShotInfo bndr one_shot
2157 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
2158 -- one-shot info might be better than what we can infer, e.g.
2159 -- due to explicit use of the magic 'oneShot' function.
2160 -- See Note [The oneShot function]
2161
2162
2163 markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var]
2164 -- Mark the lambdas of a non-recursive join point as one-shot.
2165 -- This is good to prevent gratuitous float-out etc
2166 markJoinOneShots mb_join_arity bndrs
2167 = case mb_join_arity of
2168 Nothing -> bndrs
2169 Just n -> go n bndrs
2170 where
2171 go 0 bndrs = bndrs
2172 go _ [] = WARN( True, ppr mb_join_arity <+> ppr bndrs ) []
2173 go n (b:bs) = b' : go (n-1) bs
2174 where
2175 b' | isId b = setOneShotLambda b
2176 | otherwise = b
2177
2178 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
2179 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
2180 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
2181
2182 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
2183 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
2184 -- as well as (f,g), (g,h)
2185 transClosureFV env
2186 | no_change = env
2187 | otherwise = transClosureFV (listToUFM new_fv_list)
2188 where
2189 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
2190 -- It's OK to use nonDetUFMToList here because we'll forget the
2191 -- ordering by creating a new set with listToUFM
2192 bump no_change (b,fvs)
2193 | no_change_here = (no_change, (b,fvs))
2194 | otherwise = (False, (b,new_fvs))
2195 where
2196 (new_fvs, no_change_here) = extendFvs env fvs
2197
2198 -------------
2199 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
2200 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
2201
2202 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
2203 -- (extendFVs env s) returns
2204 -- (s `union` env(s), env(s) `subset` s)
2205 extendFvs env s
2206 | isNullUFM env
2207 = (s, True)
2208 | otherwise
2209 = (s `unionVarSet` extras, extras `subVarSet` s)
2210 where
2211 extras :: VarSet -- env(s)
2212 extras = nonDetFoldUFM unionVarSet emptyVarSet $
2213 -- It's OK to use nonDetFoldUFM here because unionVarSet commutes
2214 intersectUFM_C (\x _ -> x) env (getUniqSet s)
2215
2216 {-
2217 ************************************************************************
2218 * *
2219 Binder swap
2220 * *
2221 ************************************************************************
2222
2223 Note [Binder swap]
2224 ~~~~~~~~~~~~~~~~~~
2225 We do these two transformations right here:
2226
2227 (1) case x of b { pi -> ri }
2228 ==>
2229 case x of b { pi -> let x=b in ri }
2230
2231 (2) case (x |> co) of b { pi -> ri }
2232 ==>
2233 case (x |> co) of b { pi -> let x = b |> sym co in ri }
2234
2235 Why (2)? See Note [Case of cast]
2236
2237 In both cases, in a particular alternative (pi -> ri), we only
2238 add the binding if
2239 (a) x occurs free in (pi -> ri)
2240 (ie it occurs in ri, but is not bound in pi)
2241 (b) the pi does not bind b (or the free vars of co)
2242 We need (a) and (b) for the inserted binding to be correct.
2243
2244 For the alternatives where we inject the binding, we can transfer
2245 all x's OccInfo to b. And that is the point.
2246
2247 Notice that
2248 * The deliberate shadowing of 'x'.
2249 * That (a) rapidly becomes false, so no bindings are injected.
2250
2251 The reason for doing these transformations here is because it allows
2252 us to adjust the OccInfo for 'x' and 'b' as we go.
2253
2254 * Suppose the only occurrences of 'x' are the scrutinee and in the
2255 ri; then this transformation makes it occur just once, and hence
2256 get inlined right away.
2257
2258 * If we do this in the Simplifier, we don't know whether 'x' is used
2259 in ri, so we are forced to pessimistically zap b's OccInfo even
2260 though it is typically dead (ie neither it nor x appear in the
2261 ri). There's nothing actually wrong with zapping it, except that
2262 it's kind of nice to know which variables are dead. My nose
2263 tells me to keep this information as robustly as possible.
2264
2265 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
2266 {x=b}; it's Nothing if the binder-swap doesn't happen.
2267
2268 There is a danger though. Consider
2269 let v = x +# y
2270 in case (f v) of w -> ...v...v...
2271 And suppose that (f v) expands to just v. Then we'd like to
2272 use 'w' instead of 'v' in the alternative. But it may be too
2273 late; we may have substituted the (cheap) x+#y for v in the
2274 same simplifier pass that reduced (f v) to v.
2275
2276 I think this is just too bad. CSE will recover some of it.
2277
2278 Note [Case of cast]
2279 ~~~~~~~~~~~~~~~~~~~
2280 Consider case (x `cast` co) of b { I# ->
2281 ... (case (x `cast` co) of {...}) ...
2282 We'd like to eliminate the inner case. That is the motivation for
2283 equation (2) in Note [Binder swap]. When we get to the inner case, we
2284 inline x, cancel the casts, and away we go.
2285
2286 Note [Binder swap on GlobalId scrutinees]
2287 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2288 When the scrutinee is a GlobalId we must take care in two ways
2289
2290 i) In order to *know* whether 'x' occurs free in the RHS, we need its
2291 occurrence info. BUT, we don't gather occurrence info for
2292 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
2293 OccEnv is for: it says "gather occurrence info for these".
2294
2295 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
2296 has an External Name. See, for example, SimplEnv Note [Global Ids in
2297 the substitution].
2298
2299 Note [Zap case binders in proxy bindings]
2300 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2301 From the original
2302 case x of cb(dead) { p -> ...x... }
2303 we will get
2304 case x of cb(live) { p -> let x = cb in ...x... }
2305
2306 Core Lint never expects to find an *occurrence* of an Id marked
2307 as Dead, so we must zap the OccInfo on cb before making the
2308 binding x = cb. See Trac #5028.
2309
2310 Historical note [no-case-of-case]
2311 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2312 We *used* to suppress the binder-swap in case expressions when
2313 -fno-case-of-case is on. Old remarks:
2314 "This happens in the first simplifier pass,
2315 and enhances full laziness. Here's the bad case:
2316 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
2317 If we eliminate the inner case, we trap it inside the I# v -> arm,
2318 which might prevent some full laziness happening. I've seen this
2319 in action in spectral/cichelli/Prog.hs:
2320 [(m,n) | m <- [1..max], n <- [1..max]]
2321 Hence the check for NoCaseOfCase."
2322 However, now the full-laziness pass itself reverses the binder-swap, so this
2323 check is no longer necessary.
2324
2325 Historical note [Suppressing the case binder-swap]
2326 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2327 This old note describes a problem that is also fixed by doing the
2328 binder-swap in OccAnal:
2329
2330 There is another situation when it might make sense to suppress the
2331 case-expression binde-swap. If we have
2332
2333 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
2334 ...other cases .... }
2335
2336 We'll perform the binder-swap for the outer case, giving
2337
2338 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
2339 ...other cases .... }
2340
2341 But there is no point in doing it for the inner case, because w1 can't
2342 be inlined anyway. Furthermore, doing the case-swapping involves
2343 zapping w2's occurrence info (see paragraphs that follow), and that
2344 forces us to bind w2 when doing case merging. So we get
2345
2346 case x of w1 { A -> let w2 = w1 in e1
2347 B -> let w2 = w1 in e2
2348 ...other cases .... }
2349
2350 This is plain silly in the common case where w2 is dead.
2351
2352 Even so, I can't see a good way to implement this idea. I tried
2353 not doing the binder-swap if the scrutinee was already evaluated
2354 but that failed big-time:
2355
2356 data T = MkT !Int
2357
2358 case v of w { MkT x ->
2359 case x of x1 { I# y1 ->
2360 case x of x2 { I# y2 -> ...
2361
2362 Notice that because MkT is strict, x is marked "evaluated". But to
2363 eliminate the last case, we must either make sure that x (as well as
2364 x1) has unfolding MkT y1. The straightforward thing to do is to do
2365 the binder-swap. So this whole note is a no-op.
2366
2367 It's fixed by doing the binder-swap in OccAnal because we can do the
2368 binder-swap unconditionally and still get occurrence analysis
2369 information right.
2370 -}
2371
2372 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
2373 -- Does two things: a) makes the occ_one_shots = OccVanilla
2374 -- b) extends the GlobalScruts if possible
2375 -- c) returns a proxy mapping, binding the scrutinee
2376 -- to the case binder, if possible
2377 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
2378 = case stripTicksTopE (const True) scrut of
2379 Var v -> add_scrut v case_bndr'
2380 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
2381 -- See Note [Case of cast]
2382 _ -> (env { occ_encl = OccVanilla }, Nothing)
2383
2384 where
2385 add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
2386 , Just (localise v, rhs) )
2387
2388 case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
2389 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var)
2390 -- Localise the scrut_var before shadowing it; we're making a
2391 -- new binding for it, and it might have an External Name, or
2392 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
2393 -- Also we don't want any INLINE or NOINLINE pragmas!
2394
2395 {-
2396 ************************************************************************
2397 * *
2398 \subsection[OccurAnal-types]{OccEnv}
2399 * *
2400 ************************************************************************
2401
2402 Note [UsageDetails and zapping]
2403 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2404
2405 On many occasions, we must modify all gathered occurrence data at once. For
2406 instance, all occurrences underneath a (non-one-shot) lambda set the
2407 'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but
2408 that takes O(n) time and we will do this often---in particular, there are many
2409 places where tail calls are not allowed, and each of these causes all variables
2410 to get marked with 'NoTailCallInfo'.
2411
2412 Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along
2413 with the 'OccInfoEnv'. Each of these extra environments is a "zapped set"
2414 recording which variables have been zapped in some way. Zapping all occurrence
2415 info then simply means setting the corresponding zapped set to the whole
2416 'OccInfoEnv', a fast O(1) operation.
2417 -}
2418
2419 type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage
2420 -- INVARIANT: never IAmDead
2421 -- (Deadness is signalled by not being in the map at all)
2422
2423 type ZappedSet = OccInfoEnv -- Values are ignored
2424
2425 data UsageDetails
2426 = UD { ud_env :: !OccInfoEnv
2427 , ud_z_many :: ZappedSet -- apply 'markMany' to these
2428 , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these
2429 , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these
2430 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv
2431
2432 instance Outputable UsageDetails where
2433 ppr ud = ppr (ud_env (flattenUsageDetails ud))
2434
2435 -------------------
2436 -- UsageDetails API
2437
2438 (+++), combineAltsUsageDetails
2439 :: UsageDetails -> UsageDetails -> UsageDetails
2440 (+++) = combineUsageDetailsWith addOccInfo
2441 combineAltsUsageDetails = combineUsageDetailsWith orOccInfo
2442
2443 combineUsageDetailsList :: [UsageDetails] -> UsageDetails
2444 combineUsageDetailsList = foldl (+++) emptyDetails
2445
2446 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
2447 mkOneOcc env id int_cxt arity
2448 | isLocalId id
2449 = singleton $ OneOcc { occ_in_lam = False
2450 , occ_one_br = True
2451 , occ_int_cxt = int_cxt
2452 , occ_tail = AlwaysTailCalled arity }
2453 | id `elemVarSet` occ_gbl_scrut env
2454 = singleton noOccInfo
2455
2456 | otherwise
2457 = emptyDetails
2458 where
2459 singleton info = emptyDetails { ud_env = unitVarEnv id info }
2460
2461 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
2462 addOneOcc ud id info
2463 = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info }
2464 `alterZappedSets` (`delVarEnv` id)
2465 where
2466 plus_zapped old new = doZapping ud id old `addOccInfo` new
2467
2468 addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails
2469 addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set
2470 -- It's OK to use nonDetFoldUFM here because addManyOccs commutes
2471
2472 -- Add several occurrences, assumed not to be tail calls
2473 addManyOccs :: Var -> UsageDetails -> UsageDetails
2474 addManyOccs v u | isId v = addOneOcc u v noOccInfo
2475 | otherwise = u
2476 -- Give a non-committal binder info (i.e noOccInfo) because
2477 -- a) Many copies of the specialised thing can appear
2478 -- b) We don't want to substitute a BIG expression inside a RULE
2479 -- even if that's the only occurrence of the thing
2480 -- (Same goes for INLINE.)
2481
2482 delDetails :: UsageDetails -> Id -> UsageDetails
2483 delDetails ud bndr
2484 = ud `alterUsageDetails` (`delVarEnv` bndr)
2485
2486 delDetailsList :: UsageDetails -> [Id] -> UsageDetails
2487 delDetailsList ud bndrs
2488 = ud `alterUsageDetails` (`delVarEnvList` bndrs)
2489
2490 emptyDetails :: UsageDetails
2491 emptyDetails = UD { ud_env = emptyVarEnv
2492 , ud_z_many = emptyVarEnv
2493 , ud_z_in_lam = emptyVarEnv
2494 , ud_z_no_tail = emptyVarEnv }
2495
2496 isEmptyDetails :: UsageDetails -> Bool
2497 isEmptyDetails = isEmptyVarEnv . ud_env
2498
2499 markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails
2500 :: UsageDetails -> UsageDetails
2501 markAllMany ud = ud { ud_z_many = ud_env ud }
2502 markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud }
2503 markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud }
2504
2505 zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo
2506
2507 lookupDetails :: UsageDetails -> Id -> OccInfo
2508 lookupDetails ud id
2509 = case lookupVarEnv (ud_env ud) id of
2510 Just occ -> doZapping ud id occ
2511 Nothing -> IAmDead
2512
2513 usedIn :: Id -> UsageDetails -> Bool
2514 v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
2515
2516 udFreeVars :: VarSet -> UsageDetails -> VarSet
2517 -- Find the subset of bndrs that are mentioned in uds
2518 udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud)
2519
2520 -------------------
2521 -- Auxiliary functions for UsageDetails implementation
2522
2523 combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo)
2524 -> UsageDetails -> UsageDetails -> UsageDetails
2525 combineUsageDetailsWith plus_occ_info ud1 ud2
2526 | isEmptyDetails ud1 = ud2
2527 | isEmptyDetails ud2 = ud1
2528 | otherwise
2529 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2)
2530 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2)
2531 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2)
2532 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) }
2533
2534 doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo
2535 doZapping ud var occ
2536 = doZappingByUnique ud (varUnique var) occ
2537
2538 doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo
2539 doZappingByUnique ud uniq
2540 = (if | in_subset ud_z_many -> markMany
2541 | in_subset ud_z_in_lam -> markInsideLam
2542 | otherwise -> id) .
2543 (if | in_subset ud_z_no_tail -> markNonTailCalled
2544 | otherwise -> id)
2545 where
2546 in_subset field = uniq `elemVarEnvByKey` field ud
2547
2548 alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails
2549 alterZappedSets ud f
2550 = ud { ud_z_many = f (ud_z_many ud)
2551 , ud_z_in_lam = f (ud_z_in_lam ud)
2552 , ud_z_no_tail = f (ud_z_no_tail ud) }
2553
2554 alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails
2555 alterUsageDetails ud f
2556 = ud { ud_env = f (ud_env ud) }
2557 `alterZappedSets` f
2558
2559 flattenUsageDetails :: UsageDetails -> UsageDetails
2560 flattenUsageDetails ud
2561 = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) }
2562 `alterZappedSets` const emptyVarEnv
2563
2564 -------------------
2565 -- See Note [Adjusting right-hand sides]
2566 adjustRhsUsage :: Maybe JoinArity -> RecFlag
2567 -> [CoreBndr] -- Outer lambdas, AFTER occ anal
2568 -> UsageDetails -> UsageDetails
2569 adjustRhsUsage mb_join_arity rec_flag bndrs usage
2570 = maybe_mark_lam (maybe_drop_tails usage)
2571 where
2572 maybe_mark_lam ud | one_shot = ud
2573 | otherwise = markAllInsideLam ud
2574 maybe_drop_tails ud | exact_join = ud
2575 | otherwise = markAllNonTailCalled ud
2576
2577 one_shot = case mb_join_arity of
2578 Just join_arity
2579 | isRec rec_flag -> False
2580 | otherwise -> all isOneShotBndr (drop join_arity bndrs)
2581 Nothing -> all isOneShotBndr bndrs
2582
2583 exact_join = case mb_join_arity of
2584 Just join_arity -> bndrs `lengthIs` join_arity
2585 _ -> False
2586
2587 type IdWithOccInfo = Id
2588
2589 tagLamBinders :: UsageDetails -- Of scope
2590 -> [Id] -- Binders
2591 -> (UsageDetails, -- Details with binders removed
2592 [IdWithOccInfo]) -- Tagged binders
2593 -- Used for lambda and case binders
2594 -- It copes with the fact that lambda bindings can have a
2595 -- stable unfolding, used for join points
2596 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
2597 where
2598 (usage', bndrs') = mapAccumR tag_lam usage binders
2599 tag_lam usage bndr = (usage2, bndr')
2600 where
2601 occ = lookupDetails usage bndr
2602 bndr' = setBinderOcc (markNonTailCalled occ) bndr
2603 -- Don't try to make an argument into a join point
2604 usage1 = usage `delDetails` bndr
2605 usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr)
2606 -- This is effectively the RHS of a
2607 -- non-join-point binding, so it's okay to use
2608 -- addManyOccsSet, which assumes no tail calls
2609 | otherwise = usage1
2610
2611 tagNonRecBinder :: TopLevelFlag -- At top level?
2612 -> UsageDetails -- Of scope
2613 -> CoreBndr -- Binder
2614 -> (UsageDetails, -- Details with binder removed
2615 IdWithOccInfo) -- Tagged binder
2616
2617 tagNonRecBinder lvl usage binder
2618 = let
2619 occ = lookupDetails usage binder
2620 will_be_join = decideJoinPointHood lvl usage [binder]
2621 occ' | will_be_join = -- must already be marked AlwaysTailCalled
2622 ASSERT(isAlwaysTailCalled occ) occ
2623 | otherwise = markNonTailCalled occ
2624 binder' = setBinderOcc occ' binder
2625 usage' = usage `delDetails` binder
2626 in
2627 usage' `seq` (usage', binder')
2628
2629 tagRecBinders :: TopLevelFlag -- At top level?
2630 -> UsageDetails -- Of body of let ONLY
2631 -> [(CoreBndr, -- Binder
2632 UsageDetails, -- RHS usage details
2633 [CoreBndr])] -- Lambdas in new RHS
2634 -> (UsageDetails, -- Adjusted details for whole scope,
2635 -- with binders removed
2636 [IdWithOccInfo]) -- Tagged binders
2637 -- Substantially more complicated than non-recursive case. Need to adjust RHS
2638 -- details *before* tagging binders (because the tags depend on the RHSes).
2639 tagRecBinders lvl body_uds triples
2640 = let
2641 (bndrs, rhs_udss, _) = unzip3 triples
2642
2643 -- 1. Determine join-point-hood of whole group, as determined by
2644 -- the *unadjusted* usage details
2645 unadj_uds = body_uds +++ combineUsageDetailsList rhs_udss
2646 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs
2647
2648 -- 2. Adjust usage details of each RHS, taking into account the
2649 -- join-point-hood decision
2650 rhs_udss' = map adjust triples
2651 adjust (bndr, rhs_uds, rhs_bndrs)
2652 = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds
2653 where
2654 -- Can't use willBeJoinId_maybe here because we haven't tagged the
2655 -- binder yet (the tag depends on these adjustments!)
2656 mb_join_arity
2657 | will_be_joins
2658 , let occ = lookupDetails unadj_uds bndr
2659 , AlwaysTailCalled arity <- tailCallInfo occ
2660 = Just arity
2661 | otherwise
2662 = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if we're
2663 -- making join points!
2664 Nothing
2665
2666 -- 3. Compute final usage details from adjusted RHS details
2667 adj_uds = body_uds +++ combineUsageDetailsList rhs_udss'
2668
2669 -- 4. Tag each binder with its adjusted details
2670 bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr
2671 | bndr <- bndrs ]
2672
2673 -- 5. Drop the binders from the adjusted details and return
2674 usage' = adj_uds `delDetailsList` bndrs
2675 in
2676 (usage', bndrs')
2677
2678 setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr
2679 setBinderOcc occ_info bndr
2680 | isTyVar bndr = bndr
2681 | isExportedId bndr = if isManyOccs (idOccInfo bndr)
2682 then bndr
2683 else setIdOccInfo bndr noOccInfo
2684 -- Don't use local usage info for visible-elsewhere things
2685 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
2686 -- about to re-generate it and it shouldn't be "sticky"
2687
2688 | otherwise = setIdOccInfo bndr occ_info
2689
2690 -- | Decide whether some bindings should be made into join points or not.
2691 -- Returns `False` if they can't be join points. Note that it's an
2692 -- all-or-nothing decision, as if multiple binders are given, they're assumed to
2693 -- be mutually recursive.
2694 --
2695 -- See Note [Invariants for join points] in CoreSyn.
2696 decideJoinPointHood :: TopLevelFlag -> UsageDetails
2697 -> [CoreBndr]
2698 -> Bool
2699 decideJoinPointHood TopLevel _ _
2700 = False
2701 decideJoinPointHood NotTopLevel usage bndrs
2702 | isJoinId (head bndrs)
2703 = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+>
2704 ppr bndrs)
2705 all_ok
2706 | otherwise
2707 = all_ok
2708 where
2709 -- See Note [Invariants on join points]; invariants cited by number below.
2710 -- Invariant 2 is always satisfiable by the simplifier by eta expansion.
2711 all_ok = -- Invariant 3: Either all are join points or none are
2712 all ok bndrs
2713
2714 ok bndr
2715 | -- Invariant 1: Only tail calls, all same join arity
2716 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr)
2717 , -- Invariant 1 as applied to LHSes of rules
2718 all (ok_rule arity) (idCoreRules bndr)
2719 -- Invariant 4: Satisfies polymorphism rule
2720 , isValidJoinPointType arity (idType bndr)
2721 = True
2722 | otherwise
2723 = False
2724
2725 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
2726 ok_rule join_arity (Rule { ru_args = args })
2727 = args `lengthIs` join_arity
2728 -- Invariant 1 as applied to LHSes of rules
2729
2730 willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity
2731 willBeJoinId_maybe bndr
2732 | AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr)
2733 = Just arity
2734 | otherwise
2735 = isJoinId_maybe bndr
2736
2737 {-
2738 ************************************************************************
2739 * *
2740 \subsection{Operations over OccInfo}
2741 * *
2742 ************************************************************************
2743 -}
2744
2745 markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo
2746
2747 markMany IAmDead = IAmDead
2748 markMany occ = ManyOccs { occ_tail = occ_tail occ }
2749
2750 markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = True }
2751 markInsideLam occ = occ
2752
2753 markNonTailCalled IAmDead = IAmDead
2754 markNonTailCalled occ = occ { occ_tail = NoTailCallInfo }
2755
2756 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
2757
2758 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2759 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2760 tailCallInfo a2 }
2761 -- Both branches are at least One
2762 -- (Argument is never IAmDead)
2763
2764 -- (orOccInfo orig new) is used
2765 -- when combining occurrence info from branches of a case
2766
2767 orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1
2768 , occ_tail = tail1 })
2769 (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2
2770 , occ_tail = tail2 })
2771 = OneOcc { occ_in_lam = in_lam1 || in_lam2
2772 , occ_one_br = False -- False, because it occurs in both branches
2773 , occ_int_cxt = int_cxt1 && int_cxt2
2774 , occ_tail = tail1 `andTailCallInfo` tail2 }
2775 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2776 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2777 tailCallInfo a2 }
2778
2779 andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
2780 andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
2781 | arity1 == arity2 = info
2782 andTailCallInfo _ _ = NoTailCallInfo