Allow CSE'ing of work-wrapped bindings (#14186)
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns, MultiWayIf, ViewPatterns #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import CoreSyn
23 import CoreFVs
24 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
25 stripTicksTopE, mkTicks )
26 import Id
27 import IdInfo
28 import Name( localiseName )
29 import BasicTypes
30 import Module( Module )
31 import Coercion
32 import Type
33
34 import VarSet
35 import VarEnv
36 import Var
37 import Demand ( argOneShots, argsOneShots )
38 import Digraph ( SCC(..), Node(..)
39 , stronglyConnCompFromEdgedVerticesUniq
40 , stronglyConnCompFromEdgedVerticesUniqR )
41 import Unique
42 import UniqFM
43 import UniqSet
44 import Util
45 import Outputable
46 import Data.List
47 import Control.Arrow ( second )
48
49 {-
50 ************************************************************************
51 * *
52 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
53 * *
54 ************************************************************************
55
56 Here's the externally-callable interface:
57 -}
58
59 occurAnalysePgm :: Module -- Used only in debug output
60 -> (Activation -> Bool)
61 -> [CoreRule] -> [CoreVect] -> VarSet
62 -> CoreProgram -> CoreProgram
63 occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
64 | isEmptyDetails final_usage
65 = occ_anald_binds
66
67 | otherwise -- See Note [Glomming]
68 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
69 2 (ppr final_usage ) )
70 occ_anald_glommed_binds
71 where
72 init_env = initOccEnv active_rule
73 (final_usage, occ_anald_binds) = go init_env binds
74 (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel
75 imp_rule_edges
76 (flattenBinds occ_anald_binds)
77 initial_uds
78 -- It's crucial to re-analyse the glommed-together bindings
79 -- so that we establish the right loop breakers. Otherwise
80 -- we can easily create an infinite loop (Trac #9583 is an example)
81
82 initial_uds = addManyOccsSet emptyDetails
83 (rulesFreeVars imp_rules `unionVarSet`
84 vectsFreeVars vects `unionVarSet`
85 vectVars)
86 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
87 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
88 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
89
90 -- Note [Preventing loops due to imported functions rules]
91 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
92 [ mapVarEnv (const maps_to) $
93 getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
94 | imp_rule <- imp_rules
95 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
96 , let maps_to = exprFreeIds (ru_rhs imp_rule)
97 `delVarSetList` ru_bndrs imp_rule
98 , arg <- ru_args imp_rule ]
99
100 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
101 go _ []
102 = (initial_uds, [])
103 go env (bind:binds)
104 = (final_usage, bind' ++ binds')
105 where
106 (bs_usage, binds') = go env binds
107 (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind
108 bs_usage
109
110 occurAnalyseExpr :: CoreExpr -> CoreExpr
111 -- Do occurrence analysis, and discard occurrence info returned
112 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
113
114 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
115 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
116
117 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
118 occurAnalyseExpr' enable_binder_swap expr
119 = snd (occAnal env expr)
120 where
121 env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
122 -- To be conservative, we say that all inlines and rules are active
123 all_active_rules = \_ -> True
124
125 {- Note [Plugin rules]
126 ~~~~~~~~~~~~~~~~~~~~~~
127 Conal Elliott (Trac #11651) built a GHC plugin that added some
128 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
129 do some domain-specific transformations that could not be expressed
130 with an ordinary pattern-matching CoreRule. But then we can't extract
131 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
132 BuiltinRule doesn't have any of that stuff.
133
134 So we simply assume that BuiltinRules have no dependencies, and filter
135 them out from the imp_rule_edges comprehension.
136 -}
137
138 {-
139 ************************************************************************
140 * *
141 Bindings
142 * *
143 ************************************************************************
144
145 Note [Recursive bindings: the grand plan]
146 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
147 When we come across a binding group
148 Rec { x1 = r1; ...; xn = rn }
149 we treat it like this (occAnalRecBind):
150
151 1. Occurrence-analyse each right hand side, and build a
152 "Details" for each binding to capture the results.
153
154 Wrap the details in a Node (details, node-id, dep-node-ids),
155 where node-id is just the unique of the binder, and
156 dep-node-ids lists all binders on which this binding depends.
157 We'll call these the "scope edges".
158 See Note [Forming the Rec groups].
159
160 All this is done by makeNode.
161
162 2. Do SCC-analysis on these Nodes. Each SCC will become a new Rec or
163 NonRec. The key property is that every free variable of a binding
164 is accounted for by the scope edges, so that when we are done
165 everything is still in scope.
166
167 3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
168 identify suitable loop-breakers to ensure that inlining terminates.
169 This is done by occAnalRec.
170
171 4. To do so we form a new set of Nodes, with the same details, but
172 different edges, the "loop-breaker nodes". The loop-breaker nodes
173 have both more and fewer dependencies than the scope edges
174 (see Note [Choosing loop breakers])
175
176 More edges: if f calls g, and g has an active rule that mentions h
177 then we add an edge from f -> h
178
179 Fewer edges: we only include dependencies on active rules, on rule
180 RHSs (not LHSs) and if there is an INLINE pragma only
181 on the stable unfolding (and vice versa). The scope
182 edges must be much more inclusive.
183
184 5. The "weak fvs" of a node are, by definition:
185 the scope fvs - the loop-breaker fvs
186 See Note [Weak loop breakers], and the nd_weak field of Details
187
188 6. Having formed the loop-breaker nodes
189
190 Note [Dead code]
191 ~~~~~~~~~~~~~~~~
192 Dropping dead code for a cyclic Strongly Connected Component is done
193 in a very simple way:
194
195 the entire SCC is dropped if none of its binders are mentioned
196 in the body; otherwise the whole thing is kept.
197
198 The key observation is that dead code elimination happens after
199 dependency analysis: so 'occAnalBind' processes SCCs instead of the
200 original term's binding groups.
201
202 Thus 'occAnalBind' does indeed drop 'f' in an example like
203
204 letrec f = ...g...
205 g = ...(...g...)...
206 in
207 ...g...
208
209 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
210 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
211 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
212
213 ------------------------------------------------------------
214 Note [Forming Rec groups]
215 ~~~~~~~~~~~~~~~~~~~~~~~~~
216 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
217 and "g uses f", no matter how indirectly. We do a SCC analysis
218 with an edge f -> g if "f uses g".
219
220 More precisely, "f uses g" iff g should be in scope wherever f is.
221 That is, g is free in:
222 a) the rhs 'ef'
223 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
224 c) or the LHS or a rule for f (Note [Rule dependency info])
225
226 These conditions apply regardless of the activation of the RULE (eg it might be
227 inactive in this phase but become active later). Once a Rec is broken up
228 it can never be put back together, so we must be conservative.
229
230 The principle is that, regardless of rule firings, every variable is
231 always in scope.
232
233 * Note [Rules are extra RHSs]
234 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
235 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
236 keeps the specialised "children" alive. If the parent dies
237 (because it isn't referenced any more), then the children will die
238 too (unless they are already referenced directly).
239
240 To that end, we build a Rec group for each cyclic strongly
241 connected component,
242 *treating f's rules as extra RHSs for 'f'*.
243 More concretely, the SCC analysis runs on a graph with an edge
244 from f -> g iff g is mentioned in
245 (a) f's rhs
246 (b) f's RULES
247 These are rec_edges.
248
249 Under (b) we include variables free in *either* LHS *or* RHS of
250 the rule. The former might seems silly, but see Note [Rule
251 dependency info]. So in Example [eftInt], eftInt and eftIntFB
252 will be put in the same Rec, even though their 'main' RHSs are
253 both non-recursive.
254
255 * Note [Rule dependency info]
256 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
257 The VarSet in a RuleInfo is used for dependency analysis in the
258 occurrence analyser. We must track free vars in *both* lhs and rhs.
259 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
260 Why both? Consider
261 x = y
262 RULE f x = v+4
263 Then if we substitute y for x, we'd better do so in the
264 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
265 as well as 'v'
266
267 * Note [Rules are visible in their own rec group]
268 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
269 We want the rules for 'f' to be visible in f's right-hand side.
270 And we'd like them to be visible in other functions in f's Rec
271 group. E.g. in Note [Specialisation rules] we want f' rule
272 to be visible in both f's RHS, and fs's RHS.
273
274 This means that we must simplify the RULEs first, before looking
275 at any of the definitions. This is done by Simplify.simplRecBind,
276 when it calls addLetIdInfo.
277
278 ------------------------------------------------------------
279 Note [Choosing loop breakers]
280 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
281 Loop breaking is surprisingly subtle. First read the section 4 of
282 "Secrets of the GHC inliner". This describes our basic plan.
283 We avoid infinite inlinings by choosing loop breakers, and
284 ensuring that a loop breaker cuts each loop.
285
286 See also Note [Inlining and hs-boot files] in ToIface, which deals
287 with a closely related source of infinite loops.
288
289 Fundamentally, we do SCC analysis on a graph. For each recursive
290 group we choose a loop breaker, delete all edges to that node,
291 re-analyse the SCC, and iterate.
292
293 But what is the graph? NOT the same graph as was used for Note
294 [Forming Rec groups]! In particular, a RULE is like an equation for
295 'f' that is *always* inlined if it is applicable. We do *not* disable
296 rules for loop-breakers. It's up to whoever makes the rules to make
297 sure that the rules themselves always terminate. See Note [Rules for
298 recursive functions] in Simplify.hs
299
300 Hence, if
301 f's RHS (or its INLINE template if it has one) mentions g, and
302 g has a RULE that mentions h, and
303 h has a RULE that mentions f
304
305 then we *must* choose f to be a loop breaker. Example: see Note
306 [Specialisation rules].
307
308 In general, take the free variables of f's RHS, and augment it with
309 all the variables reachable by RULES from those starting points. That
310 is the whole reason for computing rule_fv_env in occAnalBind. (Of
311 course we only consider free vars that are also binders in this Rec
312 group.) See also Note [Finding rule RHS free vars]
313
314 Note that when we compute this rule_fv_env, we only consider variables
315 free in the *RHS* of the rule, in contrast to the way we build the
316 Rec group in the first place (Note [Rule dependency info])
317
318 Note that if 'g' has RHS that mentions 'w', we should add w to
319 g's loop-breaker edges. More concretely there is an edge from f -> g
320 iff
321 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
322 (see Note [Inline rules])
323 (b) or h is mentioned in f's RHS, and
324 g appears in the RHS of an active RULE of h
325 or a transitive sequence of active rules starting with h
326
327 Why "active rules"? See Note [Finding rule RHS free vars]
328
329 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
330 chosen as a loop breaker, because their RHSs don't mention each other.
331 And indeed both can be inlined safely.
332
333 Note again that the edges of the graph we use for computing loop breakers
334 are not the same as the edges we use for computing the Rec blocks.
335 That's why we compute
336
337 - rec_edges for the Rec block analysis
338 - loop_breaker_nodes for the loop breaker analysis
339
340 * Note [Finding rule RHS free vars]
341 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
342 Consider this real example from Data Parallel Haskell
343 tagZero :: Array Int -> Array Tag
344 {-# INLINE [1] tagZeroes #-}
345 tagZero xs = pmap (\x -> fromBool (x==0)) xs
346
347 {-# RULES "tagZero" [~1] forall xs n.
348 pmap fromBool <blah blah> = tagZero xs #-}
349 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
350 However, tagZero can only be inlined in phase 1 and later, while
351 the RULE is only active *before* phase 1. So there's no problem.
352
353 To make this work, we look for the RHS free vars only for
354 *active* rules. That's the reason for the occ_rule_act field
355 of the OccEnv.
356
357 * Note [Weak loop breakers]
358 ~~~~~~~~~~~~~~~~~~~~~~~~~
359 There is a last nasty wrinkle. Suppose we have
360
361 Rec { f = f_rhs
362 RULE f [] = g
363
364 h = h_rhs
365 g = h
366 ...more...
367 }
368
369 Remember that we simplify the RULES before any RHS (see Note
370 [Rules are visible in their own rec group] above).
371
372 So we must *not* postInlineUnconditionally 'g', even though
373 its RHS turns out to be trivial. (I'm assuming that 'g' is
374 not choosen as a loop breaker.) Why not? Because then we
375 drop the binding for 'g', which leaves it out of scope in the
376 RULE!
377
378 Here's a somewhat different example of the same thing
379 Rec { g = h
380 ; h = ...f...
381 ; f = f_rhs
382 RULE f [] = g }
383 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
384 g, because the RULE for f is active throughout. So the RHS of h
385 might rewrite to h = ...g...
386 So g must remain in scope in the output program!
387
388 We "solve" this by:
389
390 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
391 iff g is a "missing free variable" of the Rec group
392
393 A "missing free variable" x is one that is mentioned in an RHS or
394 INLINE or RULE of a binding in the Rec group, but where the
395 dependency on x may not show up in the loop_breaker_nodes (see
396 note [Choosing loop breakers} above).
397
398 A normal "strong" loop breaker has IAmLoopBreaker False. So
399
400 Inline postInlineUnconditionally
401 strong IAmLoopBreaker False no no
402 weak IAmLoopBreaker True yes no
403 other yes yes
404
405 The **sole** reason for this kind of loop breaker is so that
406 postInlineUnconditionally does not fire. Ugh. (Typically it'll
407 inline via the usual callSiteInline stuff, so it'll be dead in the
408 next pass, so the main Ugh is the tiresome complication.)
409
410 Note [Rules for imported functions]
411 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
412 Consider this
413 f = /\a. B.g a
414 RULE B.g Int = 1 + f Int
415 Note that
416 * The RULE is for an imported function.
417 * f is non-recursive
418 Now we
419 can get
420 f Int --> B.g Int Inlining f
421 --> 1 + f Int Firing RULE
422 and so the simplifier goes into an infinite loop. This
423 would not happen if the RULE was for a local function,
424 because we keep track of dependencies through rules. But
425 that is pretty much impossible to do for imported Ids. Suppose
426 f's definition had been
427 f = /\a. C.h a
428 where (by some long and devious process), C.h eventually inlines to
429 B.g. We could only spot such loops by exhaustively following
430 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
431 f.
432
433 Note that RULES for imported functions are important in practice; they
434 occur a lot in the libraries.
435
436 We regard this potential infinite loop as a *programmer* error.
437 It's up the programmer not to write silly rules like
438 RULE f x = f x
439 and the example above is just a more complicated version.
440
441 Note [Preventing loops due to imported functions rules]
442 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
443 Consider:
444 import GHC.Base (foldr)
445
446 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
447 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
448 filterFB c p = ...
449
450 f = filter p xs
451
452 Note that filter is not a loop-breaker, so what happens is:
453 f = filter p xs
454 = {inline} build (\c n -> foldr (filterFB c p) n xs)
455 = {inline} foldr (filterFB (:) p) [] xs
456 = {RULE} filter p xs
457
458 We are in an infinite loop.
459
460 A more elaborate example (that I actually saw in practice when I went to
461 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
462 {-# LANGUAGE RankNTypes #-}
463 module GHCList where
464
465 import Prelude hiding (filter)
466 import GHC.Base (build)
467
468 {-# INLINABLE filter #-}
469 filter :: (a -> Bool) -> [a] -> [a]
470 filter p [] = []
471 filter p (x:xs) = if p x then x : filter p xs else filter p xs
472
473 {-# NOINLINE [0] filterFB #-}
474 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
475 filterFB c p x r | p x = x `c` r
476 | otherwise = r
477
478 {-# RULES
479 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
480 (filterFB c p) n xs)
481 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
482 #-}
483
484 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
485 are not), the unfolding given to "filter" in the interface file will be:
486 filter p [] = []
487 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
488 else build (\c n -> foldr (filterFB c p) n xs
489
490 Note that because this unfolding does not mention "filter", filter is not
491 marked as a strong loop breaker. Therefore at a use site in another module:
492 filter p xs
493 = {inline}
494 case xs of [] -> []
495 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
496 else build (\c n -> foldr (filterFB c p) n xs)
497
498 build (\c n -> foldr (filterFB c p) n xs)
499 = {inline} foldr (filterFB (:) p) [] xs
500 = {RULE} filter p xs
501
502 And we are in an infinite loop again, except that this time the loop is producing an
503 infinitely large *term* (an unrolling of filter) and so the simplifier finally
504 dies with "ticks exhausted"
505
506 Because of this problem, we make a small change in the occurrence analyser
507 designed to mark functions like "filter" as strong loop breakers on the basis that:
508 1. The RHS of filter mentions the local function "filterFB"
509 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
510
511 So for each RULE for an *imported* function we are going to add
512 dependency edges between the *local* FVS of the rule LHS and the
513 *local* FVS of the rule RHS. We don't do anything special for RULES on
514 local functions because the standard occurrence analysis stuff is
515 pretty good at getting loop-breakerness correct there.
516
517 It is important to note that even with this extra hack we aren't always going to get
518 things right. For example, it might be that the rule LHS mentions an imported Id,
519 and another module has a RULE that can rewrite that imported Id to one of our local
520 Ids.
521
522 Note [Specialising imported functions] (referred to from Specialise)
523 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
524 BUT for *automatically-generated* rules, the programmer can't be
525 responsible for the "programmer error" in Note [Rules for imported
526 functions]. In paricular, consider specialising a recursive function
527 defined in another module. If we specialise a recursive function B.g,
528 we get
529 g_spec = .....(B.g Int).....
530 RULE B.g Int = g_spec
531 Here, g_spec doesn't look recursive, but when the rule fires, it
532 becomes so. And if B.g was mutually recursive, the loop might
533 not be as obvious as it is here.
534
535 To avoid this,
536 * When specialising a function that is a loop breaker,
537 give a NOINLINE pragma to the specialised function
538
539 Note [Glomming]
540 ~~~~~~~~~~~~~~~
541 RULES for imported Ids can make something at the top refer to something at the bottom:
542 f = \x -> B.g (q x)
543 h = \y -> 3
544
545 RULE: B.g (q x) = h x
546
547 Applying this rule makes f refer to h, although f doesn't appear to
548 depend on h. (And, as in Note [Rules for imported functions], the
549 dependency might be more indirect. For example, f might mention C.t
550 rather than B.g, where C.t eventually inlines to B.g.)
551
552 NOTICE that this cannot happen for rules whose head is a
553 locally-defined function, because we accurately track dependencies
554 through RULES. It only happens for rules whose head is an imported
555 function (B.g in the example above).
556
557 Solution:
558 - When simplifying, bring all top level identifiers into
559 scope at the start, ignoring the Rec/NonRec structure, so
560 that when 'h' pops up in f's rhs, we find it in the in-scope set
561 (as the simplifier generally expects). This happens in simplTopBinds.
562
563 - In the occurrence analyser, if there are any out-of-scope
564 occurrences that pop out of the top, which will happen after
565 firing the rule: f = \x -> h x
566 h = \y -> 3
567 then just glom all the bindings into a single Rec, so that
568 the *next* iteration of the occurrence analyser will sort
569 them all out. This part happens in occurAnalysePgm.
570
571 ------------------------------------------------------------
572 Note [Inline rules]
573 ~~~~~~~~~~~~~~~~~~~
574 None of the above stuff about RULES applies to Inline Rules,
575 stored in a CoreUnfolding. The unfolding, if any, is simplified
576 at the same time as the regular RHS of the function (ie *not* like
577 Note [Rules are visible in their own rec group]), so it should be
578 treated *exactly* like an extra RHS.
579
580 Or, rather, when computing loop-breaker edges,
581 * If f has an INLINE pragma, and it is active, we treat the
582 INLINE rhs as f's rhs
583 * If it's inactive, we treat f as having no rhs
584 * If it has no INLINE pragma, we look at f's actual rhs
585
586
587 There is a danger that we'll be sub-optimal if we see this
588 f = ...f...
589 [INLINE f = ..no f...]
590 where f is recursive, but the INLINE is not. This can just about
591 happen with a sufficiently odd set of rules; eg
592
593 foo :: Int -> Int
594 {-# INLINE [1] foo #-}
595 foo x = x+1
596
597 bar :: Int -> Int
598 {-# INLINE [1] bar #-}
599 bar x = foo x + 1
600
601 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
602
603 Here the RULE makes bar recursive; but it's INLINE pragma remains
604 non-recursive. It's tempting to then say that 'bar' should not be
605 a loop breaker, but an attempt to do so goes wrong in two ways:
606 a) We may get
607 $df = ...$cfoo...
608 $cfoo = ...$df....
609 [INLINE $cfoo = ...no-$df...]
610 But we want $cfoo to depend on $df explicitly so that we
611 put the bindings in the right order to inline $df in $cfoo
612 and perhaps break the loop altogether. (Maybe this
613 b)
614
615
616 Example [eftInt]
617 ~~~~~~~~~~~~~~~
618 Example (from GHC.Enum):
619
620 eftInt :: Int# -> Int# -> [Int]
621 eftInt x y = ...(non-recursive)...
622
623 {-# INLINE [0] eftIntFB #-}
624 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
625 eftIntFB c n x y = ...(non-recursive)...
626
627 {-# RULES
628 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
629 "eftIntList" [1] eftIntFB (:) [] = eftInt
630 #-}
631
632 Note [Specialisation rules]
633 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
634 Consider this group, which is typical of what SpecConstr builds:
635
636 fs a = ....f (C a)....
637 f x = ....f (C a)....
638 {-# RULE f (C a) = fs a #-}
639
640 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
641
642 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
643 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
644 - fs is inlined (say it's small)
645 - now there's another opportunity to apply the RULE
646
647 This showed up when compiling Control.Concurrent.Chan.getChanContents.
648
649 ------------------------------------------------------------
650 Note [Finding join points]
651 ~~~~~~~~~~~~~~~~~~~~~~~~~~
652 It's the occurrence analyser's job to find bindings that we can turn into join
653 points, but it doesn't perform that transformation right away. Rather, it marks
654 the eligible bindings as part of their occurrence data, leaving it to the
655 simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'.
656 The simplifier then eta-expands the RHS if needed and then updates the
657 occurrence sites. Dividing the work this way means that the occurrence analyser
658 still only takes one pass, yet one can always tell the difference between a
659 function call and a jump by looking at the occurrence (because the same pass
660 changes the 'IdDetails' and propagates the binders to their occurrence sites).
661
662 To track potential join points, we use the 'occ_tail' field of OccInfo. A value
663 of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
664 tail call with `n` arguments (counting both value and type arguments). Otherwise
665 'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
666 rest of 'OccInfo' until it goes on the binder.
667
668 Note [Rules and join points]
669 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
670
671 Things get fiddly with rules. Suppose we have:
672
673 let j :: Int -> Int
674 j y = 2 * y
675 k :: Int -> Int -> Int
676 {-# RULES "SPEC k 0" k 0 = j #-}
677 k x y = x + 2 * y
678 in ...
679
680 Now suppose that both j and k appear only as saturated tail calls in the body.
681 Thus we would like to make them both join points. The rule complicates matters,
682 though, as its RHS has an unapplied occurrence of j. *However*, if we were to
683 eta-expand the rule, all would be well:
684
685 {-# RULES "SPEC k 0" forall a. k 0 a = j a #-}
686
687 So conceivably we could notice that a potential join point would have an
688 "undersaturated" rule and account for it. This would mean we could make
689 something that's been specialised a join point, for instance. But local bindings
690 are rarely specialised, and being overly cautious about rules only
691 costs us anything when, for some `j`:
692
693 * Before specialisation, `j` has non-tail calls, so it can't be a join point.
694 * During specialisation, `j` gets specialised and thus acquires rules.
695 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say),
696 and so now `j` *could* become a join point.
697
698 This appears to be very rare in practice. TODO Perhaps we should gather
699 statistics to be sure.
700
701 Note [Excess polymorphism and join points]
702 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
703
704 In principle, if a function would be a join point except that it fails
705 the polymorphism rule (see Note [The polymorphism rule of join points] in
706 CoreSyn), it can still be made a join point with some effort. This is because
707 all tail calls must return the same type (they return to the same context!), and
708 thus if the return type depends on an argument, that argument must always be the
709 same.
710
711 For instance, consider:
712
713 let f :: forall a. a -> Char -> [a]
714 f @a x c = ... f @a x 'a' ...
715 in ... f @Int 1 'b' ... f @Int 2 'c' ...
716
717 (where the calls are tail calls). `f` fails the polymorphism rule because its
718 return type is [a], where [a] is bound. But since the type argument is always
719 'Int', we can rewrite it as:
720
721 let f' :: Int -> Char -> [Int]
722 f' x c = ... f' x 'a' ...
723 in ... f' 1 'b' ... f 2 'c' ...
724
725 and now we can make f' a join point:
726
727 join f' :: Int -> Char -> [Int]
728 f' x c = ... jump f' x 'a' ...
729 in ... jump f' 1 'b' ... jump f' 2 'c' ...
730
731 It's not clear that this comes up often, however. TODO: Measure how often and
732 add this analysis if necessary.
733
734 ------------------------------------------------------------
735 Note [Adjusting right-hand sides]
736 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
737 There's a bit of a dance we need to do after analysing a lambda expression or
738 a right-hand side. In particular, we need to
739
740 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot
741 lambda, or a non-recursive join point; and
742 b) call 'markAllNonTailCalled' *unless* the binding is for a join point.
743
744 Some examples, with how the free occurrences in e (assumed not to be a value
745 lambda) get marked:
746
747 inside lam non-tail-called
748 ------------------------------------------------------------
749 let x = e No Yes
750 let f = \x -> e Yes Yes
751 let f = \x{OneShot} -> e No Yes
752 \x -> e Yes Yes
753 join j x = e No No
754 joinrec j x = e Yes No
755
756 There are a few other caveats; most importantly, if we're marking a binding as
757 'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so
758 that the effect cascades properly. Consequently, at the time the RHS is
759 analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must
760 return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once
761 join-point-hood has been decided.
762
763 Thus the overall sequence taking place in 'occAnalNonRecBind' and
764 'occAnalRecBind' is as follows:
765
766 1. Call 'occAnalLamOrRhs' to find usage information for the RHS.
767 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make
768 the binding a join point.
769 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when
770 recursive.)
771
772 (In the recursive case, this logic is spread between 'makeNode' and
773 'occAnalRec'.)
774 -}
775
776 ------------------------------------------------------------------
777 -- occAnalBind
778 ------------------------------------------------------------------
779
780 occAnalBind :: OccEnv -- The incoming OccEnv
781 -> TopLevelFlag
782 -> ImpRuleEdges
783 -> CoreBind
784 -> UsageDetails -- Usage details of scope
785 -> (UsageDetails, -- Of the whole let(rec)
786 [CoreBind])
787
788 occAnalBind env lvl top_env (NonRec binder rhs) body_usage
789 = occAnalNonRecBind env lvl top_env binder rhs body_usage
790 occAnalBind env lvl top_env (Rec pairs) body_usage
791 = occAnalRecBind env lvl top_env pairs body_usage
792
793 -----------------
794 occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr
795 -> UsageDetails -> (UsageDetails, [CoreBind])
796 occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage
797 | isTyVar binder -- A type let; we don't gather usage info
798 = (body_usage, [NonRec binder rhs])
799
800 | not (binder `usedIn` body_usage) -- It's not mentioned
801 = (body_usage, [])
802
803 | otherwise -- It's mentioned in the body
804 = (body_usage' +++ rhs_usage', [NonRec tagged_binder rhs'])
805 where
806 (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder
807 mb_join_arity = willBeJoinId_maybe tagged_binder
808
809 (bndrs, body) = collectBinders rhs
810
811 (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body
812 rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body'
813 -- For a /non-recursive/ join point we can mark all
814 -- its join-lambda as one-shot; and it's a good idea to do so
815
816 -- Unfoldings
817 -- See Note [Unfoldings and join points]
818 rhs_usage2 = case occAnalUnfolding env NonRecursive binder of
819 Just unf_usage -> rhs_usage1 +++ unf_usage
820 Nothing -> rhs_usage1
821
822 -- Rules
823 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
824 rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder
825 rhs_usage3 = rhs_usage2 +++ combineUsageDetailsList
826 (map (\(_, l, r) -> l +++ r) rules_w_uds)
827 rhs_usage4 = maybe rhs_usage3 (addManyOccsSet rhs_usage3) $
828 lookupVarEnv imp_rule_edges binder
829 -- See Note [Preventing loops due to imported functions rules]
830
831 -- Final adjustment
832 rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4
833
834 -----------------
835 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
836 -> UsageDetails -> (UsageDetails, [CoreBind])
837 occAnalRecBind env lvl imp_rule_edges pairs body_usage
838 = foldr (occAnalRec lvl) (body_usage, []) sccs
839 -- For a recursive group, we
840 -- * occ-analyse all the RHSs
841 -- * compute strongly-connected components
842 -- * feed those components to occAnalRec
843 -- See Note [Recursive bindings: the grand plan]
844 where
845 sccs :: [SCC Details]
846 sccs = {-# SCC "occAnalBind.scc" #-}
847 stronglyConnCompFromEdgedVerticesUniq nodes
848
849 nodes :: [LetrecNode]
850 nodes = {-# SCC "occAnalBind.assoc" #-}
851 map (makeNode env imp_rule_edges bndr_set) pairs
852
853 bndr_set = mkVarSet (map fst pairs)
854
855 {-
856 Note [Unfoldings and join points]
857 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
858
859 We assume that anything in an unfolding occurs multiple times, since unfoldings
860 are often copied (that's the whole point!). But we still need to track tail
861 calls for the purpose of finding join points.
862 -}
863
864 -----------------------------
865 occAnalRec :: TopLevelFlag
866 -> SCC Details
867 -> (UsageDetails, [CoreBind])
868 -> (UsageDetails, [CoreBind])
869
870 -- The NonRec case is just like a Let (NonRec ...) above
871 occAnalRec lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
872 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs }))
873 (body_uds, binds)
874 | not (bndr `usedIn` body_uds)
875 = (body_uds, binds) -- See Note [Dead code]
876
877 | otherwise -- It's mentioned in the body
878 = (body_uds' +++ rhs_uds',
879 NonRec tagged_bndr rhs : binds)
880 where
881 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
882 rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive
883 rhs_bndrs rhs_uds
884
885 -- The Rec case is the interesting one
886 -- See Note [Recursive bindings: the grand plan]
887 -- See Note [Loop breaking]
888 occAnalRec lvl (CyclicSCC details_s) (body_uds, binds)
889 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
890 = (body_uds, binds) -- See Note [Dead code]
891
892 | otherwise -- At this point we always build a single Rec
893 = -- pprTrace "occAnalRec" (vcat
894 -- [ text "weak_fvs" <+> ppr weak_fvs
895 -- , text "lb nodes" <+> ppr loop_breaker_nodes])
896 (final_uds, Rec pairs : binds)
897
898 where
899 bndrs = map nd_bndr details_s
900 bndr_set = mkVarSet bndrs
901
902 ------------------------------
903 -- See Note [Choosing loop breakers] for loop_breaker_nodes
904 final_uds :: UsageDetails
905 loop_breaker_nodes :: [LetrecNode]
906 (final_uds, loop_breaker_nodes)
907 = mkLoopBreakerNodes lvl bndr_set body_uds details_s
908
909 ------------------------------
910 weak_fvs :: VarSet
911 weak_fvs = mapUnionVarSet nd_weak details_s
912
913 ---------------------------
914 -- Now reconstruct the cycle
915 pairs :: [(Id,CoreExpr)]
916 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs loop_breaker_nodes []
917 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
918 -- If weak_fvs is empty, the loop_breaker_nodes will include
919 -- all the edges in the original scope edges [remember,
920 -- weak_fvs is the difference between scope edges and
921 -- lb-edges], so a fresh SCC computation would yield a
922 -- single CyclicSCC result; and reOrderNodes deals with
923 -- exactly that case
924
925
926 ------------------------------------------------------------------
927 -- Loop breaking
928 ------------------------------------------------------------------
929
930 type Binding = (Id,CoreExpr)
931
932 loopBreakNodes :: Int
933 -> VarSet -- All binders
934 -> VarSet -- Binders whose dependencies may be "missing"
935 -- See Note [Weak loop breakers]
936 -> [LetrecNode]
937 -> [Binding] -- Append these to the end
938 -> [Binding]
939 {-
940 loopBreakNodes is applied to the list of nodes for a cyclic strongly
941 connected component (there's guaranteed to be a cycle). It returns
942 the same nodes, but
943 a) in a better order,
944 b) with some of the Ids having a IAmALoopBreaker pragma
945
946 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
947 that the simplifier can guarantee not to loop provided it never records an inlining
948 for these no-inline guys.
949
950 Furthermore, the order of the binds is such that if we neglect dependencies
951 on the no-inline Ids then the binds are topologically sorted. This means
952 that the simplifier will generally do a good job if it works from top bottom,
953 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
954 -}
955
956 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
957 loopBreakNodes depth bndr_set weak_fvs nodes binds
958 = -- pprTrace "loopBreakNodes" (ppr nodes) $
959 go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
960 where
961 go [] binds = binds
962 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
963
964 loop_break_scc scc binds
965 = case scc of
966 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
967 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
968
969 ----------------------------------
970 reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
971 -- Choose a loop breaker, mark it no-inline,
972 -- and call loopBreakNodes on the rest
973 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
974 reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
975 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
976 = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen
977 -- , text "chosen" <+> ppr chosen_nodes ]) $
978 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
979 (map mk_loop_breaker chosen_nodes ++ binds)
980 where
981 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
982 (nd_score (node_payload node))
983 [node] [] nodes
984
985 approximate_lb = depth >= 2
986 new_depth | approximate_lb = 0
987 | otherwise = depth+1
988 -- After two iterations (d=0, d=1) give up
989 -- and approximate, returning to d=0
990
991 mk_loop_breaker :: LetrecNode -> Binding
992 mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
993 = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs)
994 where
995 tail_info = tailCallInfo (idOccInfo bndr)
996
997 mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
998 -- See Note [Weak loop breakers]
999 mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr
1000 , nd_rhs = rhs})
1001 | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs)
1002 | otherwise = (bndr, rhs)
1003 where
1004 occ' = weakLoopBreaker { occ_tail = tail_info }
1005 tail_info = tailCallInfo (idOccInfo bndr)
1006
1007 ----------------------------------
1008 chooseLoopBreaker :: Bool -- True <=> Too many iterations,
1009 -- so approximate
1010 -> NodeScore -- Best score so far
1011 -> [LetrecNode] -- Nodes with this score
1012 -> [LetrecNode] -- Nodes with higher scores
1013 -> [LetrecNode] -- Unprocessed nodes
1014 -> ([LetrecNode], [LetrecNode])
1015 -- This loop looks for the bind with the lowest score
1016 -- to pick as the loop breaker. The rest accumulate in
1017 chooseLoopBreaker _ _ loop_nodes acc []
1018 = (loop_nodes, acc) -- Done
1019
1020 -- If approximate_loop_breaker is True, we pick *all*
1021 -- nodes with lowest score, else just one
1022 -- See Note [Complexity of loop breaking]
1023 chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
1024 | approx_lb
1025 , rank sc == rank loop_sc
1026 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
1027
1028 | sc `betterLB` loop_sc -- Better score so pick this new one
1029 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
1030
1031 | otherwise -- Worse score so don't pick it
1032 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
1033 where
1034 sc = nd_score (node_payload node)
1035
1036 {-
1037 Note [Complexity of loop breaking]
1038 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1039 The loop-breaking algorithm knocks out one binder at a time, and
1040 performs a new SCC analysis on the remaining binders. That can
1041 behave very badly in tightly-coupled groups of bindings; in the
1042 worst case it can be (N**2)*log N, because it does a full SCC
1043 on N, then N-1, then N-2 and so on.
1044
1045 To avoid this, we switch plans after 2 (or whatever) attempts:
1046 Plan A: pick one binder with the lowest score, make it
1047 a loop breaker, and try again
1048 Plan B: pick *all* binders with the lowest score, make them
1049 all loop breakers, and try again
1050 Since there are only a small finite number of scores, this will
1051 terminate in a constant number of iterations, rather than O(N)
1052 iterations.
1053
1054 You might thing that it's very unlikely, but RULES make it much
1055 more likely. Here's a real example from Trac #1969:
1056 Rec { $dm = \d.\x. op d
1057 {-# RULES forall d. $dm Int d = $s$dm1
1058 forall d. $dm Bool d = $s$dm2 #-}
1059
1060 dInt = MkD .... opInt ...
1061 dInt = MkD .... opBool ...
1062 opInt = $dm dInt
1063 opBool = $dm dBool
1064
1065 $s$dm1 = \x. op dInt
1066 $s$dm2 = \x. op dBool }
1067 The RULES stuff means that we can't choose $dm as a loop breaker
1068 (Note [Choosing loop breakers]), so we must choose at least (say)
1069 opInt *and* opBool, and so on. The number of loop breakders is
1070 linear in the number of instance declarations.
1071
1072 Note [Loop breakers and INLINE/INLINABLE pragmas]
1073 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1074 Avoid choosing a function with an INLINE pramga as the loop breaker!
1075 If such a function is mutually-recursive with a non-INLINE thing,
1076 then the latter should be the loop-breaker.
1077
1078 It's vital to distinguish between INLINE and INLINABLE (the
1079 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1080 Rec { {-# INLINABLE f #-}
1081 f x = ...f... }
1082 and then worker/wrapper it through strictness analysis, we'll get
1083 Rec { {-# INLINABLE $wf #-}
1084 $wf p q = let x = (p,q) in ...f...
1085
1086 {-# INLINE f #-}
1087 f x = case x of (p,q) -> $wf p q }
1088
1089 Now it is vital that we choose $wf as the loop breaker, so we can
1090 inline 'f' in '$wf'.
1091
1092 Note [DFuns should not be loop breakers]
1093 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1094 It's particularly bad to make a DFun into a loop breaker. See
1095 Note [How instance declarations are translated] in TcInstDcls
1096
1097 We give DFuns a higher score than ordinary CONLIKE things because
1098 if there's a choice we want the DFun to be the non-loop breaker. Eg
1099
1100 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1101
1102 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1103 {-# DFUN #-}
1104 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1105 }
1106
1107 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1108 if we can't unravel the DFun first.
1109
1110 Note [Constructor applications]
1111 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1112 It's really really important to inline dictionaries. Real
1113 example (the Enum Ordering instance from GHC.Base):
1114
1115 rec f = \ x -> case d of (p,q,r) -> p x
1116 g = \ x -> case d of (p,q,r) -> q x
1117 d = (v, f, g)
1118
1119 Here, f and g occur just once; but we can't inline them into d.
1120 On the other hand we *could* simplify those case expressions if
1121 we didn't stupidly choose d as the loop breaker.
1122 But we won't because constructor args are marked "Many".
1123 Inlining dictionaries is really essential to unravelling
1124 the loops in static numeric dictionaries, see GHC.Float.
1125
1126 Note [Closure conversion]
1127 ~~~~~~~~~~~~~~~~~~~~~~~~~
1128 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1129 The immediate motivation came from the result of a closure-conversion transformation
1130 which generated code like this:
1131
1132 data Clo a b = forall c. Clo (c -> a -> b) c
1133
1134 ($:) :: Clo a b -> a -> b
1135 Clo f env $: x = f env x
1136
1137 rec { plus = Clo plus1 ()
1138
1139 ; plus1 _ n = Clo plus2 n
1140
1141 ; plus2 Zero n = n
1142 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1143
1144 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1145 we choose 'plus1' as the loop breaker (which is entirely possible
1146 otherwise), the loop does not unravel nicely.
1147
1148
1149 @occAnalUnfolding@ deals with the question of bindings where the Id is marked
1150 by an INLINE pragma. For these we record that anything which occurs
1151 in its RHS occurs many times. This pessimistically assumes that this
1152 inlined binder also occurs many times in its scope, but if it doesn't
1153 we'll catch it next time round. At worst this costs an extra simplifier pass.
1154 ToDo: try using the occurrence info for the inline'd binder.
1155
1156 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1157 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1158
1159
1160 ************************************************************************
1161 * *
1162 Making nodes
1163 * *
1164 ************************************************************************
1165 -}
1166
1167 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
1168
1169 noImpRuleEdges :: ImpRuleEdges
1170 noImpRuleEdges = emptyVarEnv
1171
1172 type LetrecNode = Node Unique Details -- Node comes from Digraph
1173 -- The Unique key is gotten from the Id
1174 data Details
1175 = ND { nd_bndr :: Id -- Binder
1176 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
1177 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS
1178 -- INVARIANT: (nd_rhs_bndrs nd, _) ==
1179 -- collectBinders (nd_rhs nd)
1180
1181 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
1182 -- ignoring phase (ie assuming all are active)
1183 -- See Note [Forming Rec groups]
1184
1185 , nd_inl :: IdSet -- Free variables of
1186 -- the stable unfolding (if present and active)
1187 -- or the RHS (if not)
1188 -- but excluding any RULES
1189 -- This is the IdSet that may be used if the Id is inlined
1190
1191 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
1192 -- but are *not* in nd_inl. These are the ones whose
1193 -- dependencies might not be respected by loop_breaker_nodes
1194 -- See Note [Weak loop breakers]
1195
1196 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
1197
1198 , nd_score :: NodeScore
1199 }
1200
1201 instance Outputable Details where
1202 ppr nd = text "ND" <> braces
1203 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
1204 , text "uds =" <+> ppr (nd_uds nd)
1205 , text "inl =" <+> ppr (nd_inl nd)
1206 , text "weak =" <+> ppr (nd_weak nd)
1207 , text "rule =" <+> ppr (nd_active_rule_fvs nd)
1208 , text "score =" <+> ppr (nd_score nd)
1209 ])
1210
1211 -- The NodeScore is compared lexicographically;
1212 -- e.g. lower rank wins regardless of size
1213 type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker
1214 , Int -- Size of rhs: higher => more likely to be picked as LB
1215 -- Maxes out at maxExprSize; we just use it to prioritise
1216 -- small functions
1217 , Bool ) -- Was it a loop breaker before?
1218 -- True => more likely to be picked
1219 -- Note [Loop breakers, node scoring, and stability]
1220
1221 rank :: NodeScore -> Int
1222 rank (r, _, _) = r
1223
1224 makeNode :: OccEnv -> ImpRuleEdges -> VarSet
1225 -> (Var, CoreExpr) -> LetrecNode
1226 -- See Note [Recursive bindings: the grand plan]
1227 makeNode env imp_rule_edges bndr_set (bndr, rhs)
1228 = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs)
1229 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
1230 -- is still deterministic with edges in nondeterministic order as
1231 -- explained in Note [Deterministic SCC] in Digraph.
1232 where
1233 details = ND { nd_bndr = bndr
1234 , nd_rhs = rhs'
1235 , nd_rhs_bndrs = bndrs'
1236 , nd_uds = rhs_usage3
1237 , nd_inl = inl_fvs
1238 , nd_weak = node_fvs `minusVarSet` inl_fvs
1239 , nd_active_rule_fvs = active_rule_fvs
1240 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
1241
1242 -- Constructing the edges for the main Rec computation
1243 -- See Note [Forming Rec groups]
1244 (bndrs, body) = collectBinders rhs
1245 (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body
1246 rhs' = mkLams bndrs' body'
1247 rhs_usage2 = rhs_usage1 +++ all_rule_uds
1248 -- Note [Rules are extra RHSs]
1249 -- Note [Rule dependency info]
1250 rhs_usage3 = case mb_unf_uds of
1251 Just unf_uds -> rhs_usage2 +++ unf_uds
1252 Nothing -> rhs_usage2
1253 node_fvs = udFreeVars bndr_set rhs_usage3
1254
1255 -- Finding the free variables of the rules
1256 is_active = occ_rule_act env :: Activation -> Bool
1257
1258 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
1259 rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr
1260
1261 rules_w_rhs_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
1262 rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):))
1263 (lookupVarEnv imp_rule_edges bndr)
1264 -- See Note [Preventing loops due to imported functions rules]
1265 [ (ru_act rule, udFreeVars bndr_set rhs_uds)
1266 | (rule, _, rhs_uds) <- rules_w_uds ]
1267 all_rule_uds = combineUsageDetailsList $
1268 concatMap (\(_, l, r) -> [l, r]) rules_w_uds
1269 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs
1270 , is_active a]
1271
1272 -- Finding the usage details of the INLINE pragma (if any)
1273 mb_unf_uds = occAnalUnfolding env Recursive bndr
1274
1275 -- Find the "nd_inl" free vars; for the loop-breaker phase
1276 inl_fvs = case mb_unf_uds of
1277 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
1278 Just unf_uds -> udFreeVars bndr_set unf_uds
1279 -- We could check for an *active* INLINE (returning
1280 -- emptyVarSet for an inactive one), but is_active
1281 -- isn't the right thing (it tells about
1282 -- RULE activation), so we'd need more plumbing
1283
1284 mkLoopBreakerNodes :: TopLevelFlag
1285 -> VarSet
1286 -> UsageDetails -- for BODY of let
1287 -> [Details]
1288 -> (UsageDetails, -- adjusted
1289 [LetrecNode])
1290 -- Does four things
1291 -- a) tag each binder with its occurrence info
1292 -- b) add a NodeScore to each node
1293 -- c) make a Node with the right dependency edges for
1294 -- the loop-breaker SCC analysis
1295 -- d) adjust each RHS's usage details according to
1296 -- the binder's (new) shotness and join-point-hood
1297 mkLoopBreakerNodes lvl bndr_set body_uds details_s
1298 = (final_uds, zipWith mk_lb_node details_s bndrs')
1299 where
1300 (final_uds, bndrs') = tagRecBinders lvl body_uds
1301 [ ((nd_bndr nd)
1302 ,(nd_uds nd)
1303 ,(nd_rhs_bndrs nd))
1304 | nd <- details_s ]
1305 mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr'
1306 = DigraphNode nd' (varUnique bndr) (nonDetKeysUniqSet lb_deps)
1307 -- It's OK to use nonDetKeysUniqSet here as
1308 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
1309 -- in nondeterministic order as explained in
1310 -- Note [Deterministic SCC] in Digraph.
1311 where
1312 nd' = nd { nd_bndr = bndr', nd_score = score }
1313 score = nodeScore bndr bndr' rhs lb_deps
1314 lb_deps = extendFvs_ rule_fv_env inl_fvs
1315
1316 rule_fv_env :: IdEnv IdSet
1317 -- Maps a variable f to the variables from this group
1318 -- mentioned in RHS of active rules for f
1319 -- Domain is *subset* of bound vars (others have no rule fvs)
1320 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
1321 init_rule_fvs -- See Note [Finding rule RHS free vars]
1322 = [ (b, trimmed_rule_fvs)
1323 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
1324 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
1325 , not (isEmptyVarSet trimmed_rule_fvs) ]
1326
1327
1328 ------------------------------------------
1329 nodeScore :: Id -- Binder has old occ-info (just for loop-breaker-ness)
1330 -> Id -- Binder with new occ-info
1331 -> CoreExpr -- RHS
1332 -> VarSet -- Loop-breaker dependencies
1333 -> NodeScore
1334 nodeScore old_bndr new_bndr bind_rhs lb_deps
1335 | not (isId old_bndr) -- A type or cercion variable is never a loop breaker
1336 = (100, 0, False)
1337
1338 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers
1339 = (0, 0, True) -- See Note [Self-recursion and loop breakers]
1340
1341 | exprIsTrivial rhs
1342 = mk_score 10 -- Practically certain to be inlined
1343 -- Used to have also: && not (isExportedId bndr)
1344 -- But I found this sometimes cost an extra iteration when we have
1345 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
1346 -- where df is the exported dictionary. Then df makes a really
1347 -- bad choice for loop breaker
1348
1349 | DFunUnfolding { df_args = args } <- id_unfolding
1350 -- Never choose a DFun as a loop breaker
1351 -- Note [DFuns should not be loop breakers]
1352 = (9, length args, is_lb)
1353
1354 -- Data structures are more important than INLINE pragmas
1355 -- so that dictionary/method recursion unravels
1356
1357 | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding
1358 = mk_score 6
1359
1360 | is_con_app rhs -- Data types help with cases:
1361 = mk_score 5 -- Note [Constructor applications]
1362
1363 | isStableUnfolding id_unfolding
1364 , can_unfold
1365 = mk_score 3
1366
1367 | isOneOcc (idOccInfo new_bndr)
1368 = mk_score 2 -- Likely to be inlined
1369
1370 | can_unfold -- The Id has some kind of unfolding
1371 = mk_score 1
1372
1373 | otherwise
1374 = (0, 0, is_lb)
1375
1376 where
1377 mk_score :: Int -> NodeScore
1378 mk_score rank = (rank, rhs_size, is_lb)
1379
1380 is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
1381 rhs = case id_unfolding of
1382 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
1383 | isStableSource src
1384 -> unf_rhs
1385 _ -> bind_rhs
1386 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
1387 rhs_size = case id_unfolding of
1388 CoreUnfolding { uf_guidance = guidance }
1389 | UnfIfGoodArgs { ug_size = size } <- guidance
1390 -> size
1391 _ -> cheapExprSize rhs
1392
1393 can_unfold = canUnfold id_unfolding
1394 id_unfolding = realIdUnfolding old_bndr
1395 -- realIdUnfolding: Ignore loop-breaker-ness here because
1396 -- that is what we are setting!
1397
1398 -- Checking for a constructor application
1399 -- Cheap and cheerful; the simplifier moves casts out of the way
1400 -- The lambda case is important to spot x = /\a. C (f a)
1401 -- which comes up when C is a dictionary constructor and
1402 -- f is a default method.
1403 -- Example: the instance for Show (ST s a) in GHC.ST
1404 --
1405 -- However we *also* treat (\x. C p q) as a con-app-like thing,
1406 -- Note [Closure conversion]
1407 is_con_app (Var v) = isConLikeId v
1408 is_con_app (App f _) = is_con_app f
1409 is_con_app (Lam _ e) = is_con_app e
1410 is_con_app (Tick _ e) = is_con_app e
1411 is_con_app _ = False
1412
1413 maxExprSize :: Int
1414 maxExprSize = 20 -- Rather arbitrary
1415
1416 cheapExprSize :: CoreExpr -> Int
1417 -- Maxes out at maxExprSize
1418 cheapExprSize e
1419 = go 0 e
1420 where
1421 go n e | n >= maxExprSize = n
1422 | otherwise = go1 n e
1423
1424 go1 n (Var {}) = n+1
1425 go1 n (Lit {}) = n+1
1426 go1 n (Type {}) = n
1427 go1 n (Coercion {}) = n
1428 go1 n (Tick _ e) = go1 n e
1429 go1 n (Cast e _) = go1 n e
1430 go1 n (App f a) = go (go1 n f) a
1431 go1 n (Lam b e)
1432 | isTyVar b = go1 n e
1433 | otherwise = go (n+1) e
1434 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b)
1435 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
1436
1437 gos n [] = n
1438 gos n (e:es) | n >= maxExprSize = n
1439 | otherwise = gos (go1 n e) es
1440
1441 betterLB :: NodeScore -> NodeScore -> Bool
1442 -- If n1 `betterLB` n2 then choose n1 as the loop breaker
1443 betterLB (rank1, size1, lb1) (rank2, size2, _)
1444 | rank1 < rank2 = True
1445 | rank1 > rank2 = False
1446 | size1 < size2 = False -- Make the bigger n2 into the loop breaker
1447 | size1 > size2 = True
1448 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it
1449 | otherwise = False -- See Note [Loop breakers, node scoring, and stability]
1450
1451 {- Note [Self-recursion and loop breakers]
1452 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1453 If we have
1454 rec { f = ...f...g...
1455 ; g = .....f... }
1456 then 'f' has to be a loop breaker anyway, so we may as well choose it
1457 right away, so that g can inline freely.
1458
1459 This is really just a cheap hack. Consider
1460 rec { f = ...g...
1461 ; g = ..f..h...
1462 ; h = ...f....}
1463 Here f or g are better loop breakers than h; but we might accidentally
1464 choose h. Finding the minimal set of loop breakers is hard.
1465
1466 Note [Loop breakers, node scoring, and stability]
1467 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1468 To choose a loop breaker, we give a NodeScore to each node in the SCC,
1469 and pick the one with the best score (according to 'betterLB').
1470
1471 We need to be jolly careful (Trac #12425, #12234) about the stability
1472 of this choice. Suppose we have
1473
1474 let rec { f = ...g...g...
1475 ; g = ...f...f... }
1476 in
1477 case x of
1478 True -> ...f..
1479 False -> ..f...
1480
1481 In each iteration of the simplifier the occurrence analyser OccAnal
1482 chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
1483 breaker. That means it is free to inline f.
1484
1485 Suppose that GHC decides to inline f in the branches of the case, but
1486 (for some reason; eg it is not saturated) in the rhs of g. So we get
1487
1488 let rec { f = ...g...g...
1489 ; g = ...f...f... }
1490 in
1491 case x of
1492 True -> ...g...g.....
1493 False -> ..g..g....
1494
1495 Now suppose that, for some reason, in the next iteration the occurrence
1496 analyser chooses f as the loop breaker, so it can freely inline g. And
1497 again for some reason the simplifier inlines g at its calls in the case
1498 branches, but not in the RHS of f. Then we get
1499
1500 let rec { f = ...g...g...
1501 ; g = ...f...f... }
1502 in
1503 case x of
1504 True -> ...(...f...f...)...(...f..f..).....
1505 False -> ..(...f...f...)...(..f..f...)....
1506
1507 You can see where this is going! Each iteration of the simplifier
1508 doubles the number of calls to f or g. No wonder GHC is slow!
1509
1510 (In the particular example in comment:3 of #12425, f and g are the two
1511 mutually recursive fmap instances for CondT and Result. They are both
1512 marked INLINE which, oddly, is why they don't inline in each other's
1513 RHS, because the call there is not saturated.)
1514
1515 The root cause is that we flip-flop on our choice of loop breaker. I
1516 always thought it didn't matter, and indeed for any single iteration
1517 to terminate, it doesn't matter. But when we iterate, it matters a
1518 lot!!
1519
1520 So The Plan is this:
1521 If there is a tie, choose the node that
1522 was a loop breaker last time round
1523
1524 Hence the is_lb field of NodeScore
1525
1526 ************************************************************************
1527 * *
1528 Right hand sides
1529 * *
1530 ************************************************************************
1531 -}
1532
1533 occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr
1534 -> (UsageDetails, [CoreBndr], CoreExpr)
1535 -- Returned usage details covers only the RHS,
1536 -- and *not* the RULE or INLINE template for the Id
1537 occAnalRhs env Recursive _ bndrs body
1538 = occAnalRecRhs env bndrs body
1539 occAnalRhs env NonRecursive id bndrs body
1540 = occAnalNonRecRhs env id bndrs body
1541
1542 occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr -- Rhs lambdas, body
1543 -> (UsageDetails, [CoreBndr], CoreExpr)
1544 -- Returned usage details covers only the RHS,
1545 -- and *not* the RULE or INLINE template for the Id
1546 occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body
1547
1548 occAnalNonRecRhs :: OccEnv
1549 -> Id -> [CoreBndr] -> CoreExpr -- Binder; rhs lams, body
1550 -- Binder is already tagged with occurrence info
1551 -> (UsageDetails, [CoreBndr], CoreExpr)
1552 -- Returned usage details covers only the RHS,
1553 -- and *not* the RULE or INLINE template for the Id
1554 occAnalNonRecRhs env bndr bndrs body
1555 = occAnalLamOrRhs rhs_env bndrs body
1556 where
1557 env1 | is_join_point = env -- See Note [Join point RHSs]
1558 | certainly_inline = env -- See Note [Cascading inlines]
1559 | otherwise = rhsCtxt env
1560
1561 -- See Note [Sources of one-shot information]
1562 rhs_env = env1 { occ_one_shots = argOneShots dmd }
1563
1564 certainly_inline -- See Note [Cascading inlines]
1565 = case occ of
1566 OneOcc { occ_in_lam = in_lam, occ_one_br = one_br }
1567 -> not in_lam && one_br && active && not_stable
1568 _ -> False
1569
1570 is_join_point = isAlwaysTailCalled occ
1571 -- Like (isJoinId bndr) but happens one step earlier
1572 -- c.f. willBeJoinId_maybe
1573
1574 occ = idOccInfo bndr
1575 dmd = idDemandInfo bndr
1576 active = isAlwaysActive (idInlineActivation bndr)
1577 not_stable = not (isStableUnfolding (idUnfolding bndr))
1578
1579 occAnalUnfolding :: OccEnv
1580 -> RecFlag
1581 -> Id
1582 -> Maybe UsageDetails
1583 -- Just the analysis, not a new unfolding. The unfolding
1584 -- got analysed when it was created and we don't need to
1585 -- update it.
1586 occAnalUnfolding env rec_flag id
1587 = case realIdUnfolding id of -- ignore previous loop-breaker flag
1588 CoreUnfolding { uf_tmpl = rhs, uf_src = src }
1589 | not (isStableSource src)
1590 -> Nothing
1591 | otherwise
1592 -> Just $ markAllMany usage
1593 where
1594 (bndrs, body) = collectBinders rhs
1595 (usage, _, _) = occAnalRhs env rec_flag id bndrs body
1596
1597 DFunUnfolding { df_bndrs = bndrs, df_args = args }
1598 -> Just $ zapDetails (delDetailsList usage bndrs)
1599 where
1600 usage = foldr (+++) emptyDetails (map (fst . occAnal env) args)
1601
1602 _ -> Nothing
1603
1604 occAnalRules :: OccEnv
1605 -> Maybe JoinArity -- If the binder is (or MAY become) a join
1606 -- point, what its join arity is (or WOULD
1607 -- become). See Note [Rules and join points].
1608 -> RecFlag
1609 -> Id
1610 -> [(CoreRule, -- Each (non-built-in) rule
1611 UsageDetails, -- Usage details for LHS
1612 UsageDetails)] -- Usage details for RHS
1613 occAnalRules env mb_expected_join_arity rec_flag id
1614 = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id
1615 , let (lhs_uds, rhs_uds) = occ_anal_rule rule ]
1616 where
1617 occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
1618 = (lhs_uds, final_rhs_uds)
1619 where
1620 lhs_uds = addManyOccsSet emptyDetails $
1621 (exprsFreeVars args `delVarSetList` bndrs)
1622 (rhs_bndrs, rhs_body) = collectBinders rhs
1623 (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body
1624 -- Note [Rules are extra RHSs]
1625 -- Note [Rule dependency info]
1626 final_rhs_uds = adjust_tail_info args $ markAllMany $
1627 (rhs_uds `delDetailsList` bndrs)
1628 occ_anal_rule _
1629 = (emptyDetails, emptyDetails)
1630
1631 adjust_tail_info args uds -- see Note [Rules and join points]
1632 = case mb_expected_join_arity of
1633 Just ar | args `lengthIs` ar -> uds
1634 _ -> markAllNonTailCalled uds
1635 {- Note [Join point RHSs]
1636 ~~~~~~~~~~~~~~~~~~~~~~~~~
1637 Consider
1638 x = e
1639 join j = Just x
1640
1641 We want to inline x into j right away, so we don't want to give
1642 the join point a RhsCtxt (Trac #14137). It's not a huge deal, because
1643 the FloatIn pass knows to float into join point RHSs; and the simplifier
1644 does not float things out of join point RHSs. But it's a simple, cheap
1645 thing to do. See Trac #14137.
1646
1647 Note [Cascading inlines]
1648 ~~~~~~~~~~~~~~~~~~~~~~~~
1649 By default we use an rhsCtxt for the RHS of a binding. This tells the
1650 occ anal n that it's looking at an RHS, which has an effect in
1651 occAnalApp. In particular, for constructor applications, it makes
1652 the arguments appear to have NoOccInfo, so that we don't inline into
1653 them. Thus x = f y
1654 k = Just x
1655 we do not want to inline x.
1656
1657 But there's a problem. Consider
1658 x1 = a0 : []
1659 x2 = a1 : x1
1660 x3 = a2 : x2
1661 g = f x3
1662 First time round, it looks as if x1 and x2 occur as an arg of a
1663 let-bound constructor ==> give them a many-occurrence.
1664 But then x3 is inlined (unconditionally as it happens) and
1665 next time round, x2 will be, and the next time round x1 will be
1666 Result: multiple simplifier iterations. Sigh.
1667
1668 So, when analysing the RHS of x3 we notice that x3 will itself
1669 definitely inline the next time round, and so we analyse x3's rhs in
1670 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1671
1672 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1673 If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and
1674 (b) certainly_inline says "yes" when preInlineUnconditionally says "no"
1675 then the simplifier iterates indefinitely:
1676 x = f y
1677 k = Just x -- We decide that k is 'certainly_inline'
1678 v = ...k... -- but preInlineUnconditionally doesn't inline it
1679 inline ==>
1680 k = Just (f y)
1681 v = ...k...
1682 float ==>
1683 x1 = f y
1684 k = Just x1
1685 v = ...k...
1686
1687 This is worse than the slow cascade, so we only want to say "certainly_inline"
1688 if it really is certain. Look at the note with preInlineUnconditionally
1689 for the various clauses.
1690
1691
1692 ************************************************************************
1693 * *
1694 Expressions
1695 * *
1696 ************************************************************************
1697 -}
1698
1699 occAnal :: OccEnv
1700 -> CoreExpr
1701 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1702 CoreExpr)
1703
1704 occAnal _ expr@(Type _) = (emptyDetails, expr)
1705 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1706 occAnal env expr@(Var _) = occAnalApp env (expr, [], [])
1707 -- At one stage, I gathered the idRuleVars for the variable here too,
1708 -- which in a way is the right thing to do.
1709 -- But that went wrong right after specialisation, when
1710 -- the *occurrences* of the overloaded function didn't have any
1711 -- rules in them, so the *specialised* versions looked as if they
1712 -- weren't used at all.
1713
1714 occAnal _ (Coercion co)
1715 = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co)
1716 -- See Note [Gather occurrences of coercion variables]
1717
1718 {-
1719 Note [Gather occurrences of coercion variables]
1720 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1721 We need to gather info about what coercion variables appear, so that
1722 we can sort them into the right place when doing dependency analysis.
1723 -}
1724
1725 occAnal env (Tick tickish body)
1726 | tickish `tickishScopesLike` SoftScope
1727 = (markAllNonTailCalled usage, Tick tickish body')
1728
1729 | Breakpoint _ ids <- tickish
1730 = (usage_lam +++ foldr addManyOccs emptyDetails ids, Tick tickish body')
1731 -- never substitute for any of the Ids in a Breakpoint
1732
1733 | otherwise
1734 = (usage_lam, Tick tickish body')
1735 where
1736 !(usage,body') = occAnal env body
1737 -- for a non-soft tick scope, we can inline lambdas only
1738 usage_lam = markAllNonTailCalled (markAllInsideLam usage)
1739 -- TODO There may be ways to make ticks and join points play
1740 -- nicer together, but right now there are problems:
1741 -- let j x = ... in tick<t> (j 1)
1742 -- Making j a join point may cause the simplifier to drop t
1743 -- (if the tick is put into the continuation). So we don't
1744 -- count j 1 as a tail call.
1745
1746 occAnal env (Cast expr co)
1747 = case occAnal env expr of { (usage, expr') ->
1748 let usage1 = zapDetailsIf (isRhsEnv env) usage
1749 usage2 = addManyOccsSet usage1 (coVarsOfCo co)
1750 -- See Note [Gather occurrences of coercion variables]
1751 in (markAllNonTailCalled usage2, Cast expr' co)
1752 -- If we see let x = y `cast` co
1753 -- then mark y as 'Many' so that we don't
1754 -- immediately inline y again.
1755 }
1756
1757 occAnal env app@(App _ _)
1758 = occAnalApp env (collectArgsTicks tickishFloatable app)
1759
1760 -- Ignore type variables altogether
1761 -- (a) occurrences inside type lambdas only not marked as InsideLam
1762 -- (b) type variables not in environment
1763
1764 occAnal env (Lam x body)
1765 | isTyVar x
1766 = case occAnal env body of { (body_usage, body') ->
1767 (markAllNonTailCalled body_usage, Lam x body')
1768 }
1769
1770 -- For value lambdas we do a special hack. Consider
1771 -- (\x. \y. ...x...)
1772 -- If we did nothing, x is used inside the \y, so would be marked
1773 -- as dangerous to dup. But in the common case where the abstraction
1774 -- is applied to two arguments this is over-pessimistic.
1775 -- So instead, we just mark each binder with its occurrence
1776 -- info in the *body* of the multiple lambda.
1777 -- Then, the simplifier is careful when partially applying lambdas.
1778
1779 occAnal env expr@(Lam _ _)
1780 = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') ->
1781 let
1782 expr' = mkLams tagged_binders body'
1783 usage1 = markAllNonTailCalled usage
1784 one_shot_gp = all isOneShotBndr tagged_binders
1785 final_usage | one_shot_gp = usage1
1786 | otherwise = markAllInsideLam usage1
1787 in
1788 (final_usage, expr') }
1789 where
1790 (binders, body) = collectBinders expr
1791
1792 occAnal env (Case scrut bndr ty alts)
1793 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1794 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1795 let
1796 alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
1797 (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
1798 total_usage = markAllNonTailCalled scrut_usage +++ alts_usage1
1799 -- Alts can have tail calls, but the scrutinee can't
1800 in
1801 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1802 where
1803 -- Note [Case binder usage]
1804 -- ~~~~~~~~~~~~~~~~~~~~~~~~
1805 -- The case binder gets a usage of either "many" or "dead", never "one".
1806 -- Reason: we like to inline single occurrences, to eliminate a binding,
1807 -- but inlining a case binder *doesn't* eliminate a binding.
1808 -- We *don't* want to transform
1809 -- case x of w { (p,q) -> f w }
1810 -- into
1811 -- case x of w { (p,q) -> f (p,q) }
1812 tag_case_bndr usage bndr
1813 = (usage', setIdOccInfo bndr final_occ_info)
1814 where
1815 occ_info = lookupDetails usage bndr
1816 usage' = usage `delDetails` bndr
1817 final_occ_info = case occ_info of IAmDead -> IAmDead
1818 _ -> noOccInfo
1819
1820 alt_env = mkAltEnv env scrut bndr
1821 occ_anal_alt = occAnalAlt alt_env
1822
1823 occ_anal_scrut (Var v) (alt1 : other_alts)
1824 | not (null other_alts) || not (isDefaultAlt alt1)
1825 = (mkOneOcc env v True 0, Var v)
1826 -- The 'True' says that the variable occurs in an interesting
1827 -- context; the case has at least one non-default alternative
1828 occ_anal_scrut (Tick t e) alts
1829 | t `tickishScopesLike` SoftScope
1830 -- No reason to not look through all ticks here, but only
1831 -- for soft-scoped ticks we can do so without having to
1832 -- update returned occurance info (see occAnal)
1833 = second (Tick t) $ occ_anal_scrut e alts
1834
1835 occ_anal_scrut scrut _alts
1836 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1837
1838 occAnal env (Let bind body)
1839 = case occAnal env body of { (body_usage, body') ->
1840 case occAnalBind env NotTopLevel
1841 noImpRuleEdges bind
1842 body_usage of { (final_usage, new_binds) ->
1843 (final_usage, mkLets new_binds body') }}
1844
1845 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1846 occAnalArgs _ [] _
1847 = (emptyDetails, [])
1848
1849 occAnalArgs env (arg:args) one_shots
1850 | isTypeArg arg
1851 = case occAnalArgs env args one_shots of { (uds, args') ->
1852 (uds, arg:args') }
1853
1854 | otherwise
1855 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1856 case occAnal arg_env arg of { (uds1, arg') ->
1857 case occAnalArgs env args one_shots' of { (uds2, args') ->
1858 (uds1 +++ uds2, arg':args') }}}
1859
1860 {-
1861 Applications are dealt with specially because we want
1862 the "build hack" to work.
1863
1864 Note [Arguments of let-bound constructors]
1865 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1866 Consider
1867 f x = let y = expensive x in
1868 let z = (True,y) in
1869 (case z of {(p,q)->q}, case z of {(p,q)->q})
1870 We feel free to duplicate the WHNF (True,y), but that means
1871 that y may be duplicated thereby.
1872
1873 If we aren't careful we duplicate the (expensive x) call!
1874 Constructors are rather like lambdas in this way.
1875 -}
1876
1877 occAnalApp :: OccEnv
1878 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1879 -> (UsageDetails, Expr CoreBndr)
1880 occAnalApp env (Var fun, args, ticks)
1881 | null ticks = (uds, mkApps (Var fun) args')
1882 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1883 where
1884 uds = fun_uds +++ final_args_uds
1885
1886 !(args_uds, args') = occAnalArgs env args one_shots
1887 !final_args_uds
1888 | isRhsEnv env && is_exp = markAllNonTailCalled $
1889 markAllInsideLam args_uds
1890 | otherwise = markAllNonTailCalled args_uds
1891 -- We mark the free vars of the argument of a constructor or PAP
1892 -- as "inside-lambda", if it is the RHS of a let(rec).
1893 -- This means that nothing gets inlined into a constructor or PAP
1894 -- argument position, which is what we want. Typically those
1895 -- constructor arguments are just variables, or trivial expressions.
1896 -- We use inside-lam because it's like eta-expanding the PAP.
1897 --
1898 -- This is the *whole point* of the isRhsEnv predicate
1899 -- See Note [Arguments of let-bound constructors]
1900
1901 n_val_args = valArgCount args
1902 n_args = length args
1903 fun_uds = mkOneOcc env fun (n_val_args > 0) n_args
1904 is_exp = isExpandableApp fun n_val_args
1905 -- See Note [CONLIKE pragma] in BasicTypes
1906 -- The definition of is_exp should match that in Simplify.prepareRhs
1907
1908 one_shots = argsOneShots (idStrictness fun) guaranteed_val_args
1909 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo
1910 (occ_one_shots env))
1911 -- See Note [Sources of one-shot information], bullet point A']
1912
1913 occAnalApp env (fun, args, ticks)
1914 = (markAllNonTailCalled (fun_uds +++ args_uds),
1915 mkTicks ticks $ mkApps fun' args')
1916 where
1917 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1918 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1919 -- often leaves behind beta redexs like
1920 -- (\x y -> e) a1 a2
1921 -- Here we would like to mark x,y as one-shot, and treat the whole
1922 -- thing much like a let. We do this by pushing some True items
1923 -- onto the context stack.
1924 !(args_uds, args') = occAnalArgs env args []
1925
1926 zapDetailsIf :: Bool -- If this is true
1927 -> UsageDetails -- Then do zapDetails on this
1928 -> UsageDetails
1929 zapDetailsIf True uds = zapDetails uds
1930 zapDetailsIf False uds = uds
1931
1932 {-
1933 Note [Sources of one-shot information]
1934 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1935 The occurrence analyser obtains one-shot-lambda information from two sources:
1936
1937 A: Saturated applications: eg f e1 .. en
1938
1939 In general, given a call (f e1 .. en) we can propagate one-shot info from
1940 f's strictness signature into e1 .. en, but /only/ if n is enough to
1941 saturate the strictness signature. A strictness signature like
1942
1943 f :: C1(C1(L))LS
1944
1945 means that *if f is applied to three arguments* then it will guarantee to
1946 call its first argument at most once, and to call the result of that at
1947 most once. But if f has fewer than three arguments, all bets are off; e.g.
1948
1949 map (f (\x y. expensive) e2) xs
1950
1951 Here the \x y abstraction may be called many times (once for each element of
1952 xs) so we should not mark x and y as one-shot. But if it was
1953
1954 map (f (\x y. expensive) 3 2) xs
1955
1956 then the first argument of f will be called at most once.
1957
1958 The one-shot info, derived from f's strictness signature, is
1959 computed by 'argsOneShots', called in occAnalApp.
1960
1961 A': Non-obviously saturated applications: eg build (f (\x y -> expensive))
1962 where f is as above.
1963
1964 In this case, f is only manifestly applied to one argument, so it does not
1965 look saturated. So by the previous point, we should not use its strictness
1966 signature to learn about the one-shotness of \x y. But in this case we can:
1967 build is fully applied, so we may use its strictness signature; and from
1968 that we learn that build calls its argument with two arguments *at most once*.
1969
1970 So there is really only one call to f, and it will have three arguments. In
1971 that sense, f is saturated, and we may proceed as described above.
1972
1973 Hence the computation of 'guaranteed_val_args' in occAnalApp, using
1974 '(occ_one_shots env)'. See also Trac #13227, comment:9
1975
1976 B: Let-bindings: eg let f = \c. let ... in \n -> blah
1977 in (build f, build f)
1978
1979 Propagate one-shot info from the demanand-info on 'f' to the
1980 lambdas in its RHS (which may not be syntactically at the top)
1981
1982 This information must have come from a previous run of the demanand
1983 analyser.
1984
1985 Previously, the demand analyser would *also* set the one-shot information, but
1986 that code was buggy (see #11770), so doing it only in on place, namely here, is
1987 saner.
1988
1989 Note [OneShots]
1990 ~~~~~~~~~~~~~~~
1991 When analysing an expression, the occ_one_shots argument contains information
1992 about how the function is being used. The length of the list indicates
1993 how many arguments will eventually be passed to the analysed expression,
1994 and the OneShotInfo indicates whether this application is once or multiple times.
1995
1996 Example:
1997
1998 Context of f occ_one_shots when analysing f
1999
2000 f 1 2 [OneShot, OneShot]
2001 map (f 1) [OneShot, NoOneShotInfo]
2002 build f [OneShot, OneShot]
2003 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot]
2004
2005 Note [Binders in case alternatives]
2006 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2007 Consider
2008 case x of y { (a,b) -> f y }
2009 We treat 'a', 'b' as dead, because they don't physically occur in the
2010 case alternative. (Indeed, a variable is dead iff it doesn't occur in
2011 its scope in the output of OccAnal.) It really helps to know when
2012 binders are unused. See esp the call to isDeadBinder in
2013 Simplify.mkDupableAlt
2014
2015 In this example, though, the Simplifier will bring 'a' and 'b' back to
2016 life, beause it binds 'y' to (a,b) (imagine got inlined and
2017 scrutinised y).
2018 -}
2019
2020 occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr
2021 -> (UsageDetails, [CoreBndr], CoreExpr)
2022 occAnalLamOrRhs env [] body
2023 = case occAnal env body of (body_usage, body') -> (body_usage, [], body')
2024 -- RHS of thunk or nullary join point
2025 occAnalLamOrRhs env (bndr:bndrs) body
2026 | isTyVar bndr
2027 = -- Important: Keep the environment so that we don't inline into an RHS like
2028 -- \(@ x) -> C @x (f @x)
2029 -- (see the beginning of Note [Cascading inlines]).
2030 case occAnalLamOrRhs env bndrs body of
2031 (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body')
2032 occAnalLamOrRhs env binders body
2033 = case occAnal env_body body of { (body_usage, body') ->
2034 let
2035 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
2036 -- Use binders' to put one-shot info on the lambdas
2037 in
2038 (final_usage, tagged_binders, body') }
2039 where
2040 (env_body, binders') = oneShotGroup env binders
2041
2042 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
2043 -> CoreAlt
2044 -> (UsageDetails, Alt IdWithOccInfo)
2045 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
2046 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
2047 let
2048 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
2049 -- See Note [Binders in case alternatives]
2050 (alt_usg', rhs2) =
2051 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
2052 in
2053 (alt_usg', (con, tagged_bndrs, rhs2)) }
2054
2055 wrapAltRHS :: OccEnv
2056 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
2057 -> UsageDetails -- usage for entire alt (p -> rhs)
2058 -> [Var] -- alt binders
2059 -> CoreExpr -- alt RHS
2060 -> (UsageDetails, CoreExpr)
2061 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
2062 | occ_binder_swap env
2063 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
2064 -- handles condition (a) in Note [Binder swap]
2065 , not captured -- See condition (b) in Note [Binder swap]
2066 = ( alt_usg' +++ let_rhs_usg
2067 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
2068 where
2069 captured = any (`usedIn` let_rhs_usg) bndrs
2070 -- The rhs of the let may include coercion variables
2071 -- if the scrutinee was a cast, so we must gather their
2072 -- usage. See Note [Gather occurrences of coercion variables]
2073 (let_rhs_usg, let_rhs') = occAnal env let_rhs
2074 (alt_usg', [tagged_scrut_var]) = tagLamBinders alt_usg [scrut_var]
2075
2076 wrapAltRHS _ _ alt_usg _ alt_rhs
2077 = (alt_usg, alt_rhs)
2078
2079 {-
2080 ************************************************************************
2081 * *
2082 OccEnv
2083 * *
2084 ************************************************************************
2085 -}
2086
2087 data OccEnv
2088 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
2089 , occ_one_shots :: !OneShots -- See Note [OneShots]
2090 , occ_gbl_scrut :: GlobalScruts
2091 , occ_rule_act :: Activation -> Bool -- Which rules are active
2092 -- See Note [Finding rule RHS free vars]
2093 , occ_binder_swap :: !Bool -- enable the binder_swap
2094 -- See CorePrep Note [Dead code in CorePrep]
2095 }
2096
2097 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
2098
2099 -----------------------------
2100 -- OccEncl is used to control whether to inline into constructor arguments
2101 -- For example:
2102 -- x = (p,q) -- Don't inline p or q
2103 -- y = /\a -> (p a, q a) -- Still don't inline p or q
2104 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
2105 -- So OccEncl tells enought about the context to know what to do when
2106 -- we encounter a constructor application or PAP.
2107
2108 data OccEncl
2109 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
2110 -- Don't inline into constructor args here
2111 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
2112 -- Do inline into constructor args here
2113
2114 instance Outputable OccEncl where
2115 ppr OccRhs = text "occRhs"
2116 ppr OccVanilla = text "occVanilla"
2117
2118 -- See note [OneShots]
2119 type OneShots = [OneShotInfo]
2120
2121 initOccEnv :: (Activation -> Bool) -> OccEnv
2122 initOccEnv active_rule
2123 = OccEnv { occ_encl = OccVanilla
2124 , occ_one_shots = []
2125 , occ_gbl_scrut = emptyVarSet
2126 , occ_rule_act = active_rule
2127 , occ_binder_swap = True }
2128
2129 vanillaCtxt :: OccEnv -> OccEnv
2130 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
2131
2132 rhsCtxt :: OccEnv -> OccEnv
2133 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
2134
2135 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
2136 argCtxt env []
2137 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
2138 argCtxt env (one_shots:one_shots_s)
2139 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
2140
2141 isRhsEnv :: OccEnv -> Bool
2142 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
2143 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
2144
2145 oneShotGroup :: OccEnv -> [CoreBndr]
2146 -> ( OccEnv
2147 , [CoreBndr] )
2148 -- The result binders have one-shot-ness set that they might not have had originally.
2149 -- This happens in (build (\c n -> e)). Here the occurrence analyser
2150 -- linearity context knows that c,n are one-shot, and it records that fact in
2151 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
2152
2153 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
2154 = go ctxt bndrs []
2155 where
2156 go ctxt [] rev_bndrs
2157 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
2158 , reverse rev_bndrs )
2159
2160 go [] bndrs rev_bndrs
2161 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
2162 , reverse rev_bndrs ++ bndrs )
2163
2164 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs
2165 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs)
2166 | otherwise = go ctxt bndrs (bndr : rev_bndrs)
2167 where
2168 bndr' = updOneShotInfo bndr one_shot
2169 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
2170 -- one-shot info might be better than what we can infer, e.g.
2171 -- due to explicit use of the magic 'oneShot' function.
2172 -- See Note [The oneShot function]
2173
2174
2175 markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var]
2176 -- Mark the lambdas of a non-recursive join point as one-shot.
2177 -- This is good to prevent gratuitous float-out etc
2178 markJoinOneShots mb_join_arity bndrs
2179 = case mb_join_arity of
2180 Nothing -> bndrs
2181 Just n -> go n bndrs
2182 where
2183 go 0 bndrs = bndrs
2184 go _ [] = WARN( True, ppr mb_join_arity <+> ppr bndrs ) []
2185 go n (b:bs) = b' : go (n-1) bs
2186 where
2187 b' | isId b = setOneShotLambda b
2188 | otherwise = b
2189
2190 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
2191 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
2192 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
2193
2194 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
2195 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
2196 -- as well as (f,g), (g,h)
2197 transClosureFV env
2198 | no_change = env
2199 | otherwise = transClosureFV (listToUFM new_fv_list)
2200 where
2201 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
2202 -- It's OK to use nonDetUFMToList here because we'll forget the
2203 -- ordering by creating a new set with listToUFM
2204 bump no_change (b,fvs)
2205 | no_change_here = (no_change, (b,fvs))
2206 | otherwise = (False, (b,new_fvs))
2207 where
2208 (new_fvs, no_change_here) = extendFvs env fvs
2209
2210 -------------
2211 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
2212 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
2213
2214 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
2215 -- (extendFVs env s) returns
2216 -- (s `union` env(s), env(s) `subset` s)
2217 extendFvs env s
2218 | isNullUFM env
2219 = (s, True)
2220 | otherwise
2221 = (s `unionVarSet` extras, extras `subVarSet` s)
2222 where
2223 extras :: VarSet -- env(s)
2224 extras = nonDetFoldUFM unionVarSet emptyVarSet $
2225 -- It's OK to use nonDetFoldUFM here because unionVarSet commutes
2226 intersectUFM_C (\x _ -> x) env (getUniqSet s)
2227
2228 {-
2229 ************************************************************************
2230 * *
2231 Binder swap
2232 * *
2233 ************************************************************************
2234
2235 Note [Binder swap]
2236 ~~~~~~~~~~~~~~~~~~
2237 We do these two transformations right here:
2238
2239 (1) case x of b { pi -> ri }
2240 ==>
2241 case x of b { pi -> let x=b in ri }
2242
2243 (2) case (x |> co) of b { pi -> ri }
2244 ==>
2245 case (x |> co) of b { pi -> let x = b |> sym co in ri }
2246
2247 Why (2)? See Note [Case of cast]
2248
2249 In both cases, in a particular alternative (pi -> ri), we only
2250 add the binding if
2251 (a) x occurs free in (pi -> ri)
2252 (ie it occurs in ri, but is not bound in pi)
2253 (b) the pi does not bind b (or the free vars of co)
2254 We need (a) and (b) for the inserted binding to be correct.
2255
2256 For the alternatives where we inject the binding, we can transfer
2257 all x's OccInfo to b. And that is the point.
2258
2259 Notice that
2260 * The deliberate shadowing of 'x'.
2261 * That (a) rapidly becomes false, so no bindings are injected.
2262
2263 The reason for doing these transformations here is because it allows
2264 us to adjust the OccInfo for 'x' and 'b' as we go.
2265
2266 * Suppose the only occurrences of 'x' are the scrutinee and in the
2267 ri; then this transformation makes it occur just once, and hence
2268 get inlined right away.
2269
2270 * If we do this in the Simplifier, we don't know whether 'x' is used
2271 in ri, so we are forced to pessimistically zap b's OccInfo even
2272 though it is typically dead (ie neither it nor x appear in the
2273 ri). There's nothing actually wrong with zapping it, except that
2274 it's kind of nice to know which variables are dead. My nose
2275 tells me to keep this information as robustly as possible.
2276
2277 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
2278 {x=b}; it's Nothing if the binder-swap doesn't happen.
2279
2280 There is a danger though. Consider
2281 let v = x +# y
2282 in case (f v) of w -> ...v...v...
2283 And suppose that (f v) expands to just v. Then we'd like to
2284 use 'w' instead of 'v' in the alternative. But it may be too
2285 late; we may have substituted the (cheap) x+#y for v in the
2286 same simplifier pass that reduced (f v) to v.
2287
2288 I think this is just too bad. CSE will recover some of it.
2289
2290 Note [Case of cast]
2291 ~~~~~~~~~~~~~~~~~~~
2292 Consider case (x `cast` co) of b { I# ->
2293 ... (case (x `cast` co) of {...}) ...
2294 We'd like to eliminate the inner case. That is the motivation for
2295 equation (2) in Note [Binder swap]. When we get to the inner case, we
2296 inline x, cancel the casts, and away we go.
2297
2298 Note [Binder swap on GlobalId scrutinees]
2299 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2300 When the scrutinee is a GlobalId we must take care in two ways
2301
2302 i) In order to *know* whether 'x' occurs free in the RHS, we need its
2303 occurrence info. BUT, we don't gather occurrence info for
2304 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
2305 OccEnv is for: it says "gather occurrence info for these".
2306
2307 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
2308 has an External Name. See, for example, SimplEnv Note [Global Ids in
2309 the substitution].
2310
2311 Note [Zap case binders in proxy bindings]
2312 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2313 From the original
2314 case x of cb(dead) { p -> ...x... }
2315 we will get
2316 case x of cb(live) { p -> let x = cb in ...x... }
2317
2318 Core Lint never expects to find an *occurrence* of an Id marked
2319 as Dead, so we must zap the OccInfo on cb before making the
2320 binding x = cb. See Trac #5028.
2321
2322 Historical note [no-case-of-case]
2323 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2324 We *used* to suppress the binder-swap in case expressions when
2325 -fno-case-of-case is on. Old remarks:
2326 "This happens in the first simplifier pass,
2327 and enhances full laziness. Here's the bad case:
2328 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
2329 If we eliminate the inner case, we trap it inside the I# v -> arm,
2330 which might prevent some full laziness happening. I've seen this
2331 in action in spectral/cichelli/Prog.hs:
2332 [(m,n) | m <- [1..max], n <- [1..max]]
2333 Hence the check for NoCaseOfCase."
2334 However, now the full-laziness pass itself reverses the binder-swap, so this
2335 check is no longer necessary.
2336
2337 Historical note [Suppressing the case binder-swap]
2338 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2339 This old note describes a problem that is also fixed by doing the
2340 binder-swap in OccAnal:
2341
2342 There is another situation when it might make sense to suppress the
2343 case-expression binde-swap. If we have
2344
2345 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
2346 ...other cases .... }
2347
2348 We'll perform the binder-swap for the outer case, giving
2349
2350 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
2351 ...other cases .... }
2352
2353 But there is no point in doing it for the inner case, because w1 can't
2354 be inlined anyway. Furthermore, doing the case-swapping involves
2355 zapping w2's occurrence info (see paragraphs that follow), and that
2356 forces us to bind w2 when doing case merging. So we get
2357
2358 case x of w1 { A -> let w2 = w1 in e1
2359 B -> let w2 = w1 in e2
2360 ...other cases .... }
2361
2362 This is plain silly in the common case where w2 is dead.
2363
2364 Even so, I can't see a good way to implement this idea. I tried
2365 not doing the binder-swap if the scrutinee was already evaluated
2366 but that failed big-time:
2367
2368 data T = MkT !Int
2369
2370 case v of w { MkT x ->
2371 case x of x1 { I# y1 ->
2372 case x of x2 { I# y2 -> ...
2373
2374 Notice that because MkT is strict, x is marked "evaluated". But to
2375 eliminate the last case, we must either make sure that x (as well as
2376 x1) has unfolding MkT y1. The straightforward thing to do is to do
2377 the binder-swap. So this whole note is a no-op.
2378
2379 It's fixed by doing the binder-swap in OccAnal because we can do the
2380 binder-swap unconditionally and still get occurrence analysis
2381 information right.
2382 -}
2383
2384 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
2385 -- Does two things: a) makes the occ_one_shots = OccVanilla
2386 -- b) extends the GlobalScruts if possible
2387 -- c) returns a proxy mapping, binding the scrutinee
2388 -- to the case binder, if possible
2389 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
2390 = case stripTicksTopE (const True) scrut of
2391 Var v -> add_scrut v case_bndr'
2392 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
2393 -- See Note [Case of cast]
2394 _ -> (env { occ_encl = OccVanilla }, Nothing)
2395
2396 where
2397 add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
2398 , Just (localise v, rhs) )
2399
2400 case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
2401 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var)
2402 -- Localise the scrut_var before shadowing it; we're making a
2403 -- new binding for it, and it might have an External Name, or
2404 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
2405 -- Also we don't want any INLINE or NOINLINE pragmas!
2406
2407 {-
2408 ************************************************************************
2409 * *
2410 \subsection[OccurAnal-types]{OccEnv}
2411 * *
2412 ************************************************************************
2413
2414 Note [UsageDetails and zapping]
2415 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2416
2417 On many occasions, we must modify all gathered occurrence data at once. For
2418 instance, all occurrences underneath a (non-one-shot) lambda set the
2419 'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but
2420 that takes O(n) time and we will do this often---in particular, there are many
2421 places where tail calls are not allowed, and each of these causes all variables
2422 to get marked with 'NoTailCallInfo'.
2423
2424 Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along
2425 with the 'OccInfoEnv'. Each of these extra environments is a "zapped set"
2426 recording which variables have been zapped in some way. Zapping all occurrence
2427 info then simply means setting the corresponding zapped set to the whole
2428 'OccInfoEnv', a fast O(1) operation.
2429 -}
2430
2431 type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage
2432 -- INVARIANT: never IAmDead
2433 -- (Deadness is signalled by not being in the map at all)
2434
2435 type ZappedSet = OccInfoEnv -- Values are ignored
2436
2437 data UsageDetails
2438 = UD { ud_env :: !OccInfoEnv
2439 , ud_z_many :: ZappedSet -- apply 'markMany' to these
2440 , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these
2441 , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these
2442 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv
2443
2444 instance Outputable UsageDetails where
2445 ppr ud = ppr (ud_env (flattenUsageDetails ud))
2446
2447 -------------------
2448 -- UsageDetails API
2449
2450 (+++), combineAltsUsageDetails
2451 :: UsageDetails -> UsageDetails -> UsageDetails
2452 (+++) = combineUsageDetailsWith addOccInfo
2453 combineAltsUsageDetails = combineUsageDetailsWith orOccInfo
2454
2455 combineUsageDetailsList :: [UsageDetails] -> UsageDetails
2456 combineUsageDetailsList = foldl (+++) emptyDetails
2457
2458 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
2459 mkOneOcc env id int_cxt arity
2460 | isLocalId id
2461 = singleton $ OneOcc { occ_in_lam = False
2462 , occ_one_br = True
2463 , occ_int_cxt = int_cxt
2464 , occ_tail = AlwaysTailCalled arity }
2465 | id `elemVarSet` occ_gbl_scrut env
2466 = singleton noOccInfo
2467
2468 | otherwise
2469 = emptyDetails
2470 where
2471 singleton info = emptyDetails { ud_env = unitVarEnv id info }
2472
2473 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
2474 addOneOcc ud id info
2475 = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info }
2476 `alterZappedSets` (`delVarEnv` id)
2477 where
2478 plus_zapped old new = doZapping ud id old `addOccInfo` new
2479
2480 addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails
2481 addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set
2482 -- It's OK to use nonDetFoldUFM here because addManyOccs commutes
2483
2484 -- Add several occurrences, assumed not to be tail calls
2485 addManyOccs :: Var -> UsageDetails -> UsageDetails
2486 addManyOccs v u | isId v = addOneOcc u v noOccInfo
2487 | otherwise = u
2488 -- Give a non-committal binder info (i.e noOccInfo) because
2489 -- a) Many copies of the specialised thing can appear
2490 -- b) We don't want to substitute a BIG expression inside a RULE
2491 -- even if that's the only occurrence of the thing
2492 -- (Same goes for INLINE.)
2493
2494 delDetails :: UsageDetails -> Id -> UsageDetails
2495 delDetails ud bndr
2496 = ud `alterUsageDetails` (`delVarEnv` bndr)
2497
2498 delDetailsList :: UsageDetails -> [Id] -> UsageDetails
2499 delDetailsList ud bndrs
2500 = ud `alterUsageDetails` (`delVarEnvList` bndrs)
2501
2502 emptyDetails :: UsageDetails
2503 emptyDetails = UD { ud_env = emptyVarEnv
2504 , ud_z_many = emptyVarEnv
2505 , ud_z_in_lam = emptyVarEnv
2506 , ud_z_no_tail = emptyVarEnv }
2507
2508 isEmptyDetails :: UsageDetails -> Bool
2509 isEmptyDetails = isEmptyVarEnv . ud_env
2510
2511 markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails
2512 :: UsageDetails -> UsageDetails
2513 markAllMany ud = ud { ud_z_many = ud_env ud }
2514 markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud }
2515 markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud }
2516
2517 zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo
2518
2519 lookupDetails :: UsageDetails -> Id -> OccInfo
2520 lookupDetails ud id
2521 = case lookupVarEnv (ud_env ud) id of
2522 Just occ -> doZapping ud id occ
2523 Nothing -> IAmDead
2524
2525 usedIn :: Id -> UsageDetails -> Bool
2526 v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
2527
2528 udFreeVars :: VarSet -> UsageDetails -> VarSet
2529 -- Find the subset of bndrs that are mentioned in uds
2530 udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud)
2531
2532 -------------------
2533 -- Auxiliary functions for UsageDetails implementation
2534
2535 combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo)
2536 -> UsageDetails -> UsageDetails -> UsageDetails
2537 combineUsageDetailsWith plus_occ_info ud1 ud2
2538 | isEmptyDetails ud1 = ud2
2539 | isEmptyDetails ud2 = ud1
2540 | otherwise
2541 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2)
2542 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2)
2543 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2)
2544 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) }
2545
2546 doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo
2547 doZapping ud var occ
2548 = doZappingByUnique ud (varUnique var) occ
2549
2550 doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo
2551 doZappingByUnique ud uniq
2552 = (if | in_subset ud_z_many -> markMany
2553 | in_subset ud_z_in_lam -> markInsideLam
2554 | otherwise -> id) .
2555 (if | in_subset ud_z_no_tail -> markNonTailCalled
2556 | otherwise -> id)
2557 where
2558 in_subset field = uniq `elemVarEnvByKey` field ud
2559
2560 alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails
2561 alterZappedSets ud f
2562 = ud { ud_z_many = f (ud_z_many ud)
2563 , ud_z_in_lam = f (ud_z_in_lam ud)
2564 , ud_z_no_tail = f (ud_z_no_tail ud) }
2565
2566 alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails
2567 alterUsageDetails ud f
2568 = ud { ud_env = f (ud_env ud) }
2569 `alterZappedSets` f
2570
2571 flattenUsageDetails :: UsageDetails -> UsageDetails
2572 flattenUsageDetails ud
2573 = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) }
2574 `alterZappedSets` const emptyVarEnv
2575
2576 -------------------
2577 -- See Note [Adjusting right-hand sides]
2578 adjustRhsUsage :: Maybe JoinArity -> RecFlag
2579 -> [CoreBndr] -- Outer lambdas, AFTER occ anal
2580 -> UsageDetails -> UsageDetails
2581 adjustRhsUsage mb_join_arity rec_flag bndrs usage
2582 = maybe_mark_lam (maybe_drop_tails usage)
2583 where
2584 maybe_mark_lam ud | one_shot = ud
2585 | otherwise = markAllInsideLam ud
2586 maybe_drop_tails ud | exact_join = ud
2587 | otherwise = markAllNonTailCalled ud
2588
2589 one_shot = case mb_join_arity of
2590 Just join_arity
2591 | isRec rec_flag -> False
2592 | otherwise -> all isOneShotBndr (drop join_arity bndrs)
2593 Nothing -> all isOneShotBndr bndrs
2594
2595 exact_join = case mb_join_arity of
2596 Just join_arity -> bndrs `lengthIs` join_arity
2597 _ -> False
2598
2599 type IdWithOccInfo = Id
2600
2601 tagLamBinders :: UsageDetails -- Of scope
2602 -> [Id] -- Binders
2603 -> (UsageDetails, -- Details with binders removed
2604 [IdWithOccInfo]) -- Tagged binders
2605 -- Used for lambda and case binders
2606 -- It copes with the fact that lambda bindings can have a
2607 -- stable unfolding, used for join points
2608 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
2609 where
2610 (usage', bndrs') = mapAccumR tag_lam usage binders
2611 tag_lam usage bndr = (usage2, bndr')
2612 where
2613 occ = lookupDetails usage bndr
2614 bndr' = setBinderOcc (markNonTailCalled occ) bndr
2615 -- Don't try to make an argument into a join point
2616 usage1 = usage `delDetails` bndr
2617 usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr)
2618 -- This is effectively the RHS of a
2619 -- non-join-point binding, so it's okay to use
2620 -- addManyOccsSet, which assumes no tail calls
2621 | otherwise = usage1
2622
2623 tagNonRecBinder :: TopLevelFlag -- At top level?
2624 -> UsageDetails -- Of scope
2625 -> CoreBndr -- Binder
2626 -> (UsageDetails, -- Details with binder removed
2627 IdWithOccInfo) -- Tagged binder
2628
2629 tagNonRecBinder lvl usage binder
2630 = let
2631 occ = lookupDetails usage binder
2632 will_be_join = decideJoinPointHood lvl usage [binder]
2633 occ' | will_be_join = -- must already be marked AlwaysTailCalled
2634 ASSERT(isAlwaysTailCalled occ) occ
2635 | otherwise = markNonTailCalled occ
2636 binder' = setBinderOcc occ' binder
2637 usage' = usage `delDetails` binder
2638 in
2639 usage' `seq` (usage', binder')
2640
2641 tagRecBinders :: TopLevelFlag -- At top level?
2642 -> UsageDetails -- Of body of let ONLY
2643 -> [(CoreBndr, -- Binder
2644 UsageDetails, -- RHS usage details
2645 [CoreBndr])] -- Lambdas in new RHS
2646 -> (UsageDetails, -- Adjusted details for whole scope,
2647 -- with binders removed
2648 [IdWithOccInfo]) -- Tagged binders
2649 -- Substantially more complicated than non-recursive case. Need to adjust RHS
2650 -- details *before* tagging binders (because the tags depend on the RHSes).
2651 tagRecBinders lvl body_uds triples
2652 = let
2653 (bndrs, rhs_udss, _) = unzip3 triples
2654
2655 -- 1. Determine join-point-hood of whole group, as determined by
2656 -- the *unadjusted* usage details
2657 unadj_uds = body_uds +++ combineUsageDetailsList rhs_udss
2658 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs
2659
2660 -- 2. Adjust usage details of each RHS, taking into account the
2661 -- join-point-hood decision
2662 rhs_udss' = map adjust triples
2663 adjust (bndr, rhs_uds, rhs_bndrs)
2664 = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds
2665 where
2666 -- Can't use willBeJoinId_maybe here because we haven't tagged the
2667 -- binder yet (the tag depends on these adjustments!)
2668 mb_join_arity
2669 | will_be_joins
2670 , let occ = lookupDetails unadj_uds bndr
2671 , AlwaysTailCalled arity <- tailCallInfo occ
2672 = Just arity
2673 | otherwise
2674 = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if we're
2675 -- making join points!
2676 Nothing
2677
2678 -- 3. Compute final usage details from adjusted RHS details
2679 adj_uds = body_uds +++ combineUsageDetailsList rhs_udss'
2680
2681 -- 4. Tag each binder with its adjusted details
2682 bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr
2683 | bndr <- bndrs ]
2684
2685 -- 5. Drop the binders from the adjusted details and return
2686 usage' = adj_uds `delDetailsList` bndrs
2687 in
2688 (usage', bndrs')
2689
2690 setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr
2691 setBinderOcc occ_info bndr
2692 | isTyVar bndr = bndr
2693 | isExportedId bndr = if isManyOccs (idOccInfo bndr)
2694 then bndr
2695 else setIdOccInfo bndr noOccInfo
2696 -- Don't use local usage info for visible-elsewhere things
2697 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
2698 -- about to re-generate it and it shouldn't be "sticky"
2699
2700 | otherwise = setIdOccInfo bndr occ_info
2701
2702 -- | Decide whether some bindings should be made into join points or not.
2703 -- Returns `False` if they can't be join points. Note that it's an
2704 -- all-or-nothing decision, as if multiple binders are given, they're assumed to
2705 -- be mutually recursive.
2706 --
2707 -- See Note [Invariants for join points] in CoreSyn.
2708 decideJoinPointHood :: TopLevelFlag -> UsageDetails
2709 -> [CoreBndr]
2710 -> Bool
2711 decideJoinPointHood TopLevel _ _
2712 = False
2713 decideJoinPointHood NotTopLevel usage bndrs
2714 | isJoinId (head bndrs)
2715 = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+>
2716 ppr bndrs)
2717 all_ok
2718 | otherwise
2719 = all_ok
2720 where
2721 -- See Note [Invariants on join points]; invariants cited by number below.
2722 -- Invariant 2 is always satisfiable by the simplifier by eta expansion.
2723 all_ok = -- Invariant 3: Either all are join points or none are
2724 all ok bndrs
2725
2726 ok bndr
2727 | -- Invariant 1: Only tail calls, all same join arity
2728 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr)
2729 , -- Invariant 1 as applied to LHSes of rules
2730 all (ok_rule arity) (idCoreRules bndr)
2731 -- Invariant 4: Satisfies polymorphism rule
2732 , isValidJoinPointType arity (idType bndr)
2733 = True
2734 | otherwise
2735 = False
2736
2737 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
2738 ok_rule join_arity (Rule { ru_args = args })
2739 = args `lengthIs` join_arity
2740 -- Invariant 1 as applied to LHSes of rules
2741
2742 willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity
2743 willBeJoinId_maybe bndr
2744 | AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr)
2745 = Just arity
2746 | otherwise
2747 = isJoinId_maybe bndr
2748
2749 {-
2750 ************************************************************************
2751 * *
2752 \subsection{Operations over OccInfo}
2753 * *
2754 ************************************************************************
2755 -}
2756
2757 markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo
2758
2759 markMany IAmDead = IAmDead
2760 markMany occ = ManyOccs { occ_tail = occ_tail occ }
2761
2762 markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = True }
2763 markInsideLam occ = occ
2764
2765 markNonTailCalled IAmDead = IAmDead
2766 markNonTailCalled occ = occ { occ_tail = NoTailCallInfo }
2767
2768 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
2769
2770 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2771 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2772 tailCallInfo a2 }
2773 -- Both branches are at least One
2774 -- (Argument is never IAmDead)
2775
2776 -- (orOccInfo orig new) is used
2777 -- when combining occurrence info from branches of a case
2778
2779 orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1
2780 , occ_tail = tail1 })
2781 (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2
2782 , occ_tail = tail2 })
2783 = OneOcc { occ_in_lam = in_lam1 || in_lam2
2784 , occ_one_br = False -- False, because it occurs in both branches
2785 , occ_int_cxt = int_cxt1 && int_cxt2
2786 , occ_tail = tail1 `andTailCallInfo` tail2 }
2787 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2788 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2789 tailCallInfo a2 }
2790
2791 andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
2792 andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
2793 | arity1 == arity2 = info
2794 andTailCallInfo _ _ = NoTailCallInfo