e2beb742a85a97561de2c0442af4f258c61e9f01
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns, MultiWayIf, ViewPatterns #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import GhcPrelude
23
24 import CoreSyn
25 import CoreFVs
26 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
27 stripTicksTopE, mkTicks )
28 import Id
29 import IdInfo
30 import Name( localiseName )
31 import BasicTypes
32 import Module( Module )
33 import Coercion
34 import Type
35
36 import VarSet
37 import VarEnv
38 import Var
39 import Demand ( argOneShots, argsOneShots )
40 import Digraph ( SCC(..), Node(..)
41 , stronglyConnCompFromEdgedVerticesUniq
42 , stronglyConnCompFromEdgedVerticesUniqR )
43 import Unique
44 import UniqFM
45 import UniqSet
46 import Util
47 import Outputable
48 import Data.List
49 import Control.Arrow ( second )
50
51 {-
52 ************************************************************************
53 * *
54 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
55 * *
56 ************************************************************************
57
58 Here's the externally-callable interface:
59 -}
60
61 occurAnalysePgm :: Module -- Used only in debug output
62 -> (Id -> Bool) -- Active unfoldings
63 -> (Activation -> Bool) -- Active rules
64 -> [CoreRule] -> [CoreVect] -> VarSet
65 -> CoreProgram -> CoreProgram
66 occurAnalysePgm this_mod active_unf active_rule imp_rules vects vectVars binds
67 | isEmptyDetails final_usage
68 = occ_anald_binds
69
70 | otherwise -- See Note [Glomming]
71 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
72 2 (ppr final_usage ) )
73 occ_anald_glommed_binds
74 where
75 init_env = initOccEnv { occ_rule_act = active_rule
76 , occ_unf_act = active_unf }
77
78 (final_usage, occ_anald_binds) = go init_env binds
79 (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel
80 imp_rule_edges
81 (flattenBinds occ_anald_binds)
82 initial_uds
83 -- It's crucial to re-analyse the glommed-together bindings
84 -- so that we establish the right loop breakers. Otherwise
85 -- we can easily create an infinite loop (Trac #9583 is an example)
86
87 initial_uds = addManyOccsSet emptyDetails
88 (rulesFreeVars imp_rules `unionVarSet`
89 vectsFreeVars vects `unionVarSet`
90 vectVars)
91 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
92 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
93 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
94
95 -- Note [Preventing loops due to imported functions rules]
96 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
97 [ mapVarEnv (const maps_to) $
98 getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
99 | imp_rule <- imp_rules
100 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
101 , let maps_to = exprFreeIds (ru_rhs imp_rule)
102 `delVarSetList` ru_bndrs imp_rule
103 , arg <- ru_args imp_rule ]
104
105 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
106 go _ []
107 = (initial_uds, [])
108 go env (bind:binds)
109 = (final_usage, bind' ++ binds')
110 where
111 (bs_usage, binds') = go env binds
112 (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind
113 bs_usage
114
115 occurAnalyseExpr :: CoreExpr -> CoreExpr
116 -- Do occurrence analysis, and discard occurrence info returned
117 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
118
119 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
120 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
121
122 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
123 occurAnalyseExpr' enable_binder_swap expr
124 = snd (occAnal env expr)
125 where
126 env = initOccEnv { occ_binder_swap = enable_binder_swap }
127
128 {- Note [Plugin rules]
129 ~~~~~~~~~~~~~~~~~~~~~~
130 Conal Elliott (Trac #11651) built a GHC plugin that added some
131 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
132 do some domain-specific transformations that could not be expressed
133 with an ordinary pattern-matching CoreRule. But then we can't extract
134 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
135 BuiltinRule doesn't have any of that stuff.
136
137 So we simply assume that BuiltinRules have no dependencies, and filter
138 them out from the imp_rule_edges comprehension.
139 -}
140
141 {-
142 ************************************************************************
143 * *
144 Bindings
145 * *
146 ************************************************************************
147
148 Note [Recursive bindings: the grand plan]
149 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
150 When we come across a binding group
151 Rec { x1 = r1; ...; xn = rn }
152 we treat it like this (occAnalRecBind):
153
154 1. Occurrence-analyse each right hand side, and build a
155 "Details" for each binding to capture the results.
156
157 Wrap the details in a Node (details, node-id, dep-node-ids),
158 where node-id is just the unique of the binder, and
159 dep-node-ids lists all binders on which this binding depends.
160 We'll call these the "scope edges".
161 See Note [Forming the Rec groups].
162
163 All this is done by makeNode.
164
165 2. Do SCC-analysis on these Nodes. Each SCC will become a new Rec or
166 NonRec. The key property is that every free variable of a binding
167 is accounted for by the scope edges, so that when we are done
168 everything is still in scope.
169
170 3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
171 identify suitable loop-breakers to ensure that inlining terminates.
172 This is done by occAnalRec.
173
174 4. To do so we form a new set of Nodes, with the same details, but
175 different edges, the "loop-breaker nodes". The loop-breaker nodes
176 have both more and fewer dependencies than the scope edges
177 (see Note [Choosing loop breakers])
178
179 More edges: if f calls g, and g has an active rule that mentions h
180 then we add an edge from f -> h
181
182 Fewer edges: we only include dependencies on active rules, on rule
183 RHSs (not LHSs) and if there is an INLINE pragma only
184 on the stable unfolding (and vice versa). The scope
185 edges must be much more inclusive.
186
187 5. The "weak fvs" of a node are, by definition:
188 the scope fvs - the loop-breaker fvs
189 See Note [Weak loop breakers], and the nd_weak field of Details
190
191 6. Having formed the loop-breaker nodes
192
193 Note [Dead code]
194 ~~~~~~~~~~~~~~~~
195 Dropping dead code for a cyclic Strongly Connected Component is done
196 in a very simple way:
197
198 the entire SCC is dropped if none of its binders are mentioned
199 in the body; otherwise the whole thing is kept.
200
201 The key observation is that dead code elimination happens after
202 dependency analysis: so 'occAnalBind' processes SCCs instead of the
203 original term's binding groups.
204
205 Thus 'occAnalBind' does indeed drop 'f' in an example like
206
207 letrec f = ...g...
208 g = ...(...g...)...
209 in
210 ...g...
211
212 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
213 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
214 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
215
216 ------------------------------------------------------------
217 Note [Forming Rec groups]
218 ~~~~~~~~~~~~~~~~~~~~~~~~~
219 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
220 and "g uses f", no matter how indirectly. We do a SCC analysis
221 with an edge f -> g if "f uses g".
222
223 More precisely, "f uses g" iff g should be in scope wherever f is.
224 That is, g is free in:
225 a) the rhs 'ef'
226 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
227 c) or the LHS or a rule for f (Note [Rule dependency info])
228
229 These conditions apply regardless of the activation of the RULE (eg it might be
230 inactive in this phase but become active later). Once a Rec is broken up
231 it can never be put back together, so we must be conservative.
232
233 The principle is that, regardless of rule firings, every variable is
234 always in scope.
235
236 * Note [Rules are extra RHSs]
237 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
238 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
239 keeps the specialised "children" alive. If the parent dies
240 (because it isn't referenced any more), then the children will die
241 too (unless they are already referenced directly).
242
243 To that end, we build a Rec group for each cyclic strongly
244 connected component,
245 *treating f's rules as extra RHSs for 'f'*.
246 More concretely, the SCC analysis runs on a graph with an edge
247 from f -> g iff g is mentioned in
248 (a) f's rhs
249 (b) f's RULES
250 These are rec_edges.
251
252 Under (b) we include variables free in *either* LHS *or* RHS of
253 the rule. The former might seems silly, but see Note [Rule
254 dependency info]. So in Example [eftInt], eftInt and eftIntFB
255 will be put in the same Rec, even though their 'main' RHSs are
256 both non-recursive.
257
258 * Note [Rule dependency info]
259 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
260 The VarSet in a RuleInfo is used for dependency analysis in the
261 occurrence analyser. We must track free vars in *both* lhs and rhs.
262 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
263 Why both? Consider
264 x = y
265 RULE f x = v+4
266 Then if we substitute y for x, we'd better do so in the
267 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
268 as well as 'v'
269
270 * Note [Rules are visible in their own rec group]
271 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
272 We want the rules for 'f' to be visible in f's right-hand side.
273 And we'd like them to be visible in other functions in f's Rec
274 group. E.g. in Note [Specialisation rules] we want f' rule
275 to be visible in both f's RHS, and fs's RHS.
276
277 This means that we must simplify the RULEs first, before looking
278 at any of the definitions. This is done by Simplify.simplRecBind,
279 when it calls addLetIdInfo.
280
281 ------------------------------------------------------------
282 Note [Choosing loop breakers]
283 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
284 Loop breaking is surprisingly subtle. First read the section 4 of
285 "Secrets of the GHC inliner". This describes our basic plan.
286 We avoid infinite inlinings by choosing loop breakers, and
287 ensuring that a loop breaker cuts each loop.
288
289 See also Note [Inlining and hs-boot files] in ToIface, which deals
290 with a closely related source of infinite loops.
291
292 Fundamentally, we do SCC analysis on a graph. For each recursive
293 group we choose a loop breaker, delete all edges to that node,
294 re-analyse the SCC, and iterate.
295
296 But what is the graph? NOT the same graph as was used for Note
297 [Forming Rec groups]! In particular, a RULE is like an equation for
298 'f' that is *always* inlined if it is applicable. We do *not* disable
299 rules for loop-breakers. It's up to whoever makes the rules to make
300 sure that the rules themselves always terminate. See Note [Rules for
301 recursive functions] in Simplify.hs
302
303 Hence, if
304 f's RHS (or its INLINE template if it has one) mentions g, and
305 g has a RULE that mentions h, and
306 h has a RULE that mentions f
307
308 then we *must* choose f to be a loop breaker. Example: see Note
309 [Specialisation rules].
310
311 In general, take the free variables of f's RHS, and augment it with
312 all the variables reachable by RULES from those starting points. That
313 is the whole reason for computing rule_fv_env in occAnalBind. (Of
314 course we only consider free vars that are also binders in this Rec
315 group.) See also Note [Finding rule RHS free vars]
316
317 Note that when we compute this rule_fv_env, we only consider variables
318 free in the *RHS* of the rule, in contrast to the way we build the
319 Rec group in the first place (Note [Rule dependency info])
320
321 Note that if 'g' has RHS that mentions 'w', we should add w to
322 g's loop-breaker edges. More concretely there is an edge from f -> g
323 iff
324 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
325 (see Note [Inline rules])
326 (b) or h is mentioned in f's RHS, and
327 g appears in the RHS of an active RULE of h
328 or a transitive sequence of active rules starting with h
329
330 Why "active rules"? See Note [Finding rule RHS free vars]
331
332 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
333 chosen as a loop breaker, because their RHSs don't mention each other.
334 And indeed both can be inlined safely.
335
336 Note again that the edges of the graph we use for computing loop breakers
337 are not the same as the edges we use for computing the Rec blocks.
338 That's why we compute
339
340 - rec_edges for the Rec block analysis
341 - loop_breaker_nodes for the loop breaker analysis
342
343 * Note [Finding rule RHS free vars]
344 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
345 Consider this real example from Data Parallel Haskell
346 tagZero :: Array Int -> Array Tag
347 {-# INLINE [1] tagZeroes #-}
348 tagZero xs = pmap (\x -> fromBool (x==0)) xs
349
350 {-# RULES "tagZero" [~1] forall xs n.
351 pmap fromBool <blah blah> = tagZero xs #-}
352 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
353 However, tagZero can only be inlined in phase 1 and later, while
354 the RULE is only active *before* phase 1. So there's no problem.
355
356 To make this work, we look for the RHS free vars only for
357 *active* rules. That's the reason for the occ_rule_act field
358 of the OccEnv.
359
360 * Note [Weak loop breakers]
361 ~~~~~~~~~~~~~~~~~~~~~~~~~
362 There is a last nasty wrinkle. Suppose we have
363
364 Rec { f = f_rhs
365 RULE f [] = g
366
367 h = h_rhs
368 g = h
369 ...more...
370 }
371
372 Remember that we simplify the RULES before any RHS (see Note
373 [Rules are visible in their own rec group] above).
374
375 So we must *not* postInlineUnconditionally 'g', even though
376 its RHS turns out to be trivial. (I'm assuming that 'g' is
377 not choosen as a loop breaker.) Why not? Because then we
378 drop the binding for 'g', which leaves it out of scope in the
379 RULE!
380
381 Here's a somewhat different example of the same thing
382 Rec { g = h
383 ; h = ...f...
384 ; f = f_rhs
385 RULE f [] = g }
386 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
387 g, because the RULE for f is active throughout. So the RHS of h
388 might rewrite to h = ...g...
389 So g must remain in scope in the output program!
390
391 We "solve" this by:
392
393 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
394 iff g is a "missing free variable" of the Rec group
395
396 A "missing free variable" x is one that is mentioned in an RHS or
397 INLINE or RULE of a binding in the Rec group, but where the
398 dependency on x may not show up in the loop_breaker_nodes (see
399 note [Choosing loop breakers} above).
400
401 A normal "strong" loop breaker has IAmLoopBreaker False. So
402
403 Inline postInlineUnconditionally
404 strong IAmLoopBreaker False no no
405 weak IAmLoopBreaker True yes no
406 other yes yes
407
408 The **sole** reason for this kind of loop breaker is so that
409 postInlineUnconditionally does not fire. Ugh. (Typically it'll
410 inline via the usual callSiteInline stuff, so it'll be dead in the
411 next pass, so the main Ugh is the tiresome complication.)
412
413 Note [Rules for imported functions]
414 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
415 Consider this
416 f = /\a. B.g a
417 RULE B.g Int = 1 + f Int
418 Note that
419 * The RULE is for an imported function.
420 * f is non-recursive
421 Now we
422 can get
423 f Int --> B.g Int Inlining f
424 --> 1 + f Int Firing RULE
425 and so the simplifier goes into an infinite loop. This
426 would not happen if the RULE was for a local function,
427 because we keep track of dependencies through rules. But
428 that is pretty much impossible to do for imported Ids. Suppose
429 f's definition had been
430 f = /\a. C.h a
431 where (by some long and devious process), C.h eventually inlines to
432 B.g. We could only spot such loops by exhaustively following
433 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
434 f.
435
436 Note that RULES for imported functions are important in practice; they
437 occur a lot in the libraries.
438
439 We regard this potential infinite loop as a *programmer* error.
440 It's up the programmer not to write silly rules like
441 RULE f x = f x
442 and the example above is just a more complicated version.
443
444 Note [Preventing loops due to imported functions rules]
445 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
446 Consider:
447 import GHC.Base (foldr)
448
449 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
450 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
451 filterFB c p = ...
452
453 f = filter p xs
454
455 Note that filter is not a loop-breaker, so what happens is:
456 f = filter p xs
457 = {inline} build (\c n -> foldr (filterFB c p) n xs)
458 = {inline} foldr (filterFB (:) p) [] xs
459 = {RULE} filter p xs
460
461 We are in an infinite loop.
462
463 A more elaborate example (that I actually saw in practice when I went to
464 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
465 {-# LANGUAGE RankNTypes #-}
466 module GHCList where
467
468 import Prelude hiding (filter)
469 import GHC.Base (build)
470
471 {-# INLINABLE filter #-}
472 filter :: (a -> Bool) -> [a] -> [a]
473 filter p [] = []
474 filter p (x:xs) = if p x then x : filter p xs else filter p xs
475
476 {-# NOINLINE [0] filterFB #-}
477 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
478 filterFB c p x r | p x = x `c` r
479 | otherwise = r
480
481 {-# RULES
482 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
483 (filterFB c p) n xs)
484 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
485 #-}
486
487 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
488 are not), the unfolding given to "filter" in the interface file will be:
489 filter p [] = []
490 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
491 else build (\c n -> foldr (filterFB c p) n xs
492
493 Note that because this unfolding does not mention "filter", filter is not
494 marked as a strong loop breaker. Therefore at a use site in another module:
495 filter p xs
496 = {inline}
497 case xs of [] -> []
498 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
499 else build (\c n -> foldr (filterFB c p) n xs)
500
501 build (\c n -> foldr (filterFB c p) n xs)
502 = {inline} foldr (filterFB (:) p) [] xs
503 = {RULE} filter p xs
504
505 And we are in an infinite loop again, except that this time the loop is producing an
506 infinitely large *term* (an unrolling of filter) and so the simplifier finally
507 dies with "ticks exhausted"
508
509 Because of this problem, we make a small change in the occurrence analyser
510 designed to mark functions like "filter" as strong loop breakers on the basis that:
511 1. The RHS of filter mentions the local function "filterFB"
512 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
513
514 So for each RULE for an *imported* function we are going to add
515 dependency edges between the *local* FVS of the rule LHS and the
516 *local* FVS of the rule RHS. We don't do anything special for RULES on
517 local functions because the standard occurrence analysis stuff is
518 pretty good at getting loop-breakerness correct there.
519
520 It is important to note that even with this extra hack we aren't always going to get
521 things right. For example, it might be that the rule LHS mentions an imported Id,
522 and another module has a RULE that can rewrite that imported Id to one of our local
523 Ids.
524
525 Note [Specialising imported functions] (referred to from Specialise)
526 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
527 BUT for *automatically-generated* rules, the programmer can't be
528 responsible for the "programmer error" in Note [Rules for imported
529 functions]. In paricular, consider specialising a recursive function
530 defined in another module. If we specialise a recursive function B.g,
531 we get
532 g_spec = .....(B.g Int).....
533 RULE B.g Int = g_spec
534 Here, g_spec doesn't look recursive, but when the rule fires, it
535 becomes so. And if B.g was mutually recursive, the loop might
536 not be as obvious as it is here.
537
538 To avoid this,
539 * When specialising a function that is a loop breaker,
540 give a NOINLINE pragma to the specialised function
541
542 Note [Glomming]
543 ~~~~~~~~~~~~~~~
544 RULES for imported Ids can make something at the top refer to something at the bottom:
545 f = \x -> B.g (q x)
546 h = \y -> 3
547
548 RULE: B.g (q x) = h x
549
550 Applying this rule makes f refer to h, although f doesn't appear to
551 depend on h. (And, as in Note [Rules for imported functions], the
552 dependency might be more indirect. For example, f might mention C.t
553 rather than B.g, where C.t eventually inlines to B.g.)
554
555 NOTICE that this cannot happen for rules whose head is a
556 locally-defined function, because we accurately track dependencies
557 through RULES. It only happens for rules whose head is an imported
558 function (B.g in the example above).
559
560 Solution:
561 - When simplifying, bring all top level identifiers into
562 scope at the start, ignoring the Rec/NonRec structure, so
563 that when 'h' pops up in f's rhs, we find it in the in-scope set
564 (as the simplifier generally expects). This happens in simplTopBinds.
565
566 - In the occurrence analyser, if there are any out-of-scope
567 occurrences that pop out of the top, which will happen after
568 firing the rule: f = \x -> h x
569 h = \y -> 3
570 then just glom all the bindings into a single Rec, so that
571 the *next* iteration of the occurrence analyser will sort
572 them all out. This part happens in occurAnalysePgm.
573
574 ------------------------------------------------------------
575 Note [Inline rules]
576 ~~~~~~~~~~~~~~~~~~~
577 None of the above stuff about RULES applies to Inline Rules,
578 stored in a CoreUnfolding. The unfolding, if any, is simplified
579 at the same time as the regular RHS of the function (ie *not* like
580 Note [Rules are visible in their own rec group]), so it should be
581 treated *exactly* like an extra RHS.
582
583 Or, rather, when computing loop-breaker edges,
584 * If f has an INLINE pragma, and it is active, we treat the
585 INLINE rhs as f's rhs
586 * If it's inactive, we treat f as having no rhs
587 * If it has no INLINE pragma, we look at f's actual rhs
588
589
590 There is a danger that we'll be sub-optimal if we see this
591 f = ...f...
592 [INLINE f = ..no f...]
593 where f is recursive, but the INLINE is not. This can just about
594 happen with a sufficiently odd set of rules; eg
595
596 foo :: Int -> Int
597 {-# INLINE [1] foo #-}
598 foo x = x+1
599
600 bar :: Int -> Int
601 {-# INLINE [1] bar #-}
602 bar x = foo x + 1
603
604 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
605
606 Here the RULE makes bar recursive; but it's INLINE pragma remains
607 non-recursive. It's tempting to then say that 'bar' should not be
608 a loop breaker, but an attempt to do so goes wrong in two ways:
609 a) We may get
610 $df = ...$cfoo...
611 $cfoo = ...$df....
612 [INLINE $cfoo = ...no-$df...]
613 But we want $cfoo to depend on $df explicitly so that we
614 put the bindings in the right order to inline $df in $cfoo
615 and perhaps break the loop altogether. (Maybe this
616 b)
617
618
619 Example [eftInt]
620 ~~~~~~~~~~~~~~~
621 Example (from GHC.Enum):
622
623 eftInt :: Int# -> Int# -> [Int]
624 eftInt x y = ...(non-recursive)...
625
626 {-# INLINE [0] eftIntFB #-}
627 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
628 eftIntFB c n x y = ...(non-recursive)...
629
630 {-# RULES
631 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
632 "eftIntList" [1] eftIntFB (:) [] = eftInt
633 #-}
634
635 Note [Specialisation rules]
636 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
637 Consider this group, which is typical of what SpecConstr builds:
638
639 fs a = ....f (C a)....
640 f x = ....f (C a)....
641 {-# RULE f (C a) = fs a #-}
642
643 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
644
645 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
646 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
647 - fs is inlined (say it's small)
648 - now there's another opportunity to apply the RULE
649
650 This showed up when compiling Control.Concurrent.Chan.getChanContents.
651
652 ------------------------------------------------------------
653 Note [Finding join points]
654 ~~~~~~~~~~~~~~~~~~~~~~~~~~
655 It's the occurrence analyser's job to find bindings that we can turn into join
656 points, but it doesn't perform that transformation right away. Rather, it marks
657 the eligible bindings as part of their occurrence data, leaving it to the
658 simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'.
659 The simplifier then eta-expands the RHS if needed and then updates the
660 occurrence sites. Dividing the work this way means that the occurrence analyser
661 still only takes one pass, yet one can always tell the difference between a
662 function call and a jump by looking at the occurrence (because the same pass
663 changes the 'IdDetails' and propagates the binders to their occurrence sites).
664
665 To track potential join points, we use the 'occ_tail' field of OccInfo. A value
666 of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
667 tail call with `n` arguments (counting both value and type arguments). Otherwise
668 'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
669 rest of 'OccInfo' until it goes on the binder.
670
671 Note [Rules and join points]
672 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
673
674 Things get fiddly with rules. Suppose we have:
675
676 let j :: Int -> Int
677 j y = 2 * y
678 k :: Int -> Int -> Int
679 {-# RULES "SPEC k 0" k 0 = j #-}
680 k x y = x + 2 * y
681 in ...
682
683 Now suppose that both j and k appear only as saturated tail calls in the body.
684 Thus we would like to make them both join points. The rule complicates matters,
685 though, as its RHS has an unapplied occurrence of j. *However*, if we were to
686 eta-expand the rule, all would be well:
687
688 {-# RULES "SPEC k 0" forall a. k 0 a = j a #-}
689
690 So conceivably we could notice that a potential join point would have an
691 "undersaturated" rule and account for it. This would mean we could make
692 something that's been specialised a join point, for instance. But local bindings
693 are rarely specialised, and being overly cautious about rules only
694 costs us anything when, for some `j`:
695
696 * Before specialisation, `j` has non-tail calls, so it can't be a join point.
697 * During specialisation, `j` gets specialised and thus acquires rules.
698 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say),
699 and so now `j` *could* become a join point.
700
701 This appears to be very rare in practice. TODO Perhaps we should gather
702 statistics to be sure.
703
704 Note [Excess polymorphism and join points]
705 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
706
707 In principle, if a function would be a join point except that it fails
708 the polymorphism rule (see Note [The polymorphism rule of join points] in
709 CoreSyn), it can still be made a join point with some effort. This is because
710 all tail calls must return the same type (they return to the same context!), and
711 thus if the return type depends on an argument, that argument must always be the
712 same.
713
714 For instance, consider:
715
716 let f :: forall a. a -> Char -> [a]
717 f @a x c = ... f @a x 'a' ...
718 in ... f @Int 1 'b' ... f @Int 2 'c' ...
719
720 (where the calls are tail calls). `f` fails the polymorphism rule because its
721 return type is [a], where [a] is bound. But since the type argument is always
722 'Int', we can rewrite it as:
723
724 let f' :: Int -> Char -> [Int]
725 f' x c = ... f' x 'a' ...
726 in ... f' 1 'b' ... f 2 'c' ...
727
728 and now we can make f' a join point:
729
730 join f' :: Int -> Char -> [Int]
731 f' x c = ... jump f' x 'a' ...
732 in ... jump f' 1 'b' ... jump f' 2 'c' ...
733
734 It's not clear that this comes up often, however. TODO: Measure how often and
735 add this analysis if necessary.
736
737 ------------------------------------------------------------
738 Note [Adjusting right-hand sides]
739 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
740 There's a bit of a dance we need to do after analysing a lambda expression or
741 a right-hand side. In particular, we need to
742
743 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot
744 lambda, or a non-recursive join point; and
745 b) call 'markAllNonTailCalled' *unless* the binding is for a join point.
746
747 Some examples, with how the free occurrences in e (assumed not to be a value
748 lambda) get marked:
749
750 inside lam non-tail-called
751 ------------------------------------------------------------
752 let x = e No Yes
753 let f = \x -> e Yes Yes
754 let f = \x{OneShot} -> e No Yes
755 \x -> e Yes Yes
756 join j x = e No No
757 joinrec j x = e Yes No
758
759 There are a few other caveats; most importantly, if we're marking a binding as
760 'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so
761 that the effect cascades properly. Consequently, at the time the RHS is
762 analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must
763 return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once
764 join-point-hood has been decided.
765
766 Thus the overall sequence taking place in 'occAnalNonRecBind' and
767 'occAnalRecBind' is as follows:
768
769 1. Call 'occAnalLamOrRhs' to find usage information for the RHS.
770 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make
771 the binding a join point.
772 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when
773 recursive.)
774
775 (In the recursive case, this logic is spread between 'makeNode' and
776 'occAnalRec'.)
777 -}
778
779 ------------------------------------------------------------------
780 -- occAnalBind
781 ------------------------------------------------------------------
782
783 occAnalBind :: OccEnv -- The incoming OccEnv
784 -> TopLevelFlag
785 -> ImpRuleEdges
786 -> CoreBind
787 -> UsageDetails -- Usage details of scope
788 -> (UsageDetails, -- Of the whole let(rec)
789 [CoreBind])
790
791 occAnalBind env lvl top_env (NonRec binder rhs) body_usage
792 = occAnalNonRecBind env lvl top_env binder rhs body_usage
793 occAnalBind env lvl top_env (Rec pairs) body_usage
794 = occAnalRecBind env lvl top_env pairs body_usage
795
796 -----------------
797 occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr
798 -> UsageDetails -> (UsageDetails, [CoreBind])
799 occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage
800 | isTyVar binder -- A type let; we don't gather usage info
801 = (body_usage, [NonRec binder rhs])
802
803 | not (binder `usedIn` body_usage) -- It's not mentioned
804 = (body_usage, [])
805
806 | otherwise -- It's mentioned in the body
807 = (body_usage' +++ rhs_usage', [NonRec tagged_binder rhs'])
808 where
809 (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder
810 mb_join_arity = willBeJoinId_maybe tagged_binder
811
812 (bndrs, body) = collectBinders rhs
813
814 (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body
815 rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body'
816 -- For a /non-recursive/ join point we can mark all
817 -- its join-lambda as one-shot; and it's a good idea to do so
818
819 -- Unfoldings
820 -- See Note [Unfoldings and join points]
821 rhs_usage2 = case occAnalUnfolding env NonRecursive binder of
822 Just unf_usage -> rhs_usage1 +++ unf_usage
823 Nothing -> rhs_usage1
824
825 -- Rules
826 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
827 rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder
828 rhs_usage3 = rhs_usage2 +++ combineUsageDetailsList
829 (map (\(_, l, r) -> l +++ r) rules_w_uds)
830 rhs_usage4 = maybe rhs_usage3 (addManyOccsSet rhs_usage3) $
831 lookupVarEnv imp_rule_edges binder
832 -- See Note [Preventing loops due to imported functions rules]
833
834 -- Final adjustment
835 rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4
836
837 -----------------
838 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
839 -> UsageDetails -> (UsageDetails, [CoreBind])
840 occAnalRecBind env lvl imp_rule_edges pairs body_usage
841 = foldr (occAnalRec env lvl) (body_usage, []) sccs
842 -- For a recursive group, we
843 -- * occ-analyse all the RHSs
844 -- * compute strongly-connected components
845 -- * feed those components to occAnalRec
846 -- See Note [Recursive bindings: the grand plan]
847 where
848 sccs :: [SCC Details]
849 sccs = {-# SCC "occAnalBind.scc" #-}
850 stronglyConnCompFromEdgedVerticesUniq nodes
851
852 nodes :: [LetrecNode]
853 nodes = {-# SCC "occAnalBind.assoc" #-}
854 map (makeNode env imp_rule_edges bndr_set) pairs
855
856 bndr_set = mkVarSet (map fst pairs)
857
858 {-
859 Note [Unfoldings and join points]
860 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
861
862 We assume that anything in an unfolding occurs multiple times, since unfoldings
863 are often copied (that's the whole point!). But we still need to track tail
864 calls for the purpose of finding join points.
865 -}
866
867 -----------------------------
868 occAnalRec :: OccEnv -> TopLevelFlag
869 -> SCC Details
870 -> (UsageDetails, [CoreBind])
871 -> (UsageDetails, [CoreBind])
872
873 -- The NonRec case is just like a Let (NonRec ...) above
874 occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
875 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs }))
876 (body_uds, binds)
877 | not (bndr `usedIn` body_uds)
878 = (body_uds, binds) -- See Note [Dead code]
879
880 | otherwise -- It's mentioned in the body
881 = (body_uds' +++ rhs_uds',
882 NonRec tagged_bndr rhs : binds)
883 where
884 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
885 rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive
886 rhs_bndrs rhs_uds
887
888 -- The Rec case is the interesting one
889 -- See Note [Recursive bindings: the grand plan]
890 -- See Note [Loop breaking]
891 occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
892 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
893 = (body_uds, binds) -- See Note [Dead code]
894
895 | otherwise -- At this point we always build a single Rec
896 = -- pprTrace "occAnalRec" (vcat
897 -- [ text "weak_fvs" <+> ppr weak_fvs
898 -- , text "lb nodes" <+> ppr loop_breaker_nodes])
899 (final_uds, Rec pairs : binds)
900
901 where
902 bndrs = map nd_bndr details_s
903 bndr_set = mkVarSet bndrs
904
905 ------------------------------
906 -- See Note [Choosing loop breakers] for loop_breaker_nodes
907 final_uds :: UsageDetails
908 loop_breaker_nodes :: [LetrecNode]
909 (final_uds, loop_breaker_nodes)
910 = mkLoopBreakerNodes env lvl bndr_set body_uds details_s
911
912 ------------------------------
913 weak_fvs :: VarSet
914 weak_fvs = mapUnionVarSet nd_weak details_s
915
916 ---------------------------
917 -- Now reconstruct the cycle
918 pairs :: [(Id,CoreExpr)]
919 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs loop_breaker_nodes []
920 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
921 -- If weak_fvs is empty, the loop_breaker_nodes will include
922 -- all the edges in the original scope edges [remember,
923 -- weak_fvs is the difference between scope edges and
924 -- lb-edges], so a fresh SCC computation would yield a
925 -- single CyclicSCC result; and reOrderNodes deals with
926 -- exactly that case
927
928
929 ------------------------------------------------------------------
930 -- Loop breaking
931 ------------------------------------------------------------------
932
933 type Binding = (Id,CoreExpr)
934
935 loopBreakNodes :: Int
936 -> VarSet -- All binders
937 -> VarSet -- Binders whose dependencies may be "missing"
938 -- See Note [Weak loop breakers]
939 -> [LetrecNode]
940 -> [Binding] -- Append these to the end
941 -> [Binding]
942 {-
943 loopBreakNodes is applied to the list of nodes for a cyclic strongly
944 connected component (there's guaranteed to be a cycle). It returns
945 the same nodes, but
946 a) in a better order,
947 b) with some of the Ids having a IAmALoopBreaker pragma
948
949 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
950 that the simplifier can guarantee not to loop provided it never records an inlining
951 for these no-inline guys.
952
953 Furthermore, the order of the binds is such that if we neglect dependencies
954 on the no-inline Ids then the binds are topologically sorted. This means
955 that the simplifier will generally do a good job if it works from top bottom,
956 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
957 -}
958
959 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
960 loopBreakNodes depth bndr_set weak_fvs nodes binds
961 = -- pprTrace "loopBreakNodes" (ppr nodes) $
962 go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
963 where
964 go [] binds = binds
965 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
966
967 loop_break_scc scc binds
968 = case scc of
969 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
970 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
971
972 ----------------------------------
973 reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
974 -- Choose a loop breaker, mark it no-inline,
975 -- and call loopBreakNodes on the rest
976 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
977 reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
978 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
979 = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen
980 -- , text "chosen" <+> ppr chosen_nodes ]) $
981 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
982 (map mk_loop_breaker chosen_nodes ++ binds)
983 where
984 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
985 (nd_score (node_payload node))
986 [node] [] nodes
987
988 approximate_lb = depth >= 2
989 new_depth | approximate_lb = 0
990 | otherwise = depth+1
991 -- After two iterations (d=0, d=1) give up
992 -- and approximate, returning to d=0
993
994 mk_loop_breaker :: LetrecNode -> Binding
995 mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
996 = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs)
997 where
998 tail_info = tailCallInfo (idOccInfo bndr)
999
1000 mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
1001 -- See Note [Weak loop breakers]
1002 mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr
1003 , nd_rhs = rhs})
1004 | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs)
1005 | otherwise = (bndr, rhs)
1006 where
1007 occ' = weakLoopBreaker { occ_tail = tail_info }
1008 tail_info = tailCallInfo (idOccInfo bndr)
1009
1010 ----------------------------------
1011 chooseLoopBreaker :: Bool -- True <=> Too many iterations,
1012 -- so approximate
1013 -> NodeScore -- Best score so far
1014 -> [LetrecNode] -- Nodes with this score
1015 -> [LetrecNode] -- Nodes with higher scores
1016 -> [LetrecNode] -- Unprocessed nodes
1017 -> ([LetrecNode], [LetrecNode])
1018 -- This loop looks for the bind with the lowest score
1019 -- to pick as the loop breaker. The rest accumulate in
1020 chooseLoopBreaker _ _ loop_nodes acc []
1021 = (loop_nodes, acc) -- Done
1022
1023 -- If approximate_loop_breaker is True, we pick *all*
1024 -- nodes with lowest score, else just one
1025 -- See Note [Complexity of loop breaking]
1026 chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
1027 | approx_lb
1028 , rank sc == rank loop_sc
1029 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
1030
1031 | sc `betterLB` loop_sc -- Better score so pick this new one
1032 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
1033
1034 | otherwise -- Worse score so don't pick it
1035 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
1036 where
1037 sc = nd_score (node_payload node)
1038
1039 {-
1040 Note [Complexity of loop breaking]
1041 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1042 The loop-breaking algorithm knocks out one binder at a time, and
1043 performs a new SCC analysis on the remaining binders. That can
1044 behave very badly in tightly-coupled groups of bindings; in the
1045 worst case it can be (N**2)*log N, because it does a full SCC
1046 on N, then N-1, then N-2 and so on.
1047
1048 To avoid this, we switch plans after 2 (or whatever) attempts:
1049 Plan A: pick one binder with the lowest score, make it
1050 a loop breaker, and try again
1051 Plan B: pick *all* binders with the lowest score, make them
1052 all loop breakers, and try again
1053 Since there are only a small finite number of scores, this will
1054 terminate in a constant number of iterations, rather than O(N)
1055 iterations.
1056
1057 You might thing that it's very unlikely, but RULES make it much
1058 more likely. Here's a real example from Trac #1969:
1059 Rec { $dm = \d.\x. op d
1060 {-# RULES forall d. $dm Int d = $s$dm1
1061 forall d. $dm Bool d = $s$dm2 #-}
1062
1063 dInt = MkD .... opInt ...
1064 dInt = MkD .... opBool ...
1065 opInt = $dm dInt
1066 opBool = $dm dBool
1067
1068 $s$dm1 = \x. op dInt
1069 $s$dm2 = \x. op dBool }
1070 The RULES stuff means that we can't choose $dm as a loop breaker
1071 (Note [Choosing loop breakers]), so we must choose at least (say)
1072 opInt *and* opBool, and so on. The number of loop breakders is
1073 linear in the number of instance declarations.
1074
1075 Note [Loop breakers and INLINE/INLINABLE pragmas]
1076 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1077 Avoid choosing a function with an INLINE pramga as the loop breaker!
1078 If such a function is mutually-recursive with a non-INLINE thing,
1079 then the latter should be the loop-breaker.
1080
1081 It's vital to distinguish between INLINE and INLINABLE (the
1082 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1083 Rec { {-# INLINABLE f #-}
1084 f x = ...f... }
1085 and then worker/wrapper it through strictness analysis, we'll get
1086 Rec { {-# INLINABLE $wf #-}
1087 $wf p q = let x = (p,q) in ...f...
1088
1089 {-# INLINE f #-}
1090 f x = case x of (p,q) -> $wf p q }
1091
1092 Now it is vital that we choose $wf as the loop breaker, so we can
1093 inline 'f' in '$wf'.
1094
1095 Note [DFuns should not be loop breakers]
1096 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1097 It's particularly bad to make a DFun into a loop breaker. See
1098 Note [How instance declarations are translated] in TcInstDcls
1099
1100 We give DFuns a higher score than ordinary CONLIKE things because
1101 if there's a choice we want the DFun to be the non-loop breaker. Eg
1102
1103 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1104
1105 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1106 {-# DFUN #-}
1107 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1108 }
1109
1110 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1111 if we can't unravel the DFun first.
1112
1113 Note [Constructor applications]
1114 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1115 It's really really important to inline dictionaries. Real
1116 example (the Enum Ordering instance from GHC.Base):
1117
1118 rec f = \ x -> case d of (p,q,r) -> p x
1119 g = \ x -> case d of (p,q,r) -> q x
1120 d = (v, f, g)
1121
1122 Here, f and g occur just once; but we can't inline them into d.
1123 On the other hand we *could* simplify those case expressions if
1124 we didn't stupidly choose d as the loop breaker.
1125 But we won't because constructor args are marked "Many".
1126 Inlining dictionaries is really essential to unravelling
1127 the loops in static numeric dictionaries, see GHC.Float.
1128
1129 Note [Closure conversion]
1130 ~~~~~~~~~~~~~~~~~~~~~~~~~
1131 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1132 The immediate motivation came from the result of a closure-conversion transformation
1133 which generated code like this:
1134
1135 data Clo a b = forall c. Clo (c -> a -> b) c
1136
1137 ($:) :: Clo a b -> a -> b
1138 Clo f env $: x = f env x
1139
1140 rec { plus = Clo plus1 ()
1141
1142 ; plus1 _ n = Clo plus2 n
1143
1144 ; plus2 Zero n = n
1145 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1146
1147 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1148 we choose 'plus1' as the loop breaker (which is entirely possible
1149 otherwise), the loop does not unravel nicely.
1150
1151
1152 @occAnalUnfolding@ deals with the question of bindings where the Id is marked
1153 by an INLINE pragma. For these we record that anything which occurs
1154 in its RHS occurs many times. This pessimistically assumes that this
1155 inlined binder also occurs many times in its scope, but if it doesn't
1156 we'll catch it next time round. At worst this costs an extra simplifier pass.
1157 ToDo: try using the occurrence info for the inline'd binder.
1158
1159 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1160 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1161
1162
1163 ************************************************************************
1164 * *
1165 Making nodes
1166 * *
1167 ************************************************************************
1168 -}
1169
1170 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
1171
1172 noImpRuleEdges :: ImpRuleEdges
1173 noImpRuleEdges = emptyVarEnv
1174
1175 type LetrecNode = Node Unique Details -- Node comes from Digraph
1176 -- The Unique key is gotten from the Id
1177 data Details
1178 = ND { nd_bndr :: Id -- Binder
1179 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
1180 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS
1181 -- INVARIANT: (nd_rhs_bndrs nd, _) ==
1182 -- collectBinders (nd_rhs nd)
1183
1184 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
1185 -- ignoring phase (ie assuming all are active)
1186 -- See Note [Forming Rec groups]
1187
1188 , nd_inl :: IdSet -- Free variables of
1189 -- the stable unfolding (if present and active)
1190 -- or the RHS (if not)
1191 -- but excluding any RULES
1192 -- This is the IdSet that may be used if the Id is inlined
1193
1194 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
1195 -- but are *not* in nd_inl. These are the ones whose
1196 -- dependencies might not be respected by loop_breaker_nodes
1197 -- See Note [Weak loop breakers]
1198
1199 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
1200
1201 , nd_score :: NodeScore
1202 }
1203
1204 instance Outputable Details where
1205 ppr nd = text "ND" <> braces
1206 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
1207 , text "uds =" <+> ppr (nd_uds nd)
1208 , text "inl =" <+> ppr (nd_inl nd)
1209 , text "weak =" <+> ppr (nd_weak nd)
1210 , text "rule =" <+> ppr (nd_active_rule_fvs nd)
1211 , text "score =" <+> ppr (nd_score nd)
1212 ])
1213
1214 -- The NodeScore is compared lexicographically;
1215 -- e.g. lower rank wins regardless of size
1216 type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker
1217 , Int -- Size of rhs: higher => more likely to be picked as LB
1218 -- Maxes out at maxExprSize; we just use it to prioritise
1219 -- small functions
1220 , Bool ) -- Was it a loop breaker before?
1221 -- True => more likely to be picked
1222 -- Note [Loop breakers, node scoring, and stability]
1223
1224 rank :: NodeScore -> Int
1225 rank (r, _, _) = r
1226
1227 makeNode :: OccEnv -> ImpRuleEdges -> VarSet
1228 -> (Var, CoreExpr) -> LetrecNode
1229 -- See Note [Recursive bindings: the grand plan]
1230 makeNode env imp_rule_edges bndr_set (bndr, rhs)
1231 = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs)
1232 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
1233 -- is still deterministic with edges in nondeterministic order as
1234 -- explained in Note [Deterministic SCC] in Digraph.
1235 where
1236 details = ND { nd_bndr = bndr
1237 , nd_rhs = rhs'
1238 , nd_rhs_bndrs = bndrs'
1239 , nd_uds = rhs_usage3
1240 , nd_inl = inl_fvs
1241 , nd_weak = node_fvs `minusVarSet` inl_fvs
1242 , nd_active_rule_fvs = active_rule_fvs
1243 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
1244
1245 -- Constructing the edges for the main Rec computation
1246 -- See Note [Forming Rec groups]
1247 (bndrs, body) = collectBinders rhs
1248 (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body
1249 rhs' = mkLams bndrs' body'
1250 rhs_usage2 = rhs_usage1 +++ all_rule_uds
1251 -- Note [Rules are extra RHSs]
1252 -- Note [Rule dependency info]
1253 rhs_usage3 = case mb_unf_uds of
1254 Just unf_uds -> rhs_usage2 +++ unf_uds
1255 Nothing -> rhs_usage2
1256 node_fvs = udFreeVars bndr_set rhs_usage3
1257
1258 -- Finding the free variables of the rules
1259 is_active = occ_rule_act env :: Activation -> Bool
1260
1261 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
1262 rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr
1263
1264 rules_w_rhs_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
1265 rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):))
1266 (lookupVarEnv imp_rule_edges bndr)
1267 -- See Note [Preventing loops due to imported functions rules]
1268 [ (ru_act rule, udFreeVars bndr_set rhs_uds)
1269 | (rule, _, rhs_uds) <- rules_w_uds ]
1270 all_rule_uds = combineUsageDetailsList $
1271 concatMap (\(_, l, r) -> [l, r]) rules_w_uds
1272 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs
1273 , is_active a]
1274
1275 -- Finding the usage details of the INLINE pragma (if any)
1276 mb_unf_uds = occAnalUnfolding env Recursive bndr
1277
1278 -- Find the "nd_inl" free vars; for the loop-breaker phase
1279 inl_fvs = case mb_unf_uds of
1280 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
1281 Just unf_uds -> udFreeVars bndr_set unf_uds
1282 -- We could check for an *active* INLINE (returning
1283 -- emptyVarSet for an inactive one), but is_active
1284 -- isn't the right thing (it tells about
1285 -- RULE activation), so we'd need more plumbing
1286
1287 mkLoopBreakerNodes :: OccEnv -> TopLevelFlag
1288 -> VarSet
1289 -> UsageDetails -- for BODY of let
1290 -> [Details]
1291 -> (UsageDetails, -- adjusted
1292 [LetrecNode])
1293 -- Does four things
1294 -- a) tag each binder with its occurrence info
1295 -- b) add a NodeScore to each node
1296 -- c) make a Node with the right dependency edges for
1297 -- the loop-breaker SCC analysis
1298 -- d) adjust each RHS's usage details according to
1299 -- the binder's (new) shotness and join-point-hood
1300 mkLoopBreakerNodes env lvl bndr_set body_uds details_s
1301 = (final_uds, zipWith mk_lb_node details_s bndrs')
1302 where
1303 (final_uds, bndrs') = tagRecBinders lvl body_uds
1304 [ ((nd_bndr nd)
1305 ,(nd_uds nd)
1306 ,(nd_rhs_bndrs nd))
1307 | nd <- details_s ]
1308 mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr'
1309 = DigraphNode nd' (varUnique bndr) (nonDetKeysUniqSet lb_deps)
1310 -- It's OK to use nonDetKeysUniqSet here as
1311 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
1312 -- in nondeterministic order as explained in
1313 -- Note [Deterministic SCC] in Digraph.
1314 where
1315 nd' = nd { nd_bndr = bndr', nd_score = score }
1316 score = nodeScore env bndr bndr' rhs lb_deps
1317 lb_deps = extendFvs_ rule_fv_env inl_fvs
1318
1319 rule_fv_env :: IdEnv IdSet
1320 -- Maps a variable f to the variables from this group
1321 -- mentioned in RHS of active rules for f
1322 -- Domain is *subset* of bound vars (others have no rule fvs)
1323 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
1324 init_rule_fvs -- See Note [Finding rule RHS free vars]
1325 = [ (b, trimmed_rule_fvs)
1326 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
1327 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
1328 , not (isEmptyVarSet trimmed_rule_fvs) ]
1329
1330
1331 ------------------------------------------
1332 nodeScore :: OccEnv
1333 -> Id -- Binder has old occ-info (just for loop-breaker-ness)
1334 -> Id -- Binder with new occ-info
1335 -> CoreExpr -- RHS
1336 -> VarSet -- Loop-breaker dependencies
1337 -> NodeScore
1338 nodeScore env old_bndr new_bndr bind_rhs lb_deps
1339 | not (isId old_bndr) -- A type or cercion variable is never a loop breaker
1340 = (100, 0, False)
1341
1342 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers
1343 = (0, 0, True) -- See Note [Self-recursion and loop breakers]
1344
1345 | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has
1346 = (0, 0, True) -- a NOINLINE pragam) makes a great loop breaker
1347
1348 | exprIsTrivial rhs
1349 = mk_score 10 -- Practically certain to be inlined
1350 -- Used to have also: && not (isExportedId bndr)
1351 -- But I found this sometimes cost an extra iteration when we have
1352 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
1353 -- where df is the exported dictionary. Then df makes a really
1354 -- bad choice for loop breaker
1355
1356 | DFunUnfolding { df_args = args } <- id_unfolding
1357 -- Never choose a DFun as a loop breaker
1358 -- Note [DFuns should not be loop breakers]
1359 = (9, length args, is_lb)
1360
1361 -- Data structures are more important than INLINE pragmas
1362 -- so that dictionary/method recursion unravels
1363
1364 | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding
1365 = mk_score 6
1366
1367 | is_con_app rhs -- Data types help with cases:
1368 = mk_score 5 -- Note [Constructor applications]
1369
1370 | isStableUnfolding id_unfolding
1371 , can_unfold
1372 = mk_score 3
1373
1374 | isOneOcc (idOccInfo new_bndr)
1375 = mk_score 2 -- Likely to be inlined
1376
1377 | can_unfold -- The Id has some kind of unfolding
1378 = mk_score 1
1379
1380 | otherwise
1381 = (0, 0, is_lb)
1382
1383 where
1384 mk_score :: Int -> NodeScore
1385 mk_score rank = (rank, rhs_size, is_lb)
1386
1387 is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
1388 rhs = case id_unfolding of
1389 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
1390 | isStableSource src
1391 -> unf_rhs
1392 _ -> bind_rhs
1393 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
1394 rhs_size = case id_unfolding of
1395 CoreUnfolding { uf_guidance = guidance }
1396 | UnfIfGoodArgs { ug_size = size } <- guidance
1397 -> size
1398 _ -> cheapExprSize rhs
1399
1400 can_unfold = canUnfold id_unfolding
1401 id_unfolding = realIdUnfolding old_bndr
1402 -- realIdUnfolding: Ignore loop-breaker-ness here because
1403 -- that is what we are setting!
1404
1405 -- Checking for a constructor application
1406 -- Cheap and cheerful; the simplifier moves casts out of the way
1407 -- The lambda case is important to spot x = /\a. C (f a)
1408 -- which comes up when C is a dictionary constructor and
1409 -- f is a default method.
1410 -- Example: the instance for Show (ST s a) in GHC.ST
1411 --
1412 -- However we *also* treat (\x. C p q) as a con-app-like thing,
1413 -- Note [Closure conversion]
1414 is_con_app (Var v) = isConLikeId v
1415 is_con_app (App f _) = is_con_app f
1416 is_con_app (Lam _ e) = is_con_app e
1417 is_con_app (Tick _ e) = is_con_app e
1418 is_con_app _ = False
1419
1420 maxExprSize :: Int
1421 maxExprSize = 20 -- Rather arbitrary
1422
1423 cheapExprSize :: CoreExpr -> Int
1424 -- Maxes out at maxExprSize
1425 cheapExprSize e
1426 = go 0 e
1427 where
1428 go n e | n >= maxExprSize = n
1429 | otherwise = go1 n e
1430
1431 go1 n (Var {}) = n+1
1432 go1 n (Lit {}) = n+1
1433 go1 n (Type {}) = n
1434 go1 n (Coercion {}) = n
1435 go1 n (Tick _ e) = go1 n e
1436 go1 n (Cast e _) = go1 n e
1437 go1 n (App f a) = go (go1 n f) a
1438 go1 n (Lam b e)
1439 | isTyVar b = go1 n e
1440 | otherwise = go (n+1) e
1441 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b)
1442 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
1443
1444 gos n [] = n
1445 gos n (e:es) | n >= maxExprSize = n
1446 | otherwise = gos (go1 n e) es
1447
1448 betterLB :: NodeScore -> NodeScore -> Bool
1449 -- If n1 `betterLB` n2 then choose n1 as the loop breaker
1450 betterLB (rank1, size1, lb1) (rank2, size2, _)
1451 | rank1 < rank2 = True
1452 | rank1 > rank2 = False
1453 | size1 < size2 = False -- Make the bigger n2 into the loop breaker
1454 | size1 > size2 = True
1455 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it
1456 | otherwise = False -- See Note [Loop breakers, node scoring, and stability]
1457
1458 {- Note [Self-recursion and loop breakers]
1459 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1460 If we have
1461 rec { f = ...f...g...
1462 ; g = .....f... }
1463 then 'f' has to be a loop breaker anyway, so we may as well choose it
1464 right away, so that g can inline freely.
1465
1466 This is really just a cheap hack. Consider
1467 rec { f = ...g...
1468 ; g = ..f..h...
1469 ; h = ...f....}
1470 Here f or g are better loop breakers than h; but we might accidentally
1471 choose h. Finding the minimal set of loop breakers is hard.
1472
1473 Note [Loop breakers, node scoring, and stability]
1474 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1475 To choose a loop breaker, we give a NodeScore to each node in the SCC,
1476 and pick the one with the best score (according to 'betterLB').
1477
1478 We need to be jolly careful (Trac #12425, #12234) about the stability
1479 of this choice. Suppose we have
1480
1481 let rec { f = ...g...g...
1482 ; g = ...f...f... }
1483 in
1484 case x of
1485 True -> ...f..
1486 False -> ..f...
1487
1488 In each iteration of the simplifier the occurrence analyser OccAnal
1489 chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
1490 breaker. That means it is free to inline f.
1491
1492 Suppose that GHC decides to inline f in the branches of the case, but
1493 (for some reason; eg it is not saturated) in the rhs of g. So we get
1494
1495 let rec { f = ...g...g...
1496 ; g = ...f...f... }
1497 in
1498 case x of
1499 True -> ...g...g.....
1500 False -> ..g..g....
1501
1502 Now suppose that, for some reason, in the next iteration the occurrence
1503 analyser chooses f as the loop breaker, so it can freely inline g. And
1504 again for some reason the simplifier inlines g at its calls in the case
1505 branches, but not in the RHS of f. Then we get
1506
1507 let rec { f = ...g...g...
1508 ; g = ...f...f... }
1509 in
1510 case x of
1511 True -> ...(...f...f...)...(...f..f..).....
1512 False -> ..(...f...f...)...(..f..f...)....
1513
1514 You can see where this is going! Each iteration of the simplifier
1515 doubles the number of calls to f or g. No wonder GHC is slow!
1516
1517 (In the particular example in comment:3 of #12425, f and g are the two
1518 mutually recursive fmap instances for CondT and Result. They are both
1519 marked INLINE which, oddly, is why they don't inline in each other's
1520 RHS, because the call there is not saturated.)
1521
1522 The root cause is that we flip-flop on our choice of loop breaker. I
1523 always thought it didn't matter, and indeed for any single iteration
1524 to terminate, it doesn't matter. But when we iterate, it matters a
1525 lot!!
1526
1527 So The Plan is this:
1528 If there is a tie, choose the node that
1529 was a loop breaker last time round
1530
1531 Hence the is_lb field of NodeScore
1532
1533 ************************************************************************
1534 * *
1535 Right hand sides
1536 * *
1537 ************************************************************************
1538 -}
1539
1540 occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr
1541 -> (UsageDetails, [CoreBndr], CoreExpr)
1542 -- Returned usage details covers only the RHS,
1543 -- and *not* the RULE or INLINE template for the Id
1544 occAnalRhs env Recursive _ bndrs body
1545 = occAnalRecRhs env bndrs body
1546 occAnalRhs env NonRecursive id bndrs body
1547 = occAnalNonRecRhs env id bndrs body
1548
1549 occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr -- Rhs lambdas, body
1550 -> (UsageDetails, [CoreBndr], CoreExpr)
1551 -- Returned usage details covers only the RHS,
1552 -- and *not* the RULE or INLINE template for the Id
1553 occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body
1554
1555 occAnalNonRecRhs :: OccEnv
1556 -> Id -> [CoreBndr] -> CoreExpr -- Binder; rhs lams, body
1557 -- Binder is already tagged with occurrence info
1558 -> (UsageDetails, [CoreBndr], CoreExpr)
1559 -- Returned usage details covers only the RHS,
1560 -- and *not* the RULE or INLINE template for the Id
1561 occAnalNonRecRhs env bndr bndrs body
1562 = occAnalLamOrRhs rhs_env bndrs body
1563 where
1564 env1 | is_join_point = env -- See Note [Join point RHSs]
1565 | certainly_inline = env -- See Note [Cascading inlines]
1566 | otherwise = rhsCtxt env
1567
1568 -- See Note [Sources of one-shot information]
1569 rhs_env = env1 { occ_one_shots = argOneShots dmd }
1570
1571 certainly_inline -- See Note [Cascading inlines]
1572 = case occ of
1573 OneOcc { occ_in_lam = in_lam, occ_one_br = one_br }
1574 -> not in_lam && one_br && active && not_stable
1575 _ -> False
1576
1577 is_join_point = isAlwaysTailCalled occ
1578 -- Like (isJoinId bndr) but happens one step earlier
1579 -- c.f. willBeJoinId_maybe
1580
1581 occ = idOccInfo bndr
1582 dmd = idDemandInfo bndr
1583 active = isAlwaysActive (idInlineActivation bndr)
1584 not_stable = not (isStableUnfolding (idUnfolding bndr))
1585
1586 occAnalUnfolding :: OccEnv
1587 -> RecFlag
1588 -> Id
1589 -> Maybe UsageDetails
1590 -- Just the analysis, not a new unfolding. The unfolding
1591 -- got analysed when it was created and we don't need to
1592 -- update it.
1593 occAnalUnfolding env rec_flag id
1594 = case realIdUnfolding id of -- ignore previous loop-breaker flag
1595 CoreUnfolding { uf_tmpl = rhs, uf_src = src }
1596 | not (isStableSource src)
1597 -> Nothing
1598 | otherwise
1599 -> Just $ markAllMany usage
1600 where
1601 (bndrs, body) = collectBinders rhs
1602 (usage, _, _) = occAnalRhs env rec_flag id bndrs body
1603
1604 DFunUnfolding { df_bndrs = bndrs, df_args = args }
1605 -> Just $ zapDetails (delDetailsList usage bndrs)
1606 where
1607 usage = foldr (+++) emptyDetails (map (fst . occAnal env) args)
1608
1609 _ -> Nothing
1610
1611 occAnalRules :: OccEnv
1612 -> Maybe JoinArity -- If the binder is (or MAY become) a join
1613 -- point, what its join arity is (or WOULD
1614 -- become). See Note [Rules and join points].
1615 -> RecFlag
1616 -> Id
1617 -> [(CoreRule, -- Each (non-built-in) rule
1618 UsageDetails, -- Usage details for LHS
1619 UsageDetails)] -- Usage details for RHS
1620 occAnalRules env mb_expected_join_arity rec_flag id
1621 = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id
1622 , let (lhs_uds, rhs_uds) = occ_anal_rule rule ]
1623 where
1624 occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
1625 = (lhs_uds, final_rhs_uds)
1626 where
1627 lhs_uds = addManyOccsSet emptyDetails $
1628 (exprsFreeVars args `delVarSetList` bndrs)
1629 (rhs_bndrs, rhs_body) = collectBinders rhs
1630 (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body
1631 -- Note [Rules are extra RHSs]
1632 -- Note [Rule dependency info]
1633 final_rhs_uds = adjust_tail_info args $ markAllMany $
1634 (rhs_uds `delDetailsList` bndrs)
1635 occ_anal_rule _
1636 = (emptyDetails, emptyDetails)
1637
1638 adjust_tail_info args uds -- see Note [Rules and join points]
1639 = case mb_expected_join_arity of
1640 Just ar | args `lengthIs` ar -> uds
1641 _ -> markAllNonTailCalled uds
1642 {- Note [Join point RHSs]
1643 ~~~~~~~~~~~~~~~~~~~~~~~~~
1644 Consider
1645 x = e
1646 join j = Just x
1647
1648 We want to inline x into j right away, so we don't want to give
1649 the join point a RhsCtxt (Trac #14137). It's not a huge deal, because
1650 the FloatIn pass knows to float into join point RHSs; and the simplifier
1651 does not float things out of join point RHSs. But it's a simple, cheap
1652 thing to do. See Trac #14137.
1653
1654 Note [Cascading inlines]
1655 ~~~~~~~~~~~~~~~~~~~~~~~~
1656 By default we use an rhsCtxt for the RHS of a binding. This tells the
1657 occ anal n that it's looking at an RHS, which has an effect in
1658 occAnalApp. In particular, for constructor applications, it makes
1659 the arguments appear to have NoOccInfo, so that we don't inline into
1660 them. Thus x = f y
1661 k = Just x
1662 we do not want to inline x.
1663
1664 But there's a problem. Consider
1665 x1 = a0 : []
1666 x2 = a1 : x1
1667 x3 = a2 : x2
1668 g = f x3
1669 First time round, it looks as if x1 and x2 occur as an arg of a
1670 let-bound constructor ==> give them a many-occurrence.
1671 But then x3 is inlined (unconditionally as it happens) and
1672 next time round, x2 will be, and the next time round x1 will be
1673 Result: multiple simplifier iterations. Sigh.
1674
1675 So, when analysing the RHS of x3 we notice that x3 will itself
1676 definitely inline the next time round, and so we analyse x3's rhs in
1677 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1678
1679 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1680 If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and
1681 (b) certainly_inline says "yes" when preInlineUnconditionally says "no"
1682 then the simplifier iterates indefinitely:
1683 x = f y
1684 k = Just x -- We decide that k is 'certainly_inline'
1685 v = ...k... -- but preInlineUnconditionally doesn't inline it
1686 inline ==>
1687 k = Just (f y)
1688 v = ...k...
1689 float ==>
1690 x1 = f y
1691 k = Just x1
1692 v = ...k...
1693
1694 This is worse than the slow cascade, so we only want to say "certainly_inline"
1695 if it really is certain. Look at the note with preInlineUnconditionally
1696 for the various clauses.
1697
1698
1699 ************************************************************************
1700 * *
1701 Expressions
1702 * *
1703 ************************************************************************
1704 -}
1705
1706 occAnal :: OccEnv
1707 -> CoreExpr
1708 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1709 CoreExpr)
1710
1711 occAnal _ expr@(Type _) = (emptyDetails, expr)
1712 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1713 occAnal env expr@(Var _) = occAnalApp env (expr, [], [])
1714 -- At one stage, I gathered the idRuleVars for the variable here too,
1715 -- which in a way is the right thing to do.
1716 -- But that went wrong right after specialisation, when
1717 -- the *occurrences* of the overloaded function didn't have any
1718 -- rules in them, so the *specialised* versions looked as if they
1719 -- weren't used at all.
1720
1721 occAnal _ (Coercion co)
1722 = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co)
1723 -- See Note [Gather occurrences of coercion variables]
1724
1725 {-
1726 Note [Gather occurrences of coercion variables]
1727 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1728 We need to gather info about what coercion variables appear, so that
1729 we can sort them into the right place when doing dependency analysis.
1730 -}
1731
1732 occAnal env (Tick tickish body)
1733 | SourceNote{} <- tickish
1734 = (usage, Tick tickish body')
1735 -- SourceNotes are best-effort; so we just proceed as usual.
1736 -- If we drop a tick due to the issues described below it's
1737 -- not the end of the world.
1738
1739 | tickish `tickishScopesLike` SoftScope
1740 = (markAllNonTailCalled usage, Tick tickish body')
1741
1742 | Breakpoint _ ids <- tickish
1743 = (usage_lam +++ foldr addManyOccs emptyDetails ids, Tick tickish body')
1744 -- never substitute for any of the Ids in a Breakpoint
1745
1746 | otherwise
1747 = (usage_lam, Tick tickish body')
1748 where
1749 !(usage,body') = occAnal env body
1750 -- for a non-soft tick scope, we can inline lambdas only
1751 usage_lam = markAllNonTailCalled (markAllInsideLam usage)
1752 -- TODO There may be ways to make ticks and join points play
1753 -- nicer together, but right now there are problems:
1754 -- let j x = ... in tick<t> (j 1)
1755 -- Making j a join point may cause the simplifier to drop t
1756 -- (if the tick is put into the continuation). So we don't
1757 -- count j 1 as a tail call.
1758 -- See #14242.
1759
1760 occAnal env (Cast expr co)
1761 = case occAnal env expr of { (usage, expr') ->
1762 let usage1 = zapDetailsIf (isRhsEnv env) usage
1763 usage2 = addManyOccsSet usage1 (coVarsOfCo co)
1764 -- See Note [Gather occurrences of coercion variables]
1765 in (markAllNonTailCalled usage2, Cast expr' co)
1766 -- If we see let x = y `cast` co
1767 -- then mark y as 'Many' so that we don't
1768 -- immediately inline y again.
1769 }
1770
1771 occAnal env app@(App _ _)
1772 = occAnalApp env (collectArgsTicks tickishFloatable app)
1773
1774 -- Ignore type variables altogether
1775 -- (a) occurrences inside type lambdas only not marked as InsideLam
1776 -- (b) type variables not in environment
1777
1778 occAnal env (Lam x body)
1779 | isTyVar x
1780 = case occAnal env body of { (body_usage, body') ->
1781 (markAllNonTailCalled body_usage, Lam x body')
1782 }
1783
1784 -- For value lambdas we do a special hack. Consider
1785 -- (\x. \y. ...x...)
1786 -- If we did nothing, x is used inside the \y, so would be marked
1787 -- as dangerous to dup. But in the common case where the abstraction
1788 -- is applied to two arguments this is over-pessimistic.
1789 -- So instead, we just mark each binder with its occurrence
1790 -- info in the *body* of the multiple lambda.
1791 -- Then, the simplifier is careful when partially applying lambdas.
1792
1793 occAnal env expr@(Lam _ _)
1794 = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') ->
1795 let
1796 expr' = mkLams tagged_binders body'
1797 usage1 = markAllNonTailCalled usage
1798 one_shot_gp = all isOneShotBndr tagged_binders
1799 final_usage | one_shot_gp = usage1
1800 | otherwise = markAllInsideLam usage1
1801 in
1802 (final_usage, expr') }
1803 where
1804 (binders, body) = collectBinders expr
1805
1806 occAnal env (Case scrut bndr ty alts)
1807 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1808 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1809 let
1810 alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
1811 (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
1812 total_usage = markAllNonTailCalled scrut_usage +++ alts_usage1
1813 -- Alts can have tail calls, but the scrutinee can't
1814 in
1815 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1816 where
1817 -- Note [Case binder usage]
1818 -- ~~~~~~~~~~~~~~~~~~~~~~~~
1819 -- The case binder gets a usage of either "many" or "dead", never "one".
1820 -- Reason: we like to inline single occurrences, to eliminate a binding,
1821 -- but inlining a case binder *doesn't* eliminate a binding.
1822 -- We *don't* want to transform
1823 -- case x of w { (p,q) -> f w }
1824 -- into
1825 -- case x of w { (p,q) -> f (p,q) }
1826 tag_case_bndr usage bndr
1827 = (usage', setIdOccInfo bndr final_occ_info)
1828 where
1829 occ_info = lookupDetails usage bndr
1830 usage' = usage `delDetails` bndr
1831 final_occ_info = case occ_info of IAmDead -> IAmDead
1832 _ -> noOccInfo
1833
1834 alt_env = mkAltEnv env scrut bndr
1835 occ_anal_alt = occAnalAlt alt_env
1836
1837 occ_anal_scrut (Var v) (alt1 : other_alts)
1838 | not (null other_alts) || not (isDefaultAlt alt1)
1839 = (mkOneOcc env v True 0, Var v)
1840 -- The 'True' says that the variable occurs in an interesting
1841 -- context; the case has at least one non-default alternative
1842 occ_anal_scrut (Tick t e) alts
1843 | t `tickishScopesLike` SoftScope
1844 -- No reason to not look through all ticks here, but only
1845 -- for soft-scoped ticks we can do so without having to
1846 -- update returned occurance info (see occAnal)
1847 = second (Tick t) $ occ_anal_scrut e alts
1848
1849 occ_anal_scrut scrut _alts
1850 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1851
1852 occAnal env (Let bind body)
1853 = case occAnal env body of { (body_usage, body') ->
1854 case occAnalBind env NotTopLevel
1855 noImpRuleEdges bind
1856 body_usage of { (final_usage, new_binds) ->
1857 (final_usage, mkLets new_binds body') }}
1858
1859 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1860 occAnalArgs _ [] _
1861 = (emptyDetails, [])
1862
1863 occAnalArgs env (arg:args) one_shots
1864 | isTypeArg arg
1865 = case occAnalArgs env args one_shots of { (uds, args') ->
1866 (uds, arg:args') }
1867
1868 | otherwise
1869 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1870 case occAnal arg_env arg of { (uds1, arg') ->
1871 case occAnalArgs env args one_shots' of { (uds2, args') ->
1872 (uds1 +++ uds2, arg':args') }}}
1873
1874 {-
1875 Applications are dealt with specially because we want
1876 the "build hack" to work.
1877
1878 Note [Arguments of let-bound constructors]
1879 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1880 Consider
1881 f x = let y = expensive x in
1882 let z = (True,y) in
1883 (case z of {(p,q)->q}, case z of {(p,q)->q})
1884 We feel free to duplicate the WHNF (True,y), but that means
1885 that y may be duplicated thereby.
1886
1887 If we aren't careful we duplicate the (expensive x) call!
1888 Constructors are rather like lambdas in this way.
1889 -}
1890
1891 occAnalApp :: OccEnv
1892 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1893 -> (UsageDetails, Expr CoreBndr)
1894 occAnalApp env (Var fun, args, ticks)
1895 | null ticks = (uds, mkApps (Var fun) args')
1896 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1897 where
1898 uds = fun_uds +++ final_args_uds
1899
1900 !(args_uds, args') = occAnalArgs env args one_shots
1901 !final_args_uds
1902 | isRhsEnv env && is_exp = markAllNonTailCalled $
1903 markAllInsideLam args_uds
1904 | otherwise = markAllNonTailCalled args_uds
1905 -- We mark the free vars of the argument of a constructor or PAP
1906 -- as "inside-lambda", if it is the RHS of a let(rec).
1907 -- This means that nothing gets inlined into a constructor or PAP
1908 -- argument position, which is what we want. Typically those
1909 -- constructor arguments are just variables, or trivial expressions.
1910 -- We use inside-lam because it's like eta-expanding the PAP.
1911 --
1912 -- This is the *whole point* of the isRhsEnv predicate
1913 -- See Note [Arguments of let-bound constructors]
1914
1915 n_val_args = valArgCount args
1916 n_args = length args
1917 fun_uds = mkOneOcc env fun (n_val_args > 0) n_args
1918 is_exp = isExpandableApp fun n_val_args
1919 -- See Note [CONLIKE pragma] in BasicTypes
1920 -- The definition of is_exp should match that in Simplify.prepareRhs
1921
1922 one_shots = argsOneShots (idStrictness fun) guaranteed_val_args
1923 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo
1924 (occ_one_shots env))
1925 -- See Note [Sources of one-shot information], bullet point A']
1926
1927 occAnalApp env (fun, args, ticks)
1928 = (markAllNonTailCalled (fun_uds +++ args_uds),
1929 mkTicks ticks $ mkApps fun' args')
1930 where
1931 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1932 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1933 -- often leaves behind beta redexs like
1934 -- (\x y -> e) a1 a2
1935 -- Here we would like to mark x,y as one-shot, and treat the whole
1936 -- thing much like a let. We do this by pushing some True items
1937 -- onto the context stack.
1938 !(args_uds, args') = occAnalArgs env args []
1939
1940 zapDetailsIf :: Bool -- If this is true
1941 -> UsageDetails -- Then do zapDetails on this
1942 -> UsageDetails
1943 zapDetailsIf True uds = zapDetails uds
1944 zapDetailsIf False uds = uds
1945
1946 {-
1947 Note [Sources of one-shot information]
1948 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1949 The occurrence analyser obtains one-shot-lambda information from two sources:
1950
1951 A: Saturated applications: eg f e1 .. en
1952
1953 In general, given a call (f e1 .. en) we can propagate one-shot info from
1954 f's strictness signature into e1 .. en, but /only/ if n is enough to
1955 saturate the strictness signature. A strictness signature like
1956
1957 f :: C1(C1(L))LS
1958
1959 means that *if f is applied to three arguments* then it will guarantee to
1960 call its first argument at most once, and to call the result of that at
1961 most once. But if f has fewer than three arguments, all bets are off; e.g.
1962
1963 map (f (\x y. expensive) e2) xs
1964
1965 Here the \x y abstraction may be called many times (once for each element of
1966 xs) so we should not mark x and y as one-shot. But if it was
1967
1968 map (f (\x y. expensive) 3 2) xs
1969
1970 then the first argument of f will be called at most once.
1971
1972 The one-shot info, derived from f's strictness signature, is
1973 computed by 'argsOneShots', called in occAnalApp.
1974
1975 A': Non-obviously saturated applications: eg build (f (\x y -> expensive))
1976 where f is as above.
1977
1978 In this case, f is only manifestly applied to one argument, so it does not
1979 look saturated. So by the previous point, we should not use its strictness
1980 signature to learn about the one-shotness of \x y. But in this case we can:
1981 build is fully applied, so we may use its strictness signature; and from
1982 that we learn that build calls its argument with two arguments *at most once*.
1983
1984 So there is really only one call to f, and it will have three arguments. In
1985 that sense, f is saturated, and we may proceed as described above.
1986
1987 Hence the computation of 'guaranteed_val_args' in occAnalApp, using
1988 '(occ_one_shots env)'. See also Trac #13227, comment:9
1989
1990 B: Let-bindings: eg let f = \c. let ... in \n -> blah
1991 in (build f, build f)
1992
1993 Propagate one-shot info from the demanand-info on 'f' to the
1994 lambdas in its RHS (which may not be syntactically at the top)
1995
1996 This information must have come from a previous run of the demanand
1997 analyser.
1998
1999 Previously, the demand analyser would *also* set the one-shot information, but
2000 that code was buggy (see #11770), so doing it only in on place, namely here, is
2001 saner.
2002
2003 Note [OneShots]
2004 ~~~~~~~~~~~~~~~
2005 When analysing an expression, the occ_one_shots argument contains information
2006 about how the function is being used. The length of the list indicates
2007 how many arguments will eventually be passed to the analysed expression,
2008 and the OneShotInfo indicates whether this application is once or multiple times.
2009
2010 Example:
2011
2012 Context of f occ_one_shots when analysing f
2013
2014 f 1 2 [OneShot, OneShot]
2015 map (f 1) [OneShot, NoOneShotInfo]
2016 build f [OneShot, OneShot]
2017 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot]
2018
2019 Note [Binders in case alternatives]
2020 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2021 Consider
2022 case x of y { (a,b) -> f y }
2023 We treat 'a', 'b' as dead, because they don't physically occur in the
2024 case alternative. (Indeed, a variable is dead iff it doesn't occur in
2025 its scope in the output of OccAnal.) It really helps to know when
2026 binders are unused. See esp the call to isDeadBinder in
2027 Simplify.mkDupableAlt
2028
2029 In this example, though, the Simplifier will bring 'a' and 'b' back to
2030 life, beause it binds 'y' to (a,b) (imagine got inlined and
2031 scrutinised y).
2032 -}
2033
2034 occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr
2035 -> (UsageDetails, [CoreBndr], CoreExpr)
2036 occAnalLamOrRhs env [] body
2037 = case occAnal env body of (body_usage, body') -> (body_usage, [], body')
2038 -- RHS of thunk or nullary join point
2039 occAnalLamOrRhs env (bndr:bndrs) body
2040 | isTyVar bndr
2041 = -- Important: Keep the environment so that we don't inline into an RHS like
2042 -- \(@ x) -> C @x (f @x)
2043 -- (see the beginning of Note [Cascading inlines]).
2044 case occAnalLamOrRhs env bndrs body of
2045 (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body')
2046 occAnalLamOrRhs env binders body
2047 = case occAnal env_body body of { (body_usage, body') ->
2048 let
2049 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
2050 -- Use binders' to put one-shot info on the lambdas
2051 in
2052 (final_usage, tagged_binders, body') }
2053 where
2054 (env_body, binders') = oneShotGroup env binders
2055
2056 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
2057 -> CoreAlt
2058 -> (UsageDetails, Alt IdWithOccInfo)
2059 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
2060 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
2061 let
2062 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
2063 -- See Note [Binders in case alternatives]
2064 (alt_usg', rhs2) =
2065 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
2066 in
2067 (alt_usg', (con, tagged_bndrs, rhs2)) }
2068
2069 wrapAltRHS :: OccEnv
2070 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
2071 -> UsageDetails -- usage for entire alt (p -> rhs)
2072 -> [Var] -- alt binders
2073 -> CoreExpr -- alt RHS
2074 -> (UsageDetails, CoreExpr)
2075 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
2076 | occ_binder_swap env
2077 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
2078 -- handles condition (a) in Note [Binder swap]
2079 , not captured -- See condition (b) in Note [Binder swap]
2080 = ( alt_usg' +++ let_rhs_usg
2081 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
2082 where
2083 captured = any (`usedIn` let_rhs_usg) bndrs
2084 -- The rhs of the let may include coercion variables
2085 -- if the scrutinee was a cast, so we must gather their
2086 -- usage. See Note [Gather occurrences of coercion variables]
2087 (let_rhs_usg, let_rhs') = occAnal env let_rhs
2088 (alt_usg', [tagged_scrut_var]) = tagLamBinders alt_usg [scrut_var]
2089
2090 wrapAltRHS _ _ alt_usg _ alt_rhs
2091 = (alt_usg, alt_rhs)
2092
2093 {-
2094 ************************************************************************
2095 * *
2096 OccEnv
2097 * *
2098 ************************************************************************
2099 -}
2100
2101 data OccEnv
2102 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
2103 , occ_one_shots :: !OneShots -- See Note [OneShots]
2104 , occ_gbl_scrut :: GlobalScruts
2105
2106 , occ_unf_act :: Id -> Bool -- Which Id unfoldings are active
2107
2108 , occ_rule_act :: Activation -> Bool -- Which rules are active
2109 -- See Note [Finding rule RHS free vars]
2110
2111 , occ_binder_swap :: !Bool -- enable the binder_swap
2112 -- See CorePrep Note [Dead code in CorePrep]
2113 }
2114
2115 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
2116
2117 -----------------------------
2118 -- OccEncl is used to control whether to inline into constructor arguments
2119 -- For example:
2120 -- x = (p,q) -- Don't inline p or q
2121 -- y = /\a -> (p a, q a) -- Still don't inline p or q
2122 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
2123 -- So OccEncl tells enought about the context to know what to do when
2124 -- we encounter a constructor application or PAP.
2125
2126 data OccEncl
2127 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
2128 -- Don't inline into constructor args here
2129 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
2130 -- Do inline into constructor args here
2131
2132 instance Outputable OccEncl where
2133 ppr OccRhs = text "occRhs"
2134 ppr OccVanilla = text "occVanilla"
2135
2136 -- See note [OneShots]
2137 type OneShots = [OneShotInfo]
2138
2139 initOccEnv :: OccEnv
2140 initOccEnv
2141 = OccEnv { occ_encl = OccVanilla
2142 , occ_one_shots = []
2143 , occ_gbl_scrut = emptyVarSet
2144 -- To be conservative, we say that all
2145 -- inlines and rules are active
2146 , occ_unf_act = \_ -> True
2147 , occ_rule_act = \_ -> True
2148 , occ_binder_swap = True }
2149
2150 vanillaCtxt :: OccEnv -> OccEnv
2151 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
2152
2153 rhsCtxt :: OccEnv -> OccEnv
2154 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
2155
2156 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
2157 argCtxt env []
2158 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
2159 argCtxt env (one_shots:one_shots_s)
2160 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
2161
2162 isRhsEnv :: OccEnv -> Bool
2163 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
2164 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
2165
2166 oneShotGroup :: OccEnv -> [CoreBndr]
2167 -> ( OccEnv
2168 , [CoreBndr] )
2169 -- The result binders have one-shot-ness set that they might not have had originally.
2170 -- This happens in (build (\c n -> e)). Here the occurrence analyser
2171 -- linearity context knows that c,n are one-shot, and it records that fact in
2172 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
2173
2174 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
2175 = go ctxt bndrs []
2176 where
2177 go ctxt [] rev_bndrs
2178 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
2179 , reverse rev_bndrs )
2180
2181 go [] bndrs rev_bndrs
2182 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
2183 , reverse rev_bndrs ++ bndrs )
2184
2185 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs
2186 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs)
2187 | otherwise = go ctxt bndrs (bndr : rev_bndrs)
2188 where
2189 bndr' = updOneShotInfo bndr one_shot
2190 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
2191 -- one-shot info might be better than what we can infer, e.g.
2192 -- due to explicit use of the magic 'oneShot' function.
2193 -- See Note [The oneShot function]
2194
2195
2196 markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var]
2197 -- Mark the lambdas of a non-recursive join point as one-shot.
2198 -- This is good to prevent gratuitous float-out etc
2199 markJoinOneShots mb_join_arity bndrs
2200 = case mb_join_arity of
2201 Nothing -> bndrs
2202 Just n -> go n bndrs
2203 where
2204 go 0 bndrs = bndrs
2205 go _ [] = WARN( True, ppr mb_join_arity <+> ppr bndrs ) []
2206 go n (b:bs) = b' : go (n-1) bs
2207 where
2208 b' | isId b = setOneShotLambda b
2209 | otherwise = b
2210
2211 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
2212 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
2213 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
2214
2215 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
2216 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
2217 -- as well as (f,g), (g,h)
2218 transClosureFV env
2219 | no_change = env
2220 | otherwise = transClosureFV (listToUFM new_fv_list)
2221 where
2222 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
2223 -- It's OK to use nonDetUFMToList here because we'll forget the
2224 -- ordering by creating a new set with listToUFM
2225 bump no_change (b,fvs)
2226 | no_change_here = (no_change, (b,fvs))
2227 | otherwise = (False, (b,new_fvs))
2228 where
2229 (new_fvs, no_change_here) = extendFvs env fvs
2230
2231 -------------
2232 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
2233 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
2234
2235 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
2236 -- (extendFVs env s) returns
2237 -- (s `union` env(s), env(s) `subset` s)
2238 extendFvs env s
2239 | isNullUFM env
2240 = (s, True)
2241 | otherwise
2242 = (s `unionVarSet` extras, extras `subVarSet` s)
2243 where
2244 extras :: VarSet -- env(s)
2245 extras = nonDetFoldUFM unionVarSet emptyVarSet $
2246 -- It's OK to use nonDetFoldUFM here because unionVarSet commutes
2247 intersectUFM_C (\x _ -> x) env (getUniqSet s)
2248
2249 {-
2250 ************************************************************************
2251 * *
2252 Binder swap
2253 * *
2254 ************************************************************************
2255
2256 Note [Binder swap]
2257 ~~~~~~~~~~~~~~~~~~
2258 We do these two transformations right here:
2259
2260 (1) case x of b { pi -> ri }
2261 ==>
2262 case x of b { pi -> let x=b in ri }
2263
2264 (2) case (x |> co) of b { pi -> ri }
2265 ==>
2266 case (x |> co) of b { pi -> let x = b |> sym co in ri }
2267
2268 Why (2)? See Note [Case of cast]
2269
2270 In both cases, in a particular alternative (pi -> ri), we only
2271 add the binding if
2272 (a) x occurs free in (pi -> ri)
2273 (ie it occurs in ri, but is not bound in pi)
2274 (b) the pi does not bind b (or the free vars of co)
2275 We need (a) and (b) for the inserted binding to be correct.
2276
2277 For the alternatives where we inject the binding, we can transfer
2278 all x's OccInfo to b. And that is the point.
2279
2280 Notice that
2281 * The deliberate shadowing of 'x'.
2282 * That (a) rapidly becomes false, so no bindings are injected.
2283
2284 The reason for doing these transformations here is because it allows
2285 us to adjust the OccInfo for 'x' and 'b' as we go.
2286
2287 * Suppose the only occurrences of 'x' are the scrutinee and in the
2288 ri; then this transformation makes it occur just once, and hence
2289 get inlined right away.
2290
2291 * If we do this in the Simplifier, we don't know whether 'x' is used
2292 in ri, so we are forced to pessimistically zap b's OccInfo even
2293 though it is typically dead (ie neither it nor x appear in the
2294 ri). There's nothing actually wrong with zapping it, except that
2295 it's kind of nice to know which variables are dead. My nose
2296 tells me to keep this information as robustly as possible.
2297
2298 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
2299 {x=b}; it's Nothing if the binder-swap doesn't happen.
2300
2301 There is a danger though. Consider
2302 let v = x +# y
2303 in case (f v) of w -> ...v...v...
2304 And suppose that (f v) expands to just v. Then we'd like to
2305 use 'w' instead of 'v' in the alternative. But it may be too
2306 late; we may have substituted the (cheap) x+#y for v in the
2307 same simplifier pass that reduced (f v) to v.
2308
2309 I think this is just too bad. CSE will recover some of it.
2310
2311 Note [Case of cast]
2312 ~~~~~~~~~~~~~~~~~~~
2313 Consider case (x `cast` co) of b { I# ->
2314 ... (case (x `cast` co) of {...}) ...
2315 We'd like to eliminate the inner case. That is the motivation for
2316 equation (2) in Note [Binder swap]. When we get to the inner case, we
2317 inline x, cancel the casts, and away we go.
2318
2319 Note [Binder swap on GlobalId scrutinees]
2320 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2321 When the scrutinee is a GlobalId we must take care in two ways
2322
2323 i) In order to *know* whether 'x' occurs free in the RHS, we need its
2324 occurrence info. BUT, we don't gather occurrence info for
2325 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
2326 OccEnv is for: it says "gather occurrence info for these".
2327
2328 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
2329 has an External Name. See, for example, SimplEnv Note [Global Ids in
2330 the substitution].
2331
2332 Note [Zap case binders in proxy bindings]
2333 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2334 From the original
2335 case x of cb(dead) { p -> ...x... }
2336 we will get
2337 case x of cb(live) { p -> let x = cb in ...x... }
2338
2339 Core Lint never expects to find an *occurrence* of an Id marked
2340 as Dead, so we must zap the OccInfo on cb before making the
2341 binding x = cb. See Trac #5028.
2342
2343 Historical note [no-case-of-case]
2344 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2345 We *used* to suppress the binder-swap in case expressions when
2346 -fno-case-of-case is on. Old remarks:
2347 "This happens in the first simplifier pass,
2348 and enhances full laziness. Here's the bad case:
2349 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
2350 If we eliminate the inner case, we trap it inside the I# v -> arm,
2351 which might prevent some full laziness happening. I've seen this
2352 in action in spectral/cichelli/Prog.hs:
2353 [(m,n) | m <- [1..max], n <- [1..max]]
2354 Hence the check for NoCaseOfCase."
2355 However, now the full-laziness pass itself reverses the binder-swap, so this
2356 check is no longer necessary.
2357
2358 Historical note [Suppressing the case binder-swap]
2359 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2360 This old note describes a problem that is also fixed by doing the
2361 binder-swap in OccAnal:
2362
2363 There is another situation when it might make sense to suppress the
2364 case-expression binde-swap. If we have
2365
2366 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
2367 ...other cases .... }
2368
2369 We'll perform the binder-swap for the outer case, giving
2370
2371 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
2372 ...other cases .... }
2373
2374 But there is no point in doing it for the inner case, because w1 can't
2375 be inlined anyway. Furthermore, doing the case-swapping involves
2376 zapping w2's occurrence info (see paragraphs that follow), and that
2377 forces us to bind w2 when doing case merging. So we get
2378
2379 case x of w1 { A -> let w2 = w1 in e1
2380 B -> let w2 = w1 in e2
2381 ...other cases .... }
2382
2383 This is plain silly in the common case where w2 is dead.
2384
2385 Even so, I can't see a good way to implement this idea. I tried
2386 not doing the binder-swap if the scrutinee was already evaluated
2387 but that failed big-time:
2388
2389 data T = MkT !Int
2390
2391 case v of w { MkT x ->
2392 case x of x1 { I# y1 ->
2393 case x of x2 { I# y2 -> ...
2394
2395 Notice that because MkT is strict, x is marked "evaluated". But to
2396 eliminate the last case, we must either make sure that x (as well as
2397 x1) has unfolding MkT y1. The straightforward thing to do is to do
2398 the binder-swap. So this whole note is a no-op.
2399
2400 It's fixed by doing the binder-swap in OccAnal because we can do the
2401 binder-swap unconditionally and still get occurrence analysis
2402 information right.
2403 -}
2404
2405 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
2406 -- Does two things: a) makes the occ_one_shots = OccVanilla
2407 -- b) extends the GlobalScruts if possible
2408 -- c) returns a proxy mapping, binding the scrutinee
2409 -- to the case binder, if possible
2410 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
2411 = case stripTicksTopE (const True) scrut of
2412 Var v -> add_scrut v case_bndr'
2413 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
2414 -- See Note [Case of cast]
2415 _ -> (env { occ_encl = OccVanilla }, Nothing)
2416
2417 where
2418 add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
2419 , Just (localise v, rhs) )
2420
2421 case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
2422 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var)
2423 -- Localise the scrut_var before shadowing it; we're making a
2424 -- new binding for it, and it might have an External Name, or
2425 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
2426 -- Also we don't want any INLINE or NOINLINE pragmas!
2427
2428 {-
2429 ************************************************************************
2430 * *
2431 \subsection[OccurAnal-types]{OccEnv}
2432 * *
2433 ************************************************************************
2434
2435 Note [UsageDetails and zapping]
2436 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2437
2438 On many occasions, we must modify all gathered occurrence data at once. For
2439 instance, all occurrences underneath a (non-one-shot) lambda set the
2440 'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but
2441 that takes O(n) time and we will do this often---in particular, there are many
2442 places where tail calls are not allowed, and each of these causes all variables
2443 to get marked with 'NoTailCallInfo'.
2444
2445 Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along
2446 with the 'OccInfoEnv'. Each of these extra environments is a "zapped set"
2447 recording which variables have been zapped in some way. Zapping all occurrence
2448 info then simply means setting the corresponding zapped set to the whole
2449 'OccInfoEnv', a fast O(1) operation.
2450 -}
2451
2452 type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage
2453 -- INVARIANT: never IAmDead
2454 -- (Deadness is signalled by not being in the map at all)
2455
2456 type ZappedSet = OccInfoEnv -- Values are ignored
2457
2458 data UsageDetails
2459 = UD { ud_env :: !OccInfoEnv
2460 , ud_z_many :: ZappedSet -- apply 'markMany' to these
2461 , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these
2462 , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these
2463 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv
2464
2465 instance Outputable UsageDetails where
2466 ppr ud = ppr (ud_env (flattenUsageDetails ud))
2467
2468 -------------------
2469 -- UsageDetails API
2470
2471 (+++), combineAltsUsageDetails
2472 :: UsageDetails -> UsageDetails -> UsageDetails
2473 (+++) = combineUsageDetailsWith addOccInfo
2474 combineAltsUsageDetails = combineUsageDetailsWith orOccInfo
2475
2476 combineUsageDetailsList :: [UsageDetails] -> UsageDetails
2477 combineUsageDetailsList = foldl (+++) emptyDetails
2478
2479 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
2480 mkOneOcc env id int_cxt arity
2481 | isLocalId id
2482 = singleton $ OneOcc { occ_in_lam = False
2483 , occ_one_br = True
2484 , occ_int_cxt = int_cxt
2485 , occ_tail = AlwaysTailCalled arity }
2486 | id `elemVarSet` occ_gbl_scrut env
2487 = singleton noOccInfo
2488
2489 | otherwise
2490 = emptyDetails
2491 where
2492 singleton info = emptyDetails { ud_env = unitVarEnv id info }
2493
2494 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
2495 addOneOcc ud id info
2496 = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info }
2497 `alterZappedSets` (`delVarEnv` id)
2498 where
2499 plus_zapped old new = doZapping ud id old `addOccInfo` new
2500
2501 addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails
2502 addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set
2503 -- It's OK to use nonDetFoldUFM here because addManyOccs commutes
2504
2505 -- Add several occurrences, assumed not to be tail calls
2506 addManyOccs :: Var -> UsageDetails -> UsageDetails
2507 addManyOccs v u | isId v = addOneOcc u v noOccInfo
2508 | otherwise = u
2509 -- Give a non-committal binder info (i.e noOccInfo) because
2510 -- a) Many copies of the specialised thing can appear
2511 -- b) We don't want to substitute a BIG expression inside a RULE
2512 -- even if that's the only occurrence of the thing
2513 -- (Same goes for INLINE.)
2514
2515 delDetails :: UsageDetails -> Id -> UsageDetails
2516 delDetails ud bndr
2517 = ud `alterUsageDetails` (`delVarEnv` bndr)
2518
2519 delDetailsList :: UsageDetails -> [Id] -> UsageDetails
2520 delDetailsList ud bndrs
2521 = ud `alterUsageDetails` (`delVarEnvList` bndrs)
2522
2523 emptyDetails :: UsageDetails
2524 emptyDetails = UD { ud_env = emptyVarEnv
2525 , ud_z_many = emptyVarEnv
2526 , ud_z_in_lam = emptyVarEnv
2527 , ud_z_no_tail = emptyVarEnv }
2528
2529 isEmptyDetails :: UsageDetails -> Bool
2530 isEmptyDetails = isEmptyVarEnv . ud_env
2531
2532 markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails
2533 :: UsageDetails -> UsageDetails
2534 markAllMany ud = ud { ud_z_many = ud_env ud }
2535 markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud }
2536 markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud }
2537
2538 zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo
2539
2540 lookupDetails :: UsageDetails -> Id -> OccInfo
2541 lookupDetails ud id
2542 = case lookupVarEnv (ud_env ud) id of
2543 Just occ -> doZapping ud id occ
2544 Nothing -> IAmDead
2545
2546 usedIn :: Id -> UsageDetails -> Bool
2547 v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
2548
2549 udFreeVars :: VarSet -> UsageDetails -> VarSet
2550 -- Find the subset of bndrs that are mentioned in uds
2551 udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud)
2552
2553 -------------------
2554 -- Auxiliary functions for UsageDetails implementation
2555
2556 combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo)
2557 -> UsageDetails -> UsageDetails -> UsageDetails
2558 combineUsageDetailsWith plus_occ_info ud1 ud2
2559 | isEmptyDetails ud1 = ud2
2560 | isEmptyDetails ud2 = ud1
2561 | otherwise
2562 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2)
2563 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2)
2564 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2)
2565 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) }
2566
2567 doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo
2568 doZapping ud var occ
2569 = doZappingByUnique ud (varUnique var) occ
2570
2571 doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo
2572 doZappingByUnique ud uniq
2573 = (if | in_subset ud_z_many -> markMany
2574 | in_subset ud_z_in_lam -> markInsideLam
2575 | otherwise -> id) .
2576 (if | in_subset ud_z_no_tail -> markNonTailCalled
2577 | otherwise -> id)
2578 where
2579 in_subset field = uniq `elemVarEnvByKey` field ud
2580
2581 alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails
2582 alterZappedSets ud f
2583 = ud { ud_z_many = f (ud_z_many ud)
2584 , ud_z_in_lam = f (ud_z_in_lam ud)
2585 , ud_z_no_tail = f (ud_z_no_tail ud) }
2586
2587 alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails
2588 alterUsageDetails ud f
2589 = ud { ud_env = f (ud_env ud) }
2590 `alterZappedSets` f
2591
2592 flattenUsageDetails :: UsageDetails -> UsageDetails
2593 flattenUsageDetails ud
2594 = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) }
2595 `alterZappedSets` const emptyVarEnv
2596
2597 -------------------
2598 -- See Note [Adjusting right-hand sides]
2599 adjustRhsUsage :: Maybe JoinArity -> RecFlag
2600 -> [CoreBndr] -- Outer lambdas, AFTER occ anal
2601 -> UsageDetails -> UsageDetails
2602 adjustRhsUsage mb_join_arity rec_flag bndrs usage
2603 = maybe_mark_lam (maybe_drop_tails usage)
2604 where
2605 maybe_mark_lam ud | one_shot = ud
2606 | otherwise = markAllInsideLam ud
2607 maybe_drop_tails ud | exact_join = ud
2608 | otherwise = markAllNonTailCalled ud
2609
2610 one_shot = case mb_join_arity of
2611 Just join_arity
2612 | isRec rec_flag -> False
2613 | otherwise -> all isOneShotBndr (drop join_arity bndrs)
2614 Nothing -> all isOneShotBndr bndrs
2615
2616 exact_join = case mb_join_arity of
2617 Just join_arity -> bndrs `lengthIs` join_arity
2618 _ -> False
2619
2620 type IdWithOccInfo = Id
2621
2622 tagLamBinders :: UsageDetails -- Of scope
2623 -> [Id] -- Binders
2624 -> (UsageDetails, -- Details with binders removed
2625 [IdWithOccInfo]) -- Tagged binders
2626 -- Used for lambda and case binders
2627 -- It copes with the fact that lambda bindings can have a
2628 -- stable unfolding, used for join points
2629 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
2630 where
2631 (usage', bndrs') = mapAccumR tag_lam usage binders
2632 tag_lam usage bndr = (usage2, bndr')
2633 where
2634 occ = lookupDetails usage bndr
2635 bndr' = setBinderOcc (markNonTailCalled occ) bndr
2636 -- Don't try to make an argument into a join point
2637 usage1 = usage `delDetails` bndr
2638 usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr)
2639 -- This is effectively the RHS of a
2640 -- non-join-point binding, so it's okay to use
2641 -- addManyOccsSet, which assumes no tail calls
2642 | otherwise = usage1
2643
2644 tagNonRecBinder :: TopLevelFlag -- At top level?
2645 -> UsageDetails -- Of scope
2646 -> CoreBndr -- Binder
2647 -> (UsageDetails, -- Details with binder removed
2648 IdWithOccInfo) -- Tagged binder
2649
2650 tagNonRecBinder lvl usage binder
2651 = let
2652 occ = lookupDetails usage binder
2653 will_be_join = decideJoinPointHood lvl usage [binder]
2654 occ' | will_be_join = -- must already be marked AlwaysTailCalled
2655 ASSERT(isAlwaysTailCalled occ) occ
2656 | otherwise = markNonTailCalled occ
2657 binder' = setBinderOcc occ' binder
2658 usage' = usage `delDetails` binder
2659 in
2660 usage' `seq` (usage', binder')
2661
2662 tagRecBinders :: TopLevelFlag -- At top level?
2663 -> UsageDetails -- Of body of let ONLY
2664 -> [(CoreBndr, -- Binder
2665 UsageDetails, -- RHS usage details
2666 [CoreBndr])] -- Lambdas in new RHS
2667 -> (UsageDetails, -- Adjusted details for whole scope,
2668 -- with binders removed
2669 [IdWithOccInfo]) -- Tagged binders
2670 -- Substantially more complicated than non-recursive case. Need to adjust RHS
2671 -- details *before* tagging binders (because the tags depend on the RHSes).
2672 tagRecBinders lvl body_uds triples
2673 = let
2674 (bndrs, rhs_udss, _) = unzip3 triples
2675
2676 -- 1. Determine join-point-hood of whole group, as determined by
2677 -- the *unadjusted* usage details
2678 unadj_uds = body_uds +++ combineUsageDetailsList rhs_udss
2679 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs
2680
2681 -- 2. Adjust usage details of each RHS, taking into account the
2682 -- join-point-hood decision
2683 rhs_udss' = map adjust triples
2684 adjust (bndr, rhs_uds, rhs_bndrs)
2685 = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds
2686 where
2687 -- Can't use willBeJoinId_maybe here because we haven't tagged the
2688 -- binder yet (the tag depends on these adjustments!)
2689 mb_join_arity
2690 | will_be_joins
2691 , let occ = lookupDetails unadj_uds bndr
2692 , AlwaysTailCalled arity <- tailCallInfo occ
2693 = Just arity
2694 | otherwise
2695 = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if we're
2696 -- making join points!
2697 Nothing
2698
2699 -- 3. Compute final usage details from adjusted RHS details
2700 adj_uds = body_uds +++ combineUsageDetailsList rhs_udss'
2701
2702 -- 4. Tag each binder with its adjusted details
2703 bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr
2704 | bndr <- bndrs ]
2705
2706 -- 5. Drop the binders from the adjusted details and return
2707 usage' = adj_uds `delDetailsList` bndrs
2708 in
2709 (usage', bndrs')
2710
2711 setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr
2712 setBinderOcc occ_info bndr
2713 | isTyVar bndr = bndr
2714 | isExportedId bndr = if isManyOccs (idOccInfo bndr)
2715 then bndr
2716 else setIdOccInfo bndr noOccInfo
2717 -- Don't use local usage info for visible-elsewhere things
2718 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
2719 -- about to re-generate it and it shouldn't be "sticky"
2720
2721 | otherwise = setIdOccInfo bndr occ_info
2722
2723 -- | Decide whether some bindings should be made into join points or not.
2724 -- Returns `False` if they can't be join points. Note that it's an
2725 -- all-or-nothing decision, as if multiple binders are given, they're assumed to
2726 -- be mutually recursive.
2727 --
2728 -- See Note [Invariants for join points] in CoreSyn.
2729 decideJoinPointHood :: TopLevelFlag -> UsageDetails
2730 -> [CoreBndr]
2731 -> Bool
2732 decideJoinPointHood TopLevel _ _
2733 = False
2734 decideJoinPointHood NotTopLevel usage bndrs
2735 | isJoinId (head bndrs)
2736 = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+>
2737 ppr bndrs)
2738 all_ok
2739 | otherwise
2740 = all_ok
2741 where
2742 -- See Note [Invariants on join points]; invariants cited by number below.
2743 -- Invariant 2 is always satisfiable by the simplifier by eta expansion.
2744 all_ok = -- Invariant 3: Either all are join points or none are
2745 all ok bndrs
2746
2747 ok bndr
2748 | -- Invariant 1: Only tail calls, all same join arity
2749 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr)
2750 , -- Invariant 1 as applied to LHSes of rules
2751 all (ok_rule arity) (idCoreRules bndr)
2752 -- Invariant 4: Satisfies polymorphism rule
2753 , isValidJoinPointType arity (idType bndr)
2754 = True
2755 | otherwise
2756 = False
2757
2758 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
2759 ok_rule join_arity (Rule { ru_args = args })
2760 = args `lengthIs` join_arity
2761 -- Invariant 1 as applied to LHSes of rules
2762
2763 willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity
2764 willBeJoinId_maybe bndr
2765 | AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr)
2766 = Just arity
2767 | otherwise
2768 = isJoinId_maybe bndr
2769
2770 {-
2771 ************************************************************************
2772 * *
2773 \subsection{Operations over OccInfo}
2774 * *
2775 ************************************************************************
2776 -}
2777
2778 markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo
2779
2780 markMany IAmDead = IAmDead
2781 markMany occ = ManyOccs { occ_tail = occ_tail occ }
2782
2783 markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = True }
2784 markInsideLam occ = occ
2785
2786 markNonTailCalled IAmDead = IAmDead
2787 markNonTailCalled occ = occ { occ_tail = NoTailCallInfo }
2788
2789 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
2790
2791 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2792 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2793 tailCallInfo a2 }
2794 -- Both branches are at least One
2795 -- (Argument is never IAmDead)
2796
2797 -- (orOccInfo orig new) is used
2798 -- when combining occurrence info from branches of a case
2799
2800 orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1
2801 , occ_tail = tail1 })
2802 (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2
2803 , occ_tail = tail2 })
2804 = OneOcc { occ_in_lam = in_lam1 || in_lam2
2805 , occ_one_br = False -- False, because it occurs in both branches
2806 , occ_int_cxt = int_cxt1 && int_cxt2
2807 , occ_tail = tail1 `andTailCallInfo` tail2 }
2808 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2809 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2810 tailCallInfo a2 }
2811
2812 andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
2813 andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
2814 | arity1 == arity2 = info
2815 andTailCallInfo _ _ = NoTailCallInfo