98c81ce0268e6cc9c92fe9a4fe14dcdfabcf714b
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns, MultiWayIf, ViewPatterns #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import CoreSyn
23 import CoreFVs
24 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
25 stripTicksTopE, mkTicks )
26 import Id
27 import IdInfo
28 import Name( localiseName )
29 import BasicTypes
30 import Module( Module )
31 import Coercion
32 import Type
33
34 import VarSet
35 import VarEnv
36 import Var
37 import Demand ( argOneShots, argsOneShots )
38 import Digraph ( SCC(..), Node(..)
39 , stronglyConnCompFromEdgedVerticesUniq
40 , stronglyConnCompFromEdgedVerticesUniqR )
41 import Unique
42 import UniqFM
43 import UniqSet
44 import Util
45 import Outputable
46 import Data.List
47 import Control.Arrow ( second )
48
49 {-
50 ************************************************************************
51 * *
52 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
53 * *
54 ************************************************************************
55
56 Here's the externally-callable interface:
57 -}
58
59 occurAnalysePgm :: Module -- Used only in debug output
60 -> (Activation -> Bool)
61 -> [CoreRule] -> [CoreVect] -> VarSet
62 -> CoreProgram -> CoreProgram
63 occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
64 | isEmptyDetails final_usage
65 = occ_anald_binds
66
67 | otherwise -- See Note [Glomming]
68 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
69 2 (ppr final_usage ) )
70 occ_anald_glommed_binds
71 where
72 init_env = initOccEnv active_rule
73 (final_usage, occ_anald_binds) = go init_env binds
74 (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel
75 imp_rule_edges
76 (flattenBinds occ_anald_binds)
77 initial_uds
78 -- It's crucial to re-analyse the glommed-together bindings
79 -- so that we establish the right loop breakers. Otherwise
80 -- we can easily create an infinite loop (Trac #9583 is an example)
81
82 initial_uds = addManyOccsSet emptyDetails
83 (rulesFreeVars imp_rules `unionVarSet`
84 vectsFreeVars vects `unionVarSet`
85 vectVars)
86 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
87 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
88 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
89
90 -- Note [Preventing loops due to imported functions rules]
91 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
92 [ mapVarEnv (const maps_to) $
93 getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
94 | imp_rule <- imp_rules
95 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
96 , let maps_to = exprFreeIds (ru_rhs imp_rule)
97 `delVarSetList` ru_bndrs imp_rule
98 , arg <- ru_args imp_rule ]
99
100 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
101 go _ []
102 = (initial_uds, [])
103 go env (bind:binds)
104 = (final_usage, bind' ++ binds')
105 where
106 (bs_usage, binds') = go env binds
107 (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind
108 bs_usage
109
110 occurAnalyseExpr :: CoreExpr -> CoreExpr
111 -- Do occurrence analysis, and discard occurrence info returned
112 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
113
114 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
115 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
116
117 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
118 occurAnalyseExpr' enable_binder_swap expr
119 = snd (occAnal env expr)
120 where
121 env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
122 -- To be conservative, we say that all inlines and rules are active
123 all_active_rules = \_ -> True
124
125 {- Note [Plugin rules]
126 ~~~~~~~~~~~~~~~~~~~~~~
127 Conal Elliott (Trac #11651) built a GHC plugin that added some
128 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
129 do some domain-specific transformations that could not be expressed
130 with an ordinary pattern-matching CoreRule. But then we can't extract
131 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
132 BuiltinRule doesn't have any of that stuff.
133
134 So we simply assume that BuiltinRules have no dependencies, and filter
135 them out from the imp_rule_edges comprehension.
136 -}
137
138 {-
139 ************************************************************************
140 * *
141 Bindings
142 * *
143 ************************************************************************
144
145 Note [Recursive bindings: the grand plan]
146 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
147 When we come across a binding group
148 Rec { x1 = r1; ...; xn = rn }
149 we treat it like this (occAnalRecBind):
150
151 1. Occurrence-analyse each right hand side, and build a
152 "Details" for each binding to capture the results.
153
154 Wrap the details in a Node (details, node-id, dep-node-ids),
155 where node-id is just the unique of the binder, and
156 dep-node-ids lists all binders on which this binding depends.
157 We'll call these the "scope edges".
158 See Note [Forming the Rec groups].
159
160 All this is done by makeNode.
161
162 2. Do SCC-analysis on these Nodes. Each SCC will become a new Rec or
163 NonRec. The key property is that every free variable of a binding
164 is accounted for by the scope edges, so that when we are done
165 everything is still in scope.
166
167 3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
168 identify suitable loop-breakers to ensure that inlining terminates.
169 This is done by occAnalRec.
170
171 4. To do so we form a new set of Nodes, with the same details, but
172 different edges, the "loop-breaker nodes". The loop-breaker nodes
173 have both more and fewer depedencies than the scope edges
174 (see Note [Choosing loop breakers])
175
176 More edges: if f calls g, and g has an active rule that mentions h
177 then we add an edge from f -> h
178
179 Fewer edges: we only include dependencies on active rules, on rule
180 RHSs (not LHSs) and if there is an INLINE pragma only
181 on the stable unfolding (and vice versa). The scope
182 edges must be much more inclusive.
183
184 5. The "weak fvs" of a node are, by definition:
185 the scope fvs - the loop-breaker fvs
186 See Note [Weak loop breakers], and the nd_weak field of Details
187
188 6. Having formed the loop-breaker nodes
189
190 Note [Dead code]
191 ~~~~~~~~~~~~~~~~
192 Dropping dead code for a cyclic Strongly Connected Component is done
193 in a very simple way:
194
195 the entire SCC is dropped if none of its binders are mentioned
196 in the body; otherwise the whole thing is kept.
197
198 The key observation is that dead code elimination happens after
199 dependency analysis: so 'occAnalBind' processes SCCs instead of the
200 original term's binding groups.
201
202 Thus 'occAnalBind' does indeed drop 'f' in an example like
203
204 letrec f = ...g...
205 g = ...(...g...)...
206 in
207 ...g...
208
209 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
210 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
211 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
212
213 ------------------------------------------------------------
214 Note [Forming Rec groups]
215 ~~~~~~~~~~~~~~~~~~~~~~~~~
216 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
217 and "g uses f", no matter how indirectly. We do a SCC analysis
218 with an edge f -> g if "f uses g".
219
220 More precisely, "f uses g" iff g should be in scope wherever f is.
221 That is, g is free in:
222 a) the rhs 'ef'
223 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
224 c) or the LHS or a rule for f (Note [Rule dependency info])
225
226 These conditions apply regardless of the activation of the RULE (eg it might be
227 inactive in this phase but become active later). Once a Rec is broken up
228 it can never be put back together, so we must be conservative.
229
230 The principle is that, regardless of rule firings, every variable is
231 always in scope.
232
233 * Note [Rules are extra RHSs]
234 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
235 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
236 keeps the specialised "children" alive. If the parent dies
237 (because it isn't referenced any more), then the children will die
238 too (unless they are already referenced directly).
239
240 To that end, we build a Rec group for each cyclic strongly
241 connected component,
242 *treating f's rules as extra RHSs for 'f'*.
243 More concretely, the SCC analysis runs on a graph with an edge
244 from f -> g iff g is mentioned in
245 (a) f's rhs
246 (b) f's RULES
247 These are rec_edges.
248
249 Under (b) we include variables free in *either* LHS *or* RHS of
250 the rule. The former might seems silly, but see Note [Rule
251 dependency info]. So in Example [eftInt], eftInt and eftIntFB
252 will be put in the same Rec, even though their 'main' RHSs are
253 both non-recursive.
254
255 * Note [Rule dependency info]
256 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
257 The VarSet in a RuleInfo is used for dependency analysis in the
258 occurrence analyser. We must track free vars in *both* lhs and rhs.
259 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
260 Why both? Consider
261 x = y
262 RULE f x = v+4
263 Then if we substitute y for x, we'd better do so in the
264 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
265 as well as 'v'
266
267 * Note [Rules are visible in their own rec group]
268 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
269 We want the rules for 'f' to be visible in f's right-hand side.
270 And we'd like them to be visible in other functions in f's Rec
271 group. E.g. in Note [Specialisation rules] we want f' rule
272 to be visible in both f's RHS, and fs's RHS.
273
274 This means that we must simplify the RULEs first, before looking
275 at any of the definitions. This is done by Simplify.simplRecBind,
276 when it calls addLetIdInfo.
277
278 ------------------------------------------------------------
279 Note [Choosing loop breakers]
280 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
281 Loop breaking is surprisingly subtle. First read the section 4 of
282 "Secrets of the GHC inliner". This describes our basic plan.
283 We avoid infinite inlinings by choosing loop breakers, and
284 ensuring that a loop breaker cuts each loop.
285
286 See also Note [Inlining and hs-boot files] in ToIface, which deals
287 with a closely related source of infinite loops.
288
289 Fundamentally, we do SCC analysis on a graph. For each recursive
290 group we choose a loop breaker, delete all edges to that node,
291 re-analyse the SCC, and iterate.
292
293 But what is the graph? NOT the same graph as was used for Note
294 [Forming Rec groups]! In particular, a RULE is like an equation for
295 'f' that is *always* inlined if it is applicable. We do *not* disable
296 rules for loop-breakers. It's up to whoever makes the rules to make
297 sure that the rules themselves always terminate. See Note [Rules for
298 recursive functions] in Simplify.hs
299
300 Hence, if
301 f's RHS (or its INLINE template if it has one) mentions g, and
302 g has a RULE that mentions h, and
303 h has a RULE that mentions f
304
305 then we *must* choose f to be a loop breaker. Example: see Note
306 [Specialisation rules].
307
308 In general, take the free variables of f's RHS, and augment it with
309 all the variables reachable by RULES from those starting points. That
310 is the whole reason for computing rule_fv_env in occAnalBind. (Of
311 course we only consider free vars that are also binders in this Rec
312 group.) See also Note [Finding rule RHS free vars]
313
314 Note that when we compute this rule_fv_env, we only consider variables
315 free in the *RHS* of the rule, in contrast to the way we build the
316 Rec group in the first place (Note [Rule dependency info])
317
318 Note that if 'g' has RHS that mentions 'w', we should add w to
319 g's loop-breaker edges. More concretely there is an edge from f -> g
320 iff
321 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
322 (see Note [Inline rules])
323 (b) or h is mentioned in f's RHS, and
324 g appears in the RHS of an active RULE of h
325 or a transitive sequence of active rules starting with h
326
327 Why "active rules"? See Note [Finding rule RHS free vars]
328
329 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
330 chosen as a loop breaker, because their RHSs don't mention each other.
331 And indeed both can be inlined safely.
332
333 Note again that the edges of the graph we use for computing loop breakers
334 are not the same as the edges we use for computing the Rec blocks.
335 That's why we compute
336
337 - rec_edges for the Rec block analysis
338 - loop_breaker_nodes for the loop breaker analysis
339
340 * Note [Finding rule RHS free vars]
341 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
342 Consider this real example from Data Parallel Haskell
343 tagZero :: Array Int -> Array Tag
344 {-# INLINE [1] tagZeroes #-}
345 tagZero xs = pmap (\x -> fromBool (x==0)) xs
346
347 {-# RULES "tagZero" [~1] forall xs n.
348 pmap fromBool <blah blah> = tagZero xs #-}
349 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
350 However, tagZero can only be inlined in phase 1 and later, while
351 the RULE is only active *before* phase 1. So there's no problem.
352
353 To make this work, we look for the RHS free vars only for
354 *active* rules. That's the reason for the occ_rule_act field
355 of the OccEnv.
356
357 * Note [Weak loop breakers]
358 ~~~~~~~~~~~~~~~~~~~~~~~~~
359 There is a last nasty wrinkle. Suppose we have
360
361 Rec { f = f_rhs
362 RULE f [] = g
363
364 h = h_rhs
365 g = h
366 ...more...
367 }
368
369 Remember that we simplify the RULES before any RHS (see Note
370 [Rules are visible in their own rec group] above).
371
372 So we must *not* postInlineUnconditionally 'g', even though
373 its RHS turns out to be trivial. (I'm assuming that 'g' is
374 not choosen as a loop breaker.) Why not? Because then we
375 drop the binding for 'g', which leaves it out of scope in the
376 RULE!
377
378 Here's a somewhat different example of the same thing
379 Rec { g = h
380 ; h = ...f...
381 ; f = f_rhs
382 RULE f [] = g }
383 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
384 g, because the RULE for f is active throughout. So the RHS of h
385 might rewrite to h = ...g...
386 So g must remain in scope in the output program!
387
388 We "solve" this by:
389
390 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
391 iff g is a "missing free variable" of the Rec group
392
393 A "missing free variable" x is one that is mentioned in an RHS or
394 INLINE or RULE of a binding in the Rec group, but where the
395 dependency on x may not show up in the loop_breaker_nodes (see
396 note [Choosing loop breakers} above).
397
398 A normal "strong" loop breaker has IAmLoopBreaker False. So
399
400 Inline postInlineUnconditionally
401 strong IAmLoopBreaker False no no
402 weak IAmLoopBreaker True yes no
403 other yes yes
404
405 The **sole** reason for this kind of loop breaker is so that
406 postInlineUnconditionally does not fire. Ugh. (Typically it'll
407 inline via the usual callSiteInline stuff, so it'll be dead in the
408 next pass, so the main Ugh is the tiresome complication.)
409
410 Note [Rules for imported functions]
411 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
412 Consider this
413 f = /\a. B.g a
414 RULE B.g Int = 1 + f Int
415 Note that
416 * The RULE is for an imported function.
417 * f is non-recursive
418 Now we
419 can get
420 f Int --> B.g Int Inlining f
421 --> 1 + f Int Firing RULE
422 and so the simplifier goes into an infinite loop. This
423 would not happen if the RULE was for a local function,
424 because we keep track of dependencies through rules. But
425 that is pretty much impossible to do for imported Ids. Suppose
426 f's definition had been
427 f = /\a. C.h a
428 where (by some long and devious process), C.h eventually inlines to
429 B.g. We could only spot such loops by exhaustively following
430 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
431 f.
432
433 Note that RULES for imported functions are important in practice; they
434 occur a lot in the libraries.
435
436 We regard this potential infinite loop as a *programmer* error.
437 It's up the programmer not to write silly rules like
438 RULE f x = f x
439 and the example above is just a more complicated version.
440
441 Note [Preventing loops due to imported functions rules]
442 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
443 Consider:
444 import GHC.Base (foldr)
445
446 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
447 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
448 filterFB c p = ...
449
450 f = filter p xs
451
452 Note that filter is not a loop-breaker, so what happens is:
453 f = filter p xs
454 = {inline} build (\c n -> foldr (filterFB c p) n xs)
455 = {inline} foldr (filterFB (:) p) [] xs
456 = {RULE} filter p xs
457
458 We are in an infinite loop.
459
460 A more elaborate example (that I actually saw in practice when I went to
461 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
462 {-# LANGUAGE RankNTypes #-}
463 module GHCList where
464
465 import Prelude hiding (filter)
466 import GHC.Base (build)
467
468 {-# INLINABLE filter #-}
469 filter :: (a -> Bool) -> [a] -> [a]
470 filter p [] = []
471 filter p (x:xs) = if p x then x : filter p xs else filter p xs
472
473 {-# NOINLINE [0] filterFB #-}
474 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
475 filterFB c p x r | p x = x `c` r
476 | otherwise = r
477
478 {-# RULES
479 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
480 (filterFB c p) n xs)
481 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
482 #-}
483
484 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
485 are not), the unfolding given to "filter" in the interface file will be:
486 filter p [] = []
487 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
488 else build (\c n -> foldr (filterFB c p) n xs
489
490 Note that because this unfolding does not mention "filter", filter is not
491 marked as a strong loop breaker. Therefore at a use site in another module:
492 filter p xs
493 = {inline}
494 case xs of [] -> []
495 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
496 else build (\c n -> foldr (filterFB c p) n xs)
497
498 build (\c n -> foldr (filterFB c p) n xs)
499 = {inline} foldr (filterFB (:) p) [] xs
500 = {RULE} filter p xs
501
502 And we are in an infinite loop again, except that this time the loop is producing an
503 infinitely large *term* (an unrolling of filter) and so the simplifier finally
504 dies with "ticks exhausted"
505
506 Because of this problem, we make a small change in the occurrence analyser
507 designed to mark functions like "filter" as strong loop breakers on the basis that:
508 1. The RHS of filter mentions the local function "filterFB"
509 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
510
511 So for each RULE for an *imported* function we are going to add
512 dependency edges between the *local* FVS of the rule LHS and the
513 *local* FVS of the rule RHS. We don't do anything special for RULES on
514 local functions because the standard occurrence analysis stuff is
515 pretty good at getting loop-breakerness correct there.
516
517 It is important to note that even with this extra hack we aren't always going to get
518 things right. For example, it might be that the rule LHS mentions an imported Id,
519 and another module has a RULE that can rewrite that imported Id to one of our local
520 Ids.
521
522 Note [Specialising imported functions] (referred to from Specialise)
523 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
524 BUT for *automatically-generated* rules, the programmer can't be
525 responsible for the "programmer error" in Note [Rules for imported
526 functions]. In paricular, consider specialising a recursive function
527 defined in another module. If we specialise a recursive function B.g,
528 we get
529 g_spec = .....(B.g Int).....
530 RULE B.g Int = g_spec
531 Here, g_spec doesn't look recursive, but when the rule fires, it
532 becomes so. And if B.g was mutually recursive, the loop might
533 not be as obvious as it is here.
534
535 To avoid this,
536 * When specialising a function that is a loop breaker,
537 give a NOINLINE pragma to the specialised function
538
539 Note [Glomming]
540 ~~~~~~~~~~~~~~~
541 RULES for imported Ids can make something at the top refer to something at the bottom:
542 f = \x -> B.g (q x)
543 h = \y -> 3
544
545 RULE: B.g (q x) = h x
546
547 Applying this rule makes f refer to h, although f doesn't appear to
548 depend on h. (And, as in Note [Rules for imported functions], the
549 dependency might be more indirect. For example, f might mention C.t
550 rather than B.g, where C.t eventually inlines to B.g.)
551
552 NOTICE that this cannot happen for rules whose head is a
553 locally-defined function, because we accurately track dependencies
554 through RULES. It only happens for rules whose head is an imported
555 function (B.g in the example above).
556
557 Solution:
558 - When simplifying, bring all top level identifiers into
559 scope at the start, ignoring the Rec/NonRec structure, so
560 that when 'h' pops up in f's rhs, we find it in the in-scope set
561 (as the simplifier generally expects). This happens in simplTopBinds.
562
563 - In the occurrence analyser, if there are any out-of-scope
564 occurrences that pop out of the top, which will happen after
565 firing the rule: f = \x -> h x
566 h = \y -> 3
567 then just glom all the bindings into a single Rec, so that
568 the *next* iteration of the occurrence analyser will sort
569 them all out. This part happens in occurAnalysePgm.
570
571 ------------------------------------------------------------
572 Note [Inline rules]
573 ~~~~~~~~~~~~~~~~~~~
574 None of the above stuff about RULES applies to Inline Rules,
575 stored in a CoreUnfolding. The unfolding, if any, is simplified
576 at the same time as the regular RHS of the function (ie *not* like
577 Note [Rules are visible in their own rec group]), so it should be
578 treated *exactly* like an extra RHS.
579
580 Or, rather, when computing loop-breaker edges,
581 * If f has an INLINE pragma, and it is active, we treat the
582 INLINE rhs as f's rhs
583 * If it's inactive, we treat f as having no rhs
584 * If it has no INLINE pragma, we look at f's actual rhs
585
586
587 There is a danger that we'll be sub-optimal if we see this
588 f = ...f...
589 [INLINE f = ..no f...]
590 where f is recursive, but the INLINE is not. This can just about
591 happen with a sufficiently odd set of rules; eg
592
593 foo :: Int -> Int
594 {-# INLINE [1] foo #-}
595 foo x = x+1
596
597 bar :: Int -> Int
598 {-# INLINE [1] bar #-}
599 bar x = foo x + 1
600
601 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
602
603 Here the RULE makes bar recursive; but it's INLINE pragma remains
604 non-recursive. It's tempting to then say that 'bar' should not be
605 a loop breaker, but an attempt to do so goes wrong in two ways:
606 a) We may get
607 $df = ...$cfoo...
608 $cfoo = ...$df....
609 [INLINE $cfoo = ...no-$df...]
610 But we want $cfoo to depend on $df explicitly so that we
611 put the bindings in the right order to inline $df in $cfoo
612 and perhaps break the loop altogether. (Maybe this
613 b)
614
615
616 Example [eftInt]
617 ~~~~~~~~~~~~~~~
618 Example (from GHC.Enum):
619
620 eftInt :: Int# -> Int# -> [Int]
621 eftInt x y = ...(non-recursive)...
622
623 {-# INLINE [0] eftIntFB #-}
624 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
625 eftIntFB c n x y = ...(non-recursive)...
626
627 {-# RULES
628 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
629 "eftIntList" [1] eftIntFB (:) [] = eftInt
630 #-}
631
632 Note [Specialisation rules]
633 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
634 Consider this group, which is typical of what SpecConstr builds:
635
636 fs a = ....f (C a)....
637 f x = ....f (C a)....
638 {-# RULE f (C a) = fs a #-}
639
640 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
641
642 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
643 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
644 - fs is inlined (say it's small)
645 - now there's another opportunity to apply the RULE
646
647 This showed up when compiling Control.Concurrent.Chan.getChanContents.
648
649 ------------------------------------------------------------
650 Note [Finding join points]
651 ~~~~~~~~~~~~~~~~~~~~~~~~~~
652 It's the occurrence analyser's job to find bindings that we can turn into join
653 points, but it doesn't perform that transformation right away. Rather, it marks
654 the eligible bindings as part of their occurrence data, leaving it to the
655 simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'.
656 The simplifier then eta-expands the RHS if needed and then updates the
657 occurrence sites. Dividing the work this way means that the occurrence analyser
658 still only takes one pass, yet one can always tell the difference between a
659 function call and a jump by looking at the occurrence (because the same pass
660 changes the 'IdDetails' and propagates the binders to their occurrence sites).
661
662 To track potential join points, we use the 'occ_tail' field of OccInfo. A value
663 of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
664 tail call with `n` arguments (counting both value and type arguments). Otherwise
665 'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
666 rest of 'OccInfo' until it goes on the binder.
667
668 Note [Rules and join points]
669 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
670
671 Things get fiddly with rules. Suppose we have:
672
673 let j :: Int -> Int
674 j y = 2 * y
675 k :: Int -> Int -> Int
676 {-# RULES "SPEC k 0" k 0 = j #-}
677 k x y = x + 2 * y
678 in ...
679
680 Now suppose that both j and k appear only as saturated tail calls in the body.
681 Thus we would like to make them both join points. The rule complicates matters,
682 though, as its RHS has an unapplied occurrence of j. *However*, if we were to
683 eta-expand the rule, all would be well:
684
685 {-# RULES "SPEC k 0" forall a. k 0 a = j a #-}
686
687 So conceivably we could notice that a potential join point would have an
688 "undersaturated" rule and account for it. This would mean we could make
689 something that's been specialised a join point, for instance. But local bindings
690 are rarely specialised, and being overly cautious about rules only
691 costs us anything when, for some `j`:
692
693 * Before specialisation, `j` has non-tail calls, so it can't be a join point.
694 * During specialisation, `j` gets specialised and thus acquires rules.
695 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say),
696 and so now `j` *could* become a join point.
697
698 This appears to be very rare in practice. TODO Perhaps we should gather
699 statistics to be sure.
700
701 Note [Excess polymorphism and join points]
702 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
703
704 In principle, if a function would be a join point except that it fails
705 the polymorphism rule (see Note [The polymorphism rule of join points] in
706 CoreSyn), it can still be made a join point with some effort. This is because
707 all tail calls must return the same type (they return to the same context!), and
708 thus if the return type depends on an argument, that argument must always be the
709 same.
710
711 For instance, consider:
712
713 let f :: forall a. a -> Char -> [a]
714 f @a x c = ... f @a x 'a' ...
715 in ... f @Int 1 'b' ... f @Int 2 'c' ...
716
717 (where the calls are tail calls). `f` fails the polymorphism rule because its
718 return type is [a], where [a] is bound. But since the type argument is always
719 'Int', we can rewrite it as:
720
721 let f' :: Int -> Char -> [Int]
722 f' x c = ... f' x 'a' ...
723 in ... f' 1 'b' ... f 2 'c' ...
724
725 and now we can make f' a join point:
726
727 join f' :: Int -> Char -> [Int]
728 f' x c = ... jump f' x 'a' ...
729 in ... jump f' 1 'b' ... jump f' 2 'c' ...
730
731 It's not clear that this comes up often, however. TODO: Measure how often and
732 add this analysis if necessary.
733
734 ------------------------------------------------------------
735 Note [Adjusting right-hand sides]
736 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
737 There's a bit of a dance we need to do after analysing a lambda expression or
738 a right-hand side. In particular, we need to
739
740 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot
741 lambda, or a non-recursive join point; and
742 b) call 'markAllNonTailCalled' *unless* the binding is for a join point.
743
744 Some examples, with how the free occurrences in e (assumed not to be a value
745 lambda) get marked:
746
747 inside lam non-tail-called
748 ------------------------------------------------------------
749 let x = e No Yes
750 let f = \x -> e Yes Yes
751 let f = \x{OneShot} -> e No Yes
752 \x -> e Yes Yes
753 join j x = e No No
754 joinrec j x = e Yes No
755
756 There are a few other caveats; most importantly, if we're marking a binding as
757 'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so
758 that the effect cascades properly. Consequently, at the time the RHS is
759 analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must
760 return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once
761 join-point-hood has been decided.
762
763 Thus the overall sequence taking place in 'occAnalNonRecBind' and
764 'occAnalRecBind' is as follows:
765
766 1. Call 'occAnalLamOrRhs' to find usage information for the RHS.
767 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make
768 the binding a join point.
769 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when
770 recursive.)
771
772 (In the recursive case, this logic is spread between 'makeNode' and
773 'occAnalRec'.)
774 -}
775
776 ------------------------------------------------------------------
777 -- occAnalBind
778 ------------------------------------------------------------------
779
780 occAnalBind :: OccEnv -- The incoming OccEnv
781 -> TopLevelFlag
782 -> ImpRuleEdges
783 -> CoreBind
784 -> UsageDetails -- Usage details of scope
785 -> (UsageDetails, -- Of the whole let(rec)
786 [CoreBind])
787
788 occAnalBind env lvl top_env (NonRec binder rhs) body_usage
789 = occAnalNonRecBind env lvl top_env binder rhs body_usage
790 occAnalBind env lvl top_env (Rec pairs) body_usage
791 = occAnalRecBind env lvl top_env pairs body_usage
792
793 -----------------
794 occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr
795 -> UsageDetails -> (UsageDetails, [CoreBind])
796 occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage
797 | isTyVar binder -- A type let; we don't gather usage info
798 = (body_usage, [NonRec binder rhs])
799
800 | not (binder `usedIn` body_usage) -- It's not mentioned
801 = (body_usage, [])
802
803 | otherwise -- It's mentioned in the body
804 = (body_usage' +++ rhs_usage', [NonRec tagged_binder rhs'])
805 where
806 (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder
807 mb_join_arity = willBeJoinId_maybe tagged_binder
808
809 (bndrs, body) = collectBinders rhs
810
811 (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body
812 rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body'
813 -- For a /non-recursive/ join point we can mark all
814 -- its join-lambda as one-shot; and it's a good idea to do so
815
816 -- Unfoldings
817 -- See Note [Unfoldings and join points]
818 rhs_usage2 = case occAnalUnfolding env NonRecursive binder of
819 Just unf_usage -> rhs_usage1 +++ unf_usage
820 Nothing -> rhs_usage1
821
822 -- Rules
823 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
824 rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder
825 rhs_usage3 = rhs_usage2 +++ combineUsageDetailsList
826 (map (\(_, l, r) -> l +++ r) rules_w_uds)
827 rhs_usage4 = maybe rhs_usage3 (addManyOccsSet rhs_usage3) $
828 lookupVarEnv imp_rule_edges binder
829 -- See Note [Preventing loops due to imported functions rules]
830
831 -- Final adjustment
832 rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4
833
834 -----------------
835 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
836 -> UsageDetails -> (UsageDetails, [CoreBind])
837 occAnalRecBind env lvl imp_rule_edges pairs body_usage
838 = foldr (occAnalRec lvl) (body_usage, []) sccs
839 -- For a recursive group, we
840 -- * occ-analyse all the RHSs
841 -- * compute strongly-connected components
842 -- * feed those components to occAnalRec
843 -- See Note [Recursive bindings: the grand plan]
844 where
845 sccs :: [SCC Details]
846 sccs = {-# SCC "occAnalBind.scc" #-}
847 stronglyConnCompFromEdgedVerticesUniq nodes
848
849 nodes :: [LetrecNode]
850 nodes = {-# SCC "occAnalBind.assoc" #-}
851 map (makeNode env imp_rule_edges bndr_set) pairs
852
853 bndr_set = mkVarSet (map fst pairs)
854
855 {-
856 Note [Unfoldings and join points]
857 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
858
859 We assume that anything in an unfolding occurs multiple times, since unfoldings
860 are often copied (that's the whole point!). But we still need to track tail
861 calls for the purpose of finding join points.
862 -}
863
864 -----------------------------
865 occAnalRec :: TopLevelFlag
866 -> SCC Details
867 -> (UsageDetails, [CoreBind])
868 -> (UsageDetails, [CoreBind])
869
870 -- The NonRec case is just like a Let (NonRec ...) above
871 occAnalRec lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
872 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs }))
873 (body_uds, binds)
874 | not (bndr `usedIn` body_uds)
875 = (body_uds, binds) -- See Note [Dead code]
876
877 | otherwise -- It's mentioned in the body
878 = (body_uds' +++ rhs_uds',
879 NonRec tagged_bndr rhs : binds)
880 where
881 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
882 rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive
883 rhs_bndrs rhs_uds
884
885 -- The Rec case is the interesting one
886 -- See Note [Recursive bindings: the grand plan]
887 -- See Note [Loop breaking]
888 occAnalRec lvl (CyclicSCC details_s) (body_uds, binds)
889 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
890 = (body_uds, binds) -- See Note [Dead code]
891
892 | otherwise -- At this point we always build a single Rec
893 = -- pprTrace "occAnalRec" (vcat
894 -- [ text "weak_fvs" <+> ppr weak_fvs
895 -- , text "lb nodes" <+> ppr loop_breaker_nodes])
896 (final_uds, Rec pairs : binds)
897
898 where
899 bndrs = map nd_bndr details_s
900 bndr_set = mkVarSet bndrs
901
902 ------------------------------
903 -- See Note [Choosing loop breakers] for loop_breaker_nodes
904 final_uds :: UsageDetails
905 loop_breaker_nodes :: [LetrecNode]
906 (final_uds, loop_breaker_nodes)
907 = mkLoopBreakerNodes lvl bndr_set body_uds details_s
908
909 ------------------------------
910 weak_fvs :: VarSet
911 weak_fvs = mapUnionVarSet nd_weak details_s
912
913 ---------------------------
914 -- Now reconstruct the cycle
915 pairs :: [(Id,CoreExpr)]
916 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs loop_breaker_nodes []
917 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
918 -- If weak_fvs is empty, the loop_breaker_nodes will include
919 -- all the edges in the original scope edges [remember,
920 -- weak_fvs is the difference between scope edges and
921 -- lb-edges], so a fresh SCC computation would yield a
922 -- single CyclicSCC result; and reOrderNodes deals with
923 -- exactly that case
924
925
926 ------------------------------------------------------------------
927 -- Loop breaking
928 ------------------------------------------------------------------
929
930 type Binding = (Id,CoreExpr)
931
932 loopBreakNodes :: Int
933 -> VarSet -- All binders
934 -> VarSet -- Binders whose dependencies may be "missing"
935 -- See Note [Weak loop breakers]
936 -> [LetrecNode]
937 -> [Binding] -- Append these to the end
938 -> [Binding]
939 {-
940 loopBreakNodes is applied to the list of nodes for a cyclic strongly
941 connected component (there's guaranteed to be a cycle). It returns
942 the same nodes, but
943 a) in a better order,
944 b) with some of the Ids having a IAmALoopBreaker pragma
945
946 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
947 that the simplifier can guarantee not to loop provided it never records an inlining
948 for these no-inline guys.
949
950 Furthermore, the order of the binds is such that if we neglect dependencies
951 on the no-inline Ids then the binds are topologically sorted. This means
952 that the simplifier will generally do a good job if it works from top bottom,
953 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
954 -}
955
956 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
957 loopBreakNodes depth bndr_set weak_fvs nodes binds
958 = go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
959 where
960 go [] binds = binds
961 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
962
963 loop_break_scc scc binds
964 = case scc of
965 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
966 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
967
968 ----------------------------------
969 reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
970 -- Choose a loop breaker, mark it no-inline,
971 -- and call loopBreakNodes on the rest
972 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
973 reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
974 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
975 = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
976 -- text "chosen" <+> ppr chosen_nodes) $
977 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
978 (map mk_loop_breaker chosen_nodes ++ binds)
979 where
980 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
981 (nd_score (node_payload node))
982 [node] [] nodes
983
984 approximate_lb = depth >= 2
985 new_depth | approximate_lb = 0
986 | otherwise = depth+1
987 -- After two iterations (d=0, d=1) give up
988 -- and approximate, returning to d=0
989
990 mk_loop_breaker :: LetrecNode -> Binding
991 mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
992 = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs)
993 where
994 tail_info = tailCallInfo (idOccInfo bndr)
995
996 mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
997 -- See Note [Weak loop breakers]
998 mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr
999 , nd_rhs = rhs})
1000 | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs)
1001 | otherwise = (bndr, rhs)
1002 where
1003 occ' = weakLoopBreaker { occ_tail = tail_info }
1004 tail_info = tailCallInfo (idOccInfo bndr)
1005
1006 ----------------------------------
1007 chooseLoopBreaker :: Bool -- True <=> Too many iterations,
1008 -- so approximate
1009 -> NodeScore -- Best score so far
1010 -> [LetrecNode] -- Nodes with this score
1011 -> [LetrecNode] -- Nodes with higher scores
1012 -> [LetrecNode] -- Unprocessed nodes
1013 -> ([LetrecNode], [LetrecNode])
1014 -- This loop looks for the bind with the lowest score
1015 -- to pick as the loop breaker. The rest accumulate in
1016 chooseLoopBreaker _ _ loop_nodes acc []
1017 = (loop_nodes, acc) -- Done
1018
1019 -- If approximate_loop_breaker is True, we pick *all*
1020 -- nodes with lowest score, else just one
1021 -- See Note [Complexity of loop breaking]
1022 chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
1023 | approx_lb
1024 , rank sc == rank loop_sc
1025 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
1026
1027 | sc `betterLB` loop_sc -- Better score so pick this new one
1028 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
1029
1030 | otherwise -- Worse score so don't pick it
1031 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
1032 where
1033 sc = nd_score (node_payload node)
1034
1035 {-
1036 Note [Complexity of loop breaking]
1037 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1038 The loop-breaking algorithm knocks out one binder at a time, and
1039 performs a new SCC analysis on the remaining binders. That can
1040 behave very badly in tightly-coupled groups of bindings; in the
1041 worst case it can be (N**2)*log N, because it does a full SCC
1042 on N, then N-1, then N-2 and so on.
1043
1044 To avoid this, we switch plans after 2 (or whatever) attempts:
1045 Plan A: pick one binder with the lowest score, make it
1046 a loop breaker, and try again
1047 Plan B: pick *all* binders with the lowest score, make them
1048 all loop breakers, and try again
1049 Since there are only a small finite number of scores, this will
1050 terminate in a constant number of iterations, rather than O(N)
1051 iterations.
1052
1053 You might thing that it's very unlikely, but RULES make it much
1054 more likely. Here's a real example from Trac #1969:
1055 Rec { $dm = \d.\x. op d
1056 {-# RULES forall d. $dm Int d = $s$dm1
1057 forall d. $dm Bool d = $s$dm2 #-}
1058
1059 dInt = MkD .... opInt ...
1060 dInt = MkD .... opBool ...
1061 opInt = $dm dInt
1062 opBool = $dm dBool
1063
1064 $s$dm1 = \x. op dInt
1065 $s$dm2 = \x. op dBool }
1066 The RULES stuff means that we can't choose $dm as a loop breaker
1067 (Note [Choosing loop breakers]), so we must choose at least (say)
1068 opInt *and* opBool, and so on. The number of loop breakders is
1069 linear in the number of instance declarations.
1070
1071 Note [Loop breakers and INLINE/INLINABLE pragmas]
1072 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1073 Avoid choosing a function with an INLINE pramga as the loop breaker!
1074 If such a function is mutually-recursive with a non-INLINE thing,
1075 then the latter should be the loop-breaker.
1076
1077 It's vital to distinguish between INLINE and INLINABLE (the
1078 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1079 Rec { {-# INLINABLE f #-}
1080 f x = ...f... }
1081 and then worker/wrapper it through strictness analysis, we'll get
1082 Rec { {-# INLINABLE $wf #-}
1083 $wf p q = let x = (p,q) in ...f...
1084
1085 {-# INLINE f #-}
1086 f x = case x of (p,q) -> $wf p q }
1087
1088 Now it is vital that we choose $wf as the loop breaker, so we can
1089 inline 'f' in '$wf'.
1090
1091 Note [DFuns should not be loop breakers]
1092 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1093 It's particularly bad to make a DFun into a loop breaker. See
1094 Note [How instance declarations are translated] in TcInstDcls
1095
1096 We give DFuns a higher score than ordinary CONLIKE things because
1097 if there's a choice we want the DFun to be the non-loop breaker. Eg
1098
1099 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1100
1101 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1102 {-# DFUN #-}
1103 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1104 }
1105
1106 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1107 if we can't unravel the DFun first.
1108
1109 Note [Constructor applications]
1110 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1111 It's really really important to inline dictionaries. Real
1112 example (the Enum Ordering instance from GHC.Base):
1113
1114 rec f = \ x -> case d of (p,q,r) -> p x
1115 g = \ x -> case d of (p,q,r) -> q x
1116 d = (v, f, g)
1117
1118 Here, f and g occur just once; but we can't inline them into d.
1119 On the other hand we *could* simplify those case expressions if
1120 we didn't stupidly choose d as the loop breaker.
1121 But we won't because constructor args are marked "Many".
1122 Inlining dictionaries is really essential to unravelling
1123 the loops in static numeric dictionaries, see GHC.Float.
1124
1125 Note [Closure conversion]
1126 ~~~~~~~~~~~~~~~~~~~~~~~~~
1127 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1128 The immediate motivation came from the result of a closure-conversion transformation
1129 which generated code like this:
1130
1131 data Clo a b = forall c. Clo (c -> a -> b) c
1132
1133 ($:) :: Clo a b -> a -> b
1134 Clo f env $: x = f env x
1135
1136 rec { plus = Clo plus1 ()
1137
1138 ; plus1 _ n = Clo plus2 n
1139
1140 ; plus2 Zero n = n
1141 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1142
1143 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1144 we choose 'plus1' as the loop breaker (which is entirely possible
1145 otherwise), the loop does not unravel nicely.
1146
1147
1148 @occAnalUnfolding@ deals with the question of bindings where the Id is marked
1149 by an INLINE pragma. For these we record that anything which occurs
1150 in its RHS occurs many times. This pessimistically assumes that this
1151 inlined binder also occurs many times in its scope, but if it doesn't
1152 we'll catch it next time round. At worst this costs an extra simplifier pass.
1153 ToDo: try using the occurrence info for the inline'd binder.
1154
1155 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1156 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1157
1158
1159 ************************************************************************
1160 * *
1161 Making nodes
1162 * *
1163 ************************************************************************
1164 -}
1165
1166 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
1167
1168 noImpRuleEdges :: ImpRuleEdges
1169 noImpRuleEdges = emptyVarEnv
1170
1171 type LetrecNode = Node Unique Details -- Node comes from Digraph
1172 -- The Unique key is gotten from the Id
1173 data Details
1174 = ND { nd_bndr :: Id -- Binder
1175 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
1176 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS
1177 -- INVARIANT: (nd_rhs_bndrs nd, _) ==
1178 -- collectBinders (nd_rhs nd)
1179
1180 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
1181 -- ignoring phase (ie assuming all are active)
1182 -- See Note [Forming Rec groups]
1183
1184 , nd_inl :: IdSet -- Free variables of
1185 -- the stable unfolding (if present and active)
1186 -- or the RHS (if not)
1187 -- but excluding any RULES
1188 -- This is the IdSet that may be used if the Id is inlined
1189
1190 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
1191 -- but are *not* in nd_inl. These are the ones whose
1192 -- dependencies might not be respected by loop_breaker_nodes
1193 -- See Note [Weak loop breakers]
1194
1195 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
1196
1197 , nd_score :: NodeScore
1198 }
1199
1200 instance Outputable Details where
1201 ppr nd = text "ND" <> braces
1202 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
1203 , text "uds =" <+> ppr (nd_uds nd)
1204 , text "inl =" <+> ppr (nd_inl nd)
1205 , text "weak =" <+> ppr (nd_weak nd)
1206 , text "rule =" <+> ppr (nd_active_rule_fvs nd)
1207 , text "score =" <+> ppr (nd_score nd)
1208 ])
1209
1210 -- The NodeScore is compared lexicographically;
1211 -- e.g. lower rank wins regardless of size
1212 type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker
1213 , Int -- Size of rhs: higher => more likely to be picked as LB
1214 -- Maxes out at maxExprSize; we just use it to prioritise
1215 -- small functions
1216 , Bool ) -- Was it a loop breaker before?
1217 -- True => more likely to be picked
1218 -- Note [Loop breakers, node scoring, and stability]
1219
1220 rank :: NodeScore -> Int
1221 rank (r, _, _) = r
1222
1223 makeNode :: OccEnv -> ImpRuleEdges -> VarSet
1224 -> (Var, CoreExpr) -> LetrecNode
1225 -- See Note [Recursive bindings: the grand plan]
1226 makeNode env imp_rule_edges bndr_set (bndr, rhs)
1227 = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs)
1228 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
1229 -- is still deterministic with edges in nondeterministic order as
1230 -- explained in Note [Deterministic SCC] in Digraph.
1231 where
1232 details = ND { nd_bndr = bndr
1233 , nd_rhs = rhs'
1234 , nd_rhs_bndrs = bndrs'
1235 , nd_uds = rhs_usage3
1236 , nd_inl = inl_fvs
1237 , nd_weak = node_fvs `minusVarSet` inl_fvs
1238 , nd_active_rule_fvs = active_rule_fvs
1239 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
1240
1241 -- Constructing the edges for the main Rec computation
1242 -- See Note [Forming Rec groups]
1243 (bndrs, body) = collectBinders rhs
1244 (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body
1245 rhs' = mkLams bndrs' body'
1246 rhs_usage2 = rhs_usage1 +++ all_rule_uds
1247 -- Note [Rules are extra RHSs]
1248 -- Note [Rule dependency info]
1249 rhs_usage3 = case mb_unf_uds of
1250 Just unf_uds -> rhs_usage2 +++ unf_uds
1251 Nothing -> rhs_usage2
1252 node_fvs = udFreeVars bndr_set rhs_usage3
1253
1254 -- Finding the free variables of the rules
1255 is_active = occ_rule_act env :: Activation -> Bool
1256
1257 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
1258 rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr
1259
1260 rules_w_rhs_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
1261 rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):))
1262 (lookupVarEnv imp_rule_edges bndr)
1263 -- See Note [Preventing loops due to imported functions rules]
1264 [ (ru_act rule, udFreeVars bndr_set rhs_uds)
1265 | (rule, _, rhs_uds) <- rules_w_uds ]
1266 all_rule_uds = combineUsageDetailsList $
1267 concatMap (\(_, l, r) -> [l, r]) rules_w_uds
1268 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs
1269 , is_active a]
1270
1271 -- Finding the usage details of the INLINE pragma (if any)
1272 mb_unf_uds = occAnalUnfolding env Recursive bndr
1273
1274 -- Find the "nd_inl" free vars; for the loop-breaker phase
1275 inl_fvs = case mb_unf_uds of
1276 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
1277 Just unf_uds -> udFreeVars bndr_set unf_uds
1278 -- We could check for an *active* INLINE (returning
1279 -- emptyVarSet for an inactive one), but is_active
1280 -- isn't the right thing (it tells about
1281 -- RULE activation), so we'd need more plumbing
1282
1283 mkLoopBreakerNodes :: TopLevelFlag
1284 -> VarSet
1285 -> UsageDetails -- for BODY of let
1286 -> [Details]
1287 -> (UsageDetails, -- adjusted
1288 [LetrecNode])
1289 -- Does four things
1290 -- a) tag each binder with its occurrence info
1291 -- b) add a NodeScore to each node
1292 -- c) make a Node with the right dependency edges for
1293 -- the loop-breaker SCC analysis
1294 -- d) adjust each RHS's usage details according to
1295 -- the binder's (new) shotness and join-point-hood
1296 mkLoopBreakerNodes lvl bndr_set body_uds details_s
1297 = (final_uds, zipWith mk_lb_node details_s bndrs')
1298 where
1299 (final_uds, bndrs') = tagRecBinders lvl body_uds
1300 [ ((nd_bndr nd)
1301 ,(nd_uds nd)
1302 ,(nd_rhs_bndrs nd))
1303 | nd <- details_s ]
1304 mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr'
1305 = DigraphNode nd' (varUnique bndr) (nonDetKeysUniqSet lb_deps)
1306 -- It's OK to use nonDetKeysUniqSet here as
1307 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
1308 -- in nondeterministic order as explained in
1309 -- Note [Deterministic SCC] in Digraph.
1310 where
1311 nd' = nd { nd_bndr = bndr', nd_score = score }
1312 score = nodeScore bndr bndr' rhs lb_deps
1313 lb_deps = extendFvs_ rule_fv_env inl_fvs
1314
1315 rule_fv_env :: IdEnv IdSet
1316 -- Maps a variable f to the variables from this group
1317 -- mentioned in RHS of active rules for f
1318 -- Domain is *subset* of bound vars (others have no rule fvs)
1319 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
1320 init_rule_fvs -- See Note [Finding rule RHS free vars]
1321 = [ (b, trimmed_rule_fvs)
1322 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
1323 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
1324 , not (isEmptyVarSet trimmed_rule_fvs) ]
1325
1326
1327 ------------------------------------------
1328 nodeScore :: Id -- Binder has old occ-info (just for loop-breaker-ness)
1329 -> Id -- Binder with new occ-info
1330 -> CoreExpr -- RHS
1331 -> VarSet -- Loop-breaker dependencies
1332 -> NodeScore
1333 nodeScore old_bndr new_bndr bind_rhs lb_deps
1334 | not (isId old_bndr) -- A type or cercion variable is never a loop breaker
1335 = (100, 0, False)
1336
1337 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers
1338 = (0, 0, True) -- See Note [Self-recursion and loop breakers]
1339
1340 | exprIsTrivial rhs
1341 = mk_score 10 -- Practically certain to be inlined
1342 -- Used to have also: && not (isExportedId bndr)
1343 -- But I found this sometimes cost an extra iteration when we have
1344 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
1345 -- where df is the exported dictionary. Then df makes a really
1346 -- bad choice for loop breaker
1347
1348 | DFunUnfolding { df_args = args } <- id_unfolding
1349 -- Never choose a DFun as a loop breaker
1350 -- Note [DFuns should not be loop breakers]
1351 = (9, length args, is_lb)
1352
1353 -- Data structures are more important than INLINE pragmas
1354 -- so that dictionary/method recursion unravels
1355
1356 | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding
1357 = mk_score 6
1358
1359 | is_con_app rhs -- Data types help with cases:
1360 = mk_score 5 -- Note [Constructor applications]
1361
1362 | isStableUnfolding id_unfolding
1363 , can_unfold
1364 = mk_score 3
1365
1366 | isOneOcc (idOccInfo new_bndr)
1367 = mk_score 2 -- Likely to be inlined
1368
1369 | can_unfold -- The Id has some kind of unfolding
1370 = mk_score 1
1371
1372 | otherwise
1373 = (0, 0, is_lb)
1374
1375 where
1376 mk_score :: Int -> NodeScore
1377 mk_score rank = (rank, rhs_size, is_lb)
1378
1379 is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
1380 rhs = case id_unfolding of
1381 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
1382 | isStableSource src
1383 -> unf_rhs
1384 _ -> bind_rhs
1385 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
1386 rhs_size = case id_unfolding of
1387 CoreUnfolding { uf_guidance = guidance }
1388 | UnfIfGoodArgs { ug_size = size } <- guidance
1389 -> size
1390 _ -> cheapExprSize rhs
1391
1392 can_unfold = canUnfold id_unfolding
1393 id_unfolding = realIdUnfolding old_bndr
1394 -- realIdUnfolding: Ignore loop-breaker-ness here because
1395 -- that is what we are setting!
1396
1397 -- Checking for a constructor application
1398 -- Cheap and cheerful; the simplifier moves casts out of the way
1399 -- The lambda case is important to spot x = /\a. C (f a)
1400 -- which comes up when C is a dictionary constructor and
1401 -- f is a default method.
1402 -- Example: the instance for Show (ST s a) in GHC.ST
1403 --
1404 -- However we *also* treat (\x. C p q) as a con-app-like thing,
1405 -- Note [Closure conversion]
1406 is_con_app (Var v) = isConLikeId v
1407 is_con_app (App f _) = is_con_app f
1408 is_con_app (Lam _ e) = is_con_app e
1409 is_con_app (Tick _ e) = is_con_app e
1410 is_con_app _ = False
1411
1412 maxExprSize :: Int
1413 maxExprSize = 20 -- Rather arbitrary
1414
1415 cheapExprSize :: CoreExpr -> Int
1416 -- Maxes out at maxExprSize
1417 cheapExprSize e
1418 = go 0 e
1419 where
1420 go n e | n >= maxExprSize = n
1421 | otherwise = go1 n e
1422
1423 go1 n (Var {}) = n+1
1424 go1 n (Lit {}) = n+1
1425 go1 n (Type {}) = n
1426 go1 n (Coercion {}) = n
1427 go1 n (Tick _ e) = go1 n e
1428 go1 n (Cast e _) = go1 n e
1429 go1 n (App f a) = go (go1 n f) a
1430 go1 n (Lam b e)
1431 | isTyVar b = go1 n e
1432 | otherwise = go (n+1) e
1433 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b)
1434 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
1435
1436 gos n [] = n
1437 gos n (e:es) | n >= maxExprSize = n
1438 | otherwise = gos (go1 n e) es
1439
1440 betterLB :: NodeScore -> NodeScore -> Bool
1441 -- If n1 `betterLB` n2 then choose n1 as the loop breaker
1442 betterLB (rank1, size1, lb1) (rank2, size2, _)
1443 | rank1 < rank2 = True
1444 | rank1 > rank2 = False
1445 | size1 < size2 = False -- Make the bigger n2 into the loop breaker
1446 | size1 > size2 = True
1447 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it
1448 | otherwise = False -- See Note [Loop breakers, node scoring, and stability]
1449
1450 {- Note [Self-recursion and loop breakers]
1451 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1452 If we have
1453 rec { f = ...f...g...
1454 ; g = .....f... }
1455 then 'f' has to be a loop breaker anyway, so we may as well choose it
1456 right away, so that g can inline freely.
1457
1458 This is really just a cheap hack. Consider
1459 rec { f = ...g...
1460 ; g = ..f..h...
1461 ; h = ...f....}
1462 Here f or g are better loop breakers than h; but we might accidentally
1463 choose h. Finding the minimal set of loop breakers is hard.
1464
1465 Note [Loop breakers, node scoring, and stability]
1466 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1467 To choose a loop breaker, we give a NodeScore to each node in the SCC,
1468 and pick the one with the best score (according to 'betterLB').
1469
1470 We need to be jolly careful (Trac #12425, #12234) about the stability
1471 of this choice. Suppose we have
1472
1473 let rec { f = ...g...g...
1474 ; g = ...f...f... }
1475 in
1476 case x of
1477 True -> ...f..
1478 False -> ..f...
1479
1480 In each iteration of the simplifier the occurrence analyser OccAnal
1481 chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
1482 breaker. That means it is free to inline f.
1483
1484 Suppose that GHC decides to inline f in the branches of the case, but
1485 (for some reason; eg it is not saturated) in the rhs of g. So we get
1486
1487 let rec { f = ...g...g...
1488 ; g = ...f...f... }
1489 in
1490 case x of
1491 True -> ...g...g.....
1492 False -> ..g..g....
1493
1494 Now suppose that, for some reason, in the next iteration the occurrence
1495 analyser chooses f as the loop breaker, so it can freely inline g. And
1496 again for some reason the simplifier inlines g at its calls in the case
1497 branches, but not in the RHS of f. Then we get
1498
1499 let rec { f = ...g...g...
1500 ; g = ...f...f... }
1501 in
1502 case x of
1503 True -> ...(...f...f...)...(...f..f..).....
1504 False -> ..(...f...f...)...(..f..f...)....
1505
1506 You can see where this is going! Each iteration of the simplifier
1507 doubles the number of calls to f or g. No wonder GHC is slow!
1508
1509 (In the particular example in comment:3 of #12425, f and g are the two
1510 mutually recursive fmap instances for CondT and Result. They are both
1511 marked INLINE which, oddly, is why they don't inline in each other's
1512 RHS, because the call there is not saturated.)
1513
1514 The root cause is that we flip-flop on our choice of loop breaker. I
1515 always thought it didn't matter, and indeed for any single iteration
1516 to terminate, it doesn't matter. But when we iterate, it matters a
1517 lot!!
1518
1519 So The Plan is this:
1520 If there is a tie, choose the node that
1521 was a loop breaker last time round
1522
1523 Hence the is_lb field of NodeScore
1524
1525 ************************************************************************
1526 * *
1527 Right hand sides
1528 * *
1529 ************************************************************************
1530 -}
1531
1532 occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr
1533 -> (UsageDetails, [CoreBndr], CoreExpr)
1534 -- Returned usage details covers only the RHS,
1535 -- and *not* the RULE or INLINE template for the Id
1536 occAnalRhs env Recursive _ bndrs body
1537 = occAnalRecRhs env bndrs body
1538 occAnalRhs env NonRecursive id bndrs body
1539 = occAnalNonRecRhs env id bndrs body
1540
1541 occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr -- Rhs lambdas, body
1542 -> (UsageDetails, [CoreBndr], CoreExpr)
1543 -- Returned usage details covers only the RHS,
1544 -- and *not* the RULE or INLINE template for the Id
1545 occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body
1546
1547 occAnalNonRecRhs :: OccEnv
1548 -> Id -> [CoreBndr] -> CoreExpr -- Binder; rhs lams, body
1549 -- Binder is already tagged with occurrence info
1550 -> (UsageDetails, [CoreBndr], CoreExpr)
1551 -- Returned usage details covers only the RHS,
1552 -- and *not* the RULE or INLINE template for the Id
1553 occAnalNonRecRhs env bndr bndrs body
1554 = occAnalLamOrRhs rhs_env bndrs body
1555 where
1556 -- See Note [Cascading inlines]
1557 env1 | certainly_inline = env
1558 | otherwise = rhsCtxt env
1559
1560 -- See Note [Sources of one-shot information]
1561 rhs_env = env1 { occ_one_shots = argOneShots dmd }
1562
1563 certainly_inline -- See Note [Cascading inlines]
1564 = case idOccInfo bndr of
1565 OneOcc { occ_in_lam = in_lam, occ_one_br = one_br }
1566 -> not in_lam && one_br && active && not_stable
1567 _ -> False
1568
1569 dmd = idDemandInfo bndr
1570 active = isAlwaysActive (idInlineActivation bndr)
1571 not_stable = not (isStableUnfolding (idUnfolding bndr))
1572
1573 occAnalUnfolding :: OccEnv
1574 -> RecFlag
1575 -> Id
1576 -> Maybe UsageDetails
1577 -- Just the analysis, not a new unfolding. The unfolding
1578 -- got analysed when it was created and we don't need to
1579 -- update it.
1580 occAnalUnfolding env rec_flag id
1581 = case realIdUnfolding id of -- ignore previous loop-breaker flag
1582 CoreUnfolding { uf_tmpl = rhs, uf_src = src }
1583 | not (isStableSource src)
1584 -> Nothing
1585 | otherwise
1586 -> Just $ markAllMany usage
1587 where
1588 (bndrs, body) = collectBinders rhs
1589 (usage, _, _) = occAnalRhs env rec_flag id bndrs body
1590
1591 DFunUnfolding { df_bndrs = bndrs, df_args = args }
1592 -> Just $ zapDetails (delDetailsList usage bndrs)
1593 where
1594 usage = foldr (+++) emptyDetails (map (fst . occAnal env) args)
1595
1596 _ -> Nothing
1597
1598 occAnalRules :: OccEnv
1599 -> Maybe JoinArity -- If the binder is (or MAY become) a join
1600 -- point, what its join arity is (or WOULD
1601 -- become). See Note [Rules and join points].
1602 -> RecFlag
1603 -> Id
1604 -> [(CoreRule, -- Each (non-built-in) rule
1605 UsageDetails, -- Usage details for LHS
1606 UsageDetails)] -- Usage details for RHS
1607 occAnalRules env mb_expected_join_arity rec_flag id
1608 = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id
1609 , let (lhs_uds, rhs_uds) = occ_anal_rule rule ]
1610 where
1611 occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
1612 = (lhs_uds, final_rhs_uds)
1613 where
1614 lhs_uds = addManyOccsSet emptyDetails $
1615 (exprsFreeVars args `delVarSetList` bndrs)
1616 (rhs_bndrs, rhs_body) = collectBinders rhs
1617 (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body
1618 -- Note [Rules are extra RHSs]
1619 -- Note [Rule dependency info]
1620 final_rhs_uds = adjust_tail_info args $ markAllMany $
1621 (rhs_uds `delDetailsList` bndrs)
1622 occ_anal_rule _
1623 = (emptyDetails, emptyDetails)
1624
1625 adjust_tail_info args uds -- see Note [Rules and join points]
1626 = case mb_expected_join_arity of
1627 Just ar | args `lengthIs` ar -> uds
1628 _ -> markAllNonTailCalled uds
1629 {-
1630 Note [Cascading inlines]
1631 ~~~~~~~~~~~~~~~~~~~~~~~~
1632 By default we use an rhsCtxt for the RHS of a binding. This tells the
1633 occ anal n that it's looking at an RHS, which has an effect in
1634 occAnalApp. In particular, for constructor applications, it makes
1635 the arguments appear to have NoOccInfo, so that we don't inline into
1636 them. Thus x = f y
1637 k = Just x
1638 we do not want to inline x.
1639
1640 But there's a problem. Consider
1641 x1 = a0 : []
1642 x2 = a1 : x1
1643 x3 = a2 : x2
1644 g = f x3
1645 First time round, it looks as if x1 and x2 occur as an arg of a
1646 let-bound constructor ==> give them a many-occurrence.
1647 But then x3 is inlined (unconditionally as it happens) and
1648 next time round, x2 will be, and the next time round x1 will be
1649 Result: multiple simplifier iterations. Sigh.
1650
1651 So, when analysing the RHS of x3 we notice that x3 will itself
1652 definitely inline the next time round, and so we analyse x3's rhs in
1653 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1654
1655 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1656 If we say "yes" when preInlineUnconditionally says "no" the simplifier iterates
1657 indefinitely:
1658 x = f y
1659 k = Just x
1660 inline ==>
1661 k = Just (f y)
1662 float ==>
1663 x1 = f y
1664 k = Just x1
1665
1666 This is worse than the slow cascade, so we only want to say "certainly_inline"
1667 if it really is certain. Look at the note with preInlineUnconditionally
1668 for the various clauses.
1669
1670
1671 ************************************************************************
1672 * *
1673 Expressions
1674 * *
1675 ************************************************************************
1676 -}
1677
1678 occAnal :: OccEnv
1679 -> CoreExpr
1680 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1681 CoreExpr)
1682
1683 occAnal _ expr@(Type _) = (emptyDetails, expr)
1684 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1685 occAnal env expr@(Var _) = occAnalApp env (expr, [], [])
1686 -- At one stage, I gathered the idRuleVars for the variable here too,
1687 -- which in a way is the right thing to do.
1688 -- But that went wrong right after specialisation, when
1689 -- the *occurrences* of the overloaded function didn't have any
1690 -- rules in them, so the *specialised* versions looked as if they
1691 -- weren't used at all.
1692
1693 occAnal _ (Coercion co)
1694 = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co)
1695 -- See Note [Gather occurrences of coercion variables]
1696
1697 {-
1698 Note [Gather occurrences of coercion variables]
1699 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1700 We need to gather info about what coercion variables appear, so that
1701 we can sort them into the right place when doing dependency analysis.
1702 -}
1703
1704 occAnal env (Tick tickish body)
1705 | tickish `tickishScopesLike` SoftScope
1706 = (markAllNonTailCalled usage, Tick tickish body')
1707
1708 | Breakpoint _ ids <- tickish
1709 = (usage_lam +++ foldr addManyOccs emptyDetails ids, Tick tickish body')
1710 -- never substitute for any of the Ids in a Breakpoint
1711
1712 | otherwise
1713 = (usage_lam, Tick tickish body')
1714 where
1715 !(usage,body') = occAnal env body
1716 -- for a non-soft tick scope, we can inline lambdas only
1717 usage_lam = markAllNonTailCalled (markAllInsideLam usage)
1718 -- TODO There may be ways to make ticks and join points play
1719 -- nicer together, but right now there are problems:
1720 -- let j x = ... in tick<t> (j 1)
1721 -- Making j a join point may cause the simplifier to drop t
1722 -- (if the tick is put into the continuation). So we don't
1723 -- count j 1 as a tail call.
1724
1725 occAnal env (Cast expr co)
1726 = case occAnal env expr of { (usage, expr') ->
1727 let usage1 = zapDetailsIf (isRhsEnv env) usage
1728 usage2 = addManyOccsSet usage1 (coVarsOfCo co)
1729 -- See Note [Gather occurrences of coercion variables]
1730 in (markAllNonTailCalled usage2, Cast expr' co)
1731 -- If we see let x = y `cast` co
1732 -- then mark y as 'Many' so that we don't
1733 -- immediately inline y again.
1734 }
1735
1736 occAnal env app@(App _ _)
1737 = occAnalApp env (collectArgsTicks tickishFloatable app)
1738
1739 -- Ignore type variables altogether
1740 -- (a) occurrences inside type lambdas only not marked as InsideLam
1741 -- (b) type variables not in environment
1742
1743 occAnal env (Lam x body)
1744 | isTyVar x
1745 = case occAnal env body of { (body_usage, body') ->
1746 (markAllNonTailCalled body_usage, Lam x body')
1747 }
1748
1749 -- For value lambdas we do a special hack. Consider
1750 -- (\x. \y. ...x...)
1751 -- If we did nothing, x is used inside the \y, so would be marked
1752 -- as dangerous to dup. But in the common case where the abstraction
1753 -- is applied to two arguments this is over-pessimistic.
1754 -- So instead, we just mark each binder with its occurrence
1755 -- info in the *body* of the multiple lambda.
1756 -- Then, the simplifier is careful when partially applying lambdas.
1757
1758 occAnal env expr@(Lam _ _)
1759 = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') ->
1760 let
1761 expr' = mkLams tagged_binders body'
1762 usage1 = markAllNonTailCalled usage
1763 one_shot_gp = all isOneShotBndr tagged_binders
1764 final_usage | one_shot_gp = usage1
1765 | otherwise = markAllInsideLam usage1
1766 in
1767 (final_usage, expr') }
1768 where
1769 (binders, body) = collectBinders expr
1770
1771 occAnal env (Case scrut bndr ty alts)
1772 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1773 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1774 let
1775 alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
1776 (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
1777 total_usage = markAllNonTailCalled scrut_usage +++ alts_usage1
1778 -- Alts can have tail calls, but the scrutinee can't
1779 in
1780 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1781 where
1782 -- Note [Case binder usage]
1783 -- ~~~~~~~~~~~~~~~~~~~~~~~~
1784 -- The case binder gets a usage of either "many" or "dead", never "one".
1785 -- Reason: we like to inline single occurrences, to eliminate a binding,
1786 -- but inlining a case binder *doesn't* eliminate a binding.
1787 -- We *don't* want to transform
1788 -- case x of w { (p,q) -> f w }
1789 -- into
1790 -- case x of w { (p,q) -> f (p,q) }
1791 tag_case_bndr usage bndr
1792 = (usage', setIdOccInfo bndr final_occ_info)
1793 where
1794 occ_info = lookupDetails usage bndr
1795 usage' = usage `delDetails` bndr
1796 final_occ_info = case occ_info of IAmDead -> IAmDead
1797 _ -> noOccInfo
1798
1799 alt_env = mkAltEnv env scrut bndr
1800 occ_anal_alt = occAnalAlt alt_env
1801
1802 occ_anal_scrut (Var v) (alt1 : other_alts)
1803 | not (null other_alts) || not (isDefaultAlt alt1)
1804 = (mkOneOcc env v True 0, Var v)
1805 -- The 'True' says that the variable occurs in an interesting
1806 -- context; the case has at least one non-default alternative
1807 occ_anal_scrut (Tick t e) alts
1808 | t `tickishScopesLike` SoftScope
1809 -- No reason to not look through all ticks here, but only
1810 -- for soft-scoped ticks we can do so without having to
1811 -- update returned occurance info (see occAnal)
1812 = second (Tick t) $ occ_anal_scrut e alts
1813
1814 occ_anal_scrut scrut _alts
1815 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1816
1817 occAnal env (Let bind body)
1818 = case occAnal env body of { (body_usage, body') ->
1819 case occAnalBind env NotTopLevel
1820 noImpRuleEdges bind
1821 body_usage of { (final_usage, new_binds) ->
1822 (final_usage, mkLets new_binds body') }}
1823
1824 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1825 occAnalArgs _ [] _
1826 = (emptyDetails, [])
1827
1828 occAnalArgs env (arg:args) one_shots
1829 | isTypeArg arg
1830 = case occAnalArgs env args one_shots of { (uds, args') ->
1831 (uds, arg:args') }
1832
1833 | otherwise
1834 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1835 case occAnal arg_env arg of { (uds1, arg') ->
1836 case occAnalArgs env args one_shots' of { (uds2, args') ->
1837 (uds1 +++ uds2, arg':args') }}}
1838
1839 {-
1840 Applications are dealt with specially because we want
1841 the "build hack" to work.
1842
1843 Note [Arguments of let-bound constructors]
1844 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1845 Consider
1846 f x = let y = expensive x in
1847 let z = (True,y) in
1848 (case z of {(p,q)->q}, case z of {(p,q)->q})
1849 We feel free to duplicate the WHNF (True,y), but that means
1850 that y may be duplicated thereby.
1851
1852 If we aren't careful we duplicate the (expensive x) call!
1853 Constructors are rather like lambdas in this way.
1854 -}
1855
1856 occAnalApp :: OccEnv
1857 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1858 -> (UsageDetails, Expr CoreBndr)
1859 occAnalApp env (Var fun, args, ticks)
1860 | null ticks = (uds, mkApps (Var fun) args')
1861 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1862 where
1863 uds = fun_uds +++ final_args_uds
1864
1865 !(args_uds, args') = occAnalArgs env args one_shots
1866 !final_args_uds
1867 | isRhsEnv env && is_exp = markAllNonTailCalled $
1868 markAllInsideLam args_uds
1869 | otherwise = markAllNonTailCalled args_uds
1870 -- We mark the free vars of the argument of a constructor or PAP
1871 -- as "inside-lambda", if it is the RHS of a let(rec).
1872 -- This means that nothing gets inlined into a constructor or PAP
1873 -- argument position, which is what we want. Typically those
1874 -- constructor arguments are just variables, or trivial expressions.
1875 -- We use inside-lam because it's like eta-expanding the PAP.
1876 --
1877 -- This is the *whole point* of the isRhsEnv predicate
1878 -- See Note [Arguments of let-bound constructors]
1879
1880 n_val_args = valArgCount args
1881 n_args = length args
1882 fun_uds = mkOneOcc env fun (n_val_args > 0) n_args
1883 is_exp = isExpandableApp fun n_val_args
1884 -- See Note [CONLIKE pragma] in BasicTypes
1885 -- The definition of is_exp should match that in Simplify.prepareRhs
1886
1887 one_shots = argsOneShots (idStrictness fun) guaranteed_val_args
1888 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo
1889 (occ_one_shots env))
1890 -- See Note [Sources of one-shot information], bullet point A']
1891
1892 occAnalApp env (fun, args, ticks)
1893 = (markAllNonTailCalled (fun_uds +++ args_uds),
1894 mkTicks ticks $ mkApps fun' args')
1895 where
1896 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1897 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1898 -- often leaves behind beta redexs like
1899 -- (\x y -> e) a1 a2
1900 -- Here we would like to mark x,y as one-shot, and treat the whole
1901 -- thing much like a let. We do this by pushing some True items
1902 -- onto the context stack.
1903 !(args_uds, args') = occAnalArgs env args []
1904
1905 zapDetailsIf :: Bool -- If this is true
1906 -> UsageDetails -- Then do zapDetails on this
1907 -> UsageDetails
1908 zapDetailsIf True uds = zapDetails uds
1909 zapDetailsIf False uds = uds
1910
1911 {-
1912 Note [Sources of one-shot information]
1913 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1914 The occurrence analyser obtains one-shot-lambda information from two sources:
1915
1916 A: Saturated applications: eg f e1 .. en
1917
1918 In general, given a call (f e1 .. en) we can propagate one-shot info from
1919 f's strictness signature into e1 .. en, but /only/ if n is enough to
1920 saturate the strictness signature. A strictness signature like
1921
1922 f :: C1(C1(L))LS
1923
1924 means that *if f is applied to three arguments* then it will guarantee to
1925 call its first argument at most once, and to call the result of that at
1926 most once. But if f has fewer than three arguments, all bets are off; e.g.
1927
1928 map (f (\x y. expensive) e2) xs
1929
1930 Here the \x y abstraction may be called many times (once for each element of
1931 xs) so we should not mark x and y as one-shot. But if it was
1932
1933 map (f (\x y. expensive) 3 2) xs
1934
1935 then the first argument of f will be called at most once.
1936
1937 The one-shot info, derived from f's strictness signature, is
1938 computed by 'argsOneShots', called in occAnalApp.
1939
1940 A': Non-obviously saturated applications: eg build (f (\x y -> expensive))
1941 where f is as above.
1942
1943 In this case, f is only manifestly applied to one argument, so it does not
1944 look saturated. So by the previous point, we should not use its strictness
1945 signature to learn about the one-shotness of \x y. But in this case we can:
1946 build is fully applied, so we may use its strictness signature; and from
1947 that we learn that build calls its argument with two arguments *at most once*.
1948
1949 So there is really only one call to f, and it will have three arguments. In
1950 that sense, f is saturated, and we may proceed as described above.
1951
1952 Hence the computation of 'guaranteed_val_args' in occAnalApp, using
1953 '(occ_one_shots env)'. See also Trac #13227, comment:9
1954
1955 B: Let-bindings: eg let f = \c. let ... in \n -> blah
1956 in (build f, build f)
1957
1958 Propagate one-shot info from the demanand-info on 'f' to the
1959 lambdas in its RHS (which may not be syntactically at the top)
1960
1961 This information must have come from a previous run of the demanand
1962 analyser.
1963
1964 Previously, the demand analyser would *also* set the one-shot information, but
1965 that code was buggy (see #11770), so doing it only in on place, namely here, is
1966 saner.
1967
1968 Note [OneShots]
1969 ~~~~~~~~~~~~~~~
1970 When analysing an expression, the occ_one_shots argument contains information
1971 about how the function is being used. The length of the list indicates
1972 how many arguments will eventually be passed to the analysed expression,
1973 and the OneShotInfo indicates whether this application is once or multiple times.
1974
1975 Example:
1976
1977 Context of f occ_one_shots when analysing f
1978
1979 f 1 2 [OneShot, OneShot]
1980 map (f 1) [OneShot, NoOneShotInfo]
1981 build f [OneShot, OneShot]
1982 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot]
1983
1984 Note [Binders in case alternatives]
1985 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1986 Consider
1987 case x of y { (a,b) -> f y }
1988 We treat 'a', 'b' as dead, because they don't physically occur in the
1989 case alternative. (Indeed, a variable is dead iff it doesn't occur in
1990 its scope in the output of OccAnal.) It really helps to know when
1991 binders are unused. See esp the call to isDeadBinder in
1992 Simplify.mkDupableAlt
1993
1994 In this example, though, the Simplifier will bring 'a' and 'b' back to
1995 life, beause it binds 'y' to (a,b) (imagine got inlined and
1996 scrutinised y).
1997 -}
1998
1999 occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr
2000 -> (UsageDetails, [CoreBndr], CoreExpr)
2001 occAnalLamOrRhs env [] body
2002 = case occAnal env body of (body_usage, body') -> (body_usage, [], body')
2003 -- RHS of thunk or nullary join point
2004 occAnalLamOrRhs env (bndr:bndrs) body
2005 | isTyVar bndr
2006 = -- Important: Keep the environment so that we don't inline into an RHS like
2007 -- \(@ x) -> C @x (f @x)
2008 -- (see the beginning of Note [Cascading inlines]).
2009 case occAnalLamOrRhs env bndrs body of
2010 (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body')
2011 occAnalLamOrRhs env binders body
2012 = case occAnal env_body body of { (body_usage, body') ->
2013 let
2014 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
2015 -- Use binders' to put one-shot info on the lambdas
2016 in
2017 (final_usage, tagged_binders, body') }
2018 where
2019 (env_body, binders') = oneShotGroup env binders
2020
2021 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
2022 -> CoreAlt
2023 -> (UsageDetails, Alt IdWithOccInfo)
2024 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
2025 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
2026 let
2027 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
2028 -- See Note [Binders in case alternatives]
2029 (alt_usg', rhs2) =
2030 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
2031 in
2032 (alt_usg', (con, tagged_bndrs, rhs2)) }
2033
2034 wrapAltRHS :: OccEnv
2035 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
2036 -> UsageDetails -- usage for entire alt (p -> rhs)
2037 -> [Var] -- alt binders
2038 -> CoreExpr -- alt RHS
2039 -> (UsageDetails, CoreExpr)
2040 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
2041 | occ_binder_swap env
2042 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
2043 -- handles condition (a) in Note [Binder swap]
2044 , not captured -- See condition (b) in Note [Binder swap]
2045 = ( alt_usg' +++ let_rhs_usg
2046 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
2047 where
2048 captured = any (`usedIn` let_rhs_usg) bndrs
2049 -- The rhs of the let may include coercion variables
2050 -- if the scrutinee was a cast, so we must gather their
2051 -- usage. See Note [Gather occurrences of coercion variables]
2052 (let_rhs_usg, let_rhs') = occAnal env let_rhs
2053 (alt_usg', [tagged_scrut_var]) = tagLamBinders alt_usg [scrut_var]
2054
2055 wrapAltRHS _ _ alt_usg _ alt_rhs
2056 = (alt_usg, alt_rhs)
2057
2058 {-
2059 ************************************************************************
2060 * *
2061 OccEnv
2062 * *
2063 ************************************************************************
2064 -}
2065
2066 data OccEnv
2067 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
2068 , occ_one_shots :: !OneShots -- See Note [OneShots]
2069 , occ_gbl_scrut :: GlobalScruts
2070 , occ_rule_act :: Activation -> Bool -- Which rules are active
2071 -- See Note [Finding rule RHS free vars]
2072 , occ_binder_swap :: !Bool -- enable the binder_swap
2073 -- See CorePrep Note [Dead code in CorePrep]
2074 }
2075
2076 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
2077
2078 -----------------------------
2079 -- OccEncl is used to control whether to inline into constructor arguments
2080 -- For example:
2081 -- x = (p,q) -- Don't inline p or q
2082 -- y = /\a -> (p a, q a) -- Still don't inline p or q
2083 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
2084 -- So OccEncl tells enought about the context to know what to do when
2085 -- we encounter a constructor application or PAP.
2086
2087 data OccEncl
2088 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
2089 -- Don't inline into constructor args here
2090 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
2091 -- Do inline into constructor args here
2092
2093 instance Outputable OccEncl where
2094 ppr OccRhs = text "occRhs"
2095 ppr OccVanilla = text "occVanilla"
2096
2097 -- See note [OneShots]
2098 type OneShots = [OneShotInfo]
2099
2100 initOccEnv :: (Activation -> Bool) -> OccEnv
2101 initOccEnv active_rule
2102 = OccEnv { occ_encl = OccVanilla
2103 , occ_one_shots = []
2104 , occ_gbl_scrut = emptyVarSet
2105 , occ_rule_act = active_rule
2106 , occ_binder_swap = True }
2107
2108 vanillaCtxt :: OccEnv -> OccEnv
2109 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
2110
2111 rhsCtxt :: OccEnv -> OccEnv
2112 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
2113
2114 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
2115 argCtxt env []
2116 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
2117 argCtxt env (one_shots:one_shots_s)
2118 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
2119
2120 isRhsEnv :: OccEnv -> Bool
2121 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
2122 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
2123
2124 oneShotGroup :: OccEnv -> [CoreBndr]
2125 -> ( OccEnv
2126 , [CoreBndr] )
2127 -- The result binders have one-shot-ness set that they might not have had originally.
2128 -- This happens in (build (\c n -> e)). Here the occurrence analyser
2129 -- linearity context knows that c,n are one-shot, and it records that fact in
2130 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
2131
2132 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
2133 = go ctxt bndrs []
2134 where
2135 go ctxt [] rev_bndrs
2136 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
2137 , reverse rev_bndrs )
2138
2139 go [] bndrs rev_bndrs
2140 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
2141 , reverse rev_bndrs ++ bndrs )
2142
2143 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs
2144 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs)
2145 | otherwise = go ctxt bndrs (bndr : rev_bndrs)
2146 where
2147 bndr' = updOneShotInfo bndr one_shot
2148 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
2149 -- one-shot info might be better than what we can infer, e.g.
2150 -- due to explicit use of the magic 'oneShot' function.
2151 -- See Note [The oneShot function]
2152
2153
2154 markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var]
2155 -- Mark the lambdas of a non-recursive join point as one-shot.
2156 -- This is good to prevent gratuitous float-out etc
2157 markJoinOneShots mb_join_arity bndrs
2158 = case mb_join_arity of
2159 Nothing -> bndrs
2160 Just n -> go n bndrs
2161 where
2162 go 0 bndrs = bndrs
2163 go _ [] = WARN( True, ppr mb_join_arity <+> ppr bndrs ) []
2164 go n (b:bs) = b' : go (n-1) bs
2165 where
2166 b' | isId b = setOneShotLambda b
2167 | otherwise = b
2168
2169 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
2170 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
2171 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
2172
2173 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
2174 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
2175 -- as well as (f,g), (g,h)
2176 transClosureFV env
2177 | no_change = env
2178 | otherwise = transClosureFV (listToUFM new_fv_list)
2179 where
2180 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
2181 -- It's OK to use nonDetUFMToList here because we'll forget the
2182 -- ordering by creating a new set with listToUFM
2183 bump no_change (b,fvs)
2184 | no_change_here = (no_change, (b,fvs))
2185 | otherwise = (False, (b,new_fvs))
2186 where
2187 (new_fvs, no_change_here) = extendFvs env fvs
2188
2189 -------------
2190 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
2191 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
2192
2193 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
2194 -- (extendFVs env s) returns
2195 -- (s `union` env(s), env(s) `subset` s)
2196 extendFvs env s
2197 | isNullUFM env
2198 = (s, True)
2199 | otherwise
2200 = (s `unionVarSet` extras, extras `subVarSet` s)
2201 where
2202 extras :: VarSet -- env(s)
2203 extras = nonDetFoldUFM unionVarSet emptyVarSet $
2204 -- It's OK to use nonDetFoldUFM here because unionVarSet commutes
2205 intersectUFM_C (\x _ -> x) env (getUniqSet s)
2206
2207 {-
2208 ************************************************************************
2209 * *
2210 Binder swap
2211 * *
2212 ************************************************************************
2213
2214 Note [Binder swap]
2215 ~~~~~~~~~~~~~~~~~~
2216 We do these two transformations right here:
2217
2218 (1) case x of b { pi -> ri }
2219 ==>
2220 case x of b { pi -> let x=b in ri }
2221
2222 (2) case (x |> co) of b { pi -> ri }
2223 ==>
2224 case (x |> co) of b { pi -> let x = b |> sym co in ri }
2225
2226 Why (2)? See Note [Case of cast]
2227
2228 In both cases, in a particular alternative (pi -> ri), we only
2229 add the binding if
2230 (a) x occurs free in (pi -> ri)
2231 (ie it occurs in ri, but is not bound in pi)
2232 (b) the pi does not bind b (or the free vars of co)
2233 We need (a) and (b) for the inserted binding to be correct.
2234
2235 For the alternatives where we inject the binding, we can transfer
2236 all x's OccInfo to b. And that is the point.
2237
2238 Notice that
2239 * The deliberate shadowing of 'x'.
2240 * That (a) rapidly becomes false, so no bindings are injected.
2241
2242 The reason for doing these transformations here is because it allows
2243 us to adjust the OccInfo for 'x' and 'b' as we go.
2244
2245 * Suppose the only occurrences of 'x' are the scrutinee and in the
2246 ri; then this transformation makes it occur just once, and hence
2247 get inlined right away.
2248
2249 * If we do this in the Simplifier, we don't know whether 'x' is used
2250 in ri, so we are forced to pessimistically zap b's OccInfo even
2251 though it is typically dead (ie neither it nor x appear in the
2252 ri). There's nothing actually wrong with zapping it, except that
2253 it's kind of nice to know which variables are dead. My nose
2254 tells me to keep this information as robustly as possible.
2255
2256 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
2257 {x=b}; it's Nothing if the binder-swap doesn't happen.
2258
2259 There is a danger though. Consider
2260 let v = x +# y
2261 in case (f v) of w -> ...v...v...
2262 And suppose that (f v) expands to just v. Then we'd like to
2263 use 'w' instead of 'v' in the alternative. But it may be too
2264 late; we may have substituted the (cheap) x+#y for v in the
2265 same simplifier pass that reduced (f v) to v.
2266
2267 I think this is just too bad. CSE will recover some of it.
2268
2269 Note [Case of cast]
2270 ~~~~~~~~~~~~~~~~~~~
2271 Consider case (x `cast` co) of b { I# ->
2272 ... (case (x `cast` co) of {...}) ...
2273 We'd like to eliminate the inner case. That is the motivation for
2274 equation (2) in Note [Binder swap]. When we get to the inner case, we
2275 inline x, cancel the casts, and away we go.
2276
2277 Note [Binder swap on GlobalId scrutinees]
2278 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2279 When the scrutinee is a GlobalId we must take care in two ways
2280
2281 i) In order to *know* whether 'x' occurs free in the RHS, we need its
2282 occurrence info. BUT, we don't gather occurrence info for
2283 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
2284 OccEnv is for: it says "gather occurrence info for these".
2285
2286 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
2287 has an External Name. See, for example, SimplEnv Note [Global Ids in
2288 the substitution].
2289
2290 Note [Zap case binders in proxy bindings]
2291 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2292 From the original
2293 case x of cb(dead) { p -> ...x... }
2294 we will get
2295 case x of cb(live) { p -> let x = cb in ...x... }
2296
2297 Core Lint never expects to find an *occurrence* of an Id marked
2298 as Dead, so we must zap the OccInfo on cb before making the
2299 binding x = cb. See Trac #5028.
2300
2301 Historical note [no-case-of-case]
2302 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2303 We *used* to suppress the binder-swap in case expressions when
2304 -fno-case-of-case is on. Old remarks:
2305 "This happens in the first simplifier pass,
2306 and enhances full laziness. Here's the bad case:
2307 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
2308 If we eliminate the inner case, we trap it inside the I# v -> arm,
2309 which might prevent some full laziness happening. I've seen this
2310 in action in spectral/cichelli/Prog.hs:
2311 [(m,n) | m <- [1..max], n <- [1..max]]
2312 Hence the check for NoCaseOfCase."
2313 However, now the full-laziness pass itself reverses the binder-swap, so this
2314 check is no longer necessary.
2315
2316 Historical note [Suppressing the case binder-swap]
2317 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2318 This old note describes a problem that is also fixed by doing the
2319 binder-swap in OccAnal:
2320
2321 There is another situation when it might make sense to suppress the
2322 case-expression binde-swap. If we have
2323
2324 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
2325 ...other cases .... }
2326
2327 We'll perform the binder-swap for the outer case, giving
2328
2329 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
2330 ...other cases .... }
2331
2332 But there is no point in doing it for the inner case, because w1 can't
2333 be inlined anyway. Furthermore, doing the case-swapping involves
2334 zapping w2's occurrence info (see paragraphs that follow), and that
2335 forces us to bind w2 when doing case merging. So we get
2336
2337 case x of w1 { A -> let w2 = w1 in e1
2338 B -> let w2 = w1 in e2
2339 ...other cases .... }
2340
2341 This is plain silly in the common case where w2 is dead.
2342
2343 Even so, I can't see a good way to implement this idea. I tried
2344 not doing the binder-swap if the scrutinee was already evaluated
2345 but that failed big-time:
2346
2347 data T = MkT !Int
2348
2349 case v of w { MkT x ->
2350 case x of x1 { I# y1 ->
2351 case x of x2 { I# y2 -> ...
2352
2353 Notice that because MkT is strict, x is marked "evaluated". But to
2354 eliminate the last case, we must either make sure that x (as well as
2355 x1) has unfolding MkT y1. The straightforward thing to do is to do
2356 the binder-swap. So this whole note is a no-op.
2357
2358 It's fixed by doing the binder-swap in OccAnal because we can do the
2359 binder-swap unconditionally and still get occurrence analysis
2360 information right.
2361 -}
2362
2363 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
2364 -- Does two things: a) makes the occ_one_shots = OccVanilla
2365 -- b) extends the GlobalScruts if possible
2366 -- c) returns a proxy mapping, binding the scrutinee
2367 -- to the case binder, if possible
2368 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
2369 = case stripTicksTopE (const True) scrut of
2370 Var v -> add_scrut v case_bndr'
2371 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
2372 -- See Note [Case of cast]
2373 _ -> (env { occ_encl = OccVanilla }, Nothing)
2374
2375 where
2376 add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
2377 , Just (localise v, rhs) )
2378
2379 case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
2380 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var)
2381 -- Localise the scrut_var before shadowing it; we're making a
2382 -- new binding for it, and it might have an External Name, or
2383 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
2384 -- Also we don't want any INLINE or NOINLINE pragmas!
2385
2386 {-
2387 ************************************************************************
2388 * *
2389 \subsection[OccurAnal-types]{OccEnv}
2390 * *
2391 ************************************************************************
2392
2393 Note [UsageDetails and zapping]
2394 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2395
2396 On many occasions, we must modify all gathered occurrence data at once. For
2397 instance, all occurrences underneath a (non-one-shot) lambda set the
2398 'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but
2399 that takes O(n) time and we will do this often---in particular, there are many
2400 places where tail calls are not allowed, and each of these causes all variables
2401 to get marked with 'NoTailCallInfo'.
2402
2403 Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along
2404 with the 'OccInfoEnv'. Each of these extra environments is a "zapped set"
2405 recording which variables have been zapped in some way. Zapping all occurrence
2406 info then simply means setting the corresponding zapped set to the whole
2407 'OccInfoEnv', a fast O(1) operation.
2408 -}
2409
2410 type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage
2411 -- INVARIANT: never IAmDead
2412 -- (Deadness is signalled by not being in the map at all)
2413
2414 type ZappedSet = OccInfoEnv -- Values are ignored
2415
2416 data UsageDetails
2417 = UD { ud_env :: !OccInfoEnv
2418 , ud_z_many :: ZappedSet -- apply 'markMany' to these
2419 , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these
2420 , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these
2421 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv
2422
2423 instance Outputable UsageDetails where
2424 ppr ud = ppr (ud_env (flattenUsageDetails ud))
2425
2426 -------------------
2427 -- UsageDetails API
2428
2429 (+++), combineAltsUsageDetails
2430 :: UsageDetails -> UsageDetails -> UsageDetails
2431 (+++) = combineUsageDetailsWith addOccInfo
2432 combineAltsUsageDetails = combineUsageDetailsWith orOccInfo
2433
2434 combineUsageDetailsList :: [UsageDetails] -> UsageDetails
2435 combineUsageDetailsList = foldl (+++) emptyDetails
2436
2437 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
2438 mkOneOcc env id int_cxt arity
2439 | isLocalId id
2440 = singleton $ OneOcc { occ_in_lam = False
2441 , occ_one_br = True
2442 , occ_int_cxt = int_cxt
2443 , occ_tail = AlwaysTailCalled arity }
2444 | id `elemVarSet` occ_gbl_scrut env
2445 = singleton noOccInfo
2446
2447 | otherwise
2448 = emptyDetails
2449 where
2450 singleton info = emptyDetails { ud_env = unitVarEnv id info }
2451
2452 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
2453 addOneOcc ud id info
2454 = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info }
2455 `alterZappedSets` (`delVarEnv` id)
2456 where
2457 plus_zapped old new = doZapping ud id old `addOccInfo` new
2458
2459 addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails
2460 addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set
2461 -- It's OK to use nonDetFoldUFM here because addManyOccs commutes
2462
2463 -- Add several occurrences, assumed not to be tail calls
2464 addManyOccs :: Var -> UsageDetails -> UsageDetails
2465 addManyOccs v u | isId v = addOneOcc u v noOccInfo
2466 | otherwise = u
2467 -- Give a non-committal binder info (i.e noOccInfo) because
2468 -- a) Many copies of the specialised thing can appear
2469 -- b) We don't want to substitute a BIG expression inside a RULE
2470 -- even if that's the only occurrence of the thing
2471 -- (Same goes for INLINE.)
2472
2473 delDetails :: UsageDetails -> Id -> UsageDetails
2474 delDetails ud bndr
2475 = ud `alterUsageDetails` (`delVarEnv` bndr)
2476
2477 delDetailsList :: UsageDetails -> [Id] -> UsageDetails
2478 delDetailsList ud bndrs
2479 = ud `alterUsageDetails` (`delVarEnvList` bndrs)
2480
2481 emptyDetails :: UsageDetails
2482 emptyDetails = UD { ud_env = emptyVarEnv
2483 , ud_z_many = emptyVarEnv
2484 , ud_z_in_lam = emptyVarEnv
2485 , ud_z_no_tail = emptyVarEnv }
2486
2487 isEmptyDetails :: UsageDetails -> Bool
2488 isEmptyDetails = isEmptyVarEnv . ud_env
2489
2490 markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails
2491 :: UsageDetails -> UsageDetails
2492 markAllMany ud = ud { ud_z_many = ud_env ud }
2493 markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud }
2494 markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud }
2495
2496 zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo
2497
2498 lookupDetails :: UsageDetails -> Id -> OccInfo
2499 lookupDetails ud id
2500 = case lookupVarEnv (ud_env ud) id of
2501 Just occ -> doZapping ud id occ
2502 Nothing -> IAmDead
2503
2504 usedIn :: Id -> UsageDetails -> Bool
2505 v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
2506
2507 udFreeVars :: VarSet -> UsageDetails -> VarSet
2508 -- Find the subset of bndrs that are mentioned in uds
2509 udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud)
2510
2511 -------------------
2512 -- Auxiliary functions for UsageDetails implementation
2513
2514 combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo)
2515 -> UsageDetails -> UsageDetails -> UsageDetails
2516 combineUsageDetailsWith plus_occ_info ud1 ud2
2517 | isEmptyDetails ud1 = ud2
2518 | isEmptyDetails ud2 = ud1
2519 | otherwise
2520 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2)
2521 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2)
2522 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2)
2523 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) }
2524
2525 doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo
2526 doZapping ud var occ
2527 = doZappingByUnique ud (varUnique var) occ
2528
2529 doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo
2530 doZappingByUnique ud uniq
2531 = (if | in_subset ud_z_many -> markMany
2532 | in_subset ud_z_in_lam -> markInsideLam
2533 | otherwise -> id) .
2534 (if | in_subset ud_z_no_tail -> markNonTailCalled
2535 | otherwise -> id)
2536 where
2537 in_subset field = uniq `elemVarEnvByKey` field ud
2538
2539 alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails
2540 alterZappedSets ud f
2541 = ud { ud_z_many = f (ud_z_many ud)
2542 , ud_z_in_lam = f (ud_z_in_lam ud)
2543 , ud_z_no_tail = f (ud_z_no_tail ud) }
2544
2545 alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails
2546 alterUsageDetails ud f
2547 = ud { ud_env = f (ud_env ud) }
2548 `alterZappedSets` f
2549
2550 flattenUsageDetails :: UsageDetails -> UsageDetails
2551 flattenUsageDetails ud
2552 = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) }
2553 `alterZappedSets` const emptyVarEnv
2554
2555 -------------------
2556 -- See Note [Adjusting right-hand sides]
2557 adjustRhsUsage :: Maybe JoinArity -> RecFlag
2558 -> [CoreBndr] -- Outer lambdas, AFTER occ anal
2559 -> UsageDetails -> UsageDetails
2560 adjustRhsUsage mb_join_arity rec_flag bndrs usage
2561 = maybe_mark_lam (maybe_drop_tails usage)
2562 where
2563 maybe_mark_lam ud | one_shot = ud
2564 | otherwise = markAllInsideLam ud
2565 maybe_drop_tails ud | exact_join = ud
2566 | otherwise = markAllNonTailCalled ud
2567
2568 one_shot = case mb_join_arity of
2569 Just join_arity
2570 | isRec rec_flag -> False
2571 | otherwise -> all isOneShotBndr (drop join_arity bndrs)
2572 Nothing -> all isOneShotBndr bndrs
2573
2574 exact_join = case mb_join_arity of
2575 Just join_arity -> join_arity == length bndrs
2576 _ -> False
2577
2578 type IdWithOccInfo = Id
2579
2580 tagLamBinders :: UsageDetails -- Of scope
2581 -> [Id] -- Binders
2582 -> (UsageDetails, -- Details with binders removed
2583 [IdWithOccInfo]) -- Tagged binders
2584 -- Used for lambda and case binders
2585 -- It copes with the fact that lambda bindings can have a
2586 -- stable unfolding, used for join points
2587 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
2588 where
2589 (usage', bndrs') = mapAccumR tag_lam usage binders
2590 tag_lam usage bndr = (usage2, bndr')
2591 where
2592 occ = lookupDetails usage bndr
2593 bndr' = setBinderOcc (markNonTailCalled occ) bndr
2594 -- Don't try to make an argument into a join point
2595 usage1 = usage `delDetails` bndr
2596 usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr)
2597 -- This is effectively the RHS of a
2598 -- non-join-point binding, so it's okay to use
2599 -- addManyOccsSet, which assumes no tail calls
2600 | otherwise = usage1
2601
2602 tagNonRecBinder :: TopLevelFlag -- At top level?
2603 -> UsageDetails -- Of scope
2604 -> CoreBndr -- Binder
2605 -> (UsageDetails, -- Details with binder removed
2606 IdWithOccInfo) -- Tagged binder
2607
2608 tagNonRecBinder lvl usage binder
2609 = let
2610 occ = lookupDetails usage binder
2611 will_be_join = decideJoinPointHood lvl usage [binder]
2612 occ' | will_be_join = -- must already be marked AlwaysTailCalled
2613 ASSERT(isAlwaysTailCalled occ) occ
2614 | otherwise = markNonTailCalled occ
2615 binder' = setBinderOcc occ' binder
2616 usage' = usage `delDetails` binder
2617 in
2618 usage' `seq` (usage', binder')
2619
2620 tagRecBinders :: TopLevelFlag -- At top level?
2621 -> UsageDetails -- Of body of let ONLY
2622 -> [(CoreBndr, -- Binder
2623 UsageDetails, -- RHS usage details
2624 [CoreBndr])] -- Lambdas in new RHS
2625 -> (UsageDetails, -- Adjusted details for whole scope,
2626 -- with binders removed
2627 [IdWithOccInfo]) -- Tagged binders
2628 -- Substantially more complicated than non-recursive case. Need to adjust RHS
2629 -- details *before* tagging binders (because the tags depend on the RHSes).
2630 tagRecBinders lvl body_uds triples
2631 = let
2632 (bndrs, rhs_udss, _) = unzip3 triples
2633
2634 -- 1. Determine join-point-hood of whole group, as determined by
2635 -- the *unadjusted* usage details
2636 unadj_uds = body_uds +++ combineUsageDetailsList rhs_udss
2637 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs
2638
2639 -- 2. Adjust usage details of each RHS, taking into account the
2640 -- join-point-hood decision
2641 rhs_udss' = map adjust triples
2642 adjust (bndr, rhs_uds, rhs_bndrs)
2643 = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds
2644 where
2645 -- Can't use willBeJoinId_maybe here because we haven't tagged the
2646 -- binder yet (the tag depends on these adjustments!)
2647 mb_join_arity
2648 | will_be_joins
2649 , let occ = lookupDetails unadj_uds bndr
2650 , AlwaysTailCalled arity <- tailCallInfo occ
2651 = Just arity
2652 | otherwise
2653 = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if we're
2654 -- making join points!
2655 Nothing
2656
2657 -- 3. Compute final usage details from adjusted RHS details
2658 adj_uds = body_uds +++ combineUsageDetailsList rhs_udss'
2659
2660 -- 4. Tag each binder with its adjusted details modulo the
2661 -- join-point-hood decision
2662 occs = map (lookupDetails adj_uds) bndrs
2663 occs' | will_be_joins = occs
2664 | otherwise = map markNonTailCalled occs
2665 bndrs' = zipWith setBinderOcc occs' bndrs
2666
2667 -- 5. Drop the binders from the adjusted details and return
2668 usage' = adj_uds `delDetailsList` bndrs
2669 in
2670 (usage', bndrs')
2671
2672 setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr
2673 setBinderOcc occ_info bndr
2674 | isTyVar bndr = bndr
2675 | isExportedId bndr = if isManyOccs (idOccInfo bndr)
2676 then bndr
2677 else setIdOccInfo bndr noOccInfo
2678 -- Don't use local usage info for visible-elsewhere things
2679 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
2680 -- about to re-generate it and it shouldn't be "sticky"
2681
2682 | otherwise = setIdOccInfo bndr occ_info
2683
2684 -- | Decide whether some bindings should be made into join points or not.
2685 -- Returns `False` if they can't be join points. Note that it's an
2686 -- all-or-nothing decision, as if multiple binders are given, they're assumed to
2687 -- be mutually recursive.
2688 --
2689 -- See Note [Invariants for join points] in CoreSyn.
2690 decideJoinPointHood :: TopLevelFlag -> UsageDetails
2691 -> [CoreBndr]
2692 -> Bool
2693 decideJoinPointHood TopLevel _ _
2694 = False
2695 decideJoinPointHood NotTopLevel usage bndrs
2696 | isJoinId (head bndrs)
2697 = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+>
2698 ppr bndrs)
2699 all_ok
2700 | otherwise
2701 = all_ok
2702 where
2703 -- See Note [Invariants on join points]; invariants cited by number below.
2704 -- Invariant 2 is always satisfiable by the simplifier by eta expansion.
2705 all_ok = -- Invariant 3: Either all are join points or none are
2706 all ok bndrs
2707
2708 ok bndr
2709 | -- Invariant 1: Only tail calls, all same join arity
2710 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr)
2711 , -- Invariant 1 as applied to LHSes of rules
2712 all (ok_rule arity) (idCoreRules bndr)
2713 -- Invariant 4: Satisfies polymorphism rule
2714 , isValidJoinPointType arity (idType bndr)
2715 = True
2716 | otherwise
2717 = False
2718
2719 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
2720 ok_rule join_arity (Rule { ru_args = args })
2721 = length args == join_arity
2722 -- Invariant 1 as applied to LHSes of rules
2723
2724 willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity
2725 willBeJoinId_maybe bndr
2726 | AlwaysTailCalled arity <- tailCallInfo (idOccInfo bndr)
2727 = Just arity
2728 | otherwise
2729 = isJoinId_maybe bndr
2730
2731 {-
2732 ************************************************************************
2733 * *
2734 \subsection{Operations over OccInfo}
2735 * *
2736 ************************************************************************
2737 -}
2738
2739 markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo
2740
2741 markMany IAmDead = IAmDead
2742 markMany occ = ManyOccs { occ_tail = occ_tail occ }
2743
2744 markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = True }
2745 markInsideLam occ = occ
2746
2747 markNonTailCalled IAmDead = IAmDead
2748 markNonTailCalled occ = occ { occ_tail = NoTailCallInfo }
2749
2750 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
2751
2752 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2753 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2754 tailCallInfo a2 }
2755 -- Both branches are at least One
2756 -- (Argument is never IAmDead)
2757
2758 -- (orOccInfo orig new) is used
2759 -- when combining occurrence info from branches of a case
2760
2761 orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1
2762 , occ_tail = tail1 })
2763 (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2
2764 , occ_tail = tail2 })
2765 = OneOcc { occ_in_lam = in_lam1 || in_lam2
2766 , occ_one_br = False -- False, because it occurs in both branches
2767 , occ_int_cxt = int_cxt1 && int_cxt2
2768 , occ_tail = tail1 `andTailCallInfo` tail2 }
2769 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2770 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2771 tailCallInfo a2 }
2772
2773 andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
2774 andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
2775 | arity1 == arity2 = info
2776 andTailCallInfo _ _ = NoTailCallInfo