Fix join-point decision
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns, MultiWayIf, ViewPatterns #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import GhcPrelude
23
24 import CoreSyn
25 import CoreFVs
26 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
27 stripTicksTopE, mkTicks )
28 import CoreArity ( joinRhsArity )
29 import Id
30 import IdInfo
31 import Name( localiseName )
32 import BasicTypes
33 import Module( Module )
34 import Coercion
35 import Type
36
37 import VarSet
38 import VarEnv
39 import Var
40 import Demand ( argOneShots, argsOneShots )
41 import Digraph ( SCC(..), Node(..)
42 , stronglyConnCompFromEdgedVerticesUniq
43 , stronglyConnCompFromEdgedVerticesUniqR )
44 import Unique
45 import UniqFM
46 import UniqSet
47 import Util
48 import Outputable
49 import Data.List
50 import Control.Arrow ( second )
51
52 {-
53 ************************************************************************
54 * *
55 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
56 * *
57 ************************************************************************
58
59 Here's the externally-callable interface:
60 -}
61
62 occurAnalysePgm :: Module -- Used only in debug output
63 -> (Id -> Bool) -- Active unfoldings
64 -> (Activation -> Bool) -- Active rules
65 -> [CoreRule] -> [CoreVect] -> VarSet
66 -> CoreProgram -> CoreProgram
67 occurAnalysePgm this_mod active_unf active_rule imp_rules vects vectVars binds
68 | isEmptyDetails final_usage
69 = occ_anald_binds
70
71 | otherwise -- See Note [Glomming]
72 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
73 2 (ppr final_usage ) )
74 occ_anald_glommed_binds
75 where
76 init_env = initOccEnv { occ_rule_act = active_rule
77 , occ_unf_act = active_unf }
78
79 (final_usage, occ_anald_binds) = go init_env binds
80 (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel
81 imp_rule_edges
82 (flattenBinds occ_anald_binds)
83 initial_uds
84 -- It's crucial to re-analyse the glommed-together bindings
85 -- so that we establish the right loop breakers. Otherwise
86 -- we can easily create an infinite loop (Trac #9583 is an example)
87
88 initial_uds = addManyOccsSet emptyDetails
89 (rulesFreeVars imp_rules `unionVarSet`
90 vectsFreeVars vects `unionVarSet`
91 vectVars)
92 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
93 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
94 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
95
96 -- Note [Preventing loops due to imported functions rules]
97 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
98 [ mapVarEnv (const maps_to) $
99 getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
100 | imp_rule <- imp_rules
101 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
102 , let maps_to = exprFreeIds (ru_rhs imp_rule)
103 `delVarSetList` ru_bndrs imp_rule
104 , arg <- ru_args imp_rule ]
105
106 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
107 go _ []
108 = (initial_uds, [])
109 go env (bind:binds)
110 = (final_usage, bind' ++ binds')
111 where
112 (bs_usage, binds') = go env binds
113 (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind
114 bs_usage
115
116 occurAnalyseExpr :: CoreExpr -> CoreExpr
117 -- Do occurrence analysis, and discard occurrence info returned
118 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
119
120 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
121 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
122
123 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
124 occurAnalyseExpr' enable_binder_swap expr
125 = snd (occAnal env expr)
126 where
127 env = initOccEnv { occ_binder_swap = enable_binder_swap }
128
129 {- Note [Plugin rules]
130 ~~~~~~~~~~~~~~~~~~~~~~
131 Conal Elliott (Trac #11651) built a GHC plugin that added some
132 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
133 do some domain-specific transformations that could not be expressed
134 with an ordinary pattern-matching CoreRule. But then we can't extract
135 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
136 BuiltinRule doesn't have any of that stuff.
137
138 So we simply assume that BuiltinRules have no dependencies, and filter
139 them out from the imp_rule_edges comprehension.
140 -}
141
142 {-
143 ************************************************************************
144 * *
145 Bindings
146 * *
147 ************************************************************************
148
149 Note [Recursive bindings: the grand plan]
150 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
151 When we come across a binding group
152 Rec { x1 = r1; ...; xn = rn }
153 we treat it like this (occAnalRecBind):
154
155 1. Occurrence-analyse each right hand side, and build a
156 "Details" for each binding to capture the results.
157
158 Wrap the details in a Node (details, node-id, dep-node-ids),
159 where node-id is just the unique of the binder, and
160 dep-node-ids lists all binders on which this binding depends.
161 We'll call these the "scope edges".
162 See Note [Forming the Rec groups].
163
164 All this is done by makeNode.
165
166 2. Do SCC-analysis on these Nodes. Each SCC will become a new Rec or
167 NonRec. The key property is that every free variable of a binding
168 is accounted for by the scope edges, so that when we are done
169 everything is still in scope.
170
171 3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
172 identify suitable loop-breakers to ensure that inlining terminates.
173 This is done by occAnalRec.
174
175 4. To do so we form a new set of Nodes, with the same details, but
176 different edges, the "loop-breaker nodes". The loop-breaker nodes
177 have both more and fewer dependencies than the scope edges
178 (see Note [Choosing loop breakers])
179
180 More edges: if f calls g, and g has an active rule that mentions h
181 then we add an edge from f -> h
182
183 Fewer edges: we only include dependencies on active rules, on rule
184 RHSs (not LHSs) and if there is an INLINE pragma only
185 on the stable unfolding (and vice versa). The scope
186 edges must be much more inclusive.
187
188 5. The "weak fvs" of a node are, by definition:
189 the scope fvs - the loop-breaker fvs
190 See Note [Weak loop breakers], and the nd_weak field of Details
191
192 6. Having formed the loop-breaker nodes
193
194 Note [Dead code]
195 ~~~~~~~~~~~~~~~~
196 Dropping dead code for a cyclic Strongly Connected Component is done
197 in a very simple way:
198
199 the entire SCC is dropped if none of its binders are mentioned
200 in the body; otherwise the whole thing is kept.
201
202 The key observation is that dead code elimination happens after
203 dependency analysis: so 'occAnalBind' processes SCCs instead of the
204 original term's binding groups.
205
206 Thus 'occAnalBind' does indeed drop 'f' in an example like
207
208 letrec f = ...g...
209 g = ...(...g...)...
210 in
211 ...g...
212
213 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
214 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
215 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
216
217 ------------------------------------------------------------
218 Note [Forming Rec groups]
219 ~~~~~~~~~~~~~~~~~~~~~~~~~
220 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
221 and "g uses f", no matter how indirectly. We do a SCC analysis
222 with an edge f -> g if "f uses g".
223
224 More precisely, "f uses g" iff g should be in scope wherever f is.
225 That is, g is free in:
226 a) the rhs 'ef'
227 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
228 c) or the LHS or a rule for f (Note [Rule dependency info])
229
230 These conditions apply regardless of the activation of the RULE (eg it might be
231 inactive in this phase but become active later). Once a Rec is broken up
232 it can never be put back together, so we must be conservative.
233
234 The principle is that, regardless of rule firings, every variable is
235 always in scope.
236
237 * Note [Rules are extra RHSs]
238 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
239 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
240 keeps the specialised "children" alive. If the parent dies
241 (because it isn't referenced any more), then the children will die
242 too (unless they are already referenced directly).
243
244 To that end, we build a Rec group for each cyclic strongly
245 connected component,
246 *treating f's rules as extra RHSs for 'f'*.
247 More concretely, the SCC analysis runs on a graph with an edge
248 from f -> g iff g is mentioned in
249 (a) f's rhs
250 (b) f's RULES
251 These are rec_edges.
252
253 Under (b) we include variables free in *either* LHS *or* RHS of
254 the rule. The former might seems silly, but see Note [Rule
255 dependency info]. So in Example [eftInt], eftInt and eftIntFB
256 will be put in the same Rec, even though their 'main' RHSs are
257 both non-recursive.
258
259 * Note [Rule dependency info]
260 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
261 The VarSet in a RuleInfo is used for dependency analysis in the
262 occurrence analyser. We must track free vars in *both* lhs and rhs.
263 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
264 Why both? Consider
265 x = y
266 RULE f x = v+4
267 Then if we substitute y for x, we'd better do so in the
268 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
269 as well as 'v'
270
271 * Note [Rules are visible in their own rec group]
272 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
273 We want the rules for 'f' to be visible in f's right-hand side.
274 And we'd like them to be visible in other functions in f's Rec
275 group. E.g. in Note [Specialisation rules] we want f' rule
276 to be visible in both f's RHS, and fs's RHS.
277
278 This means that we must simplify the RULEs first, before looking
279 at any of the definitions. This is done by Simplify.simplRecBind,
280 when it calls addLetIdInfo.
281
282 ------------------------------------------------------------
283 Note [Choosing loop breakers]
284 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
285 Loop breaking is surprisingly subtle. First read the section 4 of
286 "Secrets of the GHC inliner". This describes our basic plan.
287 We avoid infinite inlinings by choosing loop breakers, and
288 ensuring that a loop breaker cuts each loop.
289
290 See also Note [Inlining and hs-boot files] in ToIface, which deals
291 with a closely related source of infinite loops.
292
293 Fundamentally, we do SCC analysis on a graph. For each recursive
294 group we choose a loop breaker, delete all edges to that node,
295 re-analyse the SCC, and iterate.
296
297 But what is the graph? NOT the same graph as was used for Note
298 [Forming Rec groups]! In particular, a RULE is like an equation for
299 'f' that is *always* inlined if it is applicable. We do *not* disable
300 rules for loop-breakers. It's up to whoever makes the rules to make
301 sure that the rules themselves always terminate. See Note [Rules for
302 recursive functions] in Simplify.hs
303
304 Hence, if
305 f's RHS (or its INLINE template if it has one) mentions g, and
306 g has a RULE that mentions h, and
307 h has a RULE that mentions f
308
309 then we *must* choose f to be a loop breaker. Example: see Note
310 [Specialisation rules].
311
312 In general, take the free variables of f's RHS, and augment it with
313 all the variables reachable by RULES from those starting points. That
314 is the whole reason for computing rule_fv_env in occAnalBind. (Of
315 course we only consider free vars that are also binders in this Rec
316 group.) See also Note [Finding rule RHS free vars]
317
318 Note that when we compute this rule_fv_env, we only consider variables
319 free in the *RHS* of the rule, in contrast to the way we build the
320 Rec group in the first place (Note [Rule dependency info])
321
322 Note that if 'g' has RHS that mentions 'w', we should add w to
323 g's loop-breaker edges. More concretely there is an edge from f -> g
324 iff
325 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
326 (see Note [Inline rules])
327 (b) or h is mentioned in f's RHS, and
328 g appears in the RHS of an active RULE of h
329 or a transitive sequence of active rules starting with h
330
331 Why "active rules"? See Note [Finding rule RHS free vars]
332
333 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
334 chosen as a loop breaker, because their RHSs don't mention each other.
335 And indeed both can be inlined safely.
336
337 Note again that the edges of the graph we use for computing loop breakers
338 are not the same as the edges we use for computing the Rec blocks.
339 That's why we compute
340
341 - rec_edges for the Rec block analysis
342 - loop_breaker_nodes for the loop breaker analysis
343
344 * Note [Finding rule RHS free vars]
345 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
346 Consider this real example from Data Parallel Haskell
347 tagZero :: Array Int -> Array Tag
348 {-# INLINE [1] tagZeroes #-}
349 tagZero xs = pmap (\x -> fromBool (x==0)) xs
350
351 {-# RULES "tagZero" [~1] forall xs n.
352 pmap fromBool <blah blah> = tagZero xs #-}
353 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
354 However, tagZero can only be inlined in phase 1 and later, while
355 the RULE is only active *before* phase 1. So there's no problem.
356
357 To make this work, we look for the RHS free vars only for
358 *active* rules. That's the reason for the occ_rule_act field
359 of the OccEnv.
360
361 * Note [Weak loop breakers]
362 ~~~~~~~~~~~~~~~~~~~~~~~~~
363 There is a last nasty wrinkle. Suppose we have
364
365 Rec { f = f_rhs
366 RULE f [] = g
367
368 h = h_rhs
369 g = h
370 ...more...
371 }
372
373 Remember that we simplify the RULES before any RHS (see Note
374 [Rules are visible in their own rec group] above).
375
376 So we must *not* postInlineUnconditionally 'g', even though
377 its RHS turns out to be trivial. (I'm assuming that 'g' is
378 not choosen as a loop breaker.) Why not? Because then we
379 drop the binding for 'g', which leaves it out of scope in the
380 RULE!
381
382 Here's a somewhat different example of the same thing
383 Rec { g = h
384 ; h = ...f...
385 ; f = f_rhs
386 RULE f [] = g }
387 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
388 g, because the RULE for f is active throughout. So the RHS of h
389 might rewrite to h = ...g...
390 So g must remain in scope in the output program!
391
392 We "solve" this by:
393
394 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
395 iff g is a "missing free variable" of the Rec group
396
397 A "missing free variable" x is one that is mentioned in an RHS or
398 INLINE or RULE of a binding in the Rec group, but where the
399 dependency on x may not show up in the loop_breaker_nodes (see
400 note [Choosing loop breakers} above).
401
402 A normal "strong" loop breaker has IAmLoopBreaker False. So
403
404 Inline postInlineUnconditionally
405 strong IAmLoopBreaker False no no
406 weak IAmLoopBreaker True yes no
407 other yes yes
408
409 The **sole** reason for this kind of loop breaker is so that
410 postInlineUnconditionally does not fire. Ugh. (Typically it'll
411 inline via the usual callSiteInline stuff, so it'll be dead in the
412 next pass, so the main Ugh is the tiresome complication.)
413
414 Note [Rules for imported functions]
415 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
416 Consider this
417 f = /\a. B.g a
418 RULE B.g Int = 1 + f Int
419 Note that
420 * The RULE is for an imported function.
421 * f is non-recursive
422 Now we
423 can get
424 f Int --> B.g Int Inlining f
425 --> 1 + f Int Firing RULE
426 and so the simplifier goes into an infinite loop. This
427 would not happen if the RULE was for a local function,
428 because we keep track of dependencies through rules. But
429 that is pretty much impossible to do for imported Ids. Suppose
430 f's definition had been
431 f = /\a. C.h a
432 where (by some long and devious process), C.h eventually inlines to
433 B.g. We could only spot such loops by exhaustively following
434 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
435 f.
436
437 Note that RULES for imported functions are important in practice; they
438 occur a lot in the libraries.
439
440 We regard this potential infinite loop as a *programmer* error.
441 It's up the programmer not to write silly rules like
442 RULE f x = f x
443 and the example above is just a more complicated version.
444
445 Note [Preventing loops due to imported functions rules]
446 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
447 Consider:
448 import GHC.Base (foldr)
449
450 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
451 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
452 filterFB c p = ...
453
454 f = filter p xs
455
456 Note that filter is not a loop-breaker, so what happens is:
457 f = filter p xs
458 = {inline} build (\c n -> foldr (filterFB c p) n xs)
459 = {inline} foldr (filterFB (:) p) [] xs
460 = {RULE} filter p xs
461
462 We are in an infinite loop.
463
464 A more elaborate example (that I actually saw in practice when I went to
465 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
466 {-# LANGUAGE RankNTypes #-}
467 module GHCList where
468
469 import Prelude hiding (filter)
470 import GHC.Base (build)
471
472 {-# INLINABLE filter #-}
473 filter :: (a -> Bool) -> [a] -> [a]
474 filter p [] = []
475 filter p (x:xs) = if p x then x : filter p xs else filter p xs
476
477 {-# NOINLINE [0] filterFB #-}
478 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
479 filterFB c p x r | p x = x `c` r
480 | otherwise = r
481
482 {-# RULES
483 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
484 (filterFB c p) n xs)
485 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
486 #-}
487
488 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
489 are not), the unfolding given to "filter" in the interface file will be:
490 filter p [] = []
491 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
492 else build (\c n -> foldr (filterFB c p) n xs
493
494 Note that because this unfolding does not mention "filter", filter is not
495 marked as a strong loop breaker. Therefore at a use site in another module:
496 filter p xs
497 = {inline}
498 case xs of [] -> []
499 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
500 else build (\c n -> foldr (filterFB c p) n xs)
501
502 build (\c n -> foldr (filterFB c p) n xs)
503 = {inline} foldr (filterFB (:) p) [] xs
504 = {RULE} filter p xs
505
506 And we are in an infinite loop again, except that this time the loop is producing an
507 infinitely large *term* (an unrolling of filter) and so the simplifier finally
508 dies with "ticks exhausted"
509
510 Because of this problem, we make a small change in the occurrence analyser
511 designed to mark functions like "filter" as strong loop breakers on the basis that:
512 1. The RHS of filter mentions the local function "filterFB"
513 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
514
515 So for each RULE for an *imported* function we are going to add
516 dependency edges between the *local* FVS of the rule LHS and the
517 *local* FVS of the rule RHS. We don't do anything special for RULES on
518 local functions because the standard occurrence analysis stuff is
519 pretty good at getting loop-breakerness correct there.
520
521 It is important to note that even with this extra hack we aren't always going to get
522 things right. For example, it might be that the rule LHS mentions an imported Id,
523 and another module has a RULE that can rewrite that imported Id to one of our local
524 Ids.
525
526 Note [Specialising imported functions] (referred to from Specialise)
527 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
528 BUT for *automatically-generated* rules, the programmer can't be
529 responsible for the "programmer error" in Note [Rules for imported
530 functions]. In paricular, consider specialising a recursive function
531 defined in another module. If we specialise a recursive function B.g,
532 we get
533 g_spec = .....(B.g Int).....
534 RULE B.g Int = g_spec
535 Here, g_spec doesn't look recursive, but when the rule fires, it
536 becomes so. And if B.g was mutually recursive, the loop might
537 not be as obvious as it is here.
538
539 To avoid this,
540 * When specialising a function that is a loop breaker,
541 give a NOINLINE pragma to the specialised function
542
543 Note [Glomming]
544 ~~~~~~~~~~~~~~~
545 RULES for imported Ids can make something at the top refer to something at the bottom:
546 f = \x -> B.g (q x)
547 h = \y -> 3
548
549 RULE: B.g (q x) = h x
550
551 Applying this rule makes f refer to h, although f doesn't appear to
552 depend on h. (And, as in Note [Rules for imported functions], the
553 dependency might be more indirect. For example, f might mention C.t
554 rather than B.g, where C.t eventually inlines to B.g.)
555
556 NOTICE that this cannot happen for rules whose head is a
557 locally-defined function, because we accurately track dependencies
558 through RULES. It only happens for rules whose head is an imported
559 function (B.g in the example above).
560
561 Solution:
562 - When simplifying, bring all top level identifiers into
563 scope at the start, ignoring the Rec/NonRec structure, so
564 that when 'h' pops up in f's rhs, we find it in the in-scope set
565 (as the simplifier generally expects). This happens in simplTopBinds.
566
567 - In the occurrence analyser, if there are any out-of-scope
568 occurrences that pop out of the top, which will happen after
569 firing the rule: f = \x -> h x
570 h = \y -> 3
571 then just glom all the bindings into a single Rec, so that
572 the *next* iteration of the occurrence analyser will sort
573 them all out. This part happens in occurAnalysePgm.
574
575 ------------------------------------------------------------
576 Note [Inline rules]
577 ~~~~~~~~~~~~~~~~~~~
578 None of the above stuff about RULES applies to Inline Rules,
579 stored in a CoreUnfolding. The unfolding, if any, is simplified
580 at the same time as the regular RHS of the function (ie *not* like
581 Note [Rules are visible in their own rec group]), so it should be
582 treated *exactly* like an extra RHS.
583
584 Or, rather, when computing loop-breaker edges,
585 * If f has an INLINE pragma, and it is active, we treat the
586 INLINE rhs as f's rhs
587 * If it's inactive, we treat f as having no rhs
588 * If it has no INLINE pragma, we look at f's actual rhs
589
590
591 There is a danger that we'll be sub-optimal if we see this
592 f = ...f...
593 [INLINE f = ..no f...]
594 where f is recursive, but the INLINE is not. This can just about
595 happen with a sufficiently odd set of rules; eg
596
597 foo :: Int -> Int
598 {-# INLINE [1] foo #-}
599 foo x = x+1
600
601 bar :: Int -> Int
602 {-# INLINE [1] bar #-}
603 bar x = foo x + 1
604
605 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
606
607 Here the RULE makes bar recursive; but it's INLINE pragma remains
608 non-recursive. It's tempting to then say that 'bar' should not be
609 a loop breaker, but an attempt to do so goes wrong in two ways:
610 a) We may get
611 $df = ...$cfoo...
612 $cfoo = ...$df....
613 [INLINE $cfoo = ...no-$df...]
614 But we want $cfoo to depend on $df explicitly so that we
615 put the bindings in the right order to inline $df in $cfoo
616 and perhaps break the loop altogether. (Maybe this
617 b)
618
619
620 Example [eftInt]
621 ~~~~~~~~~~~~~~~
622 Example (from GHC.Enum):
623
624 eftInt :: Int# -> Int# -> [Int]
625 eftInt x y = ...(non-recursive)...
626
627 {-# INLINE [0] eftIntFB #-}
628 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
629 eftIntFB c n x y = ...(non-recursive)...
630
631 {-# RULES
632 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
633 "eftIntList" [1] eftIntFB (:) [] = eftInt
634 #-}
635
636 Note [Specialisation rules]
637 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
638 Consider this group, which is typical of what SpecConstr builds:
639
640 fs a = ....f (C a)....
641 f x = ....f (C a)....
642 {-# RULE f (C a) = fs a #-}
643
644 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
645
646 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
647 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
648 - fs is inlined (say it's small)
649 - now there's another opportunity to apply the RULE
650
651 This showed up when compiling Control.Concurrent.Chan.getChanContents.
652
653 ------------------------------------------------------------
654 Note [Finding join points]
655 ~~~~~~~~~~~~~~~~~~~~~~~~~~
656 It's the occurrence analyser's job to find bindings that we can turn into join
657 points, but it doesn't perform that transformation right away. Rather, it marks
658 the eligible bindings as part of their occurrence data, leaving it to the
659 simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'.
660 The simplifier then eta-expands the RHS if needed and then updates the
661 occurrence sites. Dividing the work this way means that the occurrence analyser
662 still only takes one pass, yet one can always tell the difference between a
663 function call and a jump by looking at the occurrence (because the same pass
664 changes the 'IdDetails' and propagates the binders to their occurrence sites).
665
666 To track potential join points, we use the 'occ_tail' field of OccInfo. A value
667 of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
668 tail call with `n` arguments (counting both value and type arguments). Otherwise
669 'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
670 rest of 'OccInfo' until it goes on the binder.
671
672 Note [Rules and join points]
673 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
674
675 Things get fiddly with rules. Suppose we have:
676
677 let j :: Int -> Int
678 j y = 2 * y
679 k :: Int -> Int -> Int
680 {-# RULES "SPEC k 0" k 0 = j #-}
681 k x y = x + 2 * y
682 in ...
683
684 Now suppose that both j and k appear only as saturated tail calls in the body.
685 Thus we would like to make them both join points. The rule complicates matters,
686 though, as its RHS has an unapplied occurrence of j. *However*, if we were to
687 eta-expand the rule, all would be well:
688
689 {-# RULES "SPEC k 0" forall a. k 0 a = j a #-}
690
691 So conceivably we could notice that a potential join point would have an
692 "undersaturated" rule and account for it. This would mean we could make
693 something that's been specialised a join point, for instance. But local bindings
694 are rarely specialised, and being overly cautious about rules only
695 costs us anything when, for some `j`:
696
697 * Before specialisation, `j` has non-tail calls, so it can't be a join point.
698 * During specialisation, `j` gets specialised and thus acquires rules.
699 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say),
700 and so now `j` *could* become a join point.
701
702 This appears to be very rare in practice. TODO Perhaps we should gather
703 statistics to be sure.
704
705 ------------------------------------------------------------
706 Note [Adjusting right-hand sides]
707 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
708 There's a bit of a dance we need to do after analysing a lambda expression or
709 a right-hand side. In particular, we need to
710
711 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot
712 lambda, or a non-recursive join point; and
713 b) call 'markAllNonTailCalled' *unless* the binding is for a join point.
714
715 Some examples, with how the free occurrences in e (assumed not to be a value
716 lambda) get marked:
717
718 inside lam non-tail-called
719 ------------------------------------------------------------
720 let x = e No Yes
721 let f = \x -> e Yes Yes
722 let f = \x{OneShot} -> e No Yes
723 \x -> e Yes Yes
724 join j x = e No No
725 joinrec j x = e Yes No
726
727 There are a few other caveats; most importantly, if we're marking a binding as
728 'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so
729 that the effect cascades properly. Consequently, at the time the RHS is
730 analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must
731 return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once
732 join-point-hood has been decided.
733
734 Thus the overall sequence taking place in 'occAnalNonRecBind' and
735 'occAnalRecBind' is as follows:
736
737 1. Call 'occAnalLamOrRhs' to find usage information for the RHS.
738 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make
739 the binding a join point.
740 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when
741 recursive.)
742
743 (In the recursive case, this logic is spread between 'makeNode' and
744 'occAnalRec'.)
745 -}
746
747 ------------------------------------------------------------------
748 -- occAnalBind
749 ------------------------------------------------------------------
750
751 occAnalBind :: OccEnv -- The incoming OccEnv
752 -> TopLevelFlag
753 -> ImpRuleEdges
754 -> CoreBind
755 -> UsageDetails -- Usage details of scope
756 -> (UsageDetails, -- Of the whole let(rec)
757 [CoreBind])
758
759 occAnalBind env lvl top_env (NonRec binder rhs) body_usage
760 = occAnalNonRecBind env lvl top_env binder rhs body_usage
761 occAnalBind env lvl top_env (Rec pairs) body_usage
762 = occAnalRecBind env lvl top_env pairs body_usage
763
764 -----------------
765 occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr
766 -> UsageDetails -> (UsageDetails, [CoreBind])
767 occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage
768 | isTyVar binder -- A type let; we don't gather usage info
769 = (body_usage, [NonRec binder rhs])
770
771 | not (binder `usedIn` body_usage) -- It's not mentioned
772 = (body_usage, [])
773
774 | otherwise -- It's mentioned in the body
775 = (body_usage' +++ rhs_usage', [NonRec tagged_binder rhs'])
776 where
777 (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder
778 mb_join_arity = willBeJoinId_maybe tagged_binder
779
780 (bndrs, body) = collectBinders rhs
781
782 (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body
783 rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body'
784 -- For a /non-recursive/ join point we can mark all
785 -- its join-lambda as one-shot; and it's a good idea to do so
786
787 -- Unfoldings
788 -- See Note [Unfoldings and join points]
789 rhs_usage2 = case occAnalUnfolding env NonRecursive binder of
790 Just unf_usage -> rhs_usage1 +++ unf_usage
791 Nothing -> rhs_usage1
792
793 -- Rules
794 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
795 rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder
796 rhs_usage3 = rhs_usage2 +++ combineUsageDetailsList
797 (map (\(_, l, r) -> l +++ r) rules_w_uds)
798 rhs_usage4 = maybe rhs_usage3 (addManyOccsSet rhs_usage3) $
799 lookupVarEnv imp_rule_edges binder
800 -- See Note [Preventing loops due to imported functions rules]
801
802 -- Final adjustment
803 rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4
804
805 -----------------
806 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
807 -> UsageDetails -> (UsageDetails, [CoreBind])
808 occAnalRecBind env lvl imp_rule_edges pairs body_usage
809 = foldr (occAnalRec env lvl) (body_usage, []) sccs
810 -- For a recursive group, we
811 -- * occ-analyse all the RHSs
812 -- * compute strongly-connected components
813 -- * feed those components to occAnalRec
814 -- See Note [Recursive bindings: the grand plan]
815 where
816 sccs :: [SCC Details]
817 sccs = {-# SCC "occAnalBind.scc" #-}
818 stronglyConnCompFromEdgedVerticesUniq nodes
819
820 nodes :: [LetrecNode]
821 nodes = {-# SCC "occAnalBind.assoc" #-}
822 map (makeNode env imp_rule_edges bndr_set) pairs
823
824 bndr_set = mkVarSet (map fst pairs)
825
826 {-
827 Note [Unfoldings and join points]
828 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
829
830 We assume that anything in an unfolding occurs multiple times, since unfoldings
831 are often copied (that's the whole point!). But we still need to track tail
832 calls for the purpose of finding join points.
833 -}
834
835 -----------------------------
836 occAnalRec :: OccEnv -> TopLevelFlag
837 -> SCC Details
838 -> (UsageDetails, [CoreBind])
839 -> (UsageDetails, [CoreBind])
840
841 -- The NonRec case is just like a Let (NonRec ...) above
842 occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
843 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs }))
844 (body_uds, binds)
845 | not (bndr `usedIn` body_uds)
846 = (body_uds, binds) -- See Note [Dead code]
847
848 | otherwise -- It's mentioned in the body
849 = (body_uds' +++ rhs_uds',
850 NonRec tagged_bndr rhs : binds)
851 where
852 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
853 rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive
854 rhs_bndrs rhs_uds
855
856 -- The Rec case is the interesting one
857 -- See Note [Recursive bindings: the grand plan]
858 -- See Note [Loop breaking]
859 occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
860 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
861 = (body_uds, binds) -- See Note [Dead code]
862
863 | otherwise -- At this point we always build a single Rec
864 = -- pprTrace "occAnalRec" (vcat
865 -- [ text "weak_fvs" <+> ppr weak_fvs
866 -- , text "lb nodes" <+> ppr loop_breaker_nodes])
867 (final_uds, Rec pairs : binds)
868
869 where
870 bndrs = map nd_bndr details_s
871 bndr_set = mkVarSet bndrs
872
873 ------------------------------
874 -- See Note [Choosing loop breakers] for loop_breaker_nodes
875 final_uds :: UsageDetails
876 loop_breaker_nodes :: [LetrecNode]
877 (final_uds, loop_breaker_nodes)
878 = mkLoopBreakerNodes env lvl bndr_set body_uds details_s
879
880 ------------------------------
881 weak_fvs :: VarSet
882 weak_fvs = mapUnionVarSet nd_weak details_s
883
884 ---------------------------
885 -- Now reconstruct the cycle
886 pairs :: [(Id,CoreExpr)]
887 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs loop_breaker_nodes []
888 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
889 -- If weak_fvs is empty, the loop_breaker_nodes will include
890 -- all the edges in the original scope edges [remember,
891 -- weak_fvs is the difference between scope edges and
892 -- lb-edges], so a fresh SCC computation would yield a
893 -- single CyclicSCC result; and reOrderNodes deals with
894 -- exactly that case
895
896
897 ------------------------------------------------------------------
898 -- Loop breaking
899 ------------------------------------------------------------------
900
901 type Binding = (Id,CoreExpr)
902
903 loopBreakNodes :: Int
904 -> VarSet -- All binders
905 -> VarSet -- Binders whose dependencies may be "missing"
906 -- See Note [Weak loop breakers]
907 -> [LetrecNode]
908 -> [Binding] -- Append these to the end
909 -> [Binding]
910 {-
911 loopBreakNodes is applied to the list of nodes for a cyclic strongly
912 connected component (there's guaranteed to be a cycle). It returns
913 the same nodes, but
914 a) in a better order,
915 b) with some of the Ids having a IAmALoopBreaker pragma
916
917 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
918 that the simplifier can guarantee not to loop provided it never records an inlining
919 for these no-inline guys.
920
921 Furthermore, the order of the binds is such that if we neglect dependencies
922 on the no-inline Ids then the binds are topologically sorted. This means
923 that the simplifier will generally do a good job if it works from top bottom,
924 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
925 -}
926
927 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
928 loopBreakNodes depth bndr_set weak_fvs nodes binds
929 = -- pprTrace "loopBreakNodes" (ppr nodes) $
930 go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
931 where
932 go [] binds = binds
933 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
934
935 loop_break_scc scc binds
936 = case scc of
937 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
938 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
939
940 ----------------------------------
941 reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
942 -- Choose a loop breaker, mark it no-inline,
943 -- and call loopBreakNodes on the rest
944 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
945 reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
946 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
947 = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen
948 -- , text "chosen" <+> ppr chosen_nodes ]) $
949 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
950 (map mk_loop_breaker chosen_nodes ++ binds)
951 where
952 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
953 (nd_score (node_payload node))
954 [node] [] nodes
955
956 approximate_lb = depth >= 2
957 new_depth | approximate_lb = 0
958 | otherwise = depth+1
959 -- After two iterations (d=0, d=1) give up
960 -- and approximate, returning to d=0
961
962 mk_loop_breaker :: LetrecNode -> Binding
963 mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
964 = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs)
965 where
966 tail_info = tailCallInfo (idOccInfo bndr)
967
968 mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
969 -- See Note [Weak loop breakers]
970 mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr
971 , nd_rhs = rhs})
972 | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs)
973 | otherwise = (bndr, rhs)
974 where
975 occ' = weakLoopBreaker { occ_tail = tail_info }
976 tail_info = tailCallInfo (idOccInfo bndr)
977
978 ----------------------------------
979 chooseLoopBreaker :: Bool -- True <=> Too many iterations,
980 -- so approximate
981 -> NodeScore -- Best score so far
982 -> [LetrecNode] -- Nodes with this score
983 -> [LetrecNode] -- Nodes with higher scores
984 -> [LetrecNode] -- Unprocessed nodes
985 -> ([LetrecNode], [LetrecNode])
986 -- This loop looks for the bind with the lowest score
987 -- to pick as the loop breaker. The rest accumulate in
988 chooseLoopBreaker _ _ loop_nodes acc []
989 = (loop_nodes, acc) -- Done
990
991 -- If approximate_loop_breaker is True, we pick *all*
992 -- nodes with lowest score, else just one
993 -- See Note [Complexity of loop breaking]
994 chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
995 | approx_lb
996 , rank sc == rank loop_sc
997 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
998
999 | sc `betterLB` loop_sc -- Better score so pick this new one
1000 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
1001
1002 | otherwise -- Worse score so don't pick it
1003 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
1004 where
1005 sc = nd_score (node_payload node)
1006
1007 {-
1008 Note [Complexity of loop breaking]
1009 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1010 The loop-breaking algorithm knocks out one binder at a time, and
1011 performs a new SCC analysis on the remaining binders. That can
1012 behave very badly in tightly-coupled groups of bindings; in the
1013 worst case it can be (N**2)*log N, because it does a full SCC
1014 on N, then N-1, then N-2 and so on.
1015
1016 To avoid this, we switch plans after 2 (or whatever) attempts:
1017 Plan A: pick one binder with the lowest score, make it
1018 a loop breaker, and try again
1019 Plan B: pick *all* binders with the lowest score, make them
1020 all loop breakers, and try again
1021 Since there are only a small finite number of scores, this will
1022 terminate in a constant number of iterations, rather than O(N)
1023 iterations.
1024
1025 You might thing that it's very unlikely, but RULES make it much
1026 more likely. Here's a real example from Trac #1969:
1027 Rec { $dm = \d.\x. op d
1028 {-# RULES forall d. $dm Int d = $s$dm1
1029 forall d. $dm Bool d = $s$dm2 #-}
1030
1031 dInt = MkD .... opInt ...
1032 dInt = MkD .... opBool ...
1033 opInt = $dm dInt
1034 opBool = $dm dBool
1035
1036 $s$dm1 = \x. op dInt
1037 $s$dm2 = \x. op dBool }
1038 The RULES stuff means that we can't choose $dm as a loop breaker
1039 (Note [Choosing loop breakers]), so we must choose at least (say)
1040 opInt *and* opBool, and so on. The number of loop breakders is
1041 linear in the number of instance declarations.
1042
1043 Note [Loop breakers and INLINE/INLINABLE pragmas]
1044 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1045 Avoid choosing a function with an INLINE pramga as the loop breaker!
1046 If such a function is mutually-recursive with a non-INLINE thing,
1047 then the latter should be the loop-breaker.
1048
1049 It's vital to distinguish between INLINE and INLINABLE (the
1050 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1051 Rec { {-# INLINABLE f #-}
1052 f x = ...f... }
1053 and then worker/wrapper it through strictness analysis, we'll get
1054 Rec { {-# INLINABLE $wf #-}
1055 $wf p q = let x = (p,q) in ...f...
1056
1057 {-# INLINE f #-}
1058 f x = case x of (p,q) -> $wf p q }
1059
1060 Now it is vital that we choose $wf as the loop breaker, so we can
1061 inline 'f' in '$wf'.
1062
1063 Note [DFuns should not be loop breakers]
1064 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1065 It's particularly bad to make a DFun into a loop breaker. See
1066 Note [How instance declarations are translated] in TcInstDcls
1067
1068 We give DFuns a higher score than ordinary CONLIKE things because
1069 if there's a choice we want the DFun to be the non-loop breaker. Eg
1070
1071 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1072
1073 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1074 {-# DFUN #-}
1075 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1076 }
1077
1078 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1079 if we can't unravel the DFun first.
1080
1081 Note [Constructor applications]
1082 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1083 It's really really important to inline dictionaries. Real
1084 example (the Enum Ordering instance from GHC.Base):
1085
1086 rec f = \ x -> case d of (p,q,r) -> p x
1087 g = \ x -> case d of (p,q,r) -> q x
1088 d = (v, f, g)
1089
1090 Here, f and g occur just once; but we can't inline them into d.
1091 On the other hand we *could* simplify those case expressions if
1092 we didn't stupidly choose d as the loop breaker.
1093 But we won't because constructor args are marked "Many".
1094 Inlining dictionaries is really essential to unravelling
1095 the loops in static numeric dictionaries, see GHC.Float.
1096
1097 Note [Closure conversion]
1098 ~~~~~~~~~~~~~~~~~~~~~~~~~
1099 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1100 The immediate motivation came from the result of a closure-conversion transformation
1101 which generated code like this:
1102
1103 data Clo a b = forall c. Clo (c -> a -> b) c
1104
1105 ($:) :: Clo a b -> a -> b
1106 Clo f env $: x = f env x
1107
1108 rec { plus = Clo plus1 ()
1109
1110 ; plus1 _ n = Clo plus2 n
1111
1112 ; plus2 Zero n = n
1113 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1114
1115 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1116 we choose 'plus1' as the loop breaker (which is entirely possible
1117 otherwise), the loop does not unravel nicely.
1118
1119
1120 @occAnalUnfolding@ deals with the question of bindings where the Id is marked
1121 by an INLINE pragma. For these we record that anything which occurs
1122 in its RHS occurs many times. This pessimistically assumes that this
1123 inlined binder also occurs many times in its scope, but if it doesn't
1124 we'll catch it next time round. At worst this costs an extra simplifier pass.
1125 ToDo: try using the occurrence info for the inline'd binder.
1126
1127 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1128 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1129
1130
1131 ************************************************************************
1132 * *
1133 Making nodes
1134 * *
1135 ************************************************************************
1136 -}
1137
1138 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
1139
1140 noImpRuleEdges :: ImpRuleEdges
1141 noImpRuleEdges = emptyVarEnv
1142
1143 type LetrecNode = Node Unique Details -- Node comes from Digraph
1144 -- The Unique key is gotten from the Id
1145 data Details
1146 = ND { nd_bndr :: Id -- Binder
1147 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
1148 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS
1149 -- INVARIANT: (nd_rhs_bndrs nd, _) ==
1150 -- collectBinders (nd_rhs nd)
1151
1152 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
1153 -- ignoring phase (ie assuming all are active)
1154 -- See Note [Forming Rec groups]
1155
1156 , nd_inl :: IdSet -- Free variables of
1157 -- the stable unfolding (if present and active)
1158 -- or the RHS (if not)
1159 -- but excluding any RULES
1160 -- This is the IdSet that may be used if the Id is inlined
1161
1162 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
1163 -- but are *not* in nd_inl. These are the ones whose
1164 -- dependencies might not be respected by loop_breaker_nodes
1165 -- See Note [Weak loop breakers]
1166
1167 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
1168
1169 , nd_score :: NodeScore
1170 }
1171
1172 instance Outputable Details where
1173 ppr nd = text "ND" <> braces
1174 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
1175 , text "uds =" <+> ppr (nd_uds nd)
1176 , text "inl =" <+> ppr (nd_inl nd)
1177 , text "weak =" <+> ppr (nd_weak nd)
1178 , text "rule =" <+> ppr (nd_active_rule_fvs nd)
1179 , text "score =" <+> ppr (nd_score nd)
1180 ])
1181
1182 -- The NodeScore is compared lexicographically;
1183 -- e.g. lower rank wins regardless of size
1184 type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker
1185 , Int -- Size of rhs: higher => more likely to be picked as LB
1186 -- Maxes out at maxExprSize; we just use it to prioritise
1187 -- small functions
1188 , Bool ) -- Was it a loop breaker before?
1189 -- True => more likely to be picked
1190 -- Note [Loop breakers, node scoring, and stability]
1191
1192 rank :: NodeScore -> Int
1193 rank (r, _, _) = r
1194
1195 makeNode :: OccEnv -> ImpRuleEdges -> VarSet
1196 -> (Var, CoreExpr) -> LetrecNode
1197 -- See Note [Recursive bindings: the grand plan]
1198 makeNode env imp_rule_edges bndr_set (bndr, rhs)
1199 = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs)
1200 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
1201 -- is still deterministic with edges in nondeterministic order as
1202 -- explained in Note [Deterministic SCC] in Digraph.
1203 where
1204 details = ND { nd_bndr = bndr
1205 , nd_rhs = rhs'
1206 , nd_rhs_bndrs = bndrs'
1207 , nd_uds = rhs_usage3
1208 , nd_inl = inl_fvs
1209 , nd_weak = node_fvs `minusVarSet` inl_fvs
1210 , nd_active_rule_fvs = active_rule_fvs
1211 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
1212
1213 -- Constructing the edges for the main Rec computation
1214 -- See Note [Forming Rec groups]
1215 (bndrs, body) = collectBinders rhs
1216 (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body
1217 rhs' = mkLams bndrs' body'
1218 rhs_usage2 = rhs_usage1 +++ all_rule_uds
1219 -- Note [Rules are extra RHSs]
1220 -- Note [Rule dependency info]
1221 rhs_usage3 = case mb_unf_uds of
1222 Just unf_uds -> rhs_usage2 +++ unf_uds
1223 Nothing -> rhs_usage2
1224 node_fvs = udFreeVars bndr_set rhs_usage3
1225
1226 -- Finding the free variables of the rules
1227 is_active = occ_rule_act env :: Activation -> Bool
1228
1229 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
1230 rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr
1231
1232 rules_w_rhs_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
1233 rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):))
1234 (lookupVarEnv imp_rule_edges bndr)
1235 -- See Note [Preventing loops due to imported functions rules]
1236 [ (ru_act rule, udFreeVars bndr_set rhs_uds)
1237 | (rule, _, rhs_uds) <- rules_w_uds ]
1238 all_rule_uds = combineUsageDetailsList $
1239 concatMap (\(_, l, r) -> [l, r]) rules_w_uds
1240 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs
1241 , is_active a]
1242
1243 -- Finding the usage details of the INLINE pragma (if any)
1244 mb_unf_uds = occAnalUnfolding env Recursive bndr
1245
1246 -- Find the "nd_inl" free vars; for the loop-breaker phase
1247 inl_fvs = case mb_unf_uds of
1248 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
1249 Just unf_uds -> udFreeVars bndr_set unf_uds
1250 -- We could check for an *active* INLINE (returning
1251 -- emptyVarSet for an inactive one), but is_active
1252 -- isn't the right thing (it tells about
1253 -- RULE activation), so we'd need more plumbing
1254
1255 mkLoopBreakerNodes :: OccEnv -> TopLevelFlag
1256 -> VarSet
1257 -> UsageDetails -- for BODY of let
1258 -> [Details]
1259 -> (UsageDetails, -- adjusted
1260 [LetrecNode])
1261 -- Does four things
1262 -- a) tag each binder with its occurrence info
1263 -- b) add a NodeScore to each node
1264 -- c) make a Node with the right dependency edges for
1265 -- the loop-breaker SCC analysis
1266 -- d) adjust each RHS's usage details according to
1267 -- the binder's (new) shotness and join-point-hood
1268 mkLoopBreakerNodes env lvl bndr_set body_uds details_s
1269 = (final_uds, zipWith mk_lb_node details_s bndrs')
1270 where
1271 (final_uds, bndrs') = tagRecBinders lvl body_uds
1272 [ ((nd_bndr nd)
1273 ,(nd_uds nd)
1274 ,(nd_rhs_bndrs nd))
1275 | nd <- details_s ]
1276 mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr'
1277 = DigraphNode nd' (varUnique bndr) (nonDetKeysUniqSet lb_deps)
1278 -- It's OK to use nonDetKeysUniqSet here as
1279 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
1280 -- in nondeterministic order as explained in
1281 -- Note [Deterministic SCC] in Digraph.
1282 where
1283 nd' = nd { nd_bndr = bndr', nd_score = score }
1284 score = nodeScore env bndr bndr' rhs lb_deps
1285 lb_deps = extendFvs_ rule_fv_env inl_fvs
1286
1287 rule_fv_env :: IdEnv IdSet
1288 -- Maps a variable f to the variables from this group
1289 -- mentioned in RHS of active rules for f
1290 -- Domain is *subset* of bound vars (others have no rule fvs)
1291 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
1292 init_rule_fvs -- See Note [Finding rule RHS free vars]
1293 = [ (b, trimmed_rule_fvs)
1294 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
1295 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
1296 , not (isEmptyVarSet trimmed_rule_fvs) ]
1297
1298
1299 ------------------------------------------
1300 nodeScore :: OccEnv
1301 -> Id -- Binder has old occ-info (just for loop-breaker-ness)
1302 -> Id -- Binder with new occ-info
1303 -> CoreExpr -- RHS
1304 -> VarSet -- Loop-breaker dependencies
1305 -> NodeScore
1306 nodeScore env old_bndr new_bndr bind_rhs lb_deps
1307 | not (isId old_bndr) -- A type or cercion variable is never a loop breaker
1308 = (100, 0, False)
1309
1310 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers
1311 = (0, 0, True) -- See Note [Self-recursion and loop breakers]
1312
1313 | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has
1314 = (0, 0, True) -- a NOINLINE pragam) makes a great loop breaker
1315
1316 | exprIsTrivial rhs
1317 = mk_score 10 -- Practically certain to be inlined
1318 -- Used to have also: && not (isExportedId bndr)
1319 -- But I found this sometimes cost an extra iteration when we have
1320 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
1321 -- where df is the exported dictionary. Then df makes a really
1322 -- bad choice for loop breaker
1323
1324 | DFunUnfolding { df_args = args } <- id_unfolding
1325 -- Never choose a DFun as a loop breaker
1326 -- Note [DFuns should not be loop breakers]
1327 = (9, length args, is_lb)
1328
1329 -- Data structures are more important than INLINE pragmas
1330 -- so that dictionary/method recursion unravels
1331
1332 | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding
1333 = mk_score 6
1334
1335 | is_con_app rhs -- Data types help with cases:
1336 = mk_score 5 -- Note [Constructor applications]
1337
1338 | isStableUnfolding id_unfolding
1339 , can_unfold
1340 = mk_score 3
1341
1342 | isOneOcc (idOccInfo new_bndr)
1343 = mk_score 2 -- Likely to be inlined
1344
1345 | can_unfold -- The Id has some kind of unfolding
1346 = mk_score 1
1347
1348 | otherwise
1349 = (0, 0, is_lb)
1350
1351 where
1352 mk_score :: Int -> NodeScore
1353 mk_score rank = (rank, rhs_size, is_lb)
1354
1355 is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
1356 rhs = case id_unfolding of
1357 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
1358 | isStableSource src
1359 -> unf_rhs
1360 _ -> bind_rhs
1361 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
1362 rhs_size = case id_unfolding of
1363 CoreUnfolding { uf_guidance = guidance }
1364 | UnfIfGoodArgs { ug_size = size } <- guidance
1365 -> size
1366 _ -> cheapExprSize rhs
1367
1368 can_unfold = canUnfold id_unfolding
1369 id_unfolding = realIdUnfolding old_bndr
1370 -- realIdUnfolding: Ignore loop-breaker-ness here because
1371 -- that is what we are setting!
1372
1373 -- Checking for a constructor application
1374 -- Cheap and cheerful; the simplifier moves casts out of the way
1375 -- The lambda case is important to spot x = /\a. C (f a)
1376 -- which comes up when C is a dictionary constructor and
1377 -- f is a default method.
1378 -- Example: the instance for Show (ST s a) in GHC.ST
1379 --
1380 -- However we *also* treat (\x. C p q) as a con-app-like thing,
1381 -- Note [Closure conversion]
1382 is_con_app (Var v) = isConLikeId v
1383 is_con_app (App f _) = is_con_app f
1384 is_con_app (Lam _ e) = is_con_app e
1385 is_con_app (Tick _ e) = is_con_app e
1386 is_con_app _ = False
1387
1388 maxExprSize :: Int
1389 maxExprSize = 20 -- Rather arbitrary
1390
1391 cheapExprSize :: CoreExpr -> Int
1392 -- Maxes out at maxExprSize
1393 cheapExprSize e
1394 = go 0 e
1395 where
1396 go n e | n >= maxExprSize = n
1397 | otherwise = go1 n e
1398
1399 go1 n (Var {}) = n+1
1400 go1 n (Lit {}) = n+1
1401 go1 n (Type {}) = n
1402 go1 n (Coercion {}) = n
1403 go1 n (Tick _ e) = go1 n e
1404 go1 n (Cast e _) = go1 n e
1405 go1 n (App f a) = go (go1 n f) a
1406 go1 n (Lam b e)
1407 | isTyVar b = go1 n e
1408 | otherwise = go (n+1) e
1409 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b)
1410 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
1411
1412 gos n [] = n
1413 gos n (e:es) | n >= maxExprSize = n
1414 | otherwise = gos (go1 n e) es
1415
1416 betterLB :: NodeScore -> NodeScore -> Bool
1417 -- If n1 `betterLB` n2 then choose n1 as the loop breaker
1418 betterLB (rank1, size1, lb1) (rank2, size2, _)
1419 | rank1 < rank2 = True
1420 | rank1 > rank2 = False
1421 | size1 < size2 = False -- Make the bigger n2 into the loop breaker
1422 | size1 > size2 = True
1423 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it
1424 | otherwise = False -- See Note [Loop breakers, node scoring, and stability]
1425
1426 {- Note [Self-recursion and loop breakers]
1427 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1428 If we have
1429 rec { f = ...f...g...
1430 ; g = .....f... }
1431 then 'f' has to be a loop breaker anyway, so we may as well choose it
1432 right away, so that g can inline freely.
1433
1434 This is really just a cheap hack. Consider
1435 rec { f = ...g...
1436 ; g = ..f..h...
1437 ; h = ...f....}
1438 Here f or g are better loop breakers than h; but we might accidentally
1439 choose h. Finding the minimal set of loop breakers is hard.
1440
1441 Note [Loop breakers, node scoring, and stability]
1442 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1443 To choose a loop breaker, we give a NodeScore to each node in the SCC,
1444 and pick the one with the best score (according to 'betterLB').
1445
1446 We need to be jolly careful (Trac #12425, #12234) about the stability
1447 of this choice. Suppose we have
1448
1449 let rec { f = ...g...g...
1450 ; g = ...f...f... }
1451 in
1452 case x of
1453 True -> ...f..
1454 False -> ..f...
1455
1456 In each iteration of the simplifier the occurrence analyser OccAnal
1457 chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
1458 breaker. That means it is free to inline f.
1459
1460 Suppose that GHC decides to inline f in the branches of the case, but
1461 (for some reason; eg it is not saturated) in the rhs of g. So we get
1462
1463 let rec { f = ...g...g...
1464 ; g = ...f...f... }
1465 in
1466 case x of
1467 True -> ...g...g.....
1468 False -> ..g..g....
1469
1470 Now suppose that, for some reason, in the next iteration the occurrence
1471 analyser chooses f as the loop breaker, so it can freely inline g. And
1472 again for some reason the simplifier inlines g at its calls in the case
1473 branches, but not in the RHS of f. Then we get
1474
1475 let rec { f = ...g...g...
1476 ; g = ...f...f... }
1477 in
1478 case x of
1479 True -> ...(...f...f...)...(...f..f..).....
1480 False -> ..(...f...f...)...(..f..f...)....
1481
1482 You can see where this is going! Each iteration of the simplifier
1483 doubles the number of calls to f or g. No wonder GHC is slow!
1484
1485 (In the particular example in comment:3 of #12425, f and g are the two
1486 mutually recursive fmap instances for CondT and Result. They are both
1487 marked INLINE which, oddly, is why they don't inline in each other's
1488 RHS, because the call there is not saturated.)
1489
1490 The root cause is that we flip-flop on our choice of loop breaker. I
1491 always thought it didn't matter, and indeed for any single iteration
1492 to terminate, it doesn't matter. But when we iterate, it matters a
1493 lot!!
1494
1495 So The Plan is this:
1496 If there is a tie, choose the node that
1497 was a loop breaker last time round
1498
1499 Hence the is_lb field of NodeScore
1500
1501 ************************************************************************
1502 * *
1503 Right hand sides
1504 * *
1505 ************************************************************************
1506 -}
1507
1508 occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr
1509 -> (UsageDetails, [CoreBndr], CoreExpr)
1510 -- Returned usage details covers only the RHS,
1511 -- and *not* the RULE or INLINE template for the Id
1512 occAnalRhs env Recursive _ bndrs body
1513 = occAnalRecRhs env bndrs body
1514 occAnalRhs env NonRecursive id bndrs body
1515 = occAnalNonRecRhs env id bndrs body
1516
1517 occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr -- Rhs lambdas, body
1518 -> (UsageDetails, [CoreBndr], CoreExpr)
1519 -- Returned usage details covers only the RHS,
1520 -- and *not* the RULE or INLINE template for the Id
1521 occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body
1522
1523 occAnalNonRecRhs :: OccEnv
1524 -> Id -> [CoreBndr] -> CoreExpr -- Binder; rhs lams, body
1525 -- Binder is already tagged with occurrence info
1526 -> (UsageDetails, [CoreBndr], CoreExpr)
1527 -- Returned usage details covers only the RHS,
1528 -- and *not* the RULE or INLINE template for the Id
1529 occAnalNonRecRhs env bndr bndrs body
1530 = occAnalLamOrRhs rhs_env bndrs body
1531 where
1532 env1 | is_join_point = env -- See Note [Join point RHSs]
1533 | certainly_inline = env -- See Note [Cascading inlines]
1534 | otherwise = rhsCtxt env
1535
1536 -- See Note [Sources of one-shot information]
1537 rhs_env = env1 { occ_one_shots = argOneShots dmd }
1538
1539 certainly_inline -- See Note [Cascading inlines]
1540 = case occ of
1541 OneOcc { occ_in_lam = in_lam, occ_one_br = one_br }
1542 -> not in_lam && one_br && active && not_stable
1543 _ -> False
1544
1545 is_join_point = isAlwaysTailCalled occ
1546 -- Like (isJoinId bndr) but happens one step earlier
1547 -- c.f. willBeJoinId_maybe
1548
1549 occ = idOccInfo bndr
1550 dmd = idDemandInfo bndr
1551 active = isAlwaysActive (idInlineActivation bndr)
1552 not_stable = not (isStableUnfolding (idUnfolding bndr))
1553
1554 occAnalUnfolding :: OccEnv
1555 -> RecFlag
1556 -> Id
1557 -> Maybe UsageDetails
1558 -- Just the analysis, not a new unfolding. The unfolding
1559 -- got analysed when it was created and we don't need to
1560 -- update it.
1561 occAnalUnfolding env rec_flag id
1562 = case realIdUnfolding id of -- ignore previous loop-breaker flag
1563 CoreUnfolding { uf_tmpl = rhs, uf_src = src }
1564 | not (isStableSource src)
1565 -> Nothing
1566 | otherwise
1567 -> Just $ markAllMany usage
1568 where
1569 (bndrs, body) = collectBinders rhs
1570 (usage, _, _) = occAnalRhs env rec_flag id bndrs body
1571
1572 DFunUnfolding { df_bndrs = bndrs, df_args = args }
1573 -> Just $ zapDetails (delDetailsList usage bndrs)
1574 where
1575 usage = foldr (+++) emptyDetails (map (fst . occAnal env) args)
1576
1577 _ -> Nothing
1578
1579 occAnalRules :: OccEnv
1580 -> Maybe JoinArity -- If the binder is (or MAY become) a join
1581 -- point, what its join arity is (or WOULD
1582 -- become). See Note [Rules and join points].
1583 -> RecFlag
1584 -> Id
1585 -> [(CoreRule, -- Each (non-built-in) rule
1586 UsageDetails, -- Usage details for LHS
1587 UsageDetails)] -- Usage details for RHS
1588 occAnalRules env mb_expected_join_arity rec_flag id
1589 = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id
1590 , let (lhs_uds, rhs_uds) = occ_anal_rule rule ]
1591 where
1592 occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
1593 = (lhs_uds, final_rhs_uds)
1594 where
1595 lhs_uds = addManyOccsSet emptyDetails $
1596 (exprsFreeVars args `delVarSetList` bndrs)
1597 (rhs_bndrs, rhs_body) = collectBinders rhs
1598 (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body
1599 -- Note [Rules are extra RHSs]
1600 -- Note [Rule dependency info]
1601 final_rhs_uds = adjust_tail_info args $ markAllMany $
1602 (rhs_uds `delDetailsList` bndrs)
1603 occ_anal_rule _
1604 = (emptyDetails, emptyDetails)
1605
1606 adjust_tail_info args uds -- see Note [Rules and join points]
1607 = case mb_expected_join_arity of
1608 Just ar | args `lengthIs` ar -> uds
1609 _ -> markAllNonTailCalled uds
1610 {- Note [Join point RHSs]
1611 ~~~~~~~~~~~~~~~~~~~~~~~~~
1612 Consider
1613 x = e
1614 join j = Just x
1615
1616 We want to inline x into j right away, so we don't want to give
1617 the join point a RhsCtxt (Trac #14137). It's not a huge deal, because
1618 the FloatIn pass knows to float into join point RHSs; and the simplifier
1619 does not float things out of join point RHSs. But it's a simple, cheap
1620 thing to do. See Trac #14137.
1621
1622 Note [Cascading inlines]
1623 ~~~~~~~~~~~~~~~~~~~~~~~~
1624 By default we use an rhsCtxt for the RHS of a binding. This tells the
1625 occ anal n that it's looking at an RHS, which has an effect in
1626 occAnalApp. In particular, for constructor applications, it makes
1627 the arguments appear to have NoOccInfo, so that we don't inline into
1628 them. Thus x = f y
1629 k = Just x
1630 we do not want to inline x.
1631
1632 But there's a problem. Consider
1633 x1 = a0 : []
1634 x2 = a1 : x1
1635 x3 = a2 : x2
1636 g = f x3
1637 First time round, it looks as if x1 and x2 occur as an arg of a
1638 let-bound constructor ==> give them a many-occurrence.
1639 But then x3 is inlined (unconditionally as it happens) and
1640 next time round, x2 will be, and the next time round x1 will be
1641 Result: multiple simplifier iterations. Sigh.
1642
1643 So, when analysing the RHS of x3 we notice that x3 will itself
1644 definitely inline the next time round, and so we analyse x3's rhs in
1645 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1646
1647 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1648 If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and
1649 (b) certainly_inline says "yes" when preInlineUnconditionally says "no"
1650 then the simplifier iterates indefinitely:
1651 x = f y
1652 k = Just x -- We decide that k is 'certainly_inline'
1653 v = ...k... -- but preInlineUnconditionally doesn't inline it
1654 inline ==>
1655 k = Just (f y)
1656 v = ...k...
1657 float ==>
1658 x1 = f y
1659 k = Just x1
1660 v = ...k...
1661
1662 This is worse than the slow cascade, so we only want to say "certainly_inline"
1663 if it really is certain. Look at the note with preInlineUnconditionally
1664 for the various clauses.
1665
1666
1667 ************************************************************************
1668 * *
1669 Expressions
1670 * *
1671 ************************************************************************
1672 -}
1673
1674 occAnal :: OccEnv
1675 -> CoreExpr
1676 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1677 CoreExpr)
1678
1679 occAnal _ expr@(Type _) = (emptyDetails, expr)
1680 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1681 occAnal env expr@(Var _) = occAnalApp env (expr, [], [])
1682 -- At one stage, I gathered the idRuleVars for the variable here too,
1683 -- which in a way is the right thing to do.
1684 -- But that went wrong right after specialisation, when
1685 -- the *occurrences* of the overloaded function didn't have any
1686 -- rules in them, so the *specialised* versions looked as if they
1687 -- weren't used at all.
1688
1689 occAnal _ (Coercion co)
1690 = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co)
1691 -- See Note [Gather occurrences of coercion variables]
1692
1693 {-
1694 Note [Gather occurrences of coercion variables]
1695 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1696 We need to gather info about what coercion variables appear, so that
1697 we can sort them into the right place when doing dependency analysis.
1698 -}
1699
1700 occAnal env (Tick tickish body)
1701 | SourceNote{} <- tickish
1702 = (usage, Tick tickish body')
1703 -- SourceNotes are best-effort; so we just proceed as usual.
1704 -- If we drop a tick due to the issues described below it's
1705 -- not the end of the world.
1706
1707 | tickish `tickishScopesLike` SoftScope
1708 = (markAllNonTailCalled usage, Tick tickish body')
1709
1710 | Breakpoint _ ids <- tickish
1711 = (usage_lam +++ foldr addManyOccs emptyDetails ids, Tick tickish body')
1712 -- never substitute for any of the Ids in a Breakpoint
1713
1714 | otherwise
1715 = (usage_lam, Tick tickish body')
1716 where
1717 !(usage,body') = occAnal env body
1718 -- for a non-soft tick scope, we can inline lambdas only
1719 usage_lam = markAllNonTailCalled (markAllInsideLam usage)
1720 -- TODO There may be ways to make ticks and join points play
1721 -- nicer together, but right now there are problems:
1722 -- let j x = ... in tick<t> (j 1)
1723 -- Making j a join point may cause the simplifier to drop t
1724 -- (if the tick is put into the continuation). So we don't
1725 -- count j 1 as a tail call.
1726 -- See #14242.
1727
1728 occAnal env (Cast expr co)
1729 = case occAnal env expr of { (usage, expr') ->
1730 let usage1 = zapDetailsIf (isRhsEnv env) usage
1731 -- usage1: if we see let x = y `cast` co
1732 -- then mark y as 'Many' so that we don't
1733 -- immediately inline y again.
1734 usage2 = addManyOccsSet usage1 (coVarsOfCo co)
1735 -- usage2: see Note [Gather occurrences of coercion variables]
1736 in (markAllNonTailCalled usage2, Cast expr' co)
1737 }
1738
1739 occAnal env app@(App _ _)
1740 = occAnalApp env (collectArgsTicks tickishFloatable app)
1741
1742 -- Ignore type variables altogether
1743 -- (a) occurrences inside type lambdas only not marked as InsideLam
1744 -- (b) type variables not in environment
1745
1746 occAnal env (Lam x body)
1747 | isTyVar x
1748 = case occAnal env body of { (body_usage, body') ->
1749 (markAllNonTailCalled body_usage, Lam x body')
1750 }
1751
1752 -- For value lambdas we do a special hack. Consider
1753 -- (\x. \y. ...x...)
1754 -- If we did nothing, x is used inside the \y, so would be marked
1755 -- as dangerous to dup. But in the common case where the abstraction
1756 -- is applied to two arguments this is over-pessimistic.
1757 -- So instead, we just mark each binder with its occurrence
1758 -- info in the *body* of the multiple lambda.
1759 -- Then, the simplifier is careful when partially applying lambdas.
1760
1761 occAnal env expr@(Lam _ _)
1762 = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') ->
1763 let
1764 expr' = mkLams tagged_binders body'
1765 usage1 = markAllNonTailCalled usage
1766 one_shot_gp = all isOneShotBndr tagged_binders
1767 final_usage | one_shot_gp = usage1
1768 | otherwise = markAllInsideLam usage1
1769 in
1770 (final_usage, expr') }
1771 where
1772 (binders, body) = collectBinders expr
1773
1774 occAnal env (Case scrut bndr ty alts)
1775 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1776 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1777 let
1778 alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
1779 (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
1780 total_usage = markAllNonTailCalled scrut_usage +++ alts_usage1
1781 -- Alts can have tail calls, but the scrutinee can't
1782 in
1783 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1784 where
1785 -- Note [Case binder usage]
1786 -- ~~~~~~~~~~~~~~~~~~~~~~~~
1787 -- The case binder gets a usage of either "many" or "dead", never "one".
1788 -- Reason: we like to inline single occurrences, to eliminate a binding,
1789 -- but inlining a case binder *doesn't* eliminate a binding.
1790 -- We *don't* want to transform
1791 -- case x of w { (p,q) -> f w }
1792 -- into
1793 -- case x of w { (p,q) -> f (p,q) }
1794 tag_case_bndr usage bndr
1795 = (usage', setIdOccInfo bndr final_occ_info)
1796 where
1797 occ_info = lookupDetails usage bndr
1798 usage' = usage `delDetails` bndr
1799 final_occ_info = case occ_info of IAmDead -> IAmDead
1800 _ -> noOccInfo
1801
1802 alt_env = mkAltEnv env scrut bndr
1803 occ_anal_alt = occAnalAlt alt_env
1804
1805 occ_anal_scrut (Var v) (alt1 : other_alts)
1806 | not (null other_alts) || not (isDefaultAlt alt1)
1807 = (mkOneOcc env v True 0, Var v)
1808 -- The 'True' says that the variable occurs in an interesting
1809 -- context; the case has at least one non-default alternative
1810 occ_anal_scrut (Tick t e) alts
1811 | t `tickishScopesLike` SoftScope
1812 -- No reason to not look through all ticks here, but only
1813 -- for soft-scoped ticks we can do so without having to
1814 -- update returned occurance info (see occAnal)
1815 = second (Tick t) $ occ_anal_scrut e alts
1816
1817 occ_anal_scrut scrut _alts
1818 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1819
1820 occAnal env (Let bind body)
1821 = case occAnal env body of { (body_usage, body') ->
1822 case occAnalBind env NotTopLevel
1823 noImpRuleEdges bind
1824 body_usage of { (final_usage, new_binds) ->
1825 (final_usage, mkLets new_binds body') }}
1826
1827 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1828 occAnalArgs _ [] _
1829 = (emptyDetails, [])
1830
1831 occAnalArgs env (arg:args) one_shots
1832 | isTypeArg arg
1833 = case occAnalArgs env args one_shots of { (uds, args') ->
1834 (uds, arg:args') }
1835
1836 | otherwise
1837 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1838 case occAnal arg_env arg of { (uds1, arg') ->
1839 case occAnalArgs env args one_shots' of { (uds2, args') ->
1840 (uds1 +++ uds2, arg':args') }}}
1841
1842 {-
1843 Applications are dealt with specially because we want
1844 the "build hack" to work.
1845
1846 Note [Arguments of let-bound constructors]
1847 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1848 Consider
1849 f x = let y = expensive x in
1850 let z = (True,y) in
1851 (case z of {(p,q)->q}, case z of {(p,q)->q})
1852 We feel free to duplicate the WHNF (True,y), but that means
1853 that y may be duplicated thereby.
1854
1855 If we aren't careful we duplicate the (expensive x) call!
1856 Constructors are rather like lambdas in this way.
1857 -}
1858
1859 occAnalApp :: OccEnv
1860 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1861 -> (UsageDetails, Expr CoreBndr)
1862 occAnalApp env (Var fun, args, ticks)
1863 | null ticks = (uds, mkApps (Var fun) args')
1864 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1865 where
1866 uds = fun_uds +++ final_args_uds
1867
1868 !(args_uds, args') = occAnalArgs env args one_shots
1869 !final_args_uds
1870 | isRhsEnv env && is_exp = markAllNonTailCalled $
1871 markAllInsideLam args_uds
1872 | otherwise = markAllNonTailCalled args_uds
1873 -- We mark the free vars of the argument of a constructor or PAP
1874 -- as "inside-lambda", if it is the RHS of a let(rec).
1875 -- This means that nothing gets inlined into a constructor or PAP
1876 -- argument position, which is what we want. Typically those
1877 -- constructor arguments are just variables, or trivial expressions.
1878 -- We use inside-lam because it's like eta-expanding the PAP.
1879 --
1880 -- This is the *whole point* of the isRhsEnv predicate
1881 -- See Note [Arguments of let-bound constructors]
1882
1883 n_val_args = valArgCount args
1884 n_args = length args
1885 fun_uds = mkOneOcc env fun (n_val_args > 0) n_args
1886 is_exp = isExpandableApp fun n_val_args
1887 -- See Note [CONLIKE pragma] in BasicTypes
1888 -- The definition of is_exp should match that in Simplify.prepareRhs
1889
1890 one_shots = argsOneShots (idStrictness fun) guaranteed_val_args
1891 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo
1892 (occ_one_shots env))
1893 -- See Note [Sources of one-shot information], bullet point A']
1894
1895 occAnalApp env (fun, args, ticks)
1896 = (markAllNonTailCalled (fun_uds +++ args_uds),
1897 mkTicks ticks $ mkApps fun' args')
1898 where
1899 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1900 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1901 -- often leaves behind beta redexs like
1902 -- (\x y -> e) a1 a2
1903 -- Here we would like to mark x,y as one-shot, and treat the whole
1904 -- thing much like a let. We do this by pushing some True items
1905 -- onto the context stack.
1906 !(args_uds, args') = occAnalArgs env args []
1907
1908 zapDetailsIf :: Bool -- If this is true
1909 -> UsageDetails -- Then do zapDetails on this
1910 -> UsageDetails
1911 zapDetailsIf True uds = zapDetails uds
1912 zapDetailsIf False uds = uds
1913
1914 {-
1915 Note [Sources of one-shot information]
1916 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1917 The occurrence analyser obtains one-shot-lambda information from two sources:
1918
1919 A: Saturated applications: eg f e1 .. en
1920
1921 In general, given a call (f e1 .. en) we can propagate one-shot info from
1922 f's strictness signature into e1 .. en, but /only/ if n is enough to
1923 saturate the strictness signature. A strictness signature like
1924
1925 f :: C1(C1(L))LS
1926
1927 means that *if f is applied to three arguments* then it will guarantee to
1928 call its first argument at most once, and to call the result of that at
1929 most once. But if f has fewer than three arguments, all bets are off; e.g.
1930
1931 map (f (\x y. expensive) e2) xs
1932
1933 Here the \x y abstraction may be called many times (once for each element of
1934 xs) so we should not mark x and y as one-shot. But if it was
1935
1936 map (f (\x y. expensive) 3 2) xs
1937
1938 then the first argument of f will be called at most once.
1939
1940 The one-shot info, derived from f's strictness signature, is
1941 computed by 'argsOneShots', called in occAnalApp.
1942
1943 A': Non-obviously saturated applications: eg build (f (\x y -> expensive))
1944 where f is as above.
1945
1946 In this case, f is only manifestly applied to one argument, so it does not
1947 look saturated. So by the previous point, we should not use its strictness
1948 signature to learn about the one-shotness of \x y. But in this case we can:
1949 build is fully applied, so we may use its strictness signature; and from
1950 that we learn that build calls its argument with two arguments *at most once*.
1951
1952 So there is really only one call to f, and it will have three arguments. In
1953 that sense, f is saturated, and we may proceed as described above.
1954
1955 Hence the computation of 'guaranteed_val_args' in occAnalApp, using
1956 '(occ_one_shots env)'. See also Trac #13227, comment:9
1957
1958 B: Let-bindings: eg let f = \c. let ... in \n -> blah
1959 in (build f, build f)
1960
1961 Propagate one-shot info from the demanand-info on 'f' to the
1962 lambdas in its RHS (which may not be syntactically at the top)
1963
1964 This information must have come from a previous run of the demanand
1965 analyser.
1966
1967 Previously, the demand analyser would *also* set the one-shot information, but
1968 that code was buggy (see #11770), so doing it only in on place, namely here, is
1969 saner.
1970
1971 Note [OneShots]
1972 ~~~~~~~~~~~~~~~
1973 When analysing an expression, the occ_one_shots argument contains information
1974 about how the function is being used. The length of the list indicates
1975 how many arguments will eventually be passed to the analysed expression,
1976 and the OneShotInfo indicates whether this application is once or multiple times.
1977
1978 Example:
1979
1980 Context of f occ_one_shots when analysing f
1981
1982 f 1 2 [OneShot, OneShot]
1983 map (f 1) [OneShot, NoOneShotInfo]
1984 build f [OneShot, OneShot]
1985 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot]
1986
1987 Note [Binders in case alternatives]
1988 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1989 Consider
1990 case x of y { (a,b) -> f y }
1991 We treat 'a', 'b' as dead, because they don't physically occur in the
1992 case alternative. (Indeed, a variable is dead iff it doesn't occur in
1993 its scope in the output of OccAnal.) It really helps to know when
1994 binders are unused. See esp the call to isDeadBinder in
1995 Simplify.mkDupableAlt
1996
1997 In this example, though, the Simplifier will bring 'a' and 'b' back to
1998 life, beause it binds 'y' to (a,b) (imagine got inlined and
1999 scrutinised y).
2000 -}
2001
2002 occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr
2003 -> (UsageDetails, [CoreBndr], CoreExpr)
2004 occAnalLamOrRhs env [] body
2005 = case occAnal env body of (body_usage, body') -> (body_usage, [], body')
2006 -- RHS of thunk or nullary join point
2007 occAnalLamOrRhs env (bndr:bndrs) body
2008 | isTyVar bndr
2009 = -- Important: Keep the environment so that we don't inline into an RHS like
2010 -- \(@ x) -> C @x (f @x)
2011 -- (see the beginning of Note [Cascading inlines]).
2012 case occAnalLamOrRhs env bndrs body of
2013 (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body')
2014 occAnalLamOrRhs env binders body
2015 = case occAnal env_body body of { (body_usage, body') ->
2016 let
2017 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
2018 -- Use binders' to put one-shot info on the lambdas
2019 in
2020 (final_usage, tagged_binders, body') }
2021 where
2022 (env_body, binders') = oneShotGroup env binders
2023
2024 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
2025 -> CoreAlt
2026 -> (UsageDetails, Alt IdWithOccInfo)
2027 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
2028 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
2029 let
2030 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
2031 -- See Note [Binders in case alternatives]
2032 (alt_usg', rhs2) =
2033 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
2034 in
2035 (alt_usg', (con, tagged_bndrs, rhs2)) }
2036
2037 wrapAltRHS :: OccEnv
2038 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
2039 -> UsageDetails -- usage for entire alt (p -> rhs)
2040 -> [Var] -- alt binders
2041 -> CoreExpr -- alt RHS
2042 -> (UsageDetails, CoreExpr)
2043 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
2044 | occ_binder_swap env
2045 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
2046 -- handles condition (a) in Note [Binder swap]
2047 , not captured -- See condition (b) in Note [Binder swap]
2048 = ( alt_usg' +++ let_rhs_usg
2049 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
2050 where
2051 captured = any (`usedIn` let_rhs_usg) bndrs
2052 -- The rhs of the let may include coercion variables
2053 -- if the scrutinee was a cast, so we must gather their
2054 -- usage. See Note [Gather occurrences of coercion variables]
2055 (let_rhs_usg, let_rhs') = occAnal env let_rhs
2056 (alt_usg', [tagged_scrut_var]) = tagLamBinders alt_usg [scrut_var]
2057
2058 wrapAltRHS _ _ alt_usg _ alt_rhs
2059 = (alt_usg, alt_rhs)
2060
2061 {-
2062 ************************************************************************
2063 * *
2064 OccEnv
2065 * *
2066 ************************************************************************
2067 -}
2068
2069 data OccEnv
2070 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
2071 , occ_one_shots :: !OneShots -- See Note [OneShots]
2072 , occ_gbl_scrut :: GlobalScruts
2073
2074 , occ_unf_act :: Id -> Bool -- Which Id unfoldings are active
2075
2076 , occ_rule_act :: Activation -> Bool -- Which rules are active
2077 -- See Note [Finding rule RHS free vars]
2078
2079 , occ_binder_swap :: !Bool -- enable the binder_swap
2080 -- See CorePrep Note [Dead code in CorePrep]
2081 }
2082
2083 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
2084
2085 -----------------------------
2086 -- OccEncl is used to control whether to inline into constructor arguments
2087 -- For example:
2088 -- x = (p,q) -- Don't inline p or q
2089 -- y = /\a -> (p a, q a) -- Still don't inline p or q
2090 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
2091 -- So OccEncl tells enought about the context to know what to do when
2092 -- we encounter a constructor application or PAP.
2093
2094 data OccEncl
2095 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
2096 -- Don't inline into constructor args here
2097 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
2098 -- Do inline into constructor args here
2099
2100 instance Outputable OccEncl where
2101 ppr OccRhs = text "occRhs"
2102 ppr OccVanilla = text "occVanilla"
2103
2104 -- See note [OneShots]
2105 type OneShots = [OneShotInfo]
2106
2107 initOccEnv :: OccEnv
2108 initOccEnv
2109 = OccEnv { occ_encl = OccVanilla
2110 , occ_one_shots = []
2111 , occ_gbl_scrut = emptyVarSet
2112 -- To be conservative, we say that all
2113 -- inlines and rules are active
2114 , occ_unf_act = \_ -> True
2115 , occ_rule_act = \_ -> True
2116 , occ_binder_swap = True }
2117
2118 vanillaCtxt :: OccEnv -> OccEnv
2119 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
2120
2121 rhsCtxt :: OccEnv -> OccEnv
2122 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
2123
2124 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
2125 argCtxt env []
2126 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
2127 argCtxt env (one_shots:one_shots_s)
2128 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
2129
2130 isRhsEnv :: OccEnv -> Bool
2131 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
2132 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
2133
2134 oneShotGroup :: OccEnv -> [CoreBndr]
2135 -> ( OccEnv
2136 , [CoreBndr] )
2137 -- The result binders have one-shot-ness set that they might not have had originally.
2138 -- This happens in (build (\c n -> e)). Here the occurrence analyser
2139 -- linearity context knows that c,n are one-shot, and it records that fact in
2140 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
2141
2142 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
2143 = go ctxt bndrs []
2144 where
2145 go ctxt [] rev_bndrs
2146 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
2147 , reverse rev_bndrs )
2148
2149 go [] bndrs rev_bndrs
2150 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
2151 , reverse rev_bndrs ++ bndrs )
2152
2153 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs
2154 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs)
2155 | otherwise = go ctxt bndrs (bndr : rev_bndrs)
2156 where
2157 bndr' = updOneShotInfo bndr one_shot
2158 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
2159 -- one-shot info might be better than what we can infer, e.g.
2160 -- due to explicit use of the magic 'oneShot' function.
2161 -- See Note [The oneShot function]
2162
2163
2164 markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var]
2165 -- Mark the lambdas of a non-recursive join point as one-shot.
2166 -- This is good to prevent gratuitous float-out etc
2167 markJoinOneShots mb_join_arity bndrs
2168 = case mb_join_arity of
2169 Nothing -> bndrs
2170 Just n -> go n bndrs
2171 where
2172 go 0 bndrs = bndrs
2173 go _ [] = [] -- This can legitimately happen.
2174 -- e.g. let j = case ... in j True
2175 -- This will become an arity-1 join point after the
2176 -- simplifier has eta-expanded it; but it may not have
2177 -- enough lambdas /yet/. (Lint checks that JoinIds do
2178 -- have enough lambdas.)
2179 go n (b:bs) = b' : go (n-1) bs
2180 where
2181 b' | isId b = setOneShotLambda b
2182 | otherwise = b
2183
2184 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
2185 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
2186 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
2187
2188 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
2189 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
2190 -- as well as (f,g), (g,h)
2191 transClosureFV env
2192 | no_change = env
2193 | otherwise = transClosureFV (listToUFM new_fv_list)
2194 where
2195 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
2196 -- It's OK to use nonDetUFMToList here because we'll forget the
2197 -- ordering by creating a new set with listToUFM
2198 bump no_change (b,fvs)
2199 | no_change_here = (no_change, (b,fvs))
2200 | otherwise = (False, (b,new_fvs))
2201 where
2202 (new_fvs, no_change_here) = extendFvs env fvs
2203
2204 -------------
2205 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
2206 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
2207
2208 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
2209 -- (extendFVs env s) returns
2210 -- (s `union` env(s), env(s) `subset` s)
2211 extendFvs env s
2212 | isNullUFM env
2213 = (s, True)
2214 | otherwise
2215 = (s `unionVarSet` extras, extras `subVarSet` s)
2216 where
2217 extras :: VarSet -- env(s)
2218 extras = nonDetFoldUFM unionVarSet emptyVarSet $
2219 -- It's OK to use nonDetFoldUFM here because unionVarSet commutes
2220 intersectUFM_C (\x _ -> x) env (getUniqSet s)
2221
2222 {-
2223 ************************************************************************
2224 * *
2225 Binder swap
2226 * *
2227 ************************************************************************
2228
2229 Note [Binder swap]
2230 ~~~~~~~~~~~~~~~~~~
2231 We do these two transformations right here:
2232
2233 (1) case x of b { pi -> ri }
2234 ==>
2235 case x of b { pi -> let x=b in ri }
2236
2237 (2) case (x |> co) of b { pi -> ri }
2238 ==>
2239 case (x |> co) of b { pi -> let x = b |> sym co in ri }
2240
2241 Why (2)? See Note [Case of cast]
2242
2243 In both cases, in a particular alternative (pi -> ri), we only
2244 add the binding if
2245 (a) x occurs free in (pi -> ri)
2246 (ie it occurs in ri, but is not bound in pi)
2247 (b) the pi does not bind b (or the free vars of co)
2248 We need (a) and (b) for the inserted binding to be correct.
2249
2250 For the alternatives where we inject the binding, we can transfer
2251 all x's OccInfo to b. And that is the point.
2252
2253 Notice that
2254 * The deliberate shadowing of 'x'.
2255 * That (a) rapidly becomes false, so no bindings are injected.
2256
2257 The reason for doing these transformations here is because it allows
2258 us to adjust the OccInfo for 'x' and 'b' as we go.
2259
2260 * Suppose the only occurrences of 'x' are the scrutinee and in the
2261 ri; then this transformation makes it occur just once, and hence
2262 get inlined right away.
2263
2264 * If we do this in the Simplifier, we don't know whether 'x' is used
2265 in ri, so we are forced to pessimistically zap b's OccInfo even
2266 though it is typically dead (ie neither it nor x appear in the
2267 ri). There's nothing actually wrong with zapping it, except that
2268 it's kind of nice to know which variables are dead. My nose
2269 tells me to keep this information as robustly as possible.
2270
2271 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
2272 {x=b}; it's Nothing if the binder-swap doesn't happen.
2273
2274 There is a danger though. Consider
2275 let v = x +# y
2276 in case (f v) of w -> ...v...v...
2277 And suppose that (f v) expands to just v. Then we'd like to
2278 use 'w' instead of 'v' in the alternative. But it may be too
2279 late; we may have substituted the (cheap) x+#y for v in the
2280 same simplifier pass that reduced (f v) to v.
2281
2282 I think this is just too bad. CSE will recover some of it.
2283
2284 Note [Case of cast]
2285 ~~~~~~~~~~~~~~~~~~~
2286 Consider case (x `cast` co) of b { I# ->
2287 ... (case (x `cast` co) of {...}) ...
2288 We'd like to eliminate the inner case. That is the motivation for
2289 equation (2) in Note [Binder swap]. When we get to the inner case, we
2290 inline x, cancel the casts, and away we go.
2291
2292 Note [Binder swap on GlobalId scrutinees]
2293 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2294 When the scrutinee is a GlobalId we must take care in two ways
2295
2296 i) In order to *know* whether 'x' occurs free in the RHS, we need its
2297 occurrence info. BUT, we don't gather occurrence info for
2298 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
2299 OccEnv is for: it says "gather occurrence info for these".
2300
2301 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
2302 has an External Name. See, for example, SimplEnv Note [Global Ids in
2303 the substitution].
2304
2305 Note [Zap case binders in proxy bindings]
2306 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2307 From the original
2308 case x of cb(dead) { p -> ...x... }
2309 we will get
2310 case x of cb(live) { p -> let x = cb in ...x... }
2311
2312 Core Lint never expects to find an *occurrence* of an Id marked
2313 as Dead, so we must zap the OccInfo on cb before making the
2314 binding x = cb. See Trac #5028.
2315
2316 Historical note [no-case-of-case]
2317 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2318 We *used* to suppress the binder-swap in case expressions when
2319 -fno-case-of-case is on. Old remarks:
2320 "This happens in the first simplifier pass,
2321 and enhances full laziness. Here's the bad case:
2322 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
2323 If we eliminate the inner case, we trap it inside the I# v -> arm,
2324 which might prevent some full laziness happening. I've seen this
2325 in action in spectral/cichelli/Prog.hs:
2326 [(m,n) | m <- [1..max], n <- [1..max]]
2327 Hence the check for NoCaseOfCase."
2328 However, now the full-laziness pass itself reverses the binder-swap, so this
2329 check is no longer necessary.
2330
2331 Historical note [Suppressing the case binder-swap]
2332 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2333 This old note describes a problem that is also fixed by doing the
2334 binder-swap in OccAnal:
2335
2336 There is another situation when it might make sense to suppress the
2337 case-expression binde-swap. If we have
2338
2339 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
2340 ...other cases .... }
2341
2342 We'll perform the binder-swap for the outer case, giving
2343
2344 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
2345 ...other cases .... }
2346
2347 But there is no point in doing it for the inner case, because w1 can't
2348 be inlined anyway. Furthermore, doing the case-swapping involves
2349 zapping w2's occurrence info (see paragraphs that follow), and that
2350 forces us to bind w2 when doing case merging. So we get
2351
2352 case x of w1 { A -> let w2 = w1 in e1
2353 B -> let w2 = w1 in e2
2354 ...other cases .... }
2355
2356 This is plain silly in the common case where w2 is dead.
2357
2358 Even so, I can't see a good way to implement this idea. I tried
2359 not doing the binder-swap if the scrutinee was already evaluated
2360 but that failed big-time:
2361
2362 data T = MkT !Int
2363
2364 case v of w { MkT x ->
2365 case x of x1 { I# y1 ->
2366 case x of x2 { I# y2 -> ...
2367
2368 Notice that because MkT is strict, x is marked "evaluated". But to
2369 eliminate the last case, we must either make sure that x (as well as
2370 x1) has unfolding MkT y1. The straightforward thing to do is to do
2371 the binder-swap. So this whole note is a no-op.
2372
2373 It's fixed by doing the binder-swap in OccAnal because we can do the
2374 binder-swap unconditionally and still get occurrence analysis
2375 information right.
2376 -}
2377
2378 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
2379 -- Does two things: a) makes the occ_one_shots = OccVanilla
2380 -- b) extends the GlobalScruts if possible
2381 -- c) returns a proxy mapping, binding the scrutinee
2382 -- to the case binder, if possible
2383 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
2384 = case stripTicksTopE (const True) scrut of
2385 Var v -> add_scrut v case_bndr'
2386 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
2387 -- See Note [Case of cast]
2388 _ -> (env { occ_encl = OccVanilla }, Nothing)
2389
2390 where
2391 add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
2392 , Just (localise v, rhs) )
2393
2394 case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
2395 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var)
2396 -- Localise the scrut_var before shadowing it; we're making a
2397 -- new binding for it, and it might have an External Name, or
2398 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
2399 -- Also we don't want any INLINE or NOINLINE pragmas!
2400
2401 {-
2402 ************************************************************************
2403 * *
2404 \subsection[OccurAnal-types]{OccEnv}
2405 * *
2406 ************************************************************************
2407
2408 Note [UsageDetails and zapping]
2409 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2410
2411 On many occasions, we must modify all gathered occurrence data at once. For
2412 instance, all occurrences underneath a (non-one-shot) lambda set the
2413 'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but
2414 that takes O(n) time and we will do this often---in particular, there are many
2415 places where tail calls are not allowed, and each of these causes all variables
2416 to get marked with 'NoTailCallInfo'.
2417
2418 Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along
2419 with the 'OccInfoEnv'. Each of these extra environments is a "zapped set"
2420 recording which variables have been zapped in some way. Zapping all occurrence
2421 info then simply means setting the corresponding zapped set to the whole
2422 'OccInfoEnv', a fast O(1) operation.
2423 -}
2424
2425 type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage
2426 -- INVARIANT: never IAmDead
2427 -- (Deadness is signalled by not being in the map at all)
2428
2429 type ZappedSet = OccInfoEnv -- Values are ignored
2430
2431 data UsageDetails
2432 = UD { ud_env :: !OccInfoEnv
2433 , ud_z_many :: ZappedSet -- apply 'markMany' to these
2434 , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these
2435 , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these
2436 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv
2437
2438 instance Outputable UsageDetails where
2439 ppr ud = ppr (ud_env (flattenUsageDetails ud))
2440
2441 -------------------
2442 -- UsageDetails API
2443
2444 (+++), combineAltsUsageDetails
2445 :: UsageDetails -> UsageDetails -> UsageDetails
2446 (+++) = combineUsageDetailsWith addOccInfo
2447 combineAltsUsageDetails = combineUsageDetailsWith orOccInfo
2448
2449 combineUsageDetailsList :: [UsageDetails] -> UsageDetails
2450 combineUsageDetailsList = foldl (+++) emptyDetails
2451
2452 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
2453 mkOneOcc env id int_cxt arity
2454 | isLocalId id
2455 = singleton $ OneOcc { occ_in_lam = False
2456 , occ_one_br = True
2457 , occ_int_cxt = int_cxt
2458 , occ_tail = AlwaysTailCalled arity }
2459 | id `elemVarSet` occ_gbl_scrut env
2460 = singleton noOccInfo
2461
2462 | otherwise
2463 = emptyDetails
2464 where
2465 singleton info = emptyDetails { ud_env = unitVarEnv id info }
2466
2467 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
2468 addOneOcc ud id info
2469 = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info }
2470 `alterZappedSets` (`delVarEnv` id)
2471 where
2472 plus_zapped old new = doZapping ud id old `addOccInfo` new
2473
2474 addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails
2475 addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set
2476 -- It's OK to use nonDetFoldUFM here because addManyOccs commutes
2477
2478 -- Add several occurrences, assumed not to be tail calls
2479 addManyOccs :: Var -> UsageDetails -> UsageDetails
2480 addManyOccs v u | isId v = addOneOcc u v noOccInfo
2481 | otherwise = u
2482 -- Give a non-committal binder info (i.e noOccInfo) because
2483 -- a) Many copies of the specialised thing can appear
2484 -- b) We don't want to substitute a BIG expression inside a RULE
2485 -- even if that's the only occurrence of the thing
2486 -- (Same goes for INLINE.)
2487
2488 delDetails :: UsageDetails -> Id -> UsageDetails
2489 delDetails ud bndr
2490 = ud `alterUsageDetails` (`delVarEnv` bndr)
2491
2492 delDetailsList :: UsageDetails -> [Id] -> UsageDetails
2493 delDetailsList ud bndrs
2494 = ud `alterUsageDetails` (`delVarEnvList` bndrs)
2495
2496 emptyDetails :: UsageDetails
2497 emptyDetails = UD { ud_env = emptyVarEnv
2498 , ud_z_many = emptyVarEnv
2499 , ud_z_in_lam = emptyVarEnv
2500 , ud_z_no_tail = emptyVarEnv }
2501
2502 isEmptyDetails :: UsageDetails -> Bool
2503 isEmptyDetails = isEmptyVarEnv . ud_env
2504
2505 markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails
2506 :: UsageDetails -> UsageDetails
2507 markAllMany ud = ud { ud_z_many = ud_env ud }
2508 markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud }
2509 markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud }
2510
2511 zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo
2512
2513 lookupDetails :: UsageDetails -> Id -> OccInfo
2514 lookupDetails ud id
2515 = case lookupVarEnv (ud_env ud) id of
2516 Just occ -> doZapping ud id occ
2517 Nothing -> IAmDead
2518
2519 usedIn :: Id -> UsageDetails -> Bool
2520 v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
2521
2522 udFreeVars :: VarSet -> UsageDetails -> VarSet
2523 -- Find the subset of bndrs that are mentioned in uds
2524 udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud)
2525
2526 -------------------
2527 -- Auxiliary functions for UsageDetails implementation
2528
2529 combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo)
2530 -> UsageDetails -> UsageDetails -> UsageDetails
2531 combineUsageDetailsWith plus_occ_info ud1 ud2
2532 | isEmptyDetails ud1 = ud2
2533 | isEmptyDetails ud2 = ud1
2534 | otherwise
2535 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2)
2536 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2)
2537 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2)
2538 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) }
2539
2540 doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo
2541 doZapping ud var occ
2542 = doZappingByUnique ud (varUnique var) occ
2543
2544 doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo
2545 doZappingByUnique ud uniq
2546 = (if | in_subset ud_z_many -> markMany
2547 | in_subset ud_z_in_lam -> markInsideLam
2548 | otherwise -> id) .
2549 (if | in_subset ud_z_no_tail -> markNonTailCalled
2550 | otherwise -> id)
2551 where
2552 in_subset field = uniq `elemVarEnvByKey` field ud
2553
2554 alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails
2555 alterZappedSets ud f
2556 = ud { ud_z_many = f (ud_z_many ud)
2557 , ud_z_in_lam = f (ud_z_in_lam ud)
2558 , ud_z_no_tail = f (ud_z_no_tail ud) }
2559
2560 alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails
2561 alterUsageDetails ud f
2562 = ud { ud_env = f (ud_env ud) }
2563 `alterZappedSets` f
2564
2565 flattenUsageDetails :: UsageDetails -> UsageDetails
2566 flattenUsageDetails ud
2567 = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) }
2568 `alterZappedSets` const emptyVarEnv
2569
2570 -------------------
2571 -- See Note [Adjusting right-hand sides]
2572 adjustRhsUsage :: Maybe JoinArity -> RecFlag
2573 -> [CoreBndr] -- Outer lambdas, AFTER occ anal
2574 -> UsageDetails -> UsageDetails
2575 adjustRhsUsage mb_join_arity rec_flag bndrs usage
2576 = maybe_mark_lam (maybe_drop_tails usage)
2577 where
2578 maybe_mark_lam ud | one_shot = ud
2579 | otherwise = markAllInsideLam ud
2580 maybe_drop_tails ud | exact_join = ud
2581 | otherwise = markAllNonTailCalled ud
2582
2583 one_shot = case mb_join_arity of
2584 Just join_arity
2585 | isRec rec_flag -> False
2586 | otherwise -> all isOneShotBndr (drop join_arity bndrs)
2587 Nothing -> all isOneShotBndr bndrs
2588
2589 exact_join = case mb_join_arity of
2590 Just join_arity -> bndrs `lengthIs` join_arity
2591 _ -> False
2592
2593 type IdWithOccInfo = Id
2594
2595 tagLamBinders :: UsageDetails -- Of scope
2596 -> [Id] -- Binders
2597 -> (UsageDetails, -- Details with binders removed
2598 [IdWithOccInfo]) -- Tagged binders
2599 -- Used for lambda and case binders
2600 -- It copes with the fact that lambda bindings can have a
2601 -- stable unfolding, used for join points
2602 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
2603 where
2604 (usage', bndrs') = mapAccumR tag_lam usage binders
2605 tag_lam usage bndr = (usage2, bndr')
2606 where
2607 occ = lookupDetails usage bndr
2608 bndr' = setBinderOcc (markNonTailCalled occ) bndr
2609 -- Don't try to make an argument into a join point
2610 usage1 = usage `delDetails` bndr
2611 usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr)
2612 -- This is effectively the RHS of a
2613 -- non-join-point binding, so it's okay to use
2614 -- addManyOccsSet, which assumes no tail calls
2615 | otherwise = usage1
2616
2617 tagNonRecBinder :: TopLevelFlag -- At top level?
2618 -> UsageDetails -- Of scope
2619 -> CoreBndr -- Binder
2620 -> (UsageDetails, -- Details with binder removed
2621 IdWithOccInfo) -- Tagged binder
2622
2623 tagNonRecBinder lvl usage binder
2624 = let
2625 occ = lookupDetails usage binder
2626 will_be_join = decideJoinPointHood lvl usage [binder]
2627 occ' | will_be_join = -- must already be marked AlwaysTailCalled
2628 ASSERT(isAlwaysTailCalled occ) occ
2629 | otherwise = markNonTailCalled occ
2630 binder' = setBinderOcc occ' binder
2631 usage' = usage `delDetails` binder
2632 in
2633 usage' `seq` (usage', binder')
2634
2635 tagRecBinders :: TopLevelFlag -- At top level?
2636 -> UsageDetails -- Of body of let ONLY
2637 -> [(CoreBndr, -- Binder
2638 UsageDetails, -- RHS usage details
2639 [CoreBndr])] -- Lambdas in new RHS
2640 -> (UsageDetails, -- Adjusted details for whole scope,
2641 -- with binders removed
2642 [IdWithOccInfo]) -- Tagged binders
2643 -- Substantially more complicated than non-recursive case. Need to adjust RHS
2644 -- details *before* tagging binders (because the tags depend on the RHSes).
2645 tagRecBinders lvl body_uds triples
2646 = let
2647 (bndrs, rhs_udss, _) = unzip3 triples
2648
2649 -- 1. Determine join-point-hood of whole group, as determined by
2650 -- the *unadjusted* usage details
2651 unadj_uds = body_uds +++ combineUsageDetailsList rhs_udss
2652 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs
2653
2654 -- 2. Adjust usage details of each RHS, taking into account the
2655 -- join-point-hood decision
2656 rhs_udss' = map adjust triples
2657 adjust (bndr, rhs_uds, rhs_bndrs)
2658 = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds
2659 where
2660 -- Can't use willBeJoinId_maybe here because we haven't tagged the
2661 -- binder yet (the tag depends on these adjustments!)
2662 mb_join_arity
2663 | will_be_joins
2664 , let occ = lookupDetails unadj_uds bndr
2665 , AlwaysTailCalled arity <- tailCallInfo occ
2666 = Just arity
2667 | otherwise
2668 = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if
2669 Nothing -- we are making join points!
2670
2671 -- 3. Compute final usage details from adjusted RHS details
2672 adj_uds = body_uds +++ combineUsageDetailsList rhs_udss'
2673
2674 -- 4. Tag each binder with its adjusted details
2675 bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr
2676 | bndr <- bndrs ]
2677
2678 -- 5. Drop the binders from the adjusted details and return
2679 usage' = adj_uds `delDetailsList` bndrs
2680 in
2681 (usage', bndrs')
2682
2683 setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr
2684 setBinderOcc occ_info bndr
2685 | isTyVar bndr = bndr
2686 | isExportedId bndr = if isManyOccs (idOccInfo bndr)
2687 then bndr
2688 else setIdOccInfo bndr noOccInfo
2689 -- Don't use local usage info for visible-elsewhere things
2690 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
2691 -- about to re-generate it and it shouldn't be "sticky"
2692
2693 | otherwise = setIdOccInfo bndr occ_info
2694
2695 -- | Decide whether some bindings should be made into join points or not.
2696 -- Returns `False` if they can't be join points. Note that it's an
2697 -- all-or-nothing decision, as if multiple binders are given, they're
2698 -- assumed to be mutually recursive.
2699 --
2700 -- It must, however, be a final decision. If we say "True" for 'f',
2701 -- and then subsequently decide /not/ make 'f' into a join point, then
2702 -- the decision about another binding 'g' might be invalidated if (say)
2703 -- 'f' tail-calls 'g'.
2704 --
2705 -- See Note [Invariants on join points] in CoreSyn.
2706 decideJoinPointHood :: TopLevelFlag -> UsageDetails
2707 -> [CoreBndr]
2708 -> Bool
2709 decideJoinPointHood TopLevel _ _
2710 = False
2711 decideJoinPointHood NotTopLevel usage bndrs
2712 | isJoinId (head bndrs)
2713 = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+>
2714 ppr bndrs)
2715 all_ok
2716 | otherwise
2717 = all_ok
2718 where
2719 -- See Note [Invariants on join points]; invariants cited by number below.
2720 -- Invariant 2 is always satisfiable by the simplifier by eta expansion.
2721 all_ok = -- Invariant 3: Either all are join points or none are
2722 all ok bndrs
2723
2724 ok bndr
2725 | -- Invariant 1: Only tail calls, all same join arity
2726 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr)
2727 , -- Invariant 1 as applied to LHSes of rules
2728 all (ok_rule arity) (idCoreRules bndr)
2729 -- Invariant 2a: stable unfoldings
2730 -- See Note [Join points and INLINE pragmas]
2731 , ok_unfolding arity (realIdUnfolding bndr)
2732 -- Invariant 4: Satisfies polymorphism rule
2733 , isValidJoinPointType arity (idType bndr)
2734 = True
2735 | otherwise
2736 = False
2737
2738 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
2739 ok_rule join_arity (Rule { ru_args = args })
2740 = args `lengthIs` join_arity
2741 -- Invariant 1 as applied to LHSes of rules
2742
2743 -- ok_unfolding returns False if we should /not/ convert a non-join-id
2744 -- into a join-id, even though it is AlwaysTailCalled
2745 ok_unfolding join_arity (CoreUnfolding { uf_src = src, uf_tmpl = rhs })
2746 = not (isStableSource src && join_arity > joinRhsArity rhs)
2747 ok_unfolding _ (DFunUnfolding {})
2748 = False
2749 ok_unfolding _ _
2750 = True
2751
2752 willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity
2753 willBeJoinId_maybe bndr
2754 = case tailCallInfo (idOccInfo bndr) of
2755 AlwaysTailCalled arity -> Just arity
2756 _ -> isJoinId_maybe bndr
2757
2758
2759 {- Note [Join points and INLINE pragmas]
2760 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2761 Consider
2762 f x = let g = \x. not -- Arity 1
2763 {-# INLINE g #-}
2764 in case x of
2765 A -> g True True
2766 B -> g True False
2767 C -> blah2
2768
2769 Here 'g' is always tail-called applied to 2 args, but the stable
2770 unfolding captured by the INLINE pragma has arity 1. If we try to
2771 convert g to be a join point, its unfolding will still have arity 1
2772 (since it is stable, and we don't meddle with stable unfoldings), and
2773 Lint will complain (see Note [Invariants on join points], (2a), in
2774 CoreSyn. Trac #13413.
2775
2776 Moreover, since g is going to be inlined anyway, there is no benefit
2777 from making it a join point.
2778
2779 If it is recursive, and uselessly marked INLINE, this will stop us
2780 making it a join point, which is annoying. But occasionally
2781 (notably in class methods; see Note [Instances and loop breakers] in
2782 TcInstDcls) we mark recursive things as INLINE but the recursion
2783 unravels; so ignoring INLINE pragmas on recursive things isn't good
2784 either.
2785
2786 See Invariant 2a of Note [Invariants on join points] in CoreSyn
2787
2788
2789 ************************************************************************
2790 * *
2791 \subsection{Operations over OccInfo}
2792 * *
2793 ************************************************************************
2794 -}
2795
2796 markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo
2797
2798 markMany IAmDead = IAmDead
2799 markMany occ = ManyOccs { occ_tail = occ_tail occ }
2800
2801 markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = True }
2802 markInsideLam occ = occ
2803
2804 markNonTailCalled IAmDead = IAmDead
2805 markNonTailCalled occ = occ { occ_tail = NoTailCallInfo }
2806
2807 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
2808
2809 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2810 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2811 tailCallInfo a2 }
2812 -- Both branches are at least One
2813 -- (Argument is never IAmDead)
2814
2815 -- (orOccInfo orig new) is used
2816 -- when combining occurrence info from branches of a case
2817
2818 orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1
2819 , occ_tail = tail1 })
2820 (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2
2821 , occ_tail = tail2 })
2822 = OneOcc { occ_in_lam = in_lam1 || in_lam2
2823 , occ_one_br = False -- False, because it occurs in both branches
2824 , occ_int_cxt = int_cxt1 && int_cxt2
2825 , occ_tail = tail1 `andTailCallInfo` tail2 }
2826 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2827 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2828 tailCallInfo a2 }
2829
2830 andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
2831 andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
2832 | arity1 == arity2 = info
2833 andTailCallInfo _ _ = NoTailCallInfo