Update Trac ticket URLs to point to GitLab
[ghc.git] / compiler / simplCore / OccurAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 ************************************************************************
5 * *
6 \section[OccurAnal]{Occurrence analysis pass}
7 * *
8 ************************************************************************
9
10 The occurrence analyser re-typechecks a core expression, returning a new
11 core expression with (hopefully) improved usage information.
12 -}
13
14 {-# LANGUAGE CPP, BangPatterns, MultiWayIf, ViewPatterns #-}
15
16 module OccurAnal (
17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18 ) where
19
20 #include "HsVersions.h"
21
22 import GhcPrelude
23
24 import CoreSyn
25 import CoreFVs
26 import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
27 stripTicksTopE, mkTicks )
28 import CoreArity ( joinRhsArity )
29 import Id
30 import IdInfo
31 import Name( localiseName )
32 import BasicTypes
33 import Module( Module )
34 import Coercion
35 import Type
36
37 import VarSet
38 import VarEnv
39 import Var
40 import Demand ( argOneShots, argsOneShots )
41 import Digraph ( SCC(..), Node(..)
42 , stronglyConnCompFromEdgedVerticesUniq
43 , stronglyConnCompFromEdgedVerticesUniqR )
44 import Unique
45 import UniqFM
46 import UniqSet
47 import Util
48 import Outputable
49 import Data.List
50 import Control.Arrow ( second )
51
52 {-
53 ************************************************************************
54 * *
55 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
56 * *
57 ************************************************************************
58
59 Here's the externally-callable interface:
60 -}
61
62 occurAnalysePgm :: Module -- Used only in debug output
63 -> (Id -> Bool) -- Active unfoldings
64 -> (Activation -> Bool) -- Active rules
65 -> [CoreRule]
66 -> CoreProgram -> CoreProgram
67 occurAnalysePgm this_mod active_unf active_rule imp_rules binds
68 | isEmptyDetails final_usage
69 = occ_anald_binds
70
71 | otherwise -- See Note [Glomming]
72 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
73 2 (ppr final_usage ) )
74 occ_anald_glommed_binds
75 where
76 init_env = initOccEnv { occ_rule_act = active_rule
77 , occ_unf_act = active_unf }
78
79 (final_usage, occ_anald_binds) = go init_env binds
80 (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel
81 imp_rule_edges
82 (flattenBinds occ_anald_binds)
83 initial_uds
84 -- It's crucial to re-analyse the glommed-together bindings
85 -- so that we establish the right loop breakers. Otherwise
86 -- we can easily create an infinite loop (#9583 is an example)
87
88 initial_uds = addManyOccsSet emptyDetails
89 (rulesFreeVars imp_rules)
90 -- The RULES declarations keep things alive!
91
92 -- Note [Preventing loops due to imported functions rules]
93 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
94 [ mapVarEnv (const maps_to) $
95 getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
96 | imp_rule <- imp_rules
97 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
98 , let maps_to = exprFreeIds (ru_rhs imp_rule)
99 `delVarSetList` ru_bndrs imp_rule
100 , arg <- ru_args imp_rule ]
101
102 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
103 go _ []
104 = (initial_uds, [])
105 go env (bind:binds)
106 = (final_usage, bind' ++ binds')
107 where
108 (bs_usage, binds') = go env binds
109 (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind
110 bs_usage
111
112 occurAnalyseExpr :: CoreExpr -> CoreExpr
113 -- Do occurrence analysis, and discard occurrence info returned
114 occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
115
116 occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
117 occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
118
119 occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
120 occurAnalyseExpr' enable_binder_swap expr
121 = snd (occAnal env expr)
122 where
123 env = initOccEnv { occ_binder_swap = enable_binder_swap }
124
125 {- Note [Plugin rules]
126 ~~~~~~~~~~~~~~~~~~~~~~
127 Conal Elliott (#11651) built a GHC plugin that added some
128 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
129 do some domain-specific transformations that could not be expressed
130 with an ordinary pattern-matching CoreRule. But then we can't extract
131 the dependencies (in imp_rule_edges) from ru_rhs etc, because a
132 BuiltinRule doesn't have any of that stuff.
133
134 So we simply assume that BuiltinRules have no dependencies, and filter
135 them out from the imp_rule_edges comprehension.
136 -}
137
138 {-
139 ************************************************************************
140 * *
141 Bindings
142 * *
143 ************************************************************************
144
145 Note [Recursive bindings: the grand plan]
146 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
147 When we come across a binding group
148 Rec { x1 = r1; ...; xn = rn }
149 we treat it like this (occAnalRecBind):
150
151 1. Occurrence-analyse each right hand side, and build a
152 "Details" for each binding to capture the results.
153
154 Wrap the details in a Node (details, node-id, dep-node-ids),
155 where node-id is just the unique of the binder, and
156 dep-node-ids lists all binders on which this binding depends.
157 We'll call these the "scope edges".
158 See Note [Forming the Rec groups].
159
160 All this is done by makeNode.
161
162 2. Do SCC-analysis on these Nodes. Each SCC will become a new Rec or
163 NonRec. The key property is that every free variable of a binding
164 is accounted for by the scope edges, so that when we are done
165 everything is still in scope.
166
167 3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
168 identify suitable loop-breakers to ensure that inlining terminates.
169 This is done by occAnalRec.
170
171 4. To do so we form a new set of Nodes, with the same details, but
172 different edges, the "loop-breaker nodes". The loop-breaker nodes
173 have both more and fewer dependencies than the scope edges
174 (see Note [Choosing loop breakers])
175
176 More edges: if f calls g, and g has an active rule that mentions h
177 then we add an edge from f -> h
178
179 Fewer edges: we only include dependencies on active rules, on rule
180 RHSs (not LHSs) and if there is an INLINE pragma only
181 on the stable unfolding (and vice versa). The scope
182 edges must be much more inclusive.
183
184 5. The "weak fvs" of a node are, by definition:
185 the scope fvs - the loop-breaker fvs
186 See Note [Weak loop breakers], and the nd_weak field of Details
187
188 6. Having formed the loop-breaker nodes
189
190 Note [Dead code]
191 ~~~~~~~~~~~~~~~~
192 Dropping dead code for a cyclic Strongly Connected Component is done
193 in a very simple way:
194
195 the entire SCC is dropped if none of its binders are mentioned
196 in the body; otherwise the whole thing is kept.
197
198 The key observation is that dead code elimination happens after
199 dependency analysis: so 'occAnalBind' processes SCCs instead of the
200 original term's binding groups.
201
202 Thus 'occAnalBind' does indeed drop 'f' in an example like
203
204 letrec f = ...g...
205 g = ...(...g...)...
206 in
207 ...g...
208
209 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
210 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
211 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
212
213 ------------------------------------------------------------
214 Note [Forming Rec groups]
215 ~~~~~~~~~~~~~~~~~~~~~~~~~
216 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
217 and "g uses f", no matter how indirectly. We do a SCC analysis
218 with an edge f -> g if "f uses g".
219
220 More precisely, "f uses g" iff g should be in scope wherever f is.
221 That is, g is free in:
222 a) the rhs 'ef'
223 b) or the RHS of a rule for f (Note [Rules are extra RHSs])
224 c) or the LHS or a rule for f (Note [Rule dependency info])
225
226 These conditions apply regardless of the activation of the RULE (eg it might be
227 inactive in this phase but become active later). Once a Rec is broken up
228 it can never be put back together, so we must be conservative.
229
230 The principle is that, regardless of rule firings, every variable is
231 always in scope.
232
233 * Note [Rules are extra RHSs]
234 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
235 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
236 keeps the specialised "children" alive. If the parent dies
237 (because it isn't referenced any more), then the children will die
238 too (unless they are already referenced directly).
239
240 To that end, we build a Rec group for each cyclic strongly
241 connected component,
242 *treating f's rules as extra RHSs for 'f'*.
243 More concretely, the SCC analysis runs on a graph with an edge
244 from f -> g iff g is mentioned in
245 (a) f's rhs
246 (b) f's RULES
247 These are rec_edges.
248
249 Under (b) we include variables free in *either* LHS *or* RHS of
250 the rule. The former might seems silly, but see Note [Rule
251 dependency info]. So in Example [eftInt], eftInt and eftIntFB
252 will be put in the same Rec, even though their 'main' RHSs are
253 both non-recursive.
254
255 * Note [Rule dependency info]
256 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
257 The VarSet in a RuleInfo is used for dependency analysis in the
258 occurrence analyser. We must track free vars in *both* lhs and rhs.
259 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
260 Why both? Consider
261 x = y
262 RULE f x = v+4
263 Then if we substitute y for x, we'd better do so in the
264 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
265 as well as 'v'
266
267 * Note [Rules are visible in their own rec group]
268 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
269 We want the rules for 'f' to be visible in f's right-hand side.
270 And we'd like them to be visible in other functions in f's Rec
271 group. E.g. in Note [Specialisation rules] we want f' rule
272 to be visible in both f's RHS, and fs's RHS.
273
274 This means that we must simplify the RULEs first, before looking
275 at any of the definitions. This is done by Simplify.simplRecBind,
276 when it calls addLetIdInfo.
277
278 ------------------------------------------------------------
279 Note [Choosing loop breakers]
280 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
281 Loop breaking is surprisingly subtle. First read the section 4 of
282 "Secrets of the GHC inliner". This describes our basic plan.
283 We avoid infinite inlinings by choosing loop breakers, and
284 ensuring that a loop breaker cuts each loop.
285
286 See also Note [Inlining and hs-boot files] in ToIface, which deals
287 with a closely related source of infinite loops.
288
289 Fundamentally, we do SCC analysis on a graph. For each recursive
290 group we choose a loop breaker, delete all edges to that node,
291 re-analyse the SCC, and iterate.
292
293 But what is the graph? NOT the same graph as was used for Note
294 [Forming Rec groups]! In particular, a RULE is like an equation for
295 'f' that is *always* inlined if it is applicable. We do *not* disable
296 rules for loop-breakers. It's up to whoever makes the rules to make
297 sure that the rules themselves always terminate. See Note [Rules for
298 recursive functions] in Simplify.hs
299
300 Hence, if
301 f's RHS (or its INLINE template if it has one) mentions g, and
302 g has a RULE that mentions h, and
303 h has a RULE that mentions f
304
305 then we *must* choose f to be a loop breaker. Example: see Note
306 [Specialisation rules].
307
308 In general, take the free variables of f's RHS, and augment it with
309 all the variables reachable by RULES from those starting points. That
310 is the whole reason for computing rule_fv_env in occAnalBind. (Of
311 course we only consider free vars that are also binders in this Rec
312 group.) See also Note [Finding rule RHS free vars]
313
314 Note that when we compute this rule_fv_env, we only consider variables
315 free in the *RHS* of the rule, in contrast to the way we build the
316 Rec group in the first place (Note [Rule dependency info])
317
318 Note that if 'g' has RHS that mentions 'w', we should add w to
319 g's loop-breaker edges. More concretely there is an edge from f -> g
320 iff
321 (a) g is mentioned in f's RHS `xor` f's INLINE rhs
322 (see Note [Inline rules])
323 (b) or h is mentioned in f's RHS, and
324 g appears in the RHS of an active RULE of h
325 or a transitive sequence of active rules starting with h
326
327 Why "active rules"? See Note [Finding rule RHS free vars]
328
329 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
330 chosen as a loop breaker, because their RHSs don't mention each other.
331 And indeed both can be inlined safely.
332
333 Note again that the edges of the graph we use for computing loop breakers
334 are not the same as the edges we use for computing the Rec blocks.
335 That's why we compute
336
337 - rec_edges for the Rec block analysis
338 - loop_breaker_nodes for the loop breaker analysis
339
340 * Note [Finding rule RHS free vars]
341 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
342 Consider this real example from Data Parallel Haskell
343 tagZero :: Array Int -> Array Tag
344 {-# INLINE [1] tagZeroes #-}
345 tagZero xs = pmap (\x -> fromBool (x==0)) xs
346
347 {-# RULES "tagZero" [~1] forall xs n.
348 pmap fromBool <blah blah> = tagZero xs #-}
349 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
350 However, tagZero can only be inlined in phase 1 and later, while
351 the RULE is only active *before* phase 1. So there's no problem.
352
353 To make this work, we look for the RHS free vars only for
354 *active* rules. That's the reason for the occ_rule_act field
355 of the OccEnv.
356
357 * Note [Weak loop breakers]
358 ~~~~~~~~~~~~~~~~~~~~~~~~~
359 There is a last nasty wrinkle. Suppose we have
360
361 Rec { f = f_rhs
362 RULE f [] = g
363
364 h = h_rhs
365 g = h
366 ...more...
367 }
368
369 Remember that we simplify the RULES before any RHS (see Note
370 [Rules are visible in their own rec group] above).
371
372 So we must *not* postInlineUnconditionally 'g', even though
373 its RHS turns out to be trivial. (I'm assuming that 'g' is
374 not choosen as a loop breaker.) Why not? Because then we
375 drop the binding for 'g', which leaves it out of scope in the
376 RULE!
377
378 Here's a somewhat different example of the same thing
379 Rec { g = h
380 ; h = ...f...
381 ; f = f_rhs
382 RULE f [] = g }
383 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
384 g, because the RULE for f is active throughout. So the RHS of h
385 might rewrite to h = ...g...
386 So g must remain in scope in the output program!
387
388 We "solve" this by:
389
390 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
391 iff g is a "missing free variable" of the Rec group
392
393 A "missing free variable" x is one that is mentioned in an RHS or
394 INLINE or RULE of a binding in the Rec group, but where the
395 dependency on x may not show up in the loop_breaker_nodes (see
396 note [Choosing loop breakers} above).
397
398 A normal "strong" loop breaker has IAmLoopBreaker False. So
399
400 Inline postInlineUnconditionally
401 strong IAmLoopBreaker False no no
402 weak IAmLoopBreaker True yes no
403 other yes yes
404
405 The **sole** reason for this kind of loop breaker is so that
406 postInlineUnconditionally does not fire. Ugh. (Typically it'll
407 inline via the usual callSiteInline stuff, so it'll be dead in the
408 next pass, so the main Ugh is the tiresome complication.)
409
410 Note [Rules for imported functions]
411 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
412 Consider this
413 f = /\a. B.g a
414 RULE B.g Int = 1 + f Int
415 Note that
416 * The RULE is for an imported function.
417 * f is non-recursive
418 Now we
419 can get
420 f Int --> B.g Int Inlining f
421 --> 1 + f Int Firing RULE
422 and so the simplifier goes into an infinite loop. This
423 would not happen if the RULE was for a local function,
424 because we keep track of dependencies through rules. But
425 that is pretty much impossible to do for imported Ids. Suppose
426 f's definition had been
427 f = /\a. C.h a
428 where (by some long and devious process), C.h eventually inlines to
429 B.g. We could only spot such loops by exhaustively following
430 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
431 f.
432
433 Note that RULES for imported functions are important in practice; they
434 occur a lot in the libraries.
435
436 We regard this potential infinite loop as a *programmer* error.
437 It's up the programmer not to write silly rules like
438 RULE f x = f x
439 and the example above is just a more complicated version.
440
441 Note [Preventing loops due to imported functions rules]
442 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
443 Consider:
444 import GHC.Base (foldr)
445
446 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
447 filter p xs = build (\c n -> foldr (filterFB c p) n xs)
448 filterFB c p = ...
449
450 f = filter p xs
451
452 Note that filter is not a loop-breaker, so what happens is:
453 f = filter p xs
454 = {inline} build (\c n -> foldr (filterFB c p) n xs)
455 = {inline} foldr (filterFB (:) p) [] xs
456 = {RULE} filter p xs
457
458 We are in an infinite loop.
459
460 A more elaborate example (that I actually saw in practice when I went to
461 mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
462 {-# LANGUAGE RankNTypes #-}
463 module GHCList where
464
465 import Prelude hiding (filter)
466 import GHC.Base (build)
467
468 {-# INLINABLE filter #-}
469 filter :: (a -> Bool) -> [a] -> [a]
470 filter p [] = []
471 filter p (x:xs) = if p x then x : filter p xs else filter p xs
472
473 {-# NOINLINE [0] filterFB #-}
474 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
475 filterFB c p x r | p x = x `c` r
476 | otherwise = r
477
478 {-# RULES
479 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
480 (filterFB c p) n xs)
481 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
482 #-}
483
484 Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
485 are not), the unfolding given to "filter" in the interface file will be:
486 filter p [] = []
487 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
488 else build (\c n -> foldr (filterFB c p) n xs
489
490 Note that because this unfolding does not mention "filter", filter is not
491 marked as a strong loop breaker. Therefore at a use site in another module:
492 filter p xs
493 = {inline}
494 case xs of [] -> []
495 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
496 else build (\c n -> foldr (filterFB c p) n xs)
497
498 build (\c n -> foldr (filterFB c p) n xs)
499 = {inline} foldr (filterFB (:) p) [] xs
500 = {RULE} filter p xs
501
502 And we are in an infinite loop again, except that this time the loop is producing an
503 infinitely large *term* (an unrolling of filter) and so the simplifier finally
504 dies with "ticks exhausted"
505
506 Because of this problem, we make a small change in the occurrence analyser
507 designed to mark functions like "filter" as strong loop breakers on the basis that:
508 1. The RHS of filter mentions the local function "filterFB"
509 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
510
511 So for each RULE for an *imported* function we are going to add
512 dependency edges between the *local* FVS of the rule LHS and the
513 *local* FVS of the rule RHS. We don't do anything special for RULES on
514 local functions because the standard occurrence analysis stuff is
515 pretty good at getting loop-breakerness correct there.
516
517 It is important to note that even with this extra hack we aren't always going to get
518 things right. For example, it might be that the rule LHS mentions an imported Id,
519 and another module has a RULE that can rewrite that imported Id to one of our local
520 Ids.
521
522 Note [Specialising imported functions] (referred to from Specialise)
523 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
524 BUT for *automatically-generated* rules, the programmer can't be
525 responsible for the "programmer error" in Note [Rules for imported
526 functions]. In paricular, consider specialising a recursive function
527 defined in another module. If we specialise a recursive function B.g,
528 we get
529 g_spec = .....(B.g Int).....
530 RULE B.g Int = g_spec
531 Here, g_spec doesn't look recursive, but when the rule fires, it
532 becomes so. And if B.g was mutually recursive, the loop might
533 not be as obvious as it is here.
534
535 To avoid this,
536 * When specialising a function that is a loop breaker,
537 give a NOINLINE pragma to the specialised function
538
539 Note [Glomming]
540 ~~~~~~~~~~~~~~~
541 RULES for imported Ids can make something at the top refer to something at the bottom:
542 f = \x -> B.g (q x)
543 h = \y -> 3
544
545 RULE: B.g (q x) = h x
546
547 Applying this rule makes f refer to h, although f doesn't appear to
548 depend on h. (And, as in Note [Rules for imported functions], the
549 dependency might be more indirect. For example, f might mention C.t
550 rather than B.g, where C.t eventually inlines to B.g.)
551
552 NOTICE that this cannot happen for rules whose head is a
553 locally-defined function, because we accurately track dependencies
554 through RULES. It only happens for rules whose head is an imported
555 function (B.g in the example above).
556
557 Solution:
558 - When simplifying, bring all top level identifiers into
559 scope at the start, ignoring the Rec/NonRec structure, so
560 that when 'h' pops up in f's rhs, we find it in the in-scope set
561 (as the simplifier generally expects). This happens in simplTopBinds.
562
563 - In the occurrence analyser, if there are any out-of-scope
564 occurrences that pop out of the top, which will happen after
565 firing the rule: f = \x -> h x
566 h = \y -> 3
567 then just glom all the bindings into a single Rec, so that
568 the *next* iteration of the occurrence analyser will sort
569 them all out. This part happens in occurAnalysePgm.
570
571 ------------------------------------------------------------
572 Note [Inline rules]
573 ~~~~~~~~~~~~~~~~~~~
574 None of the above stuff about RULES applies to Inline Rules,
575 stored in a CoreUnfolding. The unfolding, if any, is simplified
576 at the same time as the regular RHS of the function (ie *not* like
577 Note [Rules are visible in their own rec group]), so it should be
578 treated *exactly* like an extra RHS.
579
580 Or, rather, when computing loop-breaker edges,
581 * If f has an INLINE pragma, and it is active, we treat the
582 INLINE rhs as f's rhs
583 * If it's inactive, we treat f as having no rhs
584 * If it has no INLINE pragma, we look at f's actual rhs
585
586
587 There is a danger that we'll be sub-optimal if we see this
588 f = ...f...
589 [INLINE f = ..no f...]
590 where f is recursive, but the INLINE is not. This can just about
591 happen with a sufficiently odd set of rules; eg
592
593 foo :: Int -> Int
594 {-# INLINE [1] foo #-}
595 foo x = x+1
596
597 bar :: Int -> Int
598 {-# INLINE [1] bar #-}
599 bar x = foo x + 1
600
601 {-# RULES "foo" [~1] forall x. foo x = bar x #-}
602
603 Here the RULE makes bar recursive; but it's INLINE pragma remains
604 non-recursive. It's tempting to then say that 'bar' should not be
605 a loop breaker, but an attempt to do so goes wrong in two ways:
606 a) We may get
607 $df = ...$cfoo...
608 $cfoo = ...$df....
609 [INLINE $cfoo = ...no-$df...]
610 But we want $cfoo to depend on $df explicitly so that we
611 put the bindings in the right order to inline $df in $cfoo
612 and perhaps break the loop altogether. (Maybe this
613 b)
614
615
616 Example [eftInt]
617 ~~~~~~~~~~~~~~~
618 Example (from GHC.Enum):
619
620 eftInt :: Int# -> Int# -> [Int]
621 eftInt x y = ...(non-recursive)...
622
623 {-# INLINE [0] eftIntFB #-}
624 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
625 eftIntFB c n x y = ...(non-recursive)...
626
627 {-# RULES
628 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
629 "eftIntList" [1] eftIntFB (:) [] = eftInt
630 #-}
631
632 Note [Specialisation rules]
633 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
634 Consider this group, which is typical of what SpecConstr builds:
635
636 fs a = ....f (C a)....
637 f x = ....f (C a)....
638 {-# RULE f (C a) = fs a #-}
639
640 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
641
642 But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
643 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
644 - fs is inlined (say it's small)
645 - now there's another opportunity to apply the RULE
646
647 This showed up when compiling Control.Concurrent.Chan.getChanContents.
648
649 ------------------------------------------------------------
650 Note [Finding join points]
651 ~~~~~~~~~~~~~~~~~~~~~~~~~~
652 It's the occurrence analyser's job to find bindings that we can turn into join
653 points, but it doesn't perform that transformation right away. Rather, it marks
654 the eligible bindings as part of their occurrence data, leaving it to the
655 simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'.
656 The simplifier then eta-expands the RHS if needed and then updates the
657 occurrence sites. Dividing the work this way means that the occurrence analyser
658 still only takes one pass, yet one can always tell the difference between a
659 function call and a jump by looking at the occurrence (because the same pass
660 changes the 'IdDetails' and propagates the binders to their occurrence sites).
661
662 To track potential join points, we use the 'occ_tail' field of OccInfo. A value
663 of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
664 tail call with `n` arguments (counting both value and type arguments). Otherwise
665 'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
666 rest of 'OccInfo' until it goes on the binder.
667
668 Note [Rules and join points]
669 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
670
671 Things get fiddly with rules. Suppose we have:
672
673 let j :: Int -> Int
674 j y = 2 * y
675 k :: Int -> Int -> Int
676 {-# RULES "SPEC k 0" k 0 = j #-}
677 k x y = x + 2 * y
678 in ...
679
680 Now suppose that both j and k appear only as saturated tail calls in the body.
681 Thus we would like to make them both join points. The rule complicates matters,
682 though, as its RHS has an unapplied occurrence of j. *However*, if we were to
683 eta-expand the rule, all would be well:
684
685 {-# RULES "SPEC k 0" forall a. k 0 a = j a #-}
686
687 So conceivably we could notice that a potential join point would have an
688 "undersaturated" rule and account for it. This would mean we could make
689 something that's been specialised a join point, for instance. But local bindings
690 are rarely specialised, and being overly cautious about rules only
691 costs us anything when, for some `j`:
692
693 * Before specialisation, `j` has non-tail calls, so it can't be a join point.
694 * During specialisation, `j` gets specialised and thus acquires rules.
695 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say),
696 and so now `j` *could* become a join point.
697
698 This appears to be very rare in practice. TODO Perhaps we should gather
699 statistics to be sure.
700
701 ------------------------------------------------------------
702 Note [Adjusting right-hand sides]
703 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
704 There's a bit of a dance we need to do after analysing a lambda expression or
705 a right-hand side. In particular, we need to
706
707 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot
708 lambda, or a non-recursive join point; and
709 b) call 'markAllNonTailCalled' *unless* the binding is for a join point.
710
711 Some examples, with how the free occurrences in e (assumed not to be a value
712 lambda) get marked:
713
714 inside lam non-tail-called
715 ------------------------------------------------------------
716 let x = e No Yes
717 let f = \x -> e Yes Yes
718 let f = \x{OneShot} -> e No Yes
719 \x -> e Yes Yes
720 join j x = e No No
721 joinrec j x = e Yes No
722
723 There are a few other caveats; most importantly, if we're marking a binding as
724 'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so
725 that the effect cascades properly. Consequently, at the time the RHS is
726 analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must
727 return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once
728 join-point-hood has been decided.
729
730 Thus the overall sequence taking place in 'occAnalNonRecBind' and
731 'occAnalRecBind' is as follows:
732
733 1. Call 'occAnalLamOrRhs' to find usage information for the RHS.
734 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make
735 the binding a join point.
736 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when
737 recursive.)
738
739 (In the recursive case, this logic is spread between 'makeNode' and
740 'occAnalRec'.)
741 -}
742
743 ------------------------------------------------------------------
744 -- occAnalBind
745 ------------------------------------------------------------------
746
747 occAnalBind :: OccEnv -- The incoming OccEnv
748 -> TopLevelFlag
749 -> ImpRuleEdges
750 -> CoreBind
751 -> UsageDetails -- Usage details of scope
752 -> (UsageDetails, -- Of the whole let(rec)
753 [CoreBind])
754
755 occAnalBind env lvl top_env (NonRec binder rhs) body_usage
756 = occAnalNonRecBind env lvl top_env binder rhs body_usage
757 occAnalBind env lvl top_env (Rec pairs) body_usage
758 = occAnalRecBind env lvl top_env pairs body_usage
759
760 -----------------
761 occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr
762 -> UsageDetails -> (UsageDetails, [CoreBind])
763 occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage
764 | isTyVar binder -- A type let; we don't gather usage info
765 = (body_usage, [NonRec binder rhs])
766
767 | not (binder `usedIn` body_usage) -- It's not mentioned
768 = (body_usage, [])
769
770 | otherwise -- It's mentioned in the body
771 = (body_usage' `andUDs` rhs_usage', [NonRec tagged_binder rhs'])
772 where
773 (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder
774 mb_join_arity = willBeJoinId_maybe tagged_binder
775
776 (bndrs, body) = collectBinders rhs
777
778 (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body
779 rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body'
780 -- For a /non-recursive/ join point we can mark all
781 -- its join-lambda as one-shot; and it's a good idea to do so
782
783 -- Unfoldings
784 -- See Note [Unfoldings and join points]
785 rhs_usage2 = case occAnalUnfolding env NonRecursive binder of
786 Just unf_usage -> rhs_usage1 `andUDs` unf_usage
787 Nothing -> rhs_usage1
788
789 -- Rules
790 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
791 rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder
792 rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds
793 rhs_usage3 = foldr andUDs rhs_usage2 rule_uds
794 rhs_usage4 = case lookupVarEnv imp_rule_edges binder of
795 Nothing -> rhs_usage3
796 Just vs -> addManyOccsSet rhs_usage3 vs
797 -- See Note [Preventing loops due to imported functions rules]
798
799 -- Final adjustment
800 rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4
801
802 -----------------
803 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
804 -> UsageDetails -> (UsageDetails, [CoreBind])
805 occAnalRecBind env lvl imp_rule_edges pairs body_usage
806 = foldr (occAnalRec env lvl) (body_usage, []) sccs
807 -- For a recursive group, we
808 -- * occ-analyse all the RHSs
809 -- * compute strongly-connected components
810 -- * feed those components to occAnalRec
811 -- See Note [Recursive bindings: the grand plan]
812 where
813 sccs :: [SCC Details]
814 sccs = {-# SCC "occAnalBind.scc" #-}
815 stronglyConnCompFromEdgedVerticesUniq nodes
816
817 nodes :: [LetrecNode]
818 nodes = {-# SCC "occAnalBind.assoc" #-}
819 map (makeNode env imp_rule_edges bndr_set) pairs
820
821 bndr_set = mkVarSet (map fst pairs)
822
823 {-
824 Note [Unfoldings and join points]
825 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
826
827 We assume that anything in an unfolding occurs multiple times, since unfoldings
828 are often copied (that's the whole point!). But we still need to track tail
829 calls for the purpose of finding join points.
830 -}
831
832 -----------------------------
833 occAnalRec :: OccEnv -> TopLevelFlag
834 -> SCC Details
835 -> (UsageDetails, [CoreBind])
836 -> (UsageDetails, [CoreBind])
837
838 -- The NonRec case is just like a Let (NonRec ...) above
839 occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
840 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs }))
841 (body_uds, binds)
842 | not (bndr `usedIn` body_uds)
843 = (body_uds, binds) -- See Note [Dead code]
844
845 | otherwise -- It's mentioned in the body
846 = (body_uds' `andUDs` rhs_uds',
847 NonRec tagged_bndr rhs : binds)
848 where
849 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
850 rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive
851 rhs_bndrs rhs_uds
852
853 -- The Rec case is the interesting one
854 -- See Note [Recursive bindings: the grand plan]
855 -- See Note [Loop breaking]
856 occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
857 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
858 = (body_uds, binds) -- See Note [Dead code]
859
860 | otherwise -- At this point we always build a single Rec
861 = -- pprTrace "occAnalRec" (vcat
862 -- [ text "weak_fvs" <+> ppr weak_fvs
863 -- , text "lb nodes" <+> ppr loop_breaker_nodes])
864 (final_uds, Rec pairs : binds)
865
866 where
867 bndrs = map nd_bndr details_s
868 bndr_set = mkVarSet bndrs
869
870 ------------------------------
871 -- See Note [Choosing loop breakers] for loop_breaker_nodes
872 final_uds :: UsageDetails
873 loop_breaker_nodes :: [LetrecNode]
874 (final_uds, loop_breaker_nodes)
875 = mkLoopBreakerNodes env lvl bndr_set body_uds details_s
876
877 ------------------------------
878 weak_fvs :: VarSet
879 weak_fvs = mapUnionVarSet nd_weak details_s
880
881 ---------------------------
882 -- Now reconstruct the cycle
883 pairs :: [(Id,CoreExpr)]
884 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs loop_breaker_nodes []
885 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
886 -- If weak_fvs is empty, the loop_breaker_nodes will include
887 -- all the edges in the original scope edges [remember,
888 -- weak_fvs is the difference between scope edges and
889 -- lb-edges], so a fresh SCC computation would yield a
890 -- single CyclicSCC result; and reOrderNodes deals with
891 -- exactly that case
892
893
894 ------------------------------------------------------------------
895 -- Loop breaking
896 ------------------------------------------------------------------
897
898 type Binding = (Id,CoreExpr)
899
900 loopBreakNodes :: Int
901 -> VarSet -- All binders
902 -> VarSet -- Binders whose dependencies may be "missing"
903 -- See Note [Weak loop breakers]
904 -> [LetrecNode]
905 -> [Binding] -- Append these to the end
906 -> [Binding]
907 {-
908 loopBreakNodes is applied to the list of nodes for a cyclic strongly
909 connected component (there's guaranteed to be a cycle). It returns
910 the same nodes, but
911 a) in a better order,
912 b) with some of the Ids having a IAmALoopBreaker pragma
913
914 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
915 that the simplifier can guarantee not to loop provided it never records an inlining
916 for these no-inline guys.
917
918 Furthermore, the order of the binds is such that if we neglect dependencies
919 on the no-inline Ids then the binds are topologically sorted. This means
920 that the simplifier will generally do a good job if it works from top bottom,
921 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
922 -}
923
924 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
925 loopBreakNodes depth bndr_set weak_fvs nodes binds
926 = -- pprTrace "loopBreakNodes" (ppr nodes) $
927 go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
928 where
929 go [] binds = binds
930 go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
931
932 loop_break_scc scc binds
933 = case scc of
934 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
935 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
936
937 ----------------------------------
938 reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
939 -- Choose a loop breaker, mark it no-inline,
940 -- and call loopBreakNodes on the rest
941 reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
942 reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
943 reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
944 = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen
945 -- , text "chosen" <+> ppr chosen_nodes ]) $
946 loopBreakNodes new_depth bndr_set weak_fvs unchosen $
947 (map mk_loop_breaker chosen_nodes ++ binds)
948 where
949 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
950 (nd_score (node_payload node))
951 [node] [] nodes
952
953 approximate_lb = depth >= 2
954 new_depth | approximate_lb = 0
955 | otherwise = depth+1
956 -- After two iterations (d=0, d=1) give up
957 -- and approximate, returning to d=0
958
959 mk_loop_breaker :: LetrecNode -> Binding
960 mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
961 = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs)
962 where
963 tail_info = tailCallInfo (idOccInfo bndr)
964
965 mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
966 -- See Note [Weak loop breakers]
967 mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr
968 , nd_rhs = rhs})
969 | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs)
970 | otherwise = (bndr, rhs)
971 where
972 occ' = weakLoopBreaker { occ_tail = tail_info }
973 tail_info = tailCallInfo (idOccInfo bndr)
974
975 ----------------------------------
976 chooseLoopBreaker :: Bool -- True <=> Too many iterations,
977 -- so approximate
978 -> NodeScore -- Best score so far
979 -> [LetrecNode] -- Nodes with this score
980 -> [LetrecNode] -- Nodes with higher scores
981 -> [LetrecNode] -- Unprocessed nodes
982 -> ([LetrecNode], [LetrecNode])
983 -- This loop looks for the bind with the lowest score
984 -- to pick as the loop breaker. The rest accumulate in
985 chooseLoopBreaker _ _ loop_nodes acc []
986 = (loop_nodes, acc) -- Done
987
988 -- If approximate_loop_breaker is True, we pick *all*
989 -- nodes with lowest score, else just one
990 -- See Note [Complexity of loop breaking]
991 chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
992 | approx_lb
993 , rank sc == rank loop_sc
994 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
995
996 | sc `betterLB` loop_sc -- Better score so pick this new one
997 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
998
999 | otherwise -- Worse score so don't pick it
1000 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
1001 where
1002 sc = nd_score (node_payload node)
1003
1004 {-
1005 Note [Complexity of loop breaking]
1006 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1007 The loop-breaking algorithm knocks out one binder at a time, and
1008 performs a new SCC analysis on the remaining binders. That can
1009 behave very badly in tightly-coupled groups of bindings; in the
1010 worst case it can be (N**2)*log N, because it does a full SCC
1011 on N, then N-1, then N-2 and so on.
1012
1013 To avoid this, we switch plans after 2 (or whatever) attempts:
1014 Plan A: pick one binder with the lowest score, make it
1015 a loop breaker, and try again
1016 Plan B: pick *all* binders with the lowest score, make them
1017 all loop breakers, and try again
1018 Since there are only a small finite number of scores, this will
1019 terminate in a constant number of iterations, rather than O(N)
1020 iterations.
1021
1022 You might thing that it's very unlikely, but RULES make it much
1023 more likely. Here's a real example from #1969:
1024 Rec { $dm = \d.\x. op d
1025 {-# RULES forall d. $dm Int d = $s$dm1
1026 forall d. $dm Bool d = $s$dm2 #-}
1027
1028 dInt = MkD .... opInt ...
1029 dInt = MkD .... opBool ...
1030 opInt = $dm dInt
1031 opBool = $dm dBool
1032
1033 $s$dm1 = \x. op dInt
1034 $s$dm2 = \x. op dBool }
1035 The RULES stuff means that we can't choose $dm as a loop breaker
1036 (Note [Choosing loop breakers]), so we must choose at least (say)
1037 opInt *and* opBool, and so on. The number of loop breakders is
1038 linear in the number of instance declarations.
1039
1040 Note [Loop breakers and INLINE/INLINABLE pragmas]
1041 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1042 Avoid choosing a function with an INLINE pramga as the loop breaker!
1043 If such a function is mutually-recursive with a non-INLINE thing,
1044 then the latter should be the loop-breaker.
1045
1046 It's vital to distinguish between INLINE and INLINABLE (the
1047 Bool returned by hasStableCoreUnfolding_maybe). If we start with
1048 Rec { {-# INLINABLE f #-}
1049 f x = ...f... }
1050 and then worker/wrapper it through strictness analysis, we'll get
1051 Rec { {-# INLINABLE $wf #-}
1052 $wf p q = let x = (p,q) in ...f...
1053
1054 {-# INLINE f #-}
1055 f x = case x of (p,q) -> $wf p q }
1056
1057 Now it is vital that we choose $wf as the loop breaker, so we can
1058 inline 'f' in '$wf'.
1059
1060 Note [DFuns should not be loop breakers]
1061 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1062 It's particularly bad to make a DFun into a loop breaker. See
1063 Note [How instance declarations are translated] in TcInstDcls
1064
1065 We give DFuns a higher score than ordinary CONLIKE things because
1066 if there's a choice we want the DFun to be the non-loop breaker. Eg
1067
1068 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1069
1070 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1071 {-# DFUN #-}
1072 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1073 }
1074
1075 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1076 if we can't unravel the DFun first.
1077
1078 Note [Constructor applications]
1079 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1080 It's really really important to inline dictionaries. Real
1081 example (the Enum Ordering instance from GHC.Base):
1082
1083 rec f = \ x -> case d of (p,q,r) -> p x
1084 g = \ x -> case d of (p,q,r) -> q x
1085 d = (v, f, g)
1086
1087 Here, f and g occur just once; but we can't inline them into d.
1088 On the other hand we *could* simplify those case expressions if
1089 we didn't stupidly choose d as the loop breaker.
1090 But we won't because constructor args are marked "Many".
1091 Inlining dictionaries is really essential to unravelling
1092 the loops in static numeric dictionaries, see GHC.Float.
1093
1094 Note [Closure conversion]
1095 ~~~~~~~~~~~~~~~~~~~~~~~~~
1096 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1097 The immediate motivation came from the result of a closure-conversion transformation
1098 which generated code like this:
1099
1100 data Clo a b = forall c. Clo (c -> a -> b) c
1101
1102 ($:) :: Clo a b -> a -> b
1103 Clo f env $: x = f env x
1104
1105 rec { plus = Clo plus1 ()
1106
1107 ; plus1 _ n = Clo plus2 n
1108
1109 ; plus2 Zero n = n
1110 ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1111
1112 If we inline 'plus' and 'plus1', everything unravels nicely. But if
1113 we choose 'plus1' as the loop breaker (which is entirely possible
1114 otherwise), the loop does not unravel nicely.
1115
1116
1117 @occAnalUnfolding@ deals with the question of bindings where the Id is marked
1118 by an INLINE pragma. For these we record that anything which occurs
1119 in its RHS occurs many times. This pessimistically assumes that this
1120 inlined binder also occurs many times in its scope, but if it doesn't
1121 we'll catch it next time round. At worst this costs an extra simplifier pass.
1122 ToDo: try using the occurrence info for the inline'd binder.
1123
1124 [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
1125 [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
1126
1127
1128 ************************************************************************
1129 * *
1130 Making nodes
1131 * *
1132 ************************************************************************
1133 -}
1134
1135 type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
1136
1137 noImpRuleEdges :: ImpRuleEdges
1138 noImpRuleEdges = emptyVarEnv
1139
1140 type LetrecNode = Node Unique Details -- Node comes from Digraph
1141 -- The Unique key is gotten from the Id
1142 data Details
1143 = ND { nd_bndr :: Id -- Binder
1144 , nd_rhs :: CoreExpr -- RHS, already occ-analysed
1145 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS
1146 -- INVARIANT: (nd_rhs_bndrs nd, _) ==
1147 -- collectBinders (nd_rhs nd)
1148
1149 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
1150 -- ignoring phase (ie assuming all are active)
1151 -- See Note [Forming Rec groups]
1152
1153 , nd_inl :: IdSet -- Free variables of
1154 -- the stable unfolding (if present and active)
1155 -- or the RHS (if not)
1156 -- but excluding any RULES
1157 -- This is the IdSet that may be used if the Id is inlined
1158
1159 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
1160 -- but are *not* in nd_inl. These are the ones whose
1161 -- dependencies might not be respected by loop_breaker_nodes
1162 -- See Note [Weak loop breakers]
1163
1164 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
1165
1166 , nd_score :: NodeScore
1167 }
1168
1169 instance Outputable Details where
1170 ppr nd = text "ND" <> braces
1171 (sep [ text "bndr =" <+> ppr (nd_bndr nd)
1172 , text "uds =" <+> ppr (nd_uds nd)
1173 , text "inl =" <+> ppr (nd_inl nd)
1174 , text "weak =" <+> ppr (nd_weak nd)
1175 , text "rule =" <+> ppr (nd_active_rule_fvs nd)
1176 , text "score =" <+> ppr (nd_score nd)
1177 ])
1178
1179 -- The NodeScore is compared lexicographically;
1180 -- e.g. lower rank wins regardless of size
1181 type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker
1182 , Int -- Size of rhs: higher => more likely to be picked as LB
1183 -- Maxes out at maxExprSize; we just use it to prioritise
1184 -- small functions
1185 , Bool ) -- Was it a loop breaker before?
1186 -- True => more likely to be picked
1187 -- Note [Loop breakers, node scoring, and stability]
1188
1189 rank :: NodeScore -> Int
1190 rank (r, _, _) = r
1191
1192 makeNode :: OccEnv -> ImpRuleEdges -> VarSet
1193 -> (Var, CoreExpr) -> LetrecNode
1194 -- See Note [Recursive bindings: the grand plan]
1195 makeNode env imp_rule_edges bndr_set (bndr, rhs)
1196 = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs)
1197 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
1198 -- is still deterministic with edges in nondeterministic order as
1199 -- explained in Note [Deterministic SCC] in Digraph.
1200 where
1201 details = ND { nd_bndr = bndr
1202 , nd_rhs = rhs'
1203 , nd_rhs_bndrs = bndrs'
1204 , nd_uds = rhs_usage3
1205 , nd_inl = inl_fvs
1206 , nd_weak = node_fvs `minusVarSet` inl_fvs
1207 , nd_active_rule_fvs = active_rule_fvs
1208 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) }
1209
1210 -- Constructing the edges for the main Rec computation
1211 -- See Note [Forming Rec groups]
1212 (bndrs, body) = collectBinders rhs
1213 (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body
1214 rhs' = mkLams bndrs' body'
1215 rhs_usage2 = foldr andUDs rhs_usage1 rule_uds
1216 -- Note [Rules are extra RHSs]
1217 -- Note [Rule dependency info]
1218 rhs_usage3 = case mb_unf_uds of
1219 Just unf_uds -> rhs_usage2 `andUDs` unf_uds
1220 Nothing -> rhs_usage2
1221 node_fvs = udFreeVars bndr_set rhs_usage3
1222
1223 -- Finding the free variables of the rules
1224 is_active = occ_rule_act env :: Activation -> Bool
1225
1226 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
1227 rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr
1228
1229 rules_w_rhs_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
1230 rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):))
1231 (lookupVarEnv imp_rule_edges bndr)
1232 -- See Note [Preventing loops due to imported functions rules]
1233 [ (ru_act rule, udFreeVars bndr_set rhs_uds)
1234 | (rule, _, rhs_uds) <- rules_w_uds ]
1235 rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds
1236 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs
1237 , is_active a]
1238
1239 -- Finding the usage details of the INLINE pragma (if any)
1240 mb_unf_uds = occAnalUnfolding env Recursive bndr
1241
1242 -- Find the "nd_inl" free vars; for the loop-breaker phase
1243 inl_fvs = case mb_unf_uds of
1244 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
1245 Just unf_uds -> udFreeVars bndr_set unf_uds
1246 -- We could check for an *active* INLINE (returning
1247 -- emptyVarSet for an inactive one), but is_active
1248 -- isn't the right thing (it tells about
1249 -- RULE activation), so we'd need more plumbing
1250
1251 mkLoopBreakerNodes :: OccEnv -> TopLevelFlag
1252 -> VarSet
1253 -> UsageDetails -- for BODY of let
1254 -> [Details]
1255 -> (UsageDetails, -- adjusted
1256 [LetrecNode])
1257 -- Does four things
1258 -- a) tag each binder with its occurrence info
1259 -- b) add a NodeScore to each node
1260 -- c) make a Node with the right dependency edges for
1261 -- the loop-breaker SCC analysis
1262 -- d) adjust each RHS's usage details according to
1263 -- the binder's (new) shotness and join-point-hood
1264 mkLoopBreakerNodes env lvl bndr_set body_uds details_s
1265 = (final_uds, zipWith mk_lb_node details_s bndrs')
1266 where
1267 (final_uds, bndrs') = tagRecBinders lvl body_uds
1268 [ ((nd_bndr nd)
1269 ,(nd_uds nd)
1270 ,(nd_rhs_bndrs nd))
1271 | nd <- details_s ]
1272 mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr'
1273 = DigraphNode nd' (varUnique bndr) (nonDetKeysUniqSet lb_deps)
1274 -- It's OK to use nonDetKeysUniqSet here as
1275 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
1276 -- in nondeterministic order as explained in
1277 -- Note [Deterministic SCC] in Digraph.
1278 where
1279 nd' = nd { nd_bndr = bndr', nd_score = score }
1280 score = nodeScore env bndr bndr' rhs lb_deps
1281 lb_deps = extendFvs_ rule_fv_env inl_fvs
1282
1283 rule_fv_env :: IdEnv IdSet
1284 -- Maps a variable f to the variables from this group
1285 -- mentioned in RHS of active rules for f
1286 -- Domain is *subset* of bound vars (others have no rule fvs)
1287 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
1288 init_rule_fvs -- See Note [Finding rule RHS free vars]
1289 = [ (b, trimmed_rule_fvs)
1290 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
1291 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
1292 , not (isEmptyVarSet trimmed_rule_fvs) ]
1293
1294
1295 ------------------------------------------
1296 nodeScore :: OccEnv
1297 -> Id -- Binder has old occ-info (just for loop-breaker-ness)
1298 -> Id -- Binder with new occ-info
1299 -> CoreExpr -- RHS
1300 -> VarSet -- Loop-breaker dependencies
1301 -> NodeScore
1302 nodeScore env old_bndr new_bndr bind_rhs lb_deps
1303 | not (isId old_bndr) -- A type or cercion variable is never a loop breaker
1304 = (100, 0, False)
1305
1306 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers
1307 = (0, 0, True) -- See Note [Self-recursion and loop breakers]
1308
1309 | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has
1310 = (0, 0, True) -- a NOINLINE pragma) makes a great loop breaker
1311
1312 | exprIsTrivial rhs
1313 = mk_score 10 -- Practically certain to be inlined
1314 -- Used to have also: && not (isExportedId bndr)
1315 -- But I found this sometimes cost an extra iteration when we have
1316 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
1317 -- where df is the exported dictionary. Then df makes a really
1318 -- bad choice for loop breaker
1319
1320 | DFunUnfolding { df_args = args } <- id_unfolding
1321 -- Never choose a DFun as a loop breaker
1322 -- Note [DFuns should not be loop breakers]
1323 = (9, length args, is_lb)
1324
1325 -- Data structures are more important than INLINE pragmas
1326 -- so that dictionary/method recursion unravels
1327
1328 | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding
1329 = mk_score 6
1330
1331 | is_con_app rhs -- Data types help with cases:
1332 = mk_score 5 -- Note [Constructor applications]
1333
1334 | isStableUnfolding id_unfolding
1335 , can_unfold
1336 = mk_score 3
1337
1338 | isOneOcc (idOccInfo new_bndr)
1339 = mk_score 2 -- Likely to be inlined
1340
1341 | can_unfold -- The Id has some kind of unfolding
1342 = mk_score 1
1343
1344 | otherwise
1345 = (0, 0, is_lb)
1346
1347 where
1348 mk_score :: Int -> NodeScore
1349 mk_score rank = (rank, rhs_size, is_lb)
1350
1351 is_lb = isStrongLoopBreaker (idOccInfo old_bndr)
1352 rhs = case id_unfolding of
1353 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
1354 | isStableSource src
1355 -> unf_rhs
1356 _ -> bind_rhs
1357 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
1358 rhs_size = case id_unfolding of
1359 CoreUnfolding { uf_guidance = guidance }
1360 | UnfIfGoodArgs { ug_size = size } <- guidance
1361 -> size
1362 _ -> cheapExprSize rhs
1363
1364 can_unfold = canUnfold id_unfolding
1365 id_unfolding = realIdUnfolding old_bndr
1366 -- realIdUnfolding: Ignore loop-breaker-ness here because
1367 -- that is what we are setting!
1368
1369 -- Checking for a constructor application
1370 -- Cheap and cheerful; the simplifier moves casts out of the way
1371 -- The lambda case is important to spot x = /\a. C (f a)
1372 -- which comes up when C is a dictionary constructor and
1373 -- f is a default method.
1374 -- Example: the instance for Show (ST s a) in GHC.ST
1375 --
1376 -- However we *also* treat (\x. C p q) as a con-app-like thing,
1377 -- Note [Closure conversion]
1378 is_con_app (Var v) = isConLikeId v
1379 is_con_app (App f _) = is_con_app f
1380 is_con_app (Lam _ e) = is_con_app e
1381 is_con_app (Tick _ e) = is_con_app e
1382 is_con_app _ = False
1383
1384 maxExprSize :: Int
1385 maxExprSize = 20 -- Rather arbitrary
1386
1387 cheapExprSize :: CoreExpr -> Int
1388 -- Maxes out at maxExprSize
1389 cheapExprSize e
1390 = go 0 e
1391 where
1392 go n e | n >= maxExprSize = n
1393 | otherwise = go1 n e
1394
1395 go1 n (Var {}) = n+1
1396 go1 n (Lit {}) = n+1
1397 go1 n (Type {}) = n
1398 go1 n (Coercion {}) = n
1399 go1 n (Tick _ e) = go1 n e
1400 go1 n (Cast e _) = go1 n e
1401 go1 n (App f a) = go (go1 n f) a
1402 go1 n (Lam b e)
1403 | isTyVar b = go1 n e
1404 | otherwise = go (n+1) e
1405 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b)
1406 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
1407
1408 gos n [] = n
1409 gos n (e:es) | n >= maxExprSize = n
1410 | otherwise = gos (go1 n e) es
1411
1412 betterLB :: NodeScore -> NodeScore -> Bool
1413 -- If n1 `betterLB` n2 then choose n1 as the loop breaker
1414 betterLB (rank1, size1, lb1) (rank2, size2, _)
1415 | rank1 < rank2 = True
1416 | rank1 > rank2 = False
1417 | size1 < size2 = False -- Make the bigger n2 into the loop breaker
1418 | size1 > size2 = True
1419 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it
1420 | otherwise = False -- See Note [Loop breakers, node scoring, and stability]
1421
1422 {- Note [Self-recursion and loop breakers]
1423 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1424 If we have
1425 rec { f = ...f...g...
1426 ; g = .....f... }
1427 then 'f' has to be a loop breaker anyway, so we may as well choose it
1428 right away, so that g can inline freely.
1429
1430 This is really just a cheap hack. Consider
1431 rec { f = ...g...
1432 ; g = ..f..h...
1433 ; h = ...f....}
1434 Here f or g are better loop breakers than h; but we might accidentally
1435 choose h. Finding the minimal set of loop breakers is hard.
1436
1437 Note [Loop breakers, node scoring, and stability]
1438 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1439 To choose a loop breaker, we give a NodeScore to each node in the SCC,
1440 and pick the one with the best score (according to 'betterLB').
1441
1442 We need to be jolly careful (#12425, #12234) about the stability
1443 of this choice. Suppose we have
1444
1445 let rec { f = ...g...g...
1446 ; g = ...f...f... }
1447 in
1448 case x of
1449 True -> ...f..
1450 False -> ..f...
1451
1452 In each iteration of the simplifier the occurrence analyser OccAnal
1453 chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
1454 breaker. That means it is free to inline f.
1455
1456 Suppose that GHC decides to inline f in the branches of the case, but
1457 (for some reason; eg it is not saturated) in the rhs of g. So we get
1458
1459 let rec { f = ...g...g...
1460 ; g = ...f...f... }
1461 in
1462 case x of
1463 True -> ...g...g.....
1464 False -> ..g..g....
1465
1466 Now suppose that, for some reason, in the next iteration the occurrence
1467 analyser chooses f as the loop breaker, so it can freely inline g. And
1468 again for some reason the simplifier inlines g at its calls in the case
1469 branches, but not in the RHS of f. Then we get
1470
1471 let rec { f = ...g...g...
1472 ; g = ...f...f... }
1473 in
1474 case x of
1475 True -> ...(...f...f...)...(...f..f..).....
1476 False -> ..(...f...f...)...(..f..f...)....
1477
1478 You can see where this is going! Each iteration of the simplifier
1479 doubles the number of calls to f or g. No wonder GHC is slow!
1480
1481 (In the particular example in comment:3 of #12425, f and g are the two
1482 mutually recursive fmap instances for CondT and Result. They are both
1483 marked INLINE which, oddly, is why they don't inline in each other's
1484 RHS, because the call there is not saturated.)
1485
1486 The root cause is that we flip-flop on our choice of loop breaker. I
1487 always thought it didn't matter, and indeed for any single iteration
1488 to terminate, it doesn't matter. But when we iterate, it matters a
1489 lot!!
1490
1491 So The Plan is this:
1492 If there is a tie, choose the node that
1493 was a loop breaker last time round
1494
1495 Hence the is_lb field of NodeScore
1496
1497 ************************************************************************
1498 * *
1499 Right hand sides
1500 * *
1501 ************************************************************************
1502 -}
1503
1504 occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr
1505 -> (UsageDetails, [CoreBndr], CoreExpr)
1506 -- Returned usage details covers only the RHS,
1507 -- and *not* the RULE or INLINE template for the Id
1508 occAnalRhs env Recursive _ bndrs body
1509 = occAnalRecRhs env bndrs body
1510 occAnalRhs env NonRecursive id bndrs body
1511 = occAnalNonRecRhs env id bndrs body
1512
1513 occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr -- Rhs lambdas, body
1514 -> (UsageDetails, [CoreBndr], CoreExpr)
1515 -- Returned usage details covers only the RHS,
1516 -- and *not* the RULE or INLINE template for the Id
1517 occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body
1518
1519 occAnalNonRecRhs :: OccEnv
1520 -> Id -> [CoreBndr] -> CoreExpr -- Binder; rhs lams, body
1521 -- Binder is already tagged with occurrence info
1522 -> (UsageDetails, [CoreBndr], CoreExpr)
1523 -- Returned usage details covers only the RHS,
1524 -- and *not* the RULE or INLINE template for the Id
1525 occAnalNonRecRhs env bndr bndrs body
1526 = occAnalLamOrRhs rhs_env bndrs body
1527 where
1528 env1 | is_join_point = env -- See Note [Join point RHSs]
1529 | certainly_inline = env -- See Note [Cascading inlines]
1530 | otherwise = rhsCtxt env
1531
1532 -- See Note [Sources of one-shot information]
1533 rhs_env = env1 { occ_one_shots = argOneShots dmd }
1534
1535 certainly_inline -- See Note [Cascading inlines]
1536 = case occ of
1537 OneOcc { occ_in_lam = in_lam, occ_one_br = one_br }
1538 -> not in_lam && one_br && active && not_stable
1539 _ -> False
1540
1541 is_join_point = isAlwaysTailCalled occ
1542 -- Like (isJoinId bndr) but happens one step earlier
1543 -- c.f. willBeJoinId_maybe
1544
1545 occ = idOccInfo bndr
1546 dmd = idDemandInfo bndr
1547 active = isAlwaysActive (idInlineActivation bndr)
1548 not_stable = not (isStableUnfolding (idUnfolding bndr))
1549
1550 occAnalUnfolding :: OccEnv
1551 -> RecFlag
1552 -> Id
1553 -> Maybe UsageDetails
1554 -- Just the analysis, not a new unfolding. The unfolding
1555 -- got analysed when it was created and we don't need to
1556 -- update it.
1557 occAnalUnfolding env rec_flag id
1558 = case realIdUnfolding id of -- ignore previous loop-breaker flag
1559 CoreUnfolding { uf_tmpl = rhs, uf_src = src }
1560 | not (isStableSource src)
1561 -> Nothing
1562 | otherwise
1563 -> Just $ markAllMany usage
1564 where
1565 (bndrs, body) = collectBinders rhs
1566 (usage, _, _) = occAnalRhs env rec_flag id bndrs body
1567
1568 DFunUnfolding { df_bndrs = bndrs, df_args = args }
1569 -> Just $ zapDetails (delDetailsList usage bndrs)
1570 where
1571 usage = andUDsList (map (fst . occAnal env) args)
1572
1573 _ -> Nothing
1574
1575 occAnalRules :: OccEnv
1576 -> Maybe JoinArity -- If the binder is (or MAY become) a join
1577 -- point, what its join arity is (or WOULD
1578 -- become). See Note [Rules and join points].
1579 -> RecFlag
1580 -> Id
1581 -> [(CoreRule, -- Each (non-built-in) rule
1582 UsageDetails, -- Usage details for LHS
1583 UsageDetails)] -- Usage details for RHS
1584 occAnalRules env mb_expected_join_arity rec_flag id
1585 = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id
1586 , let (lhs_uds, rhs_uds) = occ_anal_rule rule ]
1587 where
1588 occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
1589 = (lhs_uds, final_rhs_uds)
1590 where
1591 lhs_uds = addManyOccsSet emptyDetails $
1592 (exprsFreeVars args `delVarSetList` bndrs)
1593 (rhs_bndrs, rhs_body) = collectBinders rhs
1594 (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body
1595 -- Note [Rules are extra RHSs]
1596 -- Note [Rule dependency info]
1597 final_rhs_uds = adjust_tail_info args $ markAllMany $
1598 (rhs_uds `delDetailsList` bndrs)
1599 occ_anal_rule _
1600 = (emptyDetails, emptyDetails)
1601
1602 adjust_tail_info args uds -- see Note [Rules and join points]
1603 = case mb_expected_join_arity of
1604 Just ar | args `lengthIs` ar -> uds
1605 _ -> markAllNonTailCalled uds
1606 {- Note [Join point RHSs]
1607 ~~~~~~~~~~~~~~~~~~~~~~~~~
1608 Consider
1609 x = e
1610 join j = Just x
1611
1612 We want to inline x into j right away, so we don't want to give
1613 the join point a RhsCtxt (#14137). It's not a huge deal, because
1614 the FloatIn pass knows to float into join point RHSs; and the simplifier
1615 does not float things out of join point RHSs. But it's a simple, cheap
1616 thing to do. See #14137.
1617
1618 Note [Cascading inlines]
1619 ~~~~~~~~~~~~~~~~~~~~~~~~
1620 By default we use an rhsCtxt for the RHS of a binding. This tells the
1621 occ anal n that it's looking at an RHS, which has an effect in
1622 occAnalApp. In particular, for constructor applications, it makes
1623 the arguments appear to have NoOccInfo, so that we don't inline into
1624 them. Thus x = f y
1625 k = Just x
1626 we do not want to inline x.
1627
1628 But there's a problem. Consider
1629 x1 = a0 : []
1630 x2 = a1 : x1
1631 x3 = a2 : x2
1632 g = f x3
1633 First time round, it looks as if x1 and x2 occur as an arg of a
1634 let-bound constructor ==> give them a many-occurrence.
1635 But then x3 is inlined (unconditionally as it happens) and
1636 next time round, x2 will be, and the next time round x1 will be
1637 Result: multiple simplifier iterations. Sigh.
1638
1639 So, when analysing the RHS of x3 we notice that x3 will itself
1640 definitely inline the next time round, and so we analyse x3's rhs in
1641 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
1642
1643 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1644 If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and
1645 (b) certainly_inline says "yes" when preInlineUnconditionally says "no"
1646 then the simplifier iterates indefinitely:
1647 x = f y
1648 k = Just x -- We decide that k is 'certainly_inline'
1649 v = ...k... -- but preInlineUnconditionally doesn't inline it
1650 inline ==>
1651 k = Just (f y)
1652 v = ...k...
1653 float ==>
1654 x1 = f y
1655 k = Just x1
1656 v = ...k...
1657
1658 This is worse than the slow cascade, so we only want to say "certainly_inline"
1659 if it really is certain. Look at the note with preInlineUnconditionally
1660 for the various clauses.
1661
1662
1663 ************************************************************************
1664 * *
1665 Expressions
1666 * *
1667 ************************************************************************
1668 -}
1669
1670 occAnal :: OccEnv
1671 -> CoreExpr
1672 -> (UsageDetails, -- Gives info only about the "interesting" Ids
1673 CoreExpr)
1674
1675 occAnal _ expr@(Type _) = (emptyDetails, expr)
1676 occAnal _ expr@(Lit _) = (emptyDetails, expr)
1677 occAnal env expr@(Var _) = occAnalApp env (expr, [], [])
1678 -- At one stage, I gathered the idRuleVars for the variable here too,
1679 -- which in a way is the right thing to do.
1680 -- But that went wrong right after specialisation, when
1681 -- the *occurrences* of the overloaded function didn't have any
1682 -- rules in them, so the *specialised* versions looked as if they
1683 -- weren't used at all.
1684
1685 occAnal _ (Coercion co)
1686 = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co)
1687 -- See Note [Gather occurrences of coercion variables]
1688
1689 {-
1690 Note [Gather occurrences of coercion variables]
1691 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1692 We need to gather info about what coercion variables appear, so that
1693 we can sort them into the right place when doing dependency analysis.
1694 -}
1695
1696 occAnal env (Tick tickish body)
1697 | SourceNote{} <- tickish
1698 = (usage, Tick tickish body')
1699 -- SourceNotes are best-effort; so we just proceed as usual.
1700 -- If we drop a tick due to the issues described below it's
1701 -- not the end of the world.
1702
1703 | tickish `tickishScopesLike` SoftScope
1704 = (markAllNonTailCalled usage, Tick tickish body')
1705
1706 | Breakpoint _ ids <- tickish
1707 = (usage_lam `andUDs` foldr addManyOccs emptyDetails ids, Tick tickish body')
1708 -- never substitute for any of the Ids in a Breakpoint
1709
1710 | otherwise
1711 = (usage_lam, Tick tickish body')
1712 where
1713 !(usage,body') = occAnal env body
1714 -- for a non-soft tick scope, we can inline lambdas only
1715 usage_lam = markAllNonTailCalled (markAllInsideLam usage)
1716 -- TODO There may be ways to make ticks and join points play
1717 -- nicer together, but right now there are problems:
1718 -- let j x = ... in tick<t> (j 1)
1719 -- Making j a join point may cause the simplifier to drop t
1720 -- (if the tick is put into the continuation). So we don't
1721 -- count j 1 as a tail call.
1722 -- See #14242.
1723
1724 occAnal env (Cast expr co)
1725 = case occAnal env expr of { (usage, expr') ->
1726 let usage1 = zapDetailsIf (isRhsEnv env) usage
1727 -- usage1: if we see let x = y `cast` co
1728 -- then mark y as 'Many' so that we don't
1729 -- immediately inline y again.
1730 usage2 = addManyOccsSet usage1 (coVarsOfCo co)
1731 -- usage2: see Note [Gather occurrences of coercion variables]
1732 in (markAllNonTailCalled usage2, Cast expr' co)
1733 }
1734
1735 occAnal env app@(App _ _)
1736 = occAnalApp env (collectArgsTicks tickishFloatable app)
1737
1738 -- Ignore type variables altogether
1739 -- (a) occurrences inside type lambdas only not marked as InsideLam
1740 -- (b) type variables not in environment
1741
1742 occAnal env (Lam x body)
1743 | isTyVar x
1744 = case occAnal env body of { (body_usage, body') ->
1745 (markAllNonTailCalled body_usage, Lam x body')
1746 }
1747
1748 -- For value lambdas we do a special hack. Consider
1749 -- (\x. \y. ...x...)
1750 -- If we did nothing, x is used inside the \y, so would be marked
1751 -- as dangerous to dup. But in the common case where the abstraction
1752 -- is applied to two arguments this is over-pessimistic.
1753 -- So instead, we just mark each binder with its occurrence
1754 -- info in the *body* of the multiple lambda.
1755 -- Then, the simplifier is careful when partially applying lambdas.
1756
1757 occAnal env expr@(Lam _ _)
1758 = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') ->
1759 let
1760 expr' = mkLams tagged_binders body'
1761 usage1 = markAllNonTailCalled usage
1762 one_shot_gp = all isOneShotBndr tagged_binders
1763 final_usage | one_shot_gp = usage1
1764 | otherwise = markAllInsideLam usage1
1765 in
1766 (final_usage, expr') }
1767 where
1768 (binders, body) = collectBinders expr
1769
1770 occAnal env (Case scrut bndr ty alts)
1771 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
1772 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
1773 let
1774 alts_usage = foldr orUDs emptyDetails alts_usage_s
1775 (alts_usage1, tagged_bndr) = tagLamBinder alts_usage bndr
1776 total_usage = markAllNonTailCalled scrut_usage `andUDs` alts_usage1
1777 -- Alts can have tail calls, but the scrutinee can't
1778 in
1779 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1780 where
1781 alt_env = mkAltEnv env scrut bndr
1782 occ_anal_alt = occAnalAlt alt_env
1783
1784 occ_anal_scrut (Var v) (alt1 : other_alts)
1785 | not (null other_alts) || not (isDefaultAlt alt1)
1786 = (mkOneOcc env v True 0, Var v)
1787 -- The 'True' says that the variable occurs in an interesting
1788 -- context; the case has at least one non-default alternative
1789 occ_anal_scrut (Tick t e) alts
1790 | t `tickishScopesLike` SoftScope
1791 -- No reason to not look through all ticks here, but only
1792 -- for soft-scoped ticks we can do so without having to
1793 -- update returned occurance info (see occAnal)
1794 = second (Tick t) $ occ_anal_scrut e alts
1795
1796 occ_anal_scrut scrut _alts
1797 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
1798
1799 occAnal env (Let bind body)
1800 = case occAnal env body of { (body_usage, body') ->
1801 case occAnalBind env NotTopLevel
1802 noImpRuleEdges bind
1803 body_usage of { (final_usage, new_binds) ->
1804 (final_usage, mkLets new_binds body') }}
1805
1806 occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1807 occAnalArgs _ [] _
1808 = (emptyDetails, [])
1809
1810 occAnalArgs env (arg:args) one_shots
1811 | isTypeArg arg
1812 = case occAnalArgs env args one_shots of { (uds, args') ->
1813 (uds, arg:args') }
1814
1815 | otherwise
1816 = case argCtxt env one_shots of { (arg_env, one_shots') ->
1817 case occAnal arg_env arg of { (uds1, arg') ->
1818 case occAnalArgs env args one_shots' of { (uds2, args') ->
1819 (uds1 `andUDs` uds2, arg':args') }}}
1820
1821 {-
1822 Applications are dealt with specially because we want
1823 the "build hack" to work.
1824
1825 Note [Arguments of let-bound constructors]
1826 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1827 Consider
1828 f x = let y = expensive x in
1829 let z = (True,y) in
1830 (case z of {(p,q)->q}, case z of {(p,q)->q})
1831 We feel free to duplicate the WHNF (True,y), but that means
1832 that y may be duplicated thereby.
1833
1834 If we aren't careful we duplicate the (expensive x) call!
1835 Constructors are rather like lambdas in this way.
1836 -}
1837
1838 occAnalApp :: OccEnv
1839 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1840 -> (UsageDetails, Expr CoreBndr)
1841 occAnalApp env (Var fun, args, ticks)
1842 | null ticks = (uds, mkApps (Var fun) args')
1843 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
1844 where
1845 uds = fun_uds `andUDs` final_args_uds
1846
1847 !(args_uds, args') = occAnalArgs env args one_shots
1848 !final_args_uds
1849 | isRhsEnv env && is_exp = markAllNonTailCalled $
1850 markAllInsideLam args_uds
1851 | otherwise = markAllNonTailCalled args_uds
1852 -- We mark the free vars of the argument of a constructor or PAP
1853 -- as "inside-lambda", if it is the RHS of a let(rec).
1854 -- This means that nothing gets inlined into a constructor or PAP
1855 -- argument position, which is what we want. Typically those
1856 -- constructor arguments are just variables, or trivial expressions.
1857 -- We use inside-lam because it's like eta-expanding the PAP.
1858 --
1859 -- This is the *whole point* of the isRhsEnv predicate
1860 -- See Note [Arguments of let-bound constructors]
1861
1862 n_val_args = valArgCount args
1863 n_args = length args
1864 fun_uds = mkOneOcc env fun (n_val_args > 0) n_args
1865 is_exp = isExpandableApp fun n_val_args
1866 -- See Note [CONLIKE pragma] in BasicTypes
1867 -- The definition of is_exp should match that in Simplify.prepareRhs
1868
1869 one_shots = argsOneShots (idStrictness fun) guaranteed_val_args
1870 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo
1871 (occ_one_shots env))
1872 -- See Note [Sources of one-shot information], bullet point A']
1873
1874 occAnalApp env (fun, args, ticks)
1875 = (markAllNonTailCalled (fun_uds `andUDs` args_uds),
1876 mkTicks ticks $ mkApps fun' args')
1877 where
1878 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1879 -- The addAppCtxt is a bit cunning. One iteration of the simplifier
1880 -- often leaves behind beta redexs like
1881 -- (\x y -> e) a1 a2
1882 -- Here we would like to mark x,y as one-shot, and treat the whole
1883 -- thing much like a let. We do this by pushing some True items
1884 -- onto the context stack.
1885 !(args_uds, args') = occAnalArgs env args []
1886
1887 zapDetailsIf :: Bool -- If this is true
1888 -> UsageDetails -- Then do zapDetails on this
1889 -> UsageDetails
1890 zapDetailsIf True uds = zapDetails uds
1891 zapDetailsIf False uds = uds
1892
1893 {-
1894 Note [Sources of one-shot information]
1895 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1896 The occurrence analyser obtains one-shot-lambda information from two sources:
1897
1898 A: Saturated applications: eg f e1 .. en
1899
1900 In general, given a call (f e1 .. en) we can propagate one-shot info from
1901 f's strictness signature into e1 .. en, but /only/ if n is enough to
1902 saturate the strictness signature. A strictness signature like
1903
1904 f :: C1(C1(L))LS
1905
1906 means that *if f is applied to three arguments* then it will guarantee to
1907 call its first argument at most once, and to call the result of that at
1908 most once. But if f has fewer than three arguments, all bets are off; e.g.
1909
1910 map (f (\x y. expensive) e2) xs
1911
1912 Here the \x y abstraction may be called many times (once for each element of
1913 xs) so we should not mark x and y as one-shot. But if it was
1914
1915 map (f (\x y. expensive) 3 2) xs
1916
1917 then the first argument of f will be called at most once.
1918
1919 The one-shot info, derived from f's strictness signature, is
1920 computed by 'argsOneShots', called in occAnalApp.
1921
1922 A': Non-obviously saturated applications: eg build (f (\x y -> expensive))
1923 where f is as above.
1924
1925 In this case, f is only manifestly applied to one argument, so it does not
1926 look saturated. So by the previous point, we should not use its strictness
1927 signature to learn about the one-shotness of \x y. But in this case we can:
1928 build is fully applied, so we may use its strictness signature; and from
1929 that we learn that build calls its argument with two arguments *at most once*.
1930
1931 So there is really only one call to f, and it will have three arguments. In
1932 that sense, f is saturated, and we may proceed as described above.
1933
1934 Hence the computation of 'guaranteed_val_args' in occAnalApp, using
1935 '(occ_one_shots env)'. See also #13227, comment:9
1936
1937 B: Let-bindings: eg let f = \c. let ... in \n -> blah
1938 in (build f, build f)
1939
1940 Propagate one-shot info from the demanand-info on 'f' to the
1941 lambdas in its RHS (which may not be syntactically at the top)
1942
1943 This information must have come from a previous run of the demanand
1944 analyser.
1945
1946 Previously, the demand analyser would *also* set the one-shot information, but
1947 that code was buggy (see #11770), so doing it only in on place, namely here, is
1948 saner.
1949
1950 Note [OneShots]
1951 ~~~~~~~~~~~~~~~
1952 When analysing an expression, the occ_one_shots argument contains information
1953 about how the function is being used. The length of the list indicates
1954 how many arguments will eventually be passed to the analysed expression,
1955 and the OneShotInfo indicates whether this application is once or multiple times.
1956
1957 Example:
1958
1959 Context of f occ_one_shots when analysing f
1960
1961 f 1 2 [OneShot, OneShot]
1962 map (f 1) [OneShot, NoOneShotInfo]
1963 build f [OneShot, OneShot]
1964 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot]
1965
1966 Note [Binders in case alternatives]
1967 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1968 Consider
1969 case x of y { (a,b) -> f y }
1970 We treat 'a', 'b' as dead, because they don't physically occur in the
1971 case alternative. (Indeed, a variable is dead iff it doesn't occur in
1972 its scope in the output of OccAnal.) It really helps to know when
1973 binders are unused. See esp the call to isDeadBinder in
1974 Simplify.mkDupableAlt
1975
1976 In this example, though, the Simplifier will bring 'a' and 'b' back to
1977 life, beause it binds 'y' to (a,b) (imagine got inlined and
1978 scrutinised y).
1979 -}
1980
1981 occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr
1982 -> (UsageDetails, [CoreBndr], CoreExpr)
1983 occAnalLamOrRhs env [] body
1984 = case occAnal env body of (body_usage, body') -> (body_usage, [], body')
1985 -- RHS of thunk or nullary join point
1986 occAnalLamOrRhs env (bndr:bndrs) body
1987 | isTyVar bndr
1988 = -- Important: Keep the environment so that we don't inline into an RHS like
1989 -- \(@ x) -> C @x (f @x)
1990 -- (see the beginning of Note [Cascading inlines]).
1991 case occAnalLamOrRhs env bndrs body of
1992 (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body')
1993 occAnalLamOrRhs env binders body
1994 = case occAnal env_body body of { (body_usage, body') ->
1995 let
1996 (final_usage, tagged_binders) = tagLamBinders body_usage binders'
1997 -- Use binders' to put one-shot info on the lambdas
1998 in
1999 (final_usage, tagged_binders, body') }
2000 where
2001 (env_body, binders') = oneShotGroup env binders
2002
2003 occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
2004 -> CoreAlt
2005 -> (UsageDetails, Alt IdWithOccInfo)
2006 occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
2007 = case occAnal env rhs of { (rhs_usage1, rhs1) ->
2008 let
2009 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
2010 -- See Note [Binders in case alternatives]
2011 (alt_usg', rhs2) = wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
2012 in
2013 (alt_usg', (con, tagged_bndrs, rhs2)) }
2014
2015 wrapAltRHS :: OccEnv
2016 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
2017 -> UsageDetails -- usage for entire alt (p -> rhs)
2018 -> [Var] -- alt binders
2019 -> CoreExpr -- alt RHS
2020 -> (UsageDetails, CoreExpr)
2021 wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
2022 | occ_binder_swap env
2023 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
2024 -- handles condition (a) in Note [Binder swap]
2025 , not captured -- See condition (b) in Note [Binder swap]
2026 = ( alt_usg' `andUDs` let_rhs_usg
2027 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
2028 where
2029 captured = any (`usedIn` let_rhs_usg) bndrs -- Check condition (b)
2030
2031 -- The rhs of the let may include coercion variables
2032 -- if the scrutinee was a cast, so we must gather their
2033 -- usage. See Note [Gather occurrences of coercion variables]
2034 -- Moreover, the rhs of the let may mention the case-binder, and
2035 -- we want to gather its occ-info as well
2036 (let_rhs_usg, let_rhs') = occAnal env let_rhs
2037
2038 (alt_usg', tagged_scrut_var) = tagLamBinder alt_usg scrut_var
2039
2040 wrapAltRHS _ _ alt_usg _ alt_rhs
2041 = (alt_usg, alt_rhs)
2042
2043 {-
2044 ************************************************************************
2045 * *
2046 OccEnv
2047 * *
2048 ************************************************************************
2049 -}
2050
2051 data OccEnv
2052 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
2053 , occ_one_shots :: !OneShots -- See Note [OneShots]
2054 , occ_gbl_scrut :: GlobalScruts
2055
2056 , occ_unf_act :: Id -> Bool -- Which Id unfoldings are active
2057
2058 , occ_rule_act :: Activation -> Bool -- Which rules are active
2059 -- See Note [Finding rule RHS free vars]
2060
2061 , occ_binder_swap :: !Bool -- enable the binder_swap
2062 -- See CorePrep Note [Dead code in CorePrep]
2063 }
2064
2065 type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
2066
2067 -----------------------------
2068 -- OccEncl is used to control whether to inline into constructor arguments
2069 -- For example:
2070 -- x = (p,q) -- Don't inline p or q
2071 -- y = /\a -> (p a, q a) -- Still don't inline p or q
2072 -- z = f (p,q) -- Do inline p,q; it may make a rule fire
2073 -- So OccEncl tells enough about the context to know what to do when
2074 -- we encounter a constructor application or PAP.
2075
2076 data OccEncl
2077 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
2078 -- Don't inline into constructor args here
2079 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
2080 -- Do inline into constructor args here
2081
2082 instance Outputable OccEncl where
2083 ppr OccRhs = text "occRhs"
2084 ppr OccVanilla = text "occVanilla"
2085
2086 -- See note [OneShots]
2087 type OneShots = [OneShotInfo]
2088
2089 initOccEnv :: OccEnv
2090 initOccEnv
2091 = OccEnv { occ_encl = OccVanilla
2092 , occ_one_shots = []
2093 , occ_gbl_scrut = emptyVarSet
2094 -- To be conservative, we say that all
2095 -- inlines and rules are active
2096 , occ_unf_act = \_ -> True
2097 , occ_rule_act = \_ -> True
2098 , occ_binder_swap = True }
2099
2100 vanillaCtxt :: OccEnv -> OccEnv
2101 vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
2102
2103 rhsCtxt :: OccEnv -> OccEnv
2104 rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
2105
2106 argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
2107 argCtxt env []
2108 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
2109 argCtxt env (one_shots:one_shots_s)
2110 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
2111
2112 isRhsEnv :: OccEnv -> Bool
2113 isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
2114 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
2115
2116 oneShotGroup :: OccEnv -> [CoreBndr]
2117 -> ( OccEnv
2118 , [CoreBndr] )
2119 -- The result binders have one-shot-ness set that they might not have had originally.
2120 -- This happens in (build (\c n -> e)). Here the occurrence analyser
2121 -- linearity context knows that c,n are one-shot, and it records that fact in
2122 -- the binder. This is useful to guide subsequent float-in/float-out tranformations
2123
2124 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
2125 = go ctxt bndrs []
2126 where
2127 go ctxt [] rev_bndrs
2128 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
2129 , reverse rev_bndrs )
2130
2131 go [] bndrs rev_bndrs
2132 = ( env { occ_one_shots = [], occ_encl = OccVanilla }
2133 , reverse rev_bndrs ++ bndrs )
2134
2135 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs
2136 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs)
2137 | otherwise = go ctxt bndrs (bndr : rev_bndrs)
2138 where
2139 bndr' = updOneShotInfo bndr one_shot
2140 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
2141 -- one-shot info might be better than what we can infer, e.g.
2142 -- due to explicit use of the magic 'oneShot' function.
2143 -- See Note [The oneShot function]
2144
2145
2146 markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var]
2147 -- Mark the lambdas of a non-recursive join point as one-shot.
2148 -- This is good to prevent gratuitous float-out etc
2149 markJoinOneShots mb_join_arity bndrs
2150 = case mb_join_arity of
2151 Nothing -> bndrs
2152 Just n -> go n bndrs
2153 where
2154 go 0 bndrs = bndrs
2155 go _ [] = [] -- This can legitimately happen.
2156 -- e.g. let j = case ... in j True
2157 -- This will become an arity-1 join point after the
2158 -- simplifier has eta-expanded it; but it may not have
2159 -- enough lambdas /yet/. (Lint checks that JoinIds do
2160 -- have enough lambdas.)
2161 go n (b:bs) = b' : go (n-1) bs
2162 where
2163 b' | isId b = setOneShotLambda b
2164 | otherwise = b
2165
2166 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
2167 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
2168 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
2169
2170 transClosureFV :: UniqFM VarSet -> UniqFM VarSet
2171 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
2172 -- as well as (f,g), (g,h)
2173 transClosureFV env
2174 | no_change = env
2175 | otherwise = transClosureFV (listToUFM new_fv_list)
2176 where
2177 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
2178 -- It's OK to use nonDetUFMToList here because we'll forget the
2179 -- ordering by creating a new set with listToUFM
2180 bump no_change (b,fvs)
2181 | no_change_here = (no_change, (b,fvs))
2182 | otherwise = (False, (b,new_fvs))
2183 where
2184 (new_fvs, no_change_here) = extendFvs env fvs
2185
2186 -------------
2187 extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
2188 extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
2189
2190 extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
2191 -- (extendFVs env s) returns
2192 -- (s `union` env(s), env(s) `subset` s)
2193 extendFvs env s
2194 | isNullUFM env
2195 = (s, True)
2196 | otherwise
2197 = (s `unionVarSet` extras, extras `subVarSet` s)
2198 where
2199 extras :: VarSet -- env(s)
2200 extras = nonDetFoldUFM unionVarSet emptyVarSet $
2201 -- It's OK to use nonDetFoldUFM here because unionVarSet commutes
2202 intersectUFM_C (\x _ -> x) env (getUniqSet s)
2203
2204 {-
2205 ************************************************************************
2206 * *
2207 Binder swap
2208 * *
2209 ************************************************************************
2210
2211 Note [Binder swap]
2212 ~~~~~~~~~~~~~~~~~~
2213 The "binder swap" tranformation swaps occurence of the
2214 scrutinee of a case for occurrences of the case-binder:
2215
2216 (1) case x of b { pi -> ri }
2217 ==>
2218 case x of b { pi -> let x=b in ri }
2219
2220 (2) case (x |> co) of b { pi -> ri }
2221 ==>
2222 case (x |> co) of b { pi -> let x = b |> sym co in ri }
2223
2224 In both cases, the trivial 'let' can be eliminated by the
2225 immediately following simplifier pass.
2226
2227 There are two reasons for making this swap:
2228
2229 (A) It reduces the number of occurrences of the scrutinee, x.
2230 That in turn might reduce its occurrences to one, so we
2231 can inline it and save an allocation. E.g.
2232 let x = factorial y in case x of b { I# v -> ...x... }
2233 If we replace 'x' by 'b' in the alternative we get
2234 let x = factorial y in case x of b { I# v -> ...b... }
2235 and now we can inline 'x', thus
2236 case (factorial y) of b { I# v -> ...b... }
2237
2238 (B) The case-binder b has unfolding information; in the
2239 example above we know that b = I# v. That in turn allows
2240 nested cases to simplify. Consider
2241 case x of b { I# v ->
2242 ...(case x of b2 { I# v2 -> rhs })...
2243 If we replace 'x' by 'b' in the alternative we get
2244 case x of b { I# v ->
2245 ...(case b of b2 { I# v2 -> rhs })...
2246 and now it is trivial to simplify the inner case:
2247 case x of b { I# v ->
2248 ...(let b2 = b in rhs)...
2249
2250 The same can happen even if the scrutinee is a variable
2251 with a cast: see Note [Case of cast]
2252
2253 In both cases, in a particular alternative (pi -> ri), we only
2254 add the binding if
2255 (a) x occurs free in (pi -> ri)
2256 (ie it occurs in ri, but is not bound in pi)
2257 (b) the pi does not bind b (or the free vars of co)
2258 We need (a) and (b) for the inserted binding to be correct.
2259
2260 For the alternatives where we inject the binding, we can transfer
2261 all x's OccInfo to b. And that is the point.
2262
2263 Notice that
2264 * The deliberate shadowing of 'x'.
2265 * That (a) rapidly becomes false, so no bindings are injected.
2266
2267 The reason for doing these transformations /here in the occurrence
2268 analyser/ is because it allows us to adjust the OccInfo for 'x' and
2269 'b' as we go.
2270
2271 * Suppose the only occurrences of 'x' are the scrutinee and in the
2272 ri; then this transformation makes it occur just once, and hence
2273 get inlined right away.
2274
2275 * If instead we do this in the Simplifier, we don't know whether 'x'
2276 is used in ri, so we are forced to pessimistically zap b's OccInfo
2277 even though it is typically dead (ie neither it nor x appear in
2278 the ri). There's nothing actually wrong with zapping it, except
2279 that it's kind of nice to know which variables are dead. My nose
2280 tells me to keep this information as robustly as possible.
2281
2282 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
2283 {x=b}; it's Nothing if the binder-swap doesn't happen.
2284
2285 There is a danger though. Consider
2286 let v = x +# y
2287 in case (f v) of w -> ...v...v...
2288 And suppose that (f v) expands to just v. Then we'd like to
2289 use 'w' instead of 'v' in the alternative. But it may be too
2290 late; we may have substituted the (cheap) x+#y for v in the
2291 same simplifier pass that reduced (f v) to v.
2292
2293 I think this is just too bad. CSE will recover some of it.
2294
2295 Note [Case of cast]
2296 ~~~~~~~~~~~~~~~~~~~
2297 Consider case (x `cast` co) of b { I# ->
2298 ... (case (x `cast` co) of {...}) ...
2299 We'd like to eliminate the inner case. That is the motivation for
2300 equation (2) in Note [Binder swap]. When we get to the inner case, we
2301 inline x, cancel the casts, and away we go.
2302
2303 Note [Binder swap on GlobalId scrutinees]
2304 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2305 When the scrutinee is a GlobalId we must take care in two ways
2306
2307 i) In order to *know* whether 'x' occurs free in the RHS, we need its
2308 occurrence info. BUT, we don't gather occurrence info for
2309 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
2310 OccEnv is for: it says "gather occurrence info for these".
2311
2312 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
2313 has an External Name. See, for example, SimplEnv Note [Global Ids in
2314 the substitution].
2315
2316 Note [Zap case binders in proxy bindings]
2317 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2318 From the original
2319 case x of cb(dead) { p -> ...x... }
2320 we will get
2321 case x of cb(live) { p -> let x = cb in ...x... }
2322
2323 Core Lint never expects to find an *occurrence* of an Id marked
2324 as Dead, so we must zap the OccInfo on cb before making the
2325 binding x = cb. See #5028.
2326
2327 NB: the OccInfo on /occurrences/ really doesn't matter much; the simplifier
2328 doesn't use it. So this is only to satisfy the perhpas-over-picky Lint.
2329
2330 Historical note [no-case-of-case]
2331 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2332 We *used* to suppress the binder-swap in case expressions when
2333 -fno-case-of-case is on. Old remarks:
2334 "This happens in the first simplifier pass,
2335 and enhances full laziness. Here's the bad case:
2336 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
2337 If we eliminate the inner case, we trap it inside the I# v -> arm,
2338 which might prevent some full laziness happening. I've seen this
2339 in action in spectral/cichelli/Prog.hs:
2340 [(m,n) | m <- [1..max], n <- [1..max]]
2341 Hence the check for NoCaseOfCase."
2342 However, now the full-laziness pass itself reverses the binder-swap, so this
2343 check is no longer necessary.
2344
2345 Historical note [Suppressing the case binder-swap]
2346 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2347 This old note describes a problem that is also fixed by doing the
2348 binder-swap in OccAnal:
2349
2350 There is another situation when it might make sense to suppress the
2351 case-expression binde-swap. If we have
2352
2353 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
2354 ...other cases .... }
2355
2356 We'll perform the binder-swap for the outer case, giving
2357
2358 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
2359 ...other cases .... }
2360
2361 But there is no point in doing it for the inner case, because w1 can't
2362 be inlined anyway. Furthermore, doing the case-swapping involves
2363 zapping w2's occurrence info (see paragraphs that follow), and that
2364 forces us to bind w2 when doing case merging. So we get
2365
2366 case x of w1 { A -> let w2 = w1 in e1
2367 B -> let w2 = w1 in e2
2368 ...other cases .... }
2369
2370 This is plain silly in the common case where w2 is dead.
2371
2372 Even so, I can't see a good way to implement this idea. I tried
2373 not doing the binder-swap if the scrutinee was already evaluated
2374 but that failed big-time:
2375
2376 data T = MkT !Int
2377
2378 case v of w { MkT x ->
2379 case x of x1 { I# y1 ->
2380 case x of x2 { I# y2 -> ...
2381
2382 Notice that because MkT is strict, x is marked "evaluated". But to
2383 eliminate the last case, we must either make sure that x (as well as
2384 x1) has unfolding MkT y1. The straightforward thing to do is to do
2385 the binder-swap. So this whole note is a no-op.
2386
2387 It's fixed by doing the binder-swap in OccAnal because we can do the
2388 binder-swap unconditionally and still get occurrence analysis
2389 information right.
2390 -}
2391
2392 mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
2393 -- Does three things: a) makes the occ_one_shots = OccVanilla
2394 -- b) extends the GlobalScruts if possible
2395 -- c) returns a proxy mapping, binding the scrutinee
2396 -- to the case binder, if possible
2397 mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
2398 = case stripTicksTopE (const True) scrut of
2399 Var v -> add_scrut v case_bndr'
2400 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
2401 -- See Note [Case of cast]
2402 _ -> (env { occ_encl = OccVanilla }, Nothing)
2403
2404 where
2405 add_scrut v rhs
2406 | isGlobalId v = (env { occ_encl = OccVanilla }, Nothing)
2407 | otherwise = ( env { occ_encl = OccVanilla
2408 , occ_gbl_scrut = pe `extendVarSet` v }
2409 , Just (localise v, rhs) )
2410 -- ToDO: this isGlobalId stuff is a TEMPORARY FIX
2411 -- to avoid the binder-swap for GlobalIds
2412 -- See #16346
2413
2414 case_bndr' = Var (zapIdOccInfo case_bndr)
2415 -- See Note [Zap case binders in proxy bindings]
2416
2417 -- Localise the scrut_var before shadowing it; we're making a
2418 -- new binding for it, and it might have an External Name, or
2419 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
2420 -- Also we don't want any INLINE or NOINLINE pragmas!
2421 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var))
2422 (idType scrut_var)
2423
2424 {-
2425 ************************************************************************
2426 * *
2427 \subsection[OccurAnal-types]{OccEnv}
2428 * *
2429 ************************************************************************
2430
2431 Note [UsageDetails and zapping]
2432 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2433
2434 On many occasions, we must modify all gathered occurrence data at once. For
2435 instance, all occurrences underneath a (non-one-shot) lambda set the
2436 'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but
2437 that takes O(n) time and we will do this often---in particular, there are many
2438 places where tail calls are not allowed, and each of these causes all variables
2439 to get marked with 'NoTailCallInfo'.
2440
2441 Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along
2442 with the 'OccInfoEnv'. Each of these extra environments is a "zapped set"
2443 recording which variables have been zapped in some way. Zapping all occurrence
2444 info then simply means setting the corresponding zapped set to the whole
2445 'OccInfoEnv', a fast O(1) operation.
2446 -}
2447
2448 type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage
2449 -- INVARIANT: never IAmDead
2450 -- (Deadness is signalled by not being in the map at all)
2451
2452 type ZappedSet = OccInfoEnv -- Values are ignored
2453
2454 data UsageDetails
2455 = UD { ud_env :: !OccInfoEnv
2456 , ud_z_many :: ZappedSet -- apply 'markMany' to these
2457 , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these
2458 , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these
2459 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv
2460
2461 instance Outputable UsageDetails where
2462 ppr ud = ppr (ud_env (flattenUsageDetails ud))
2463
2464 -------------------
2465 -- UsageDetails API
2466
2467 andUDs, orUDs
2468 :: UsageDetails -> UsageDetails -> UsageDetails
2469 andUDs = combineUsageDetailsWith addOccInfo
2470 orUDs = combineUsageDetailsWith orOccInfo
2471
2472 andUDsList :: [UsageDetails] -> UsageDetails
2473 andUDsList = foldl' andUDs emptyDetails
2474
2475 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
2476 mkOneOcc env id int_cxt arity
2477 | isLocalId id
2478 = singleton $ OneOcc { occ_in_lam = False
2479 , occ_one_br = True
2480 , occ_int_cxt = int_cxt
2481 , occ_tail = AlwaysTailCalled arity }
2482 | id `elemVarSet` occ_gbl_scrut env
2483 = singleton noOccInfo
2484
2485 | otherwise
2486 = emptyDetails
2487 where
2488 singleton info = emptyDetails { ud_env = unitVarEnv id info }
2489
2490 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
2491 addOneOcc ud id info
2492 = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info }
2493 `alterZappedSets` (`delVarEnv` id)
2494 where
2495 plus_zapped old new = doZapping ud id old `addOccInfo` new
2496
2497 addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails
2498 addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set
2499 -- It's OK to use nonDetFoldUFM here because addManyOccs commutes
2500
2501 -- Add several occurrences, assumed not to be tail calls
2502 addManyOccs :: Var -> UsageDetails -> UsageDetails
2503 addManyOccs v u | isId v = addOneOcc u v noOccInfo
2504 | otherwise = u
2505 -- Give a non-committal binder info (i.e noOccInfo) because
2506 -- a) Many copies of the specialised thing can appear
2507 -- b) We don't want to substitute a BIG expression inside a RULE
2508 -- even if that's the only occurrence of the thing
2509 -- (Same goes for INLINE.)
2510
2511 delDetails :: UsageDetails -> Id -> UsageDetails
2512 delDetails ud bndr
2513 = ud `alterUsageDetails` (`delVarEnv` bndr)
2514
2515 delDetailsList :: UsageDetails -> [Id] -> UsageDetails
2516 delDetailsList ud bndrs
2517 = ud `alterUsageDetails` (`delVarEnvList` bndrs)
2518
2519 emptyDetails :: UsageDetails
2520 emptyDetails = UD { ud_env = emptyVarEnv
2521 , ud_z_many = emptyVarEnv
2522 , ud_z_in_lam = emptyVarEnv
2523 , ud_z_no_tail = emptyVarEnv }
2524
2525 isEmptyDetails :: UsageDetails -> Bool
2526 isEmptyDetails = isEmptyVarEnv . ud_env
2527
2528 markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails
2529 :: UsageDetails -> UsageDetails
2530 markAllMany ud = ud { ud_z_many = ud_env ud }
2531 markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud }
2532 markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud }
2533
2534 zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo
2535
2536 lookupDetails :: UsageDetails -> Id -> OccInfo
2537 lookupDetails ud id
2538 | isCoVar id -- We do not currenly gather occurrence info (from types)
2539 = noOccInfo -- for CoVars, so we must conservatively mark them as used
2540 -- See Note [DoO not mark CoVars as dead]
2541 | otherwise
2542 = case lookupVarEnv (ud_env ud) id of
2543 Just occ -> doZapping ud id occ
2544 Nothing -> IAmDead
2545
2546 usedIn :: Id -> UsageDetails -> Bool
2547 v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
2548
2549 udFreeVars :: VarSet -> UsageDetails -> VarSet
2550 -- Find the subset of bndrs that are mentioned in uds
2551 udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud)
2552
2553 {- Note [Do not mark CoVars as dead]
2554 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2555 It's obviously wrong to mark CoVars as dead if they are used.
2556 Currently we don't traverse types to gather usase info for CoVars,
2557 so we had better treat them as having noOccInfo.
2558
2559 This showed up in #15696 we had something like
2560 case eq_sel d of co -> ...(typeError @(...co...) "urk")...
2561
2562 Then 'd' was substitued by a dictionary, so the expression
2563 simpified to
2564 case (Coercion <blah>) of co -> ...(typeError @(...co...) "urk")...
2565
2566 But then the "drop the case altogether" equation of rebuildCase
2567 thought that 'co' was dead, and discarded the entire case. Urk!
2568
2569 I have no idea how we managed to avoid this pitfall for so long!
2570 -}
2571
2572 -------------------
2573 -- Auxiliary functions for UsageDetails implementation
2574
2575 combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo)
2576 -> UsageDetails -> UsageDetails -> UsageDetails
2577 combineUsageDetailsWith plus_occ_info ud1 ud2
2578 | isEmptyDetails ud1 = ud2
2579 | isEmptyDetails ud2 = ud1
2580 | otherwise
2581 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2)
2582 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2)
2583 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2)
2584 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) }
2585
2586 doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo
2587 doZapping ud var occ
2588 = doZappingByUnique ud (varUnique var) occ
2589
2590 doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo
2591 doZappingByUnique ud uniq
2592 = (if | in_subset ud_z_many -> markMany
2593 | in_subset ud_z_in_lam -> markInsideLam
2594 | otherwise -> id) .
2595 (if | in_subset ud_z_no_tail -> markNonTailCalled
2596 | otherwise -> id)
2597 where
2598 in_subset field = uniq `elemVarEnvByKey` field ud
2599
2600 alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails
2601 alterZappedSets ud f
2602 = ud { ud_z_many = f (ud_z_many ud)
2603 , ud_z_in_lam = f (ud_z_in_lam ud)
2604 , ud_z_no_tail = f (ud_z_no_tail ud) }
2605
2606 alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails
2607 alterUsageDetails ud f
2608 = ud { ud_env = f (ud_env ud) }
2609 `alterZappedSets` f
2610
2611 flattenUsageDetails :: UsageDetails -> UsageDetails
2612 flattenUsageDetails ud
2613 = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) }
2614 `alterZappedSets` const emptyVarEnv
2615
2616 -------------------
2617 -- See Note [Adjusting right-hand sides]
2618 adjustRhsUsage :: Maybe JoinArity -> RecFlag
2619 -> [CoreBndr] -- Outer lambdas, AFTER occ anal
2620 -> UsageDetails -> UsageDetails
2621 adjustRhsUsage mb_join_arity rec_flag bndrs usage
2622 = maybe_mark_lam (maybe_drop_tails usage)
2623 where
2624 maybe_mark_lam ud | one_shot = ud
2625 | otherwise = markAllInsideLam ud
2626 maybe_drop_tails ud | exact_join = ud
2627 | otherwise = markAllNonTailCalled ud
2628
2629 one_shot = case mb_join_arity of
2630 Just join_arity
2631 | isRec rec_flag -> False
2632 | otherwise -> all isOneShotBndr (drop join_arity bndrs)
2633 Nothing -> all isOneShotBndr bndrs
2634
2635 exact_join = case mb_join_arity of
2636 Just join_arity -> bndrs `lengthIs` join_arity
2637 _ -> False
2638
2639 type IdWithOccInfo = Id
2640
2641 tagLamBinders :: UsageDetails -- Of scope
2642 -> [Id] -- Binders
2643 -> (UsageDetails, -- Details with binders removed
2644 [IdWithOccInfo]) -- Tagged binders
2645 tagLamBinders usage binders
2646 = usage' `seq` (usage', bndrs')
2647 where
2648 (usage', bndrs') = mapAccumR tagLamBinder usage binders
2649
2650 tagLamBinder :: UsageDetails -- Of scope
2651 -> Id -- Binder
2652 -> (UsageDetails, -- Details with binder removed
2653 IdWithOccInfo) -- Tagged binders
2654 -- Used for lambda and case binders
2655 -- It copes with the fact that lambda bindings can have a
2656 -- stable unfolding, used for join points
2657 tagLamBinder usage bndr
2658 = (usage2, bndr')
2659 where
2660 occ = lookupDetails usage bndr
2661 bndr' = setBinderOcc (markNonTailCalled occ) bndr
2662 -- Don't try to make an argument into a join point
2663 usage1 = usage `delDetails` bndr
2664 usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr)
2665 -- This is effectively the RHS of a
2666 -- non-join-point binding, so it's okay to use
2667 -- addManyOccsSet, which assumes no tail calls
2668 | otherwise = usage1
2669
2670 tagNonRecBinder :: TopLevelFlag -- At top level?
2671 -> UsageDetails -- Of scope
2672 -> CoreBndr -- Binder
2673 -> (UsageDetails, -- Details with binder removed
2674 IdWithOccInfo) -- Tagged binder
2675
2676 tagNonRecBinder lvl usage binder
2677 = let
2678 occ = lookupDetails usage binder
2679 will_be_join = decideJoinPointHood lvl usage [binder]
2680 occ' | will_be_join = -- must already be marked AlwaysTailCalled
2681 ASSERT(isAlwaysTailCalled occ) occ
2682 | otherwise = markNonTailCalled occ
2683 binder' = setBinderOcc occ' binder
2684 usage' = usage `delDetails` binder
2685 in
2686 usage' `seq` (usage', binder')
2687
2688 tagRecBinders :: TopLevelFlag -- At top level?
2689 -> UsageDetails -- Of body of let ONLY
2690 -> [(CoreBndr, -- Binder
2691 UsageDetails, -- RHS usage details
2692 [CoreBndr])] -- Lambdas in new RHS
2693 -> (UsageDetails, -- Adjusted details for whole scope,
2694 -- with binders removed
2695 [IdWithOccInfo]) -- Tagged binders
2696 -- Substantially more complicated than non-recursive case. Need to adjust RHS
2697 -- details *before* tagging binders (because the tags depend on the RHSes).
2698 tagRecBinders lvl body_uds triples
2699 = let
2700 (bndrs, rhs_udss, _) = unzip3 triples
2701
2702 -- 1. Determine join-point-hood of whole group, as determined by
2703 -- the *unadjusted* usage details
2704 unadj_uds = foldr andUDs body_uds rhs_udss
2705 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs
2706
2707 -- 2. Adjust usage details of each RHS, taking into account the
2708 -- join-point-hood decision
2709 rhs_udss' = map adjust triples
2710 adjust (bndr, rhs_uds, rhs_bndrs)
2711 = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds
2712 where
2713 -- Can't use willBeJoinId_maybe here because we haven't tagged the
2714 -- binder yet (the tag depends on these adjustments!)
2715 mb_join_arity
2716 | will_be_joins
2717 , let occ = lookupDetails unadj_uds bndr
2718 , AlwaysTailCalled arity <- tailCallInfo occ
2719 = Just arity
2720 | otherwise
2721 = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if
2722 Nothing -- we are making join points!
2723
2724 -- 3. Compute final usage details from adjusted RHS details
2725 adj_uds = foldr andUDs body_uds rhs_udss'
2726
2727 -- 4. Tag each binder with its adjusted details
2728 bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr
2729 | bndr <- bndrs ]
2730
2731 -- 5. Drop the binders from the adjusted details and return
2732 usage' = adj_uds `delDetailsList` bndrs
2733 in
2734 (usage', bndrs')
2735
2736 setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr
2737 setBinderOcc occ_info bndr
2738 | isTyVar bndr = bndr
2739 | isExportedId bndr = if isManyOccs (idOccInfo bndr)
2740 then bndr
2741 else setIdOccInfo bndr noOccInfo
2742 -- Don't use local usage info for visible-elsewhere things
2743 -- BUT *do* erase any IAmALoopBreaker annotation, because we're
2744 -- about to re-generate it and it shouldn't be "sticky"
2745
2746 | otherwise = setIdOccInfo bndr occ_info
2747
2748 -- | Decide whether some bindings should be made into join points or not.
2749 -- Returns `False` if they can't be join points. Note that it's an
2750 -- all-or-nothing decision, as if multiple binders are given, they're
2751 -- assumed to be mutually recursive.
2752 --
2753 -- It must, however, be a final decision. If we say "True" for 'f',
2754 -- and then subsequently decide /not/ make 'f' into a join point, then
2755 -- the decision about another binding 'g' might be invalidated if (say)
2756 -- 'f' tail-calls 'g'.
2757 --
2758 -- See Note [Invariants on join points] in CoreSyn.
2759 decideJoinPointHood :: TopLevelFlag -> UsageDetails
2760 -> [CoreBndr]
2761 -> Bool
2762 decideJoinPointHood TopLevel _ _
2763 = False
2764 decideJoinPointHood NotTopLevel usage bndrs
2765 | isJoinId (head bndrs)
2766 = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+>
2767 ppr bndrs)
2768 all_ok
2769 | otherwise
2770 = all_ok
2771 where
2772 -- See Note [Invariants on join points]; invariants cited by number below.
2773 -- Invariant 2 is always satisfiable by the simplifier by eta expansion.
2774 all_ok = -- Invariant 3: Either all are join points or none are
2775 all ok bndrs
2776
2777 ok bndr
2778 | -- Invariant 1: Only tail calls, all same join arity
2779 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr)
2780
2781 , -- Invariant 1 as applied to LHSes of rules
2782 all (ok_rule arity) (idCoreRules bndr)
2783
2784 -- Invariant 2a: stable unfoldings
2785 -- See Note [Join points and INLINE pragmas]
2786 , ok_unfolding arity (realIdUnfolding bndr)
2787
2788 -- Invariant 4: Satisfies polymorphism rule
2789 , isValidJoinPointType arity (idType bndr)
2790 = True
2791
2792 | otherwise
2793 = False
2794
2795 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
2796 ok_rule join_arity (Rule { ru_args = args })
2797 = args `lengthIs` join_arity
2798 -- Invariant 1 as applied to LHSes of rules
2799
2800 -- ok_unfolding returns False if we should /not/ convert a non-join-id
2801 -- into a join-id, even though it is AlwaysTailCalled
2802 ok_unfolding join_arity (CoreUnfolding { uf_src = src, uf_tmpl = rhs })
2803 = not (isStableSource src && join_arity > joinRhsArity rhs)
2804 ok_unfolding _ (DFunUnfolding {})
2805 = False
2806 ok_unfolding _ _
2807 = True
2808
2809 willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity
2810 willBeJoinId_maybe bndr
2811 = case tailCallInfo (idOccInfo bndr) of
2812 AlwaysTailCalled arity -> Just arity
2813 _ -> isJoinId_maybe bndr
2814
2815
2816 {- Note [Join points and INLINE pragmas]
2817 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2818 Consider
2819 f x = let g = \x. not -- Arity 1
2820 {-# INLINE g #-}
2821 in case x of
2822 A -> g True True
2823 B -> g True False
2824 C -> blah2
2825
2826 Here 'g' is always tail-called applied to 2 args, but the stable
2827 unfolding captured by the INLINE pragma has arity 1. If we try to
2828 convert g to be a join point, its unfolding will still have arity 1
2829 (since it is stable, and we don't meddle with stable unfoldings), and
2830 Lint will complain (see Note [Invariants on join points], (2a), in
2831 CoreSyn. #13413.
2832
2833 Moreover, since g is going to be inlined anyway, there is no benefit
2834 from making it a join point.
2835
2836 If it is recursive, and uselessly marked INLINE, this will stop us
2837 making it a join point, which is annoying. But occasionally
2838 (notably in class methods; see Note [Instances and loop breakers] in
2839 TcInstDcls) we mark recursive things as INLINE but the recursion
2840 unravels; so ignoring INLINE pragmas on recursive things isn't good
2841 either.
2842
2843 See Invariant 2a of Note [Invariants on join points] in CoreSyn
2844
2845
2846 ************************************************************************
2847 * *
2848 \subsection{Operations over OccInfo}
2849 * *
2850 ************************************************************************
2851 -}
2852
2853 markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo
2854
2855 markMany IAmDead = IAmDead
2856 markMany occ = ManyOccs { occ_tail = occ_tail occ }
2857
2858 markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = True }
2859 markInsideLam occ = occ
2860
2861 markNonTailCalled IAmDead = IAmDead
2862 markNonTailCalled occ = occ { occ_tail = NoTailCallInfo }
2863
2864 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
2865
2866 addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2867 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2868 tailCallInfo a2 }
2869 -- Both branches are at least One
2870 -- (Argument is never IAmDead)
2871
2872 -- (orOccInfo orig new) is used
2873 -- when combining occurrence info from branches of a case
2874
2875 orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1
2876 , occ_tail = tail1 })
2877 (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2
2878 , occ_tail = tail2 })
2879 = OneOcc { occ_one_br = False -- False, because it occurs in both branches
2880 , occ_in_lam = in_lam1 || in_lam2
2881 , occ_int_cxt = int_cxt1 && int_cxt2
2882 , occ_tail = tail1 `andTailCallInfo` tail2 }
2883
2884 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2885 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2886 tailCallInfo a2 }
2887
2888 andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
2889 andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
2890 | arity1 == arity2 = info
2891 andTailCallInfo _ _ = NoTailCallInfo