41a6f7fa71a687573770403695a3261ea5dbd77a

1 {-

2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998

4 ************************************************************************

5 * *

6 \section[OccurAnal]{Occurrence analysis pass}

7 * *

8 ************************************************************************

10 The occurrence analyser re-typechecks a core expression, returning a new

11 core expression with (hopefully) improved usage information.

12 -}

14 {-# LANGUAGE CPP, BangPatterns #-}

22 import CoreSyn

23 import CoreFVs

26 import Id

28 import BasicTypes

30 import Coercion

32 import VarSet

33 import VarEnv

34 import Var

38 import Unique

39 import UniqFM

40 import Util

41 import Outputable

45 {-

46 ************************************************************************

47 * *

48 \subsection[OccurAnal-main]{Counting occurrences: main function}

49 * *

50 ************************************************************************

52 Here's the externally-callable interface:

53 -}

59 occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds

60 | isEmptyVarEnv final_usage

61 = occ_anald_binds

66 occ_anald_glommed_binds

67 where

68 init_env = initOccEnv active_rule

72 initial_uds

73 -- It's crucial to re-analyse the glommed-together bindings

74 -- so that we establish the right loop breakers. Otherwise

75 -- we can easily create an infinite loop (Trac #9583 is an example)

77 initial_uds = addIdOccs emptyDetails

78 (rulesFreeVars imp_rules `unionVarSet`

79 vectsFreeVars vects `unionVarSet`

80 vectVars)

81 -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,

82 -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are

83 -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)

85 -- Note [Preventing loops due to imported functions rules]

88 | imp_rule <- imp_rules

91 `delVarSetList` ru_bndrs imp_rule

95 go _ []

99 where

104 -- Do occurrence analysis, and discard occurrence info returned

111 occurAnalyseExpr' enable_binder_swap expr

113 where

115 -- To be conservative, we say that all inlines and rules are active

118 {- Note [Plugin rules]

119 ~~~~~~~~~~~~~~~~~~~~~~

120 Conal Elliott (Trac #11651) built a GHC plugin that added some

121 BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to

122 do some domain-specific transformations that could not be expressed

123 with an ordinary pattern-matching CoreRule. But then we can't extract

124 the dependencies (in imp_rule_edges) from ru_rhs etc, because a

125 BuiltinRule doesn't have any of that stuff.

127 So we simply assume that BuiltinRules have no dependencies, and filter

128 them out from the imp_rule_edges comprehension.

129 -}

131 {-

132 ************************************************************************

133 * *

134 \subsection[OccurAnal-main]{Counting occurrences: main function}

135 * *

136 ************************************************************************

138 Bindings

139 ~~~~~~~~

140 -}

144 noImpRuleEdges :: ImpRuleEdges

145 noImpRuleEdges = emptyVarEnv

148 -> ImpRuleEdges

149 -> CoreBind

155 = occAnalNonRecBind env top_env binder rhs body_usage

157 = occAnalRecBind env top_env pairs body_usage

159 -----------------

162 occAnalNonRecBind env imp_rule_edges binder rhs body_usage

163 | isTyVar binder -- A type let; we don't gather usage info

171 where

177 -- See Note [Rules are extra RHSs] and Note [Rule dependency info]

180 lookupVarEnv imp_rule_edges binder

181 -- See Note [Preventing loops due to imported functions rules]

183 -----------------

186 occAnalRecBind env imp_rule_edges pairs body_usage

188 -- For a recursive group, we

189 -- * occ-analyse all the RHSs

190 -- * compute strongly-connected components

191 -- * feed those components to occAnalRec

192 where

201 {-

202 Note [Dead code]

203 ~~~~~~~~~~~~~~~~

204 Dropping dead code for a cyclic Strongly Connected Component is done

205 in a very simple way:

207 the entire SCC is dropped if none of its binders are mentioned

208 in the body; otherwise the whole thing is kept.

210 The key observation is that dead code elimination happens after

211 dependency analysis: so 'occAnalBind' processes SCCs instead of the

212 original term's binding groups.

214 Thus 'occAnalBind' does indeed drop 'f' in an example like

216 letrec f = ...g...

217 g = ...(...g...)...

218 in

219 ...g...

221 when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in

222 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes

223 'AcyclicSCC f', where 'body_usage' won't contain 'f'.

225 ------------------------------------------------------------

226 Note [Forming Rec groups]

227 ~~~~~~~~~~~~~~~~~~~~~~~~~

228 We put bindings {f = ef; g = eg } in a Rec group if "f uses g"

229 and "g uses f", no matter how indirectly. We do a SCC analysis

230 with an edge f -> g if "f uses g".

232 More precisely, "f uses g" iff g should be in scope wherever f is.

233 That is, g is free in:

234 a) the rhs 'ef'

235 b) or the RHS of a rule for f (Note [Rules are extra RHSs])

236 c) or the LHS or a rule for f (Note [Rule dependency info])

238 These conditions apply regardless of the activation of the RULE (eg it might be

239 inactive in this phase but become active later). Once a Rec is broken up

240 it can never be put back together, so we must be conservative.

242 The principle is that, regardless of rule firings, every variable is

243 always in scope.

245 * Note [Rules are extra RHSs]

246 ~~~~~~~~~~~~~~~~~~~~~~~~~~~

247 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"

248 keeps the specialised "children" alive. If the parent dies

249 (because it isn't referenced any more), then the children will die

250 too (unless they are already referenced directly).

252 To that end, we build a Rec group for each cyclic strongly

253 connected component,

254 *treating f's rules as extra RHSs for 'f'*.

255 More concretely, the SCC analysis runs on a graph with an edge

256 from f -> g iff g is mentioned in

257 (a) f's rhs

258 (b) f's RULES

259 These are rec_edges.

261 Under (b) we include variables free in *either* LHS *or* RHS of

262 the rule. The former might seems silly, but see Note [Rule

263 dependency info]. So in Example [eftInt], eftInt and eftIntFB

264 will be put in the same Rec, even though their 'main' RHSs are

265 both non-recursive.

267 * Note [Rule dependency info]

268 ~~~~~~~~~~~~~~~~~~~~~~~~~~~

269 The VarSet in a RuleInfo is used for dependency analysis in the

270 occurrence analyser. We must track free vars in *both* lhs and rhs.

271 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.

272 Why both? Consider

273 x = y

274 RULE f x = v+4

275 Then if we substitute y for x, we'd better do so in the

276 rule's LHS too, so we'd better ensure the RULE appears to mention 'x'

277 as well as 'v'

279 * Note [Rules are visible in their own rec group]

280 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

281 We want the rules for 'f' to be visible in f's right-hand side.

282 And we'd like them to be visible in other functions in f's Rec

283 group. E.g. in Note [Specialisation rules] we want f' rule

284 to be visible in both f's RHS, and fs's RHS.

286 This means that we must simplify the RULEs first, before looking

287 at any of the definitions. This is done by Simplify.simplRecBind,

288 when it calls addLetIdInfo.

290 ------------------------------------------------------------

291 Note [Choosing loop breakers]

292 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

293 Loop breaking is surprisingly subtle. First read the section 4 of

294 "Secrets of the GHC inliner". This describes our basic plan.

295 We avoid infinite inlinings by choosing loop breakers, and

296 ensuring that a loop breaker cuts each loop.

298 Fundamentally, we do SCC analysis on a graph. For each recursive

299 group we choose a loop breaker, delete all edges to that node,

300 re-analyse the SCC, and iterate.

302 But what is the graph? NOT the same graph as was used for Note

303 [Forming Rec groups]! In particular, a RULE is like an equation for

304 'f' that is *always* inlined if it is applicable. We do *not* disable

305 rules for loop-breakers. It's up to whoever makes the rules to make

306 sure that the rules themselves always terminate. See Note [Rules for

307 recursive functions] in Simplify.hs

309 Hence, if

310 f's RHS (or its INLINE template if it has one) mentions g, and

311 g has a RULE that mentions h, and

312 h has a RULE that mentions f

314 then we *must* choose f to be a loop breaker. Example: see Note

315 [Specialisation rules].

317 In general, take the free variables of f's RHS, and augment it with

318 all the variables reachable by RULES from those starting points. That

319 is the whole reason for computing rule_fv_env in occAnalBind. (Of

320 course we only consider free vars that are also binders in this Rec

321 group.) See also Note [Finding rule RHS free vars]

323 Note that when we compute this rule_fv_env, we only consider variables

324 free in the *RHS* of the rule, in contrast to the way we build the

325 Rec group in the first place (Note [Rule dependency info])

327 Note that if 'g' has RHS that mentions 'w', we should add w to

328 g's loop-breaker edges. More concretely there is an edge from f -> g

329 iff

330 (a) g is mentioned in f's RHS `xor` f's INLINE rhs

331 (see Note [Inline rules])

332 (b) or h is mentioned in f's RHS, and

333 g appears in the RHS of an active RULE of h

334 or a transitive sequence of active rules starting with h

336 Why "active rules"? See Note [Finding rule RHS free vars]

338 Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is

339 chosen as a loop breaker, because their RHSs don't mention each other.

340 And indeed both can be inlined safely.

342 Note again that the edges of the graph we use for computing loop breakers

343 are not the same as the edges we use for computing the Rec blocks.

344 That's why we compute

346 - rec_edges for the Rec block analysis

347 - loop_breaker_edges for the loop breaker analysis

349 * Note [Finding rule RHS free vars]

350 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

351 Consider this real example from Data Parallel Haskell

352 tagZero :: Array Int -> Array Tag

353 {-# INLINE [1] tagZeroes #-}

356 {-# RULES "tagZero" [~1] forall xs n.

357 pmap fromBool <blah blah> = tagZero xs #-}

362 To make this work, we look for the RHS free vars only for

367 ~~~~~~~~~~~~~~~~~~~~~~~~~

371 RULE f [] = g

373 h = h_rhs

374 g = h

376 }

385 RULE!

409 Inline postInlineUnconditionally

410 strong IAmLoopBreaker False no no

411 weak IAmLoopBreaker True yes no

412 other yes yes

420 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

421 Consider this

424 Note that

427 Now we

428 can get

430 --> 1 + f Int Firing RULE

435 f's definition had been

440 f.

447 RULE f x = f x

451 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

452 Consider:

455 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}

457 filterFB c p = ...

471 {-# LANGUAGE RankNTypes #-}

477 {-# INLINABLE filter #-}

482 {-# NOINLINE [0] filterFB #-}

484 filterFB c p x r | p x = x `c` r

487 {-# RULES

488 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr

489 (filterFB c p) n xs)

490 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p

491 #-}

501 filter p xs

513 dies with "ticks exhausted"

523 local functions because the standard occurrence analysis stuff is

529 Ids.

532 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

537 we get

544 To avoid this,

546 give a NOINLINE pragma to the specialised function

549 ~~~~~~~~~~~~~~~

550 RULES for imported Ids can make something at the top refer to something at the bottom:

561 NOTICE that this cannot happen for rules whose head is a

566 Solution:

580 ------------------------------------------------------------

582 ~~~~~~~~~~~~~~~~~~~

603 {-# INLINE [1] foo #-}

607 {-# INLINE [1] bar #-}

610 {-# RULES "foo" [~1] forall x. foo x = bar x #-}

615 a) We may get

622 b)

626 ~~~~~~~~~~~~~~~

632 {-# INLINE [0] eftIntFB #-}

636 {-# RULES

637 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)

638 "eftIntList" [1] eftIntFB (:) [] = eftInt

639 #-}

642 ~~~~~~~~~~~~~~~~~~~~~~~~~~~

647 {-# RULE f (C a) = fs a #-}

657 -}

660 -- which is gotten from the Id.

661 data Details

666 -- ignoring phase (ie assuming all are active)

667 -- See Note [Forming Rec groups]

670 -- the stable unfolding (if present and active)

671 -- or the RHS (if not)

672 -- but excluding any RULES

673 -- This is the IdSet that may be used if the Id is inlined

676 -- but are *not* in nd_inl. These are the ones whose

677 -- dependencies might not be respected by loop_breaker_edges

678 -- See Note [Weak loop breakers]

681 }

690 ])

695 where

703 -- Constructing the edges for the main Rec computation

704 -- See Note [Forming Rec groups]

707 -- Note [Rule dependency info]

709 Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs

710 Nothing -> rhs_usage2

711 node_fvs = udFreeVars bndr_set rhs_usage3

713 -- Finding the free variables of the rules

718 -- See Note [Preventing loops due to imported functions rules]

720 | rule <- rules

722 `delVarSetList` ru_bndrs rule

724 all_rule_fvs = rule_lhs_fvs `unionVarSet` rule_rhs_fvs

727 `delVarSetList` ru_bndrs ru) rules

730 -- Finding the free variables of the INLINE pragma (if any)

732 mb_unf_fvs = stableUnfoldingVars unf

734 -- Find the "nd_inl" free vars; for the loop-breaker phase

737 Just unf_fvs -> unf_fvs

738 -- We could check for an *active* INLINE (returning

739 -- emptyVarSet for an inactive one), but is_active

740 -- isn't the right thing (it tells about

741 -- RULE activation), so we'd need more plumbing

743 -----------------------------

748 -- The NonRec case is just like a Let (NonRec ...) above

757 where

760 -- The Rec case is the interesting one

761 -- See Note [Loop breaking]

768 -- [ text "tagged nodes" <+> ppr tagged_nodes

769 -- , text "lb edges" <+> ppr loop_breaker_edges])

772 where

774 bndr_set = mkVarSet bndrs

776 ----------------------------

777 -- Tag the binders with their occurrence info

780 final_uds = total_uds `minusVarEnv` bndr_set

788 ---------------------------

789 -- Now reconstruct the cycle

793 -- If weak_fvs is empty, the loop_breaker_edges will include all

794 -- the edges in tagged_nodes, so there isn't any point in doing

795 -- a fresh SCC computation that will yield a single CyclicSCC result.

797 weak_fvs :: VarSet

800 -- See Note [Choosing loop breakers] for loop_breaker_edges

805 ------------------------------------

806 rule_fv_env :: IdEnv IdSet

807 -- Maps a variable f to the variables from this group

808 -- mentioned in RHS of active rules for f

809 -- Domain is *subset* of bound vars (others have no rule fvs)

811 init_rule_fvs -- See Note [Finding rule RHS free vars]

817 {-

818 @loopBreakSCC@ is applied to the list of (binder,rhs) pairs for a cyclic

819 strongly connected component (there's guaranteed to be a cycle). It returns the

820 same pairs, but

821 a) in a better order,

822 b) with some of the Ids having a IAmALoopBreaker pragma

824 The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means

825 that the simplifier can guarantee not to loop provided it never records an inlining

826 for these no-inline guys.

828 Furthermore, the order of the binds is such that if we neglect dependencies

829 on the no-inline Ids then the binds are topologically sorted. This means

830 that the simplifier will generally do a good job if it works from top bottom,

831 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.

832 -}

841 -- See Note [Weak loop breakers]

847 -- Find the subset of bndrs that are mentioned in uds

853 -- See Note [Weak loop breakers]

857 -- Return the bindings sorted into a plausible order, and marked with loop breakers.

858 loopBreakNodes depth bndr_set weak_fvs nodes binds

860 where

864 loop_break_scc scc binds

868 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds

871 -- Choose a loop breaker, mark it no-inline,

872 -- do SCC analysis on the rest, and recursively sort them out

876 -- text "chosen" <+> ppr chosen_nodes) $

877 loopBreakNodes new_depth bndr_set weak_fvs unchosen $

879 where

885 -- After two iterations (d=0, d=1) give up

886 -- and approximate, returning to d=0

893 -- This loop looks for the bind with the lowest score

894 -- to pick as the loop breaker. The rest accumulate in

895 choose_loop_breaker _ loop_nodes acc []

898 -- If approximate_loop_breaker is True, we pick *all*

899 -- nodes with lowest score, else just one

900 -- See Note [Complexity of loop breaking]

910 where

911 sc = score node

918 -- Note [DFuns should not be loop breakers]

923 -- Data structures are more important than INLINE pragmas

924 -- so that dictionary/method recursion unravels

925 -- Note that this case hits all stable unfoldings, so we

926 -- never look at 'rhs' for stable unfoldings. That's right, because

927 -- 'rhs' is irrelevant for inlining things with a stable unfolding

932 -- Used to have also: && not (isExportedId bndr)

933 -- But I found this sometimes cost an extra iteration when we have

934 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }

935 -- where df is the exported dictionary. Then df makes a really

936 -- bad choice for loop breaker

939 -- If an Id is marked "never inline" then it makes a great loop breaker

940 -- The only reason for not checking that here is that it is rare

941 -- and I've never seen a situation where it makes a difference,

942 -- so it probably isn't worth the time to test on every binder

943 -- | isNeverActive (idInlinePragma bndr) = -10

948 -- The Id has some kind of unfolding

949 -- Ignore loop-breaker-ness here because that is what we are setting!

953 -- Checking for a constructor application

954 -- Cheap and cheerful; the simplifer moves casts out of the way

955 -- The lambda case is important to spot x = /\a. C (f a)

956 -- which comes up when C is a dictionary constructor and

957 -- f is a default method.

958 -- Example: the instance for Show (ST s a) in GHC.ST

959 --

960 -- However we *also* treat (\x. C p q) as a con-app-like thing,

961 -- Note [Closure conversion]

968 {-

969 Note [Complexity of loop breaking]

970 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

971 The loop-breaking algorithm knocks out one binder at a time, and

972 performs a new SCC analysis on the remaining binders. That can

973 behave very badly in tightly-coupled groups of bindings; in the

974 worst case it can be (N**2)*log N, because it does a full SCC

975 on N, then N-1, then N-2 and so on.

977 To avoid this, we switch plans after 2 (or whatever) attempts:

978 Plan A: pick one binder with the lowest score, make it

979 a loop breaker, and try again

980 Plan B: pick *all* binders with the lowest score, make them

981 all loop breakers, and try again

982 Since there are only a small finite number of scores, this will

983 terminate in a constant number of iterations, rather than O(N)

984 iterations.

986 You might thing that it's very unlikely, but RULES make it much

987 more likely. Here's a real example from Trac #1969:

988 Rec { $dm = \d.\x. op d

989 {-# RULES forall d. $dm Int d = $s$dm1

990 forall d. $dm Bool d = $s$dm2 #-}

994 opInt = $dm dInt

995 opBool = $dm dBool

1005 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1018 {-# INLINE f #-}

1025 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1029 We give DFuns a higher score than ordinary CONLIKE things because

1035 {-# DFUN #-}

1037 }

1043 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1055 Inlining dictionaries is really essential to unravelling

1059 ~~~~~~~~~~~~~~~~~~~~~~~~~

1062 which generated code like this:

1082 by an INLINE pragma. For these we record that anything which occurs

1090 -}

1094 -- Returned usage details covers only the RHS,

1095 -- and *not* the RULE or INLINE template for the Id

1098 occAnalNonRecRhs :: OccEnv

1100 -- Binder is already tagged with occurrence info

1102 -- Returned usage details covers only the RHS,

1103 -- and *not* the RULE or INLINE template for the Id

1104 occAnalNonRecRhs env bndr rhs

1105 = occAnal rhs_env rhs

1106 where

1107 -- See Note [Cascading inlines]

1108 env1 | certainly_inline = env

1111 -- See Note [Use one-shot info]

1115 certainly_inline -- See Note [Cascading inlines]

1120 dmd = idDemandInfo bndr

1125 addIdOccs usage id_set = foldVarSet addIdOcc usage id_set

1128 addIdOcc v u | isId v = addOneOcc u v NoOccInfo

1130 -- Give a non-committal binder info (i.e NoOccInfo) because

1131 -- a) Many copies of the specialised thing can appear

1132 -- b) We don't want to substitute a BIG expression inside a RULE

1133 -- even if that's the only occurrence of the thing

1134 -- (Same goes for INLINE.)

1136 {-

1137 Note [Cascading inlines]

1138 ~~~~~~~~~~~~~~~~~~~~~~~~

1139 By default we use an rhsCtxt for the RHS of a binding. This tells the

1140 occ anal n that it's looking at an RHS, which has an effect in

1141 occAnalApp. In particular, for constructor applications, it makes

1142 the arguments appear to have NoOccInfo, so that we don't inline into

1143 them. Thus x = f y

1144 k = Just x

1145 we do not want to inline x.

1147 But there's a problem. Consider

1148 x1 = a0 : []

1149 x2 = a1 : x1

1150 x3 = a2 : x2

1151 g = f x3

1152 First time round, it looks as if x1 and x2 occur as an arg of a

1153 let-bound constructor ==> give them a many-occurrence.

1154 But then x3 is inlined (unconditionally as it happens) and

1155 next time round, x2 will be, and the next time round x1 will be

1156 Result: multiple simplifier iterations. Sigh.

1158 So, when analysing the RHS of x3 we notice that x3 will itself

1159 definitely inline the next time round, and so we analyse x3's rhs in

1160 an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.

1162 Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.

1163 If we say "yes" when preInlineUnconditionally says "no" the simplifier iterates

1164 indefinitely:

1165 x = f y

1166 k = Just x

1167 inline ==>

1168 k = Just (f y)

1169 float ==>

1170 x1 = f y

1171 k = Just x1

1173 This is worse than the slow cascade, so we only want to say "certainly_inline"

1174 if it really is certain. Look at the note with preInlineUnconditionally

1175 for the various clauses.

1177 Expressions

1178 ~~~~~~~~~~~

1179 -}

1181 occAnal :: OccEnv

1182 -> CoreExpr

1184 CoreExpr)

1189 -- At one stage, I gathered the idRuleVars for v here too,

1190 -- which in a way is the right thing to do.

1191 -- But that went wrong right after specialisation, when

1192 -- the *occurrences* of the overloaded function didn't have any

1193 -- rules in them, so the *specialised* versions looked as if they

1194 -- weren't used at all.

1198 -- See Note [Gather occurrences of coercion variables]

1200 {-

1201 Note [Gather occurrences of coercion variables]

1202 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1203 We need to gather info about what coercion variables appear, so that

1204 we can sort them into the right place when doing dependency analysis.

1205 -}

1208 | tickish `tickishScopesLike` SoftScope

1211 | Breakpoint _ ids <- tickish

1213 -- never substitute for any of the Ids in a Breakpoint

1215 | otherwise

1217 where

1219 -- for a non-soft tick scope, we can inline lambdas only

1220 usage_lam = mapVarEnv markInsideLam usage

1226 -- See Note [Gather occurrences of coercion variables]

1228 -- If we see let x = y `cast` co

1229 -- then mark y as 'Many' so that we don't

1230 -- immediately inline y again.

1231 }

1236 -- Ignore type variables altogether

1237 -- (a) occurrences inside type lambdas only not marked as InsideLam

1238 -- (b) type variables not in environment

1243 }

1245 -- For value lambdas we do a special hack. Consider

1246 -- (\x. \y. ...x...)

1247 -- If we did nothing, x is used inside the \y, so would be marked

1248 -- as dangerous to dup. But in the common case where the abstraction

1249 -- is applied to two arguments this is over-pessimistic.

1250 -- So instead, we just mark each binder with its occurrence

1251 -- info in the *body* of the multiple lambda.

1252 -- Then, the simplifier is careful when partially applying lambdas.

1256 let

1258 -- Use binders' to put one-shot info on the lambdas

1260 really_final_usage

1263 in

1265 where

1272 let

1276 in

1278 where

1279 -- Note [Case binder usage]

1280 -- ~~~~~~~~~~~~~~~~~~~~~~~~

1281 -- The case binder gets a usage of either "many" or "dead", never "one".

1282 -- Reason: we like to inline single occurrences, to eliminate a binding,

1283 -- but inlining a case binder *doesn't* eliminate a binding.

1284 -- We *don't* want to transform

1285 -- case x of w { (p,q) -> f w }

1286 -- into

1287 -- case x of w { (p,q) -> f (p,q) }

1288 tag_case_bndr usage bndr

1293 alt_env = mkAltEnv env scrut bndr

1294 occ_anal_alt = occAnalAlt alt_env

1299 -- in an interesting context; the case has

1300 -- at least one non-default alternative

1302 | t `tickishScopesLike` SoftScope

1303 -- No reason to not look through all ticks here, but only

1304 -- for soft-scoped ticks we can do so without having to

1305 -- update returned occurance info (see occAnal)

1308 occ_anal_scrut scrut _alts

1317 occAnalArgs _ [] _

1321 | isTypeArg arg

1325 | otherwise

1331 {-

1332 Applications are dealt with specially because we want

1333 the "build hack" to work.

1335 Note [Arguments of let-bound constructors]

1336 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1337 Consider

1338 f x = let y = expensive x in

1339 let z = (True,y) in

1340 (case z of {(p,q)->q}, case z of {(p,q)->q})

1341 We feel free to duplicate the WHNF (True,y), but that means

1342 that y may be duplicated thereby.

1344 If we aren't careful we duplicate the (expensive x) call!

1345 Constructors are rather like lambdas in this way.

1346 -}

1348 occAnalApp :: OccEnv

1354 where

1359 -- We mark the free vars of the argument of a constructor or PAP

1360 -- as "many", if it is the RHS of a let(rec).

1361 -- This means that nothing gets inlined into a constructor argument

1362 -- position, which is what we want. Typically those constructor

1363 -- arguments are just variables, or trivial expressions.

1364 --

1365 -- This is the *whole point* of the isRhsEnv predicate

1366 -- See Note [Arguments of let-bound constructors]

1368 n_val_args = valArgCount args

1370 is_exp = isExpandableApp fun n_val_args

1371 -- See Note [CONLIKE pragma] in BasicTypes

1372 -- The definition of is_exp should match that in

1373 -- Simplify.prepareRhs

1376 -- See Note [Use one-shot info]

1380 where

1382 -- The addAppCtxt is a bit cunning. One iteration of the simplifier

1383 -- often leaves behind beta redexs like

1384 -- (\x y -> e) a1 a2

1385 -- Here we would like to mark x,y as one-shot, and treat the whole

1386 -- thing much like a let. We do this by pushing some True items

1387 -- onto the context stack.

1392 -> UsageDetails

1396 {-

1397 Note [Use one-shot information]

1398 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1399 The occurrrence analyser propagates one-shot-lambda information in two

1400 situations:

1402 * Applications: eg build (\c n -> blah)

1404 Propagate one-shot info from the strictness signature of 'build' to

1405 the \c n.

1407 This strictness signature can come from a module interface, in the case of

1408 an imported function, or from a previous run of the demand analyser.

1410 * Let-bindings: eg let f = \c. let ... in \n -> blah

1411 in (build f, build f)

1413 Propagate one-shot info from the demanand-info on 'f' to the

1414 lambdas in its RHS (which may not be syntactically at the top)

1416 This information must have come from a previous run of the demanand

1417 analyser.

1419 Previously, the demand analyser would *also* set the one-shot information, but

1420 that code was buggy (see #11770), so doing it only in on place, namely here, is

1421 saner.

1423 Note [Binders in case alternatives]

1424 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1425 Consider

1426 case x of y { (a,b) -> f y }

1427 We treat 'a', 'b' as dead, because they don't physically occur in the

1428 case alternative. (Indeed, a variable is dead iff it doesn't occur in

1429 its scope in the output of OccAnal.) It really helps to know when

1430 binders are unused. See esp the call to isDeadBinder in

1431 Simplify.mkDupableAlt

1433 In this example, though, the Simplifier will bring 'a' and 'b' back to

1434 life, beause it binds 'y' to (a,b) (imagine got inlined and

1435 scrutinised y).

1436 -}

1439 -> CoreAlt

1443 let

1445 -- See Note [Binders in case alternatives]

1447 wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1

1448 in

1451 wrapAltRHS :: OccEnv

1458 | occ_binder_swap env

1460 -- handles condition (a) in Note [Binder swap]

1464 where

1466 -- The rhs of the let may include coercion variables

1467 -- if the scrutinee was a cast, so we must gather their

1468 -- usage. See Note [Gather occurrences of coercion variables]

1472 wrapAltRHS _ _ alt_usg _ alt_rhs

1475 {-

1476 ************************************************************************

1477 * *

1478 OccEnv

1479 * *

1480 ************************************************************************

1481 -}

1483 data OccEnv

1488 -- See Note [Finding rule RHS free vars]

1490 -- See CorePrep Note [Dead code in CorePrep]

1491 }

1495 -----------------------------

1496 -- OccEncl is used to control whether to inline into constructor arguments

1497 -- For example:

1498 -- x = (p,q) -- Don't inline p or q

1499 -- y = /\a -> (p a, q a) -- Still don't inline p or q

1500 -- z = f (p,q) -- Do inline p,q; it may make a rule fire

1501 -- So OccEncl tells enought about the context to know what to do when

1502 -- we encounter a contructor application or PAP.

1504 data OccEncl

1506 -- Don't inline into constructor args here

1507 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.

1508 -- Do inline into constructor args here

1515 -- [] No info

1516 --

1517 -- one_shot_info:ctxt Analysing a function-valued expression that

1518 -- will be applied as described by one_shot_info

1521 initOccEnv active_rule

1535 argCtxt env []

1545 -> ( OccEnv

1547 -- The result binders have one-shot-ness set that they might not have had originally.

1548 -- This happens in (build (\c n -> e)). Here the occurrence analyser

1549 -- linearity context knows that c,n are one-shot, and it records that fact in

1550 -- the binder. This is useful to guide subsequent float-in/float-out tranformations

1554 where

1555 go ctxt [] rev_bndrs

1559 go [] bndrs rev_bndrs

1564 | isId bndr

1569 where

1570 bndr' = updOneShotInfo bndr one_shot

1571 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing

1572 -- one-shot info might be better than what we can infer, e.g.

1573 -- due to explicit use of the magic 'oneShot' function.

1574 -- See Note [The oneShot function]

1576 | otherwise

1584 -- If (f,g), (g,h) are in the input, then (f,h) is in the output

1585 -- as well as (f,g), (g,h)

1586 transClosureFV env

1587 | no_change = env

1589 where

1594 where

1597 -------------

1602 -- (extendFVs env s) returns

1603 -- (s `union` env(s), env(s) `subset` s)

1604 extendFvs env s

1605 | isNullUFM env

1607 | otherwise

1609 where

1614 {-

1615 ************************************************************************

1616 * *

1617 Binder swap

1618 * *

1619 ************************************************************************

1621 Note [Binder swap]

1622 ~~~~~~~~~~~~~~~~~~

1623 We do these two transformations right here:

1625 (1) case x of b { pi -> ri }

1626 ==>

1627 case x of b { pi -> let x=b in ri }

1629 (2) case (x |> co) of b { pi -> ri }

1630 ==>

1631 case (x |> co) of b { pi -> let x = b |> sym co in ri }

1633 Why (2)? See Note [Case of cast]

1635 In both cases, in a particular alternative (pi -> ri), we only

1636 add the binding if

1637 (a) x occurs free in (pi -> ri)

1638 (ie it occurs in ri, but is not bound in pi)

1639 (b) the pi does not bind b (or the free vars of co)

1640 We need (a) and (b) for the inserted binding to be correct.

1642 For the alternatives where we inject the binding, we can transfer

1643 all x's OccInfo to b. And that is the point.

1645 Notice that

1646 * The deliberate shadowing of 'x'.

1647 * That (a) rapidly becomes false, so no bindings are injected.

1649 The reason for doing these transformations here is because it allows

1650 us to adjust the OccInfo for 'x' and 'b' as we go.

1652 * Suppose the only occurrences of 'x' are the scrutinee and in the

1653 ri; then this transformation makes it occur just once, and hence

1654 get inlined right away.

1656 * If we do this in the Simplifier, we don't know whether 'x' is used

1657 in ri, so we are forced to pessimistically zap b's OccInfo even

1658 though it is typically dead (ie neither it nor x appear in the

1659 ri). There's nothing actually wrong with zapping it, except that

1660 it's kind of nice to know which variables are dead. My nose

1661 tells me to keep this information as robustly as possible.

1663 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding

1664 {x=b}; it's Nothing if the binder-swap doesn't happen.

1666 There is a danger though. Consider

1667 let v = x +# y

1668 in case (f v) of w -> ...v...v...

1669 And suppose that (f v) expands to just v. Then we'd like to

1670 use 'w' instead of 'v' in the alternative. But it may be too

1671 late; we may have substituted the (cheap) x+#y for v in the

1672 same simplifier pass that reduced (f v) to v.

1674 I think this is just too bad. CSE will recover some of it.

1676 Note [Case of cast]

1677 ~~~~~~~~~~~~~~~~~~~

1678 Consider case (x `cast` co) of b { I# ->

1679 ... (case (x `cast` co) of {...}) ...

1680 We'd like to eliminate the inner case. That is the motivation for

1681 equation (2) in Note [Binder swap]. When we get to the inner case, we

1682 inline x, cancel the casts, and away we go.

1684 Note [Binder swap on GlobalId scrutinees]

1685 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1686 When the scrutinee is a GlobalId we must take care in two ways

1688 i) In order to *know* whether 'x' occurs free in the RHS, we need its

1689 occurrence info. BUT, we don't gather occurrence info for

1690 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in

1691 OccEnv is for: it says "gather occurrence info for these".

1693 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or

1694 has an External Name. See, for example, SimplEnv Note [Global Ids in

1695 the substitution].

1697 Note [Zap case binders in proxy bindings]

1698 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1699 From the original

1700 case x of cb(dead) { p -> ...x... }

1701 we will get

1702 case x of cb(live) { p -> let x = cb in ...x... }

1704 Core Lint never expects to find an *occurrence* of an Id marked

1705 as Dead, so we must zap the OccInfo on cb before making the

1706 binding x = cb. See Trac #5028.

1708 Historical note [no-case-of-case]

1709 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1710 We *used* to suppress the binder-swap in case expressions when

1711 -fno-case-of-case is on. Old remarks:

1712 "This happens in the first simplifier pass,

1713 and enhances full laziness. Here's the bad case:

1714 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )

1715 If we eliminate the inner case, we trap it inside the I# v -> arm,

1716 which might prevent some full laziness happening. I've seen this

1717 in action in spectral/cichelli/Prog.hs:

1718 [(m,n) | m <- [1..max], n <- [1..max]]

1719 Hence the check for NoCaseOfCase."

1720 However, now the full-laziness pass itself reverses the binder-swap, so this

1721 check is no longer necessary.

1723 Historical note [Suppressing the case binder-swap]

1724 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

1725 This old note describes a problem that is also fixed by doing the

1726 binder-swap in OccAnal:

1728 There is another situation when it might make sense to suppress the

1729 case-expression binde-swap. If we have

1731 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }

1732 ...other cases .... }

1734 We'll perform the binder-swap for the outer case, giving

1736 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }

1737 ...other cases .... }

1739 But there is no point in doing it for the inner case, because w1 can't

1740 be inlined anyway. Furthermore, doing the case-swapping involves

1741 zapping w2's occurrence info (see paragraphs that follow), and that

1742 forces us to bind w2 when doing case merging. So we get

1744 case x of w1 { A -> let w2 = w1 in e1

1745 B -> let w2 = w1 in e2

1746 ...other cases .... }

1748 This is plain silly in the common case where w2 is dead.

1750 Even so, I can't see a good way to implement this idea. I tried

1751 not doing the binder-swap if the scrutinee was already evaluated

1752 but that failed big-time:

1754 data T = MkT !Int

1756 case v of w { MkT x ->

1757 case x of x1 { I# y1 ->

1758 case x of x2 { I# y2 -> ...

1760 Notice that because MkT is strict, x is marked "evaluated". But to

1761 eliminate the last case, we must either make sure that x (as well as

1762 x1) has unfolding MkT y1. The straightforward thing to do is to do

1763 the binder-swap. So this whole note is a no-op.

1765 It's fixed by doing the binder-swap in OccAnal because we can do the

1766 binder-swap unconditionally and still get occurrence analysis

1767 information right.

1768 -}

1771 -- Does two things: a) makes the occ_one_shots = OccVanilla

1772 -- b) extends the GlobalScruts if possible

1773 -- c) returns a proxy mapping, binding the scrutinee

1774 -- to the case binder, if possible

1779 -- See Note [Case of cast]

1782 where

1788 -- Localise the scrut_var before shadowing it; we're making a

1789 -- new binding for it, and it might have an External Name, or

1790 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]

1791 -- Also we don't want any INLINE or NOINLINE pragmas!

1793 {-

1794 ************************************************************************

1795 * *

1796 \subsection[OccurAnal-types]{OccEnv}

1797 * *

1798 ************************************************************************

1799 -}

1802 -- INVARIANT: never IAmDead

1803 -- (Deadness is signalled by not being in the map at all)

1805 (+++), combineAltsUsageDetails

1808 (+++) usage1 usage2

1809 = plusVarEnv_C addOccInfo usage1 usage2

1811 combineAltsUsageDetails usage1 usage2

1812 = plusVarEnv_C orOccInfo usage1 usage2

1815 addOneOcc usage id info

1817 -- ToDo: make this more efficient

1819 emptyDetails :: UsageDetails

1823 v `usedIn` details = isExportedId v || v `elemVarEnv` details

1831 -- Used for lambda and case binders

1832 -- It copes with the fact that lambda bindings can have a

1833 -- stable unfolding, used for join points

1835 where

1838 where

1839 usage1 = usage `delVarEnv` bndr

1848 tagBinder usage binder

1850 usage' = usage `delVarEnv` binder

1851 binder' = setBinderOcc usage binder

1852 in

1856 setBinderOcc usage bndr

1857 | isTyVar bndr = bndr

1859 NoOccInfo -> bndr

1860 _ -> setIdOccInfo bndr NoOccInfo

1861 -- Don't use local usage info for visible-elsewhere things

1862 -- BUT *do* erase any IAmALoopBreaker annotation, because we're

1863 -- about to re-generate it and it shouldn't be "sticky"

1866 where

1867 occ_info = lookupVarEnv usage bndr `orElse` IAmDead

1869 {-

1870 ************************************************************************

1871 * *

1872 \subsection{Operations over OccInfo}

1873 * *

1874 ************************************************************************

1875 -}

1878 mkOneOcc env id int_cxt

1879 | isLocalId id

1882 | id `elemVarEnv` occ_gbl_scrut env

1885 | otherwise

1886 = emptyDetails

1890 markMany _ = NoOccInfo

1893 markInsideLam occ = occ

1898 NoOccInfo -- Both branches are at least One

1899 -- (Argument is never IAmDead)

1901 -- (orOccInfo orig new) is used

1902 -- when combining occurrence info from branches of a case

1910 NoOccInfo