Add a final demand analyzer run right before TidyCore
[ghc.git] / compiler / stranal / WorkWrap.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
3
4 \section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser}
5 -}
6
7 {-# LANGUAGE CPP #-}
8 module WorkWrap ( wwTopBinds ) where
9
10 import CoreSyn
11 import CoreUnfold ( certainlyWillInline, mkWwInlineRule, mkWorkerUnfolding )
12 import CoreUtils ( exprType, exprIsHNF )
13 import Var
14 import Id
15 import IdInfo
16 import UniqSupply
17 import BasicTypes
18 import DynFlags
19 import Demand
20 import WwLib
21 import Util
22 import Outputable
23 import FamInstEnv
24 import MonadUtils
25
26 #include "HsVersions.h"
27
28 {-
29 We take Core bindings whose binders have:
30
31 \begin{enumerate}
32
33 \item Strictness attached (by the front-end of the strictness
34 analyser), and / or
35
36 \item Constructed Product Result information attached by the CPR
37 analysis pass.
38
39 \end{enumerate}
40
41 and we return some ``plain'' bindings which have been
42 worker/wrapper-ified, meaning:
43
44 \begin{enumerate}
45
46 \item Functions have been split into workers and wrappers where
47 appropriate. If a function has both strictness and CPR properties
48 then only one worker/wrapper doing both transformations is produced;
49
50 \item Binders' @IdInfos@ have been updated to reflect the existence of
51 these workers/wrappers (this is where we get STRICTNESS and CPR pragma
52 info for exported values).
53 \end{enumerate}
54 -}
55
56 wwTopBinds :: DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram
57
58 wwTopBinds dflags fam_envs us top_binds
59 = initUs_ us $ do
60 top_binds' <- mapM (wwBind dflags fam_envs) top_binds
61 return (concat top_binds')
62
63 {-
64 ************************************************************************
65 * *
66 \subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@}
67 * *
68 ************************************************************************
69
70 @wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in
71 turn. Non-recursive case first, then recursive...
72 -}
73
74 wwBind :: DynFlags
75 -> FamInstEnvs
76 -> CoreBind
77 -> UniqSM [CoreBind] -- returns a WwBinding intermediate form;
78 -- the caller will convert to Expr/Binding,
79 -- as appropriate.
80
81 wwBind dflags fam_envs (NonRec binder rhs) = do
82 new_rhs <- wwExpr dflags fam_envs rhs
83 new_pairs <- tryWW dflags fam_envs NonRecursive binder new_rhs
84 return [NonRec b e | (b,e) <- new_pairs]
85 -- Generated bindings must be non-recursive
86 -- because the original binding was.
87
88 wwBind dflags fam_envs (Rec pairs)
89 = return . Rec <$> concatMapM do_one pairs
90 where
91 do_one (binder, rhs) = do new_rhs <- wwExpr dflags fam_envs rhs
92 tryWW dflags fam_envs Recursive binder new_rhs
93
94 {-
95 @wwExpr@ basically just walks the tree, looking for appropriate
96 annotations that can be used. Remember it is @wwBind@ that does the
97 matching by looking for strict arguments of the correct type.
98 @wwExpr@ is a version that just returns the ``Plain'' Tree.
99 -}
100
101 wwExpr :: DynFlags -> FamInstEnvs -> CoreExpr -> UniqSM CoreExpr
102
103 wwExpr _ _ e@(Type {}) = return e
104 wwExpr _ _ e@(Coercion {}) = return e
105 wwExpr _ _ e@(Lit {}) = return e
106 wwExpr _ _ e@(Var {}) = return e
107
108 wwExpr dflags fam_envs (Lam binder expr)
109 = Lam new_binder <$> wwExpr dflags fam_envs expr
110 where new_binder | isId binder = zapIdUsedOnceInfo binder
111 | otherwise = binder
112 -- See Note [Zapping Used Once info in WorkWrap]
113
114 wwExpr dflags fam_envs (App f a)
115 = App <$> wwExpr dflags fam_envs f <*> wwExpr dflags fam_envs a
116
117 wwExpr dflags fam_envs (Tick note expr)
118 = Tick note <$> wwExpr dflags fam_envs expr
119
120 wwExpr dflags fam_envs (Cast expr co) = do
121 new_expr <- wwExpr dflags fam_envs expr
122 return (Cast new_expr co)
123
124 wwExpr dflags fam_envs (Let bind expr)
125 = mkLets <$> wwBind dflags fam_envs bind <*> wwExpr dflags fam_envs expr
126
127 wwExpr dflags fam_envs (Case expr binder ty alts) = do
128 new_expr <- wwExpr dflags fam_envs expr
129 new_alts <- mapM ww_alt alts
130 let new_binder = zapIdUsedOnceInfo binder
131 -- See Note [Zapping Used Once info in WorkWrap]
132 return (Case new_expr new_binder ty new_alts)
133 where
134 ww_alt (con, binders, rhs) = do
135 new_rhs <- wwExpr dflags fam_envs rhs
136 let new_binders = [ if isId b then zapIdUsedOnceInfo b else b
137 | b <- binders ]
138 -- See Note [Zapping Used Once info in WorkWrap]
139 return (con, new_binders, new_rhs)
140
141 {-
142 ************************************************************************
143 * *
144 \subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair}
145 * *
146 ************************************************************************
147
148 @tryWW@ just accumulates arguments, converts strictness info from the
149 front-end into the proper form, then calls @mkWwBodies@ to do
150 the business.
151
152 The only reason this is monadised is for the unique supply.
153
154 Note [Don't w/w INLINE things]
155 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
156 It's very important to refrain from w/w-ing an INLINE function (ie one
157 with a stable unfolding) because the wrapper will then overwrite the
158 old stable unfolding with the wrapper code.
159
160 Furthermore, if the programmer has marked something as INLINE,
161 we may lose by w/w'ing it.
162
163 If the strictness analyser is run twice, this test also prevents
164 wrappers (which are INLINEd) from being re-done. (You can end up with
165 several liked-named Ids bouncing around at the same time---absolute
166 mischief.)
167
168 Notice that we refrain from w/w'ing an INLINE function even if it is
169 in a recursive group. It might not be the loop breaker. (We could
170 test for loop-breaker-hood, but I'm not sure that ever matters.)
171
172 Note [Worker-wrapper for INLINABLE functions]
173 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174 If we have
175 {-# INLINABLE f #-}
176 f :: Ord a => [a] -> Int -> a
177 f x y = ....f....
178
179 where f is strict in y, we might get a more efficient loop by w/w'ing
180 f. But that would make a new unfolding which would overwrite the old
181 one! So the function would no longer be ININABLE, and in particular
182 will not be specialised at call sites in other modules.
183
184 This comes in practice (Trac #6056).
185
186 Solution: do the w/w for strictness analysis, but transfer the Stable
187 unfolding to the *worker*. So we will get something like this:
188
189 {-# INLINE[0] f #-}
190 f :: Ord a => [a] -> Int -> a
191 f d x y = case y of I# y' -> fw d x y'
192
193 {-# INLINABLE[0] fw #-}
194 fw :: Ord a => [a] -> Int# -> a
195 fw d x y' = let y = I# y' in ...f...
196
197 How do we "transfer the unfolding"? Easy: by using the old one, wrapped
198 in work_fn! See CoreUnfold.mkWorkerUnfolding.
199
200 Note [Activation for INLINABLE worker]
201 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
202 Follows on from Note [Worker-wrapper for INLINABLE functions]
203 It is *vital* that if the worker gets an INLINABLE pragma (from the
204 original function), then the worker has the same phase activation as
205 the wrapper (or later). That is necessary to allow the wrapper to
206 inline into the worker's unfolding: see SimplUtils
207 Note [Simplifying inside stable unfoldings].
208
209 Notihng is lost by giving the worker the same activation as the
210 worker, because the worker won't have any chance of inlining until the
211 wrapper does; there's no point in giving it an earlier activation.
212
213 Note [Don't w/w inline small non-loop-breaker things]
214 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
215 In general, we refrain from w/w-ing *small* functions, which are not
216 loop breakers, because they'll inline anyway. But we must take care:
217 it may look small now, but get to be big later after other inlining
218 has happened. So we take the precaution of adding an INLINE pragma to
219 any such functions.
220
221 I made this change when I observed a big function at the end of
222 compilation with a useful strictness signature but no w-w. (It was
223 small during demand analysis, we refrained from w/w, and then got big
224 when something was inlined in its rhs.) When I measured it on nofib,
225 it didn't make much difference; just a few percent improved allocation
226 on one benchmark (bspt/Euclid.space). But nothing got worse.
227
228 There is an infelicity though. We may get something like
229 f = g val
230 ==>
231 g x = case gw x of r -> I# r
232
233 f {- InlineStable, Template = g val -}
234 f = case gw x of r -> I# r
235
236 The code for f duplicates that for g, without any real benefit. It
237 won't really be executed, because calls to f will go via the inlining.
238
239 Note [Wrapper activation]
240 ~~~~~~~~~~~~~~~~~~~~~~~~~
241 When should the wrapper inlining be active? It must not be active
242 earlier than the current Activation of the Id (eg it might have a
243 NOINLINE pragma). But in fact strictness analysis happens fairly
244 late in the pipeline, and we want to prioritise specialisations over
245 strictness. Eg if we have
246 module Foo where
247 f :: Num a => a -> Int -> a
248 f n 0 = n -- Strict in the Int, hence wrapper
249 f n x = f (n+n) (x-1)
250
251 g :: Int -> Int
252 g x = f x x -- Provokes a specialisation for f
253
254 module Bar where
255 import Foo
256
257 h :: Int -> Int
258 h x = f 3 x
259
260 Then we want the specialisation for 'f' to kick in before the wrapper does.
261
262 Now in fact the 'gentle' simplification pass encourages this, by
263 having rules on, but inlinings off. But that's kind of lucky. It seems
264 more robust to give the wrapper an Activation of (ActiveAfter 0),
265 so that it becomes active in an importing module at the same time that
266 it appears in the first place in the defining module.
267
268 At one stage I tried making the wrapper inlining always-active, and
269 that had a very bad effect on nofib/imaginary/x2n1; a wrapper was
270 inlined before the specialisation fired.
271 -}
272
273 tryWW :: DynFlags
274 -> FamInstEnvs
275 -> RecFlag
276 -> Id -- The fn binder
277 -> CoreExpr -- The bound rhs; its innards
278 -- are already ww'd
279 -> UniqSM [(Id, CoreExpr)] -- either *one* or *two* pairs;
280 -- if one, then no worker (only
281 -- the orig "wrapper" lives on);
282 -- if two, then a worker and a
283 -- wrapper.
284 tryWW dflags fam_envs is_rec fn_id rhs
285 | isNeverActive inline_act
286 -- No point in worker/wrappering if the thing is never inlined!
287 -- Because the no-inline prag will prevent the wrapper ever
288 -- being inlined at a call site.
289 = return [ (new_fn_id, rhs) ]
290
291 | not loop_breaker
292 , Just stable_unf <- certainlyWillInline dflags fn_unf
293 = return [ (fn_id `setIdUnfolding` stable_unf, rhs) ]
294 -- Note [Don't w/w inline small non-loop-breaker, or INLINE, things]
295 -- NB: use idUnfolding because we don't want to apply
296 -- this criterion to a loop breaker!
297
298 | is_fun
299 = splitFun dflags fam_envs new_fn_id fn_info wrap_dmds res_info rhs
300
301 | is_thunk -- See Note [Thunk splitting]
302 = splitThunk dflags fam_envs is_rec new_fn_id rhs
303
304 | otherwise
305 = return [ (new_fn_id, rhs) ]
306
307 where
308 loop_breaker = isStrongLoopBreaker (occInfo fn_info)
309 fn_info = idInfo fn_id
310 inline_act = inlinePragmaActivation (inlinePragInfo fn_info)
311 fn_unf = unfoldingInfo fn_info
312 (wrap_dmds, res_info) = splitStrictSig (strictnessInfo fn_info)
313
314 new_fn_id = zapIdUsedOnceInfo (zapIdUsageEnvInfo fn_id)
315 -- See Note [Zapping DmdEnv after Demand Analyzer] and
316 -- See Note [Zapping Used Once info in WorkWrap]
317
318 is_fun = notNull wrap_dmds
319 is_thunk = not is_fun && not (exprIsHNF rhs)
320
321 {-
322 Note [Zapping DmdEnv after Demand Analyzer]
323 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
324
325 In the worker-wrapper pass we zap the DmdEnv.
326
327 Why?
328 (a) it is never used again
329 (b) it wastes space
330 (c) it becomes incorrect as things are cloned, because
331 we don't push the substitution into it
332
333 Why here?
334 * Because we don’t want to do it in the Demand Analyzer, as we never know
335 there when we are doing the last pass.
336 * We want them to be still there at the end of DmdAnal, so that
337 -ddump-str-anal contains them.
338 * We don’t want a second pass just for that.
339 * WorkWrap looks at all bindings anyways.
340
341 We also need to do it in TidyCore to clean up after the final,
342 worker/wrapper-less run of the demand analyser.
343
344 Note [Zapping Used Once info in WorkWrap]
345 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
346
347 In the worker-wrapper pass we zap the used once info in demands and in
348 strictness signatures.
349
350 Why?
351 * The simplifier may happen to transform code in a way that invalidates the
352 data (see #11731 for an example).
353 * It is not used in later passes, up to code generation.
354
355 So as the data is useless and possibly wrong, we want to remove it. The most
356 convenient place to do that is the worker wrapper phase, as it runs after every
357 run of the demand analyser besides the very last one (which is the one where we
358 want to _keep_ the info for the code generator).
359
360 We do not do it in the demand analyser for the same reasons outlined in
361 Note [Zapping DmdEnv after Demand Analyzer] above.
362 -}
363
364
365 ---------------------
366 splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult -> CoreExpr
367 -> UniqSM [(Id, CoreExpr)]
368 splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
369 = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do
370 -- The arity should match the signature
371 stuff <- mkWwBodies dflags fam_envs fun_ty wrap_dmds res_info
372 case stuff of
373 Just (work_demands, wrap_fn, work_fn) -> do
374 work_uniq <- getUniqueM
375 let work_rhs = work_fn rhs
376 work_prag = InlinePragma { inl_src = "{-# INLINE"
377 , inl_inline = inl_inline inl_prag
378 , inl_sat = Nothing
379 , inl_act = wrap_act
380 , inl_rule = FunLike }
381 -- idl_inline: copy from fn_id; see Note [Worker-wrapper for INLINABLE functions]
382 -- idl_act: see Note [Activation for INLINABLE workers]
383 -- inl_rule: it does not make sense for workers to be constructorlike.
384
385 work_id = mkWorkerId work_uniq fn_id (exprType work_rhs)
386 `setIdOccInfo` occInfo fn_info
387 -- Copy over occurrence info from parent
388 -- Notably whether it's a loop breaker
389 -- Doesn't matter much, since we will simplify next, but
390 -- seems right-er to do so
391
392 `setInlinePragma` work_prag
393
394 `setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info)
395 -- See Note [Worker-wrapper for INLINABLE functions]
396
397 `setIdStrictness` mkClosedStrictSig work_demands work_res_info
398 -- Even though we may not be at top level,
399 -- it's ok to give it an empty DmdEnv
400
401 `setIdDemandInfo` worker_demand
402
403 `setIdArity` work_arity
404 -- Set the arity so that the Core Lint check that the
405
406 work_arity = length work_demands
407
408 -- See Note [Demand on the Worker]
409 single_call = saturatedByOneShots arity (demandInfo fn_info)
410 worker_demand | single_call = mkWorkerDemand work_arity
411 | otherwise = topDmd
412
413 -- arity is consistent with the demand type goes through
414
415 wrap_act = ActiveAfter "0" 0
416 wrap_rhs = wrap_fn work_id
417 wrap_prag = InlinePragma { inl_src = "{-# INLINE"
418 , inl_inline = Inline
419 , inl_sat = Nothing
420 , inl_act = wrap_act
421 , inl_rule = rule_match_info }
422 -- See Note [Wrapper activation]
423 -- The RuleMatchInfo is (and must be) unaffected
424
425 wrap_id = fn_id `setIdUnfolding` mkWwInlineRule wrap_rhs arity
426 `setInlinePragma` wrap_prag
427 `setIdOccInfo` NoOccInfo
428 -- Zap any loop-breaker-ness, to avoid bleating from Lint
429 -- about a loop breaker with an INLINE rule
430
431
432
433 return $ [(work_id, work_rhs), (wrap_id, wrap_rhs)]
434 -- Worker first, because wrapper mentions it
435
436 Nothing -> return [(fn_id, rhs)]
437 where
438 fun_ty = idType fn_id
439 inl_prag = inlinePragInfo fn_info
440 rule_match_info = inlinePragmaRuleMatchInfo inl_prag
441 arity = arityInfo fn_info
442 -- The arity is set by the simplifier using exprEtaExpandArity
443 -- So it may be more than the number of top-level-visible lambdas
444
445 work_res_info = case returnsCPR_maybe res_info of
446 Just _ -> topRes -- Cpr stuff done by wrapper; kill it here
447 Nothing -> res_info -- Preserve exception/divergence
448
449
450 {-
451 Note [Demand on the worker]
452 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
453
454 If the original function is called once, according to its demand info, then
455 so is the worker. This is important so that the occurrence analyser can
456 attach OneShot annotations to the worker’s lambda binders.
457
458
459 Example:
460
461 -- Original function
462 f [Demand=<L,1*C1(U)>] :: (a,a) -> a
463 f = \p -> ...
464
465 -- Wrapper
466 f [Demand=<L,1*C1(U)>] :: a -> a -> a
467 f = \p -> case p of (a,b) -> $wf a b
468
469 -- Worker
470 $wf [Demand=<L,1*C1(C1(U))>] :: Int -> Int
471 $wf = \a b -> ...
472
473 We need to check whether the original function is called once, with
474 sufficiently many arguments. This is done using saturatedByOneShots, which
475 takes the arity of the original function (resp. the wrapper) and the demand on
476 the original function.
477
478 The demand on the worker is then calculated using mkWorkerDemand, and always of
479 the form [Demand=<L,1*(C1(...(C1(U))))>]
480
481
482 Note [Do not split void functions]
483 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
484 Consider this rather common form of binding:
485 $j = \x:Void# -> ...no use of x...
486
487 Since x is not used it'll be marked as absent. But there is no point
488 in w/w-ing because we'll simply add (\y:Void#), see WwLib.mkWorerArgs.
489
490 If x has a more interesting type (eg Int, or Int#), there *is* a point
491 in w/w so that we don't pass the argument at all.
492
493 Note [Thunk splitting]
494 ~~~~~~~~~~~~~~~~~~~~~~
495 Suppose x is used strictly (never mind whether it has the CPR
496 property).
497
498 let
499 x* = x-rhs
500 in body
501
502 splitThunk transforms like this:
503
504 let
505 x* = case x-rhs of { I# a -> I# a }
506 in body
507
508 Now simplifier will transform to
509
510 case x-rhs of
511 I# a -> let x* = I# a
512 in body
513
514 which is what we want. Now suppose x-rhs is itself a case:
515
516 x-rhs = case e of { T -> I# a; F -> I# b }
517
518 The join point will abstract over a, rather than over (which is
519 what would have happened before) which is fine.
520
521 Notice that x certainly has the CPR property now!
522
523 In fact, splitThunk uses the function argument w/w splitting
524 function, so that if x's demand is deeper (say U(U(L,L),L))
525 then the splitting will go deeper too.
526 -}
527
528 -- See Note [Thunk splitting]
529 -- splitThunk converts the *non-recursive* binding
530 -- x = e
531 -- into
532 -- x = let x = e
533 -- in case x of
534 -- I# y -> let x = I# y in x }
535 -- See comments above. Is it not beautifully short?
536 -- Moreover, it works just as well when there are
537 -- several binders, and if the binders are lifted
538 -- E.g. x = e
539 -- --> x = let x = e in
540 -- case x of (a,b) -> let x = (a,b) in x
541
542 splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
543 splitThunk dflags fam_envs is_rec fn_id rhs
544 = do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id]
545 ; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
546 ; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive
547 return res
548 else return [(fn_id, rhs)] }