FunDep printer: Fix unicode arrow
[ghc.git] / compiler / stranal / WorkWrap.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
3
4 \section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser}
5 -}
6
7 {-# LANGUAGE CPP #-}
8 module WorkWrap ( wwTopBinds ) where
9
10 import CoreSyn
11 import CoreUnfold ( certainlyWillInline, mkWwInlineRule, mkWorkerUnfolding )
12 import CoreUtils ( exprType, exprIsHNF )
13 import Var
14 import Id
15 import IdInfo
16 import UniqSupply
17 import BasicTypes
18 import DynFlags
19 import Demand
20 import WwLib
21 import Util
22 import Outputable
23 import FamInstEnv
24 import MonadUtils
25
26 #include "HsVersions.h"
27
28 {-
29 We take Core bindings whose binders have:
30
31 \begin{enumerate}
32
33 \item Strictness attached (by the front-end of the strictness
34 analyser), and / or
35
36 \item Constructed Product Result information attached by the CPR
37 analysis pass.
38
39 \end{enumerate}
40
41 and we return some ``plain'' bindings which have been
42 worker/wrapper-ified, meaning:
43
44 \begin{enumerate}
45
46 \item Functions have been split into workers and wrappers where
47 appropriate. If a function has both strictness and CPR properties
48 then only one worker/wrapper doing both transformations is produced;
49
50 \item Binders' @IdInfos@ have been updated to reflect the existence of
51 these workers/wrappers (this is where we get STRICTNESS and CPR pragma
52 info for exported values).
53 \end{enumerate}
54 -}
55
56 wwTopBinds :: DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram
57
58 wwTopBinds dflags fam_envs us top_binds
59 = initUs_ us $ do
60 top_binds' <- mapM (wwBind dflags fam_envs) top_binds
61 return (concat top_binds')
62
63 {-
64 ************************************************************************
65 * *
66 \subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@}
67 * *
68 ************************************************************************
69
70 @wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in
71 turn. Non-recursive case first, then recursive...
72 -}
73
74 wwBind :: DynFlags
75 -> FamInstEnvs
76 -> CoreBind
77 -> UniqSM [CoreBind] -- returns a WwBinding intermediate form;
78 -- the caller will convert to Expr/Binding,
79 -- as appropriate.
80
81 wwBind dflags fam_envs (NonRec binder rhs) = do
82 new_rhs <- wwExpr dflags fam_envs rhs
83 new_pairs <- tryWW dflags fam_envs NonRecursive binder new_rhs
84 return [NonRec b e | (b,e) <- new_pairs]
85 -- Generated bindings must be non-recursive
86 -- because the original binding was.
87
88 wwBind dflags fam_envs (Rec pairs)
89 = return . Rec <$> concatMapM do_one pairs
90 where
91 do_one (binder, rhs) = do new_rhs <- wwExpr dflags fam_envs rhs
92 tryWW dflags fam_envs Recursive binder new_rhs
93
94 {-
95 @wwExpr@ basically just walks the tree, looking for appropriate
96 annotations that can be used. Remember it is @wwBind@ that does the
97 matching by looking for strict arguments of the correct type.
98 @wwExpr@ is a version that just returns the ``Plain'' Tree.
99 -}
100
101 wwExpr :: DynFlags -> FamInstEnvs -> CoreExpr -> UniqSM CoreExpr
102
103 wwExpr _ _ e@(Type {}) = return e
104 wwExpr _ _ e@(Coercion {}) = return e
105 wwExpr _ _ e@(Lit {}) = return e
106 wwExpr _ _ e@(Var {}) = return e
107
108 wwExpr dflags fam_envs (Lam binder expr)
109 = Lam new_binder <$> wwExpr dflags fam_envs expr
110 where new_binder | isId binder = zapIdUsedOnceInfo binder
111 | otherwise = binder
112 -- See Note [Zapping Used Once info in WorkWrap]
113
114 wwExpr dflags fam_envs (App f a)
115 = App <$> wwExpr dflags fam_envs f <*> wwExpr dflags fam_envs a
116
117 wwExpr dflags fam_envs (Tick note expr)
118 = Tick note <$> wwExpr dflags fam_envs expr
119
120 wwExpr dflags fam_envs (Cast expr co) = do
121 new_expr <- wwExpr dflags fam_envs expr
122 return (Cast new_expr co)
123
124 wwExpr dflags fam_envs (Let bind expr)
125 = mkLets <$> wwBind dflags fam_envs bind <*> wwExpr dflags fam_envs expr
126
127 wwExpr dflags fam_envs (Case expr binder ty alts) = do
128 new_expr <- wwExpr dflags fam_envs expr
129 new_alts <- mapM ww_alt alts
130 let new_binder = zapIdUsedOnceInfo binder
131 -- See Note [Zapping Used Once info in WorkWrap]
132 return (Case new_expr new_binder ty new_alts)
133 where
134 ww_alt (con, binders, rhs) = do
135 new_rhs <- wwExpr dflags fam_envs rhs
136 let new_binders = [ if isId b then zapIdUsedOnceInfo b else b
137 | b <- binders ]
138 -- See Note [Zapping Used Once info in WorkWrap]
139 return (con, new_binders, new_rhs)
140
141 {-
142 ************************************************************************
143 * *
144 \subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair}
145 * *
146 ************************************************************************
147
148 @tryWW@ just accumulates arguments, converts strictness info from the
149 front-end into the proper form, then calls @mkWwBodies@ to do
150 the business.
151
152 The only reason this is monadised is for the unique supply.
153
154 Note [Don't w/w INLINE things]
155 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
156 It's very important to refrain from w/w-ing an INLINE function (ie one
157 with a stable unfolding) because the wrapper will then overwrite the
158 old stable unfolding with the wrapper code.
159
160 Furthermore, if the programmer has marked something as INLINE,
161 we may lose by w/w'ing it.
162
163 If the strictness analyser is run twice, this test also prevents
164 wrappers (which are INLINEd) from being re-done. (You can end up with
165 several liked-named Ids bouncing around at the same time---absolute
166 mischief.)
167
168 Notice that we refrain from w/w'ing an INLINE function even if it is
169 in a recursive group. It might not be the loop breaker. (We could
170 test for loop-breaker-hood, but I'm not sure that ever matters.)
171
172 Note [Worker-wrapper for INLINABLE functions]
173 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174 If we have
175 {-# INLINABLE f #-}
176 f :: Ord a => [a] -> Int -> a
177 f x y = ....f....
178
179 where f is strict in y, we might get a more efficient loop by w/w'ing
180 f. But that would make a new unfolding which would overwrite the old
181 one! So the function would no longer be ININABLE, and in particular
182 will not be specialised at call sites in other modules.
183
184 This comes in practice (Trac #6056).
185
186 Solution: do the w/w for strictness analysis, but transfer the Stable
187 unfolding to the *worker*. So we will get something like this:
188
189 {-# INLINE[0] f #-}
190 f :: Ord a => [a] -> Int -> a
191 f d x y = case y of I# y' -> fw d x y'
192
193 {-# INLINABLE[0] fw #-}
194 fw :: Ord a => [a] -> Int# -> a
195 fw d x y' = let y = I# y' in ...f...
196
197 How do we "transfer the unfolding"? Easy: by using the old one, wrapped
198 in work_fn! See CoreUnfold.mkWorkerUnfolding.
199
200 Note [Activation for INLINABLE worker]
201 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
202 Follows on from Note [Worker-wrapper for INLINABLE functions]
203 It is *vital* that if the worker gets an INLINABLE pragma (from the
204 original function), then the worker has the same phase activation as
205 the wrapper (or later). That is necessary to allow the wrapper to
206 inline into the worker's unfolding: see SimplUtils
207 Note [Simplifying inside stable unfoldings].
208
209 Notihng is lost by giving the worker the same activation as the
210 worker, because the worker won't have any chance of inlining until the
211 wrapper does; there's no point in giving it an earlier activation.
212
213 Note [Don't w/w inline small non-loop-breaker things]
214 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
215 In general, we refrain from w/w-ing *small* functions, which are not
216 loop breakers, because they'll inline anyway. But we must take care:
217 it may look small now, but get to be big later after other inlining
218 has happened. So we take the precaution of adding an INLINE pragma to
219 any such functions.
220
221 I made this change when I observed a big function at the end of
222 compilation with a useful strictness signature but no w-w. (It was
223 small during demand analysis, we refrained from w/w, and then got big
224 when something was inlined in its rhs.) When I measured it on nofib,
225 it didn't make much difference; just a few percent improved allocation
226 on one benchmark (bspt/Euclid.space). But nothing got worse.
227
228 There is an infelicity though. We may get something like
229 f = g val
230 ==>
231 g x = case gw x of r -> I# r
232
233 f {- InlineStable, Template = g val -}
234 f = case gw x of r -> I# r
235
236 The code for f duplicates that for g, without any real benefit. It
237 won't really be executed, because calls to f will go via the inlining.
238
239 Note [Wrapper activation]
240 ~~~~~~~~~~~~~~~~~~~~~~~~~
241 When should the wrapper inlining be active? It must not be active
242 earlier than the current Activation of the Id (eg it might have a
243 NOINLINE pragma). But in fact strictness analysis happens fairly
244 late in the pipeline, and we want to prioritise specialisations over
245 strictness. Eg if we have
246 module Foo where
247 f :: Num a => a -> Int -> a
248 f n 0 = n -- Strict in the Int, hence wrapper
249 f n x = f (n+n) (x-1)
250
251 g :: Int -> Int
252 g x = f x x -- Provokes a specialisation for f
253
254 module Bar where
255 import Foo
256
257 h :: Int -> Int
258 h x = f 3 x
259
260 Then we want the specialisation for 'f' to kick in before the wrapper does.
261
262 Now in fact the 'gentle' simplification pass encourages this, by
263 having rules on, but inlinings off. But that's kind of lucky. It seems
264 more robust to give the wrapper an Activation of (ActiveAfter 0),
265 so that it becomes active in an importing module at the same time that
266 it appears in the first place in the defining module.
267
268 At one stage I tried making the wrapper inlining always-active, and
269 that had a very bad effect on nofib/imaginary/x2n1; a wrapper was
270 inlined before the specialisation fired.
271 -}
272
273 tryWW :: DynFlags
274 -> FamInstEnvs
275 -> RecFlag
276 -> Id -- The fn binder
277 -> CoreExpr -- The bound rhs; its innards
278 -- are already ww'd
279 -> UniqSM [(Id, CoreExpr)] -- either *one* or *two* pairs;
280 -- if one, then no worker (only
281 -- the orig "wrapper" lives on);
282 -- if two, then a worker and a
283 -- wrapper.
284 tryWW dflags fam_envs is_rec fn_id rhs
285 | isNeverActive inline_act
286 -- No point in worker/wrappering if the thing is never inlined!
287 -- Because the no-inline prag will prevent the wrapper ever
288 -- being inlined at a call site.
289 = return [ (new_fn_id, rhs) ]
290
291 | not loop_breaker
292 , Just stable_unf <- certainlyWillInline dflags fn_unf
293 = return [ (fn_id `setIdUnfolding` stable_unf, rhs) ]
294 -- Note [Don't w/w inline small non-loop-breaker, or INLINE, things]
295 -- NB: use idUnfolding because we don't want to apply
296 -- this criterion to a loop breaker!
297
298 | is_fun
299 = splitFun dflags fam_envs new_fn_id fn_info wrap_dmds res_info rhs
300
301 | is_thunk -- See Note [Thunk splitting]
302 = splitThunk dflags fam_envs is_rec new_fn_id rhs
303
304 | otherwise
305 = return [ (new_fn_id, rhs) ]
306
307 where
308 loop_breaker = isStrongLoopBreaker (occInfo fn_info)
309 fn_info = idInfo fn_id
310 inline_act = inlinePragmaActivation (inlinePragInfo fn_info)
311 fn_unf = unfoldingInfo fn_info
312 (wrap_dmds, res_info) = splitStrictSig (strictnessInfo fn_info)
313
314 new_fn_id = zapIdUsedOnceInfo (zapIdUsageEnvInfo fn_id)
315 -- See Note [Zapping DmdEnv after Demand Analyzer] and
316 -- See Note [Zapping Used Once info in WorkWrap]
317
318 is_fun = notNull wrap_dmds
319 is_thunk = not is_fun && not (exprIsHNF rhs)
320
321 {-
322 Note [Zapping DmdEnv after Demand Analyzer]
323 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
324 In the worker-wrapper pass we zap the DmdEnv. Why?
325 (a) it is never used again
326 (b) it wastes space
327 (c) it becomes incorrect as things are cloned, because
328 we don't push the substitution into it
329
330 Why here?
331 * Because we don’t want to do it in the Demand Analyzer, as we never know
332 there when we are doing the last pass.
333 * We want them to be still there at the end of DmdAnal, so that
334 -ddump-str-anal contains them.
335 * We don’t want a second pass just for that.
336 * WorkWrap looks at all bindings anyway.
337
338 We also need to do it in TidyCore.tidyLetBndr to clean up after the
339 final, worker/wrapper-less run of the demand analyser (see
340 Note [Final Demand Analyser run] in DmdAnal).
341
342 Note [Zapping Used Once info in WorkWrap]
343 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
344 In the worker-wrapper pass we zap the used once info in demands and in
345 strictness signatures.
346
347 Why?
348 * The simplifier may happen to transform code in a way that invalidates the
349 data (see #11731 for an example).
350 * It is not used in later passes, up to code generation.
351
352 So as the data is useless and possibly wrong, we want to remove it. The most
353 convenient place to do that is the worker wrapper phase, as it runs after every
354 run of the demand analyser besides the very last one (which is the one where we
355 want to _keep_ the info for the code generator).
356
357 We do not do it in the demand analyser for the same reasons outlined in
358 Note [Zapping DmdEnv after Demand Analyzer] above.
359 -}
360
361
362 ---------------------
363 splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult -> CoreExpr
364 -> UniqSM [(Id, CoreExpr)]
365 splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
366 = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do
367 -- The arity should match the signature
368 stuff <- mkWwBodies dflags fam_envs fun_ty wrap_dmds res_info
369 case stuff of
370 Just (work_demands, wrap_fn, work_fn) -> do
371 work_uniq <- getUniqueM
372 let work_rhs = work_fn rhs
373 work_prag = InlinePragma { inl_src = "{-# INLINE"
374 , inl_inline = inl_inline inl_prag
375 , inl_sat = Nothing
376 , inl_act = wrap_act
377 , inl_rule = FunLike }
378 -- idl_inline: copy from fn_id; see Note [Worker-wrapper for INLINABLE functions]
379 -- idl_act: see Note [Activation for INLINABLE workers]
380 -- inl_rule: it does not make sense for workers to be constructorlike.
381
382 work_id = mkWorkerId work_uniq fn_id (exprType work_rhs)
383 `setIdOccInfo` occInfo fn_info
384 -- Copy over occurrence info from parent
385 -- Notably whether it's a loop breaker
386 -- Doesn't matter much, since we will simplify next, but
387 -- seems right-er to do so
388
389 `setInlinePragma` work_prag
390
391 `setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info)
392 -- See Note [Worker-wrapper for INLINABLE functions]
393
394 `setIdStrictness` mkClosedStrictSig work_demands work_res_info
395 -- Even though we may not be at top level,
396 -- it's ok to give it an empty DmdEnv
397
398 `setIdDemandInfo` worker_demand
399
400 `setIdArity` work_arity
401 -- Set the arity so that the Core Lint check that the
402
403 work_arity = length work_demands
404
405 -- See Note [Demand on the Worker]
406 single_call = saturatedByOneShots arity (demandInfo fn_info)
407 worker_demand | single_call = mkWorkerDemand work_arity
408 | otherwise = topDmd
409
410 -- arity is consistent with the demand type goes through
411
412 wrap_act = ActiveAfter "0" 0
413 wrap_rhs = wrap_fn work_id
414 wrap_prag = InlinePragma { inl_src = "{-# INLINE"
415 , inl_inline = Inline
416 , inl_sat = Nothing
417 , inl_act = wrap_act
418 , inl_rule = rule_match_info }
419 -- See Note [Wrapper activation]
420 -- The RuleMatchInfo is (and must be) unaffected
421
422 wrap_id = fn_id `setIdUnfolding` mkWwInlineRule wrap_rhs arity
423 `setInlinePragma` wrap_prag
424 `setIdOccInfo` NoOccInfo
425 -- Zap any loop-breaker-ness, to avoid bleating from Lint
426 -- about a loop breaker with an INLINE rule
427
428
429
430 return $ [(work_id, work_rhs), (wrap_id, wrap_rhs)]
431 -- Worker first, because wrapper mentions it
432
433 Nothing -> return [(fn_id, rhs)]
434 where
435 fun_ty = idType fn_id
436 inl_prag = inlinePragInfo fn_info
437 rule_match_info = inlinePragmaRuleMatchInfo inl_prag
438 arity = arityInfo fn_info
439 -- The arity is set by the simplifier using exprEtaExpandArity
440 -- So it may be more than the number of top-level-visible lambdas
441
442 work_res_info = case returnsCPR_maybe res_info of
443 Just _ -> topRes -- Cpr stuff done by wrapper; kill it here
444 Nothing -> res_info -- Preserve exception/divergence
445
446
447 {-
448 Note [Demand on the worker]
449 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
450
451 If the original function is called once, according to its demand info, then
452 so is the worker. This is important so that the occurrence analyser can
453 attach OneShot annotations to the worker’s lambda binders.
454
455
456 Example:
457
458 -- Original function
459 f [Demand=<L,1*C1(U)>] :: (a,a) -> a
460 f = \p -> ...
461
462 -- Wrapper
463 f [Demand=<L,1*C1(U)>] :: a -> a -> a
464 f = \p -> case p of (a,b) -> $wf a b
465
466 -- Worker
467 $wf [Demand=<L,1*C1(C1(U))>] :: Int -> Int
468 $wf = \a b -> ...
469
470 We need to check whether the original function is called once, with
471 sufficiently many arguments. This is done using saturatedByOneShots, which
472 takes the arity of the original function (resp. the wrapper) and the demand on
473 the original function.
474
475 The demand on the worker is then calculated using mkWorkerDemand, and always of
476 the form [Demand=<L,1*(C1(...(C1(U))))>]
477
478
479 Note [Do not split void functions]
480 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
481 Consider this rather common form of binding:
482 $j = \x:Void# -> ...no use of x...
483
484 Since x is not used it'll be marked as absent. But there is no point
485 in w/w-ing because we'll simply add (\y:Void#), see WwLib.mkWorerArgs.
486
487 If x has a more interesting type (eg Int, or Int#), there *is* a point
488 in w/w so that we don't pass the argument at all.
489
490 Note [Thunk splitting]
491 ~~~~~~~~~~~~~~~~~~~~~~
492 Suppose x is used strictly (never mind whether it has the CPR
493 property).
494
495 let
496 x* = x-rhs
497 in body
498
499 splitThunk transforms like this:
500
501 let
502 x* = case x-rhs of { I# a -> I# a }
503 in body
504
505 Now simplifier will transform to
506
507 case x-rhs of
508 I# a -> let x* = I# a
509 in body
510
511 which is what we want. Now suppose x-rhs is itself a case:
512
513 x-rhs = case e of { T -> I# a; F -> I# b }
514
515 The join point will abstract over a, rather than over (which is
516 what would have happened before) which is fine.
517
518 Notice that x certainly has the CPR property now!
519
520 In fact, splitThunk uses the function argument w/w splitting
521 function, so that if x's demand is deeper (say U(U(L,L),L))
522 then the splitting will go deeper too.
523 -}
524
525 -- See Note [Thunk splitting]
526 -- splitThunk converts the *non-recursive* binding
527 -- x = e
528 -- into
529 -- x = let x = e
530 -- in case x of
531 -- I# y -> let x = I# y in x }
532 -- See comments above. Is it not beautifully short?
533 -- Moreover, it works just as well when there are
534 -- several binders, and if the binders are lifted
535 -- E.g. x = e
536 -- --> x = let x = e in
537 -- case x of (a,b) -> let x = (a,b) in x
538
539 splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
540 splitThunk dflags fam_envs is_rec fn_id rhs
541 = do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id]
542 ; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
543 ; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive
544 return res
545 else return [(fn_id, rhs)] }