unlit compiler/stranal/ modules
[ghc.git] / compiler / stranal / WorkWrap.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
3
4 \section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser}
5 -}
6
7 {-# LANGUAGE CPP #-}
8 module WorkWrap ( wwTopBinds ) where
9
10 import CoreSyn
11 import CoreUnfold ( certainlyWillInline, mkWwInlineRule, mkWorkerUnfolding )
12 import CoreUtils ( exprType, exprIsHNF )
13 import CoreArity ( exprArity )
14 import Var
15 import Id
16 import IdInfo
17 import UniqSupply
18 import BasicTypes
19 import DynFlags
20 import VarEnv ( isEmptyVarEnv )
21 import Demand
22 import WwLib
23 import Util
24 import Outputable
25 import FamInstEnv
26 import MonadUtils
27
28 #include "HsVersions.h"
29
30 {-
31 We take Core bindings whose binders have:
32
33 \begin{enumerate}
34
35 \item Strictness attached (by the front-end of the strictness
36 analyser), and / or
37
38 \item Constructed Product Result information attached by the CPR
39 analysis pass.
40
41 \end{enumerate}
42
43 and we return some ``plain'' bindings which have been
44 worker/wrapper-ified, meaning:
45
46 \begin{enumerate}
47
48 \item Functions have been split into workers and wrappers where
49 appropriate. If a function has both strictness and CPR properties
50 then only one worker/wrapper doing both transformations is produced;
51
52 \item Binders' @IdInfos@ have been updated to reflect the existence of
53 these workers/wrappers (this is where we get STRICTNESS and CPR pragma
54 info for exported values).
55 \end{enumerate}
56 -}
57
58 wwTopBinds :: DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram
59
60 wwTopBinds dflags fam_envs us top_binds
61 = initUs_ us $ do
62 top_binds' <- mapM (wwBind dflags fam_envs) top_binds
63 return (concat top_binds')
64
65 {-
66 ************************************************************************
67 * *
68 \subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@}
69 * *
70 ************************************************************************
71
72 @wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in
73 turn. Non-recursive case first, then recursive...
74 -}
75
76 wwBind :: DynFlags
77 -> FamInstEnvs
78 -> CoreBind
79 -> UniqSM [CoreBind] -- returns a WwBinding intermediate form;
80 -- the caller will convert to Expr/Binding,
81 -- as appropriate.
82
83 wwBind dflags fam_envs (NonRec binder rhs) = do
84 new_rhs <- wwExpr dflags fam_envs rhs
85 new_pairs <- tryWW dflags fam_envs NonRecursive binder new_rhs
86 return [NonRec b e | (b,e) <- new_pairs]
87 -- Generated bindings must be non-recursive
88 -- because the original binding was.
89
90 wwBind dflags fam_envs (Rec pairs)
91 = return . Rec <$> concatMapM do_one pairs
92 where
93 do_one (binder, rhs) = do new_rhs <- wwExpr dflags fam_envs rhs
94 tryWW dflags fam_envs Recursive binder new_rhs
95
96 {-
97 @wwExpr@ basically just walks the tree, looking for appropriate
98 annotations that can be used. Remember it is @wwBind@ that does the
99 matching by looking for strict arguments of the correct type.
100 @wwExpr@ is a version that just returns the ``Plain'' Tree.
101 -}
102
103 wwExpr :: DynFlags -> FamInstEnvs -> CoreExpr -> UniqSM CoreExpr
104
105 wwExpr _ _ e@(Type {}) = return e
106 wwExpr _ _ e@(Coercion {}) = return e
107 wwExpr _ _ e@(Lit {}) = return e
108 wwExpr _ _ e@(Var {}) = return e
109
110 wwExpr dflags fam_envs (Lam binder expr)
111 = Lam binder <$> wwExpr dflags fam_envs expr
112
113 wwExpr dflags fam_envs (App f a)
114 = App <$> wwExpr dflags fam_envs f <*> wwExpr dflags fam_envs a
115
116 wwExpr dflags fam_envs (Tick note expr)
117 = Tick note <$> wwExpr dflags fam_envs expr
118
119 wwExpr dflags fam_envs (Cast expr co) = do
120 new_expr <- wwExpr dflags fam_envs expr
121 return (Cast new_expr co)
122
123 wwExpr dflags fam_envs (Let bind expr)
124 = mkLets <$> wwBind dflags fam_envs bind <*> wwExpr dflags fam_envs expr
125
126 wwExpr dflags fam_envs (Case expr binder ty alts) = do
127 new_expr <- wwExpr dflags fam_envs expr
128 new_alts <- mapM ww_alt alts
129 return (Case new_expr binder ty new_alts)
130 where
131 ww_alt (con, binders, rhs) = do
132 new_rhs <- wwExpr dflags fam_envs rhs
133 return (con, binders, new_rhs)
134
135 {-
136 ************************************************************************
137 * *
138 \subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair}
139 * *
140 ************************************************************************
141
142 @tryWW@ just accumulates arguments, converts strictness info from the
143 front-end into the proper form, then calls @mkWwBodies@ to do
144 the business.
145
146 The only reason this is monadised is for the unique supply.
147
148 Note [Don't w/w INLINE things]
149 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
150 It's very important to refrain from w/w-ing an INLINE function (ie one
151 with a stable unfolding) because the wrapper will then overwrite the
152 old stable unfolding with the wrapper code.
153
154 Furthermore, if the programmer has marked something as INLINE,
155 we may lose by w/w'ing it.
156
157 If the strictness analyser is run twice, this test also prevents
158 wrappers (which are INLINEd) from being re-done. (You can end up with
159 several liked-named Ids bouncing around at the same time---absolute
160 mischief.)
161
162 Notice that we refrain from w/w'ing an INLINE function even if it is
163 in a recursive group. It might not be the loop breaker. (We could
164 test for loop-breaker-hood, but I'm not sure that ever matters.)
165
166 Note [Worker-wrapper for INLINABLE functions]
167 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
168 If we have
169 {-# INLINABLE f #-}
170 f :: Ord a => [a] -> Int -> a
171 f x y = ....f....
172
173 where f is strict in y, we might get a more efficient loop by w/w'ing
174 f. But that would make a new unfolding which would overwrite the old
175 one! So the function would no longer be ININABLE, and in particular
176 will not be specialised at call sites in other modules.
177
178 This comes in practice (Trac #6056).
179
180 Solution: do the w/w for strictness analysis, but transfer the Stable
181 unfolding to the *worker*. So we will get something like this:
182
183 {-# INLINE[0] f #-}
184 f :: Ord a => [a] -> Int -> a
185 f d x y = case y of I# y' -> fw d x y'
186
187 {-# INLINABLE[0] fw #-}
188 fw :: Ord a => [a] -> Int# -> a
189 fw d x y' = let y = I# y' in ...f...
190
191 How do we "transfer the unfolding"? Easy: by using the old one, wrapped
192 in work_fn! See CoreUnfold.mkWorkerUnfolding.
193
194 Note [Activation for INLINABLE worker]
195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 Follows on from Note [Worker-wrapper for INLINABLE functions]
197 It is *vital* that if the worker gets an INLINABLE pragma (from the
198 original function), then the worker has the same phase activation as
199 the wrapper (or later). That is necessary to allow the wrapper to
200 inline into the worker's unfolding: see SimplUtils
201 Note [Simplifying inside stable unfoldings].
202
203 Notihng is lost by giving the worker the same activation as the
204 worker, because the worker won't have any chance of inlining until the
205 wrapper does; there's no point in giving it an earlier activation.
206
207 Note [Don't w/w inline small non-loop-breaker things]
208 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
209 In general, we refrain from w/w-ing *small* functions, which are not
210 loop breakers, because they'll inline anyway. But we must take care:
211 it may look small now, but get to be big later after other inlining
212 has happened. So we take the precaution of adding an INLINE pragma to
213 any such functions.
214
215 I made this change when I observed a big function at the end of
216 compilation with a useful strictness signature but no w-w. (It was
217 small during demand analysis, we refrained from w/w, and then got big
218 when something was inlined in its rhs.) When I measured it on nofib,
219 it didn't make much difference; just a few percent improved allocation
220 on one benchmark (bspt/Euclid.space). But nothing got worse.
221
222 There is an infelicity though. We may get something like
223 f = g val
224 ==>
225 g x = case gw x of r -> I# r
226
227 f {- InlineStable, Template = g val -}
228 f = case gw x of r -> I# r
229
230 The code for f duplicates that for g, without any real benefit. It
231 won't really be executed, because calls to f will go via the inlining.
232
233 Note [Wrapper activation]
234 ~~~~~~~~~~~~~~~~~~~~~~~~~
235 When should the wrapper inlining be active? It must not be active
236 earlier than the current Activation of the Id (eg it might have a
237 NOINLINE pragma). But in fact strictness analysis happens fairly
238 late in the pipeline, and we want to prioritise specialisations over
239 strictness. Eg if we have
240 module Foo where
241 f :: Num a => a -> Int -> a
242 f n 0 = n -- Strict in the Int, hence wrapper
243 f n x = f (n+n) (x-1)
244
245 g :: Int -> Int
246 g x = f x x -- Provokes a specialisation for f
247
248 module Bar where
249 import Foo
250
251 h :: Int -> Int
252 h x = f 3 x
253
254 Then we want the specialisation for 'f' to kick in before the wrapper does.
255
256 Now in fact the 'gentle' simplification pass encourages this, by
257 having rules on, but inlinings off. But that's kind of lucky. It seems
258 more robust to give the wrapper an Activation of (ActiveAfter 0),
259 so that it becomes active in an importing module at the same time that
260 it appears in the first place in the defining module.
261
262 At one stage I tried making the wrapper inlining always-active, and
263 that had a very bad effect on nofib/imaginary/x2n1; a wrapper was
264 inlined before the specialisation fired.
265 -}
266
267 tryWW :: DynFlags
268 -> FamInstEnvs
269 -> RecFlag
270 -> Id -- The fn binder
271 -> CoreExpr -- The bound rhs; its innards
272 -- are already ww'd
273 -> UniqSM [(Id, CoreExpr)] -- either *one* or *two* pairs;
274 -- if one, then no worker (only
275 -- the orig "wrapper" lives on);
276 -- if two, then a worker and a
277 -- wrapper.
278 tryWW dflags fam_envs is_rec fn_id rhs
279 | isNeverActive inline_act
280 -- No point in worker/wrappering if the thing is never inlined!
281 -- Because the no-inline prag will prevent the wrapper ever
282 -- being inlined at a call site.
283 --
284 -- Furthermore, don't even expose strictness info
285 = return [ (fn_id, rhs) ]
286
287 | not loop_breaker
288 , Just stable_unf <- certainlyWillInline dflags fn_unf
289 = return [ (fn_id `setIdUnfolding` stable_unf, rhs) ]
290 -- Note [Don't w/w inline small non-loop-breaker, or INLINE, things]
291 -- NB: use idUnfolding because we don't want to apply
292 -- this criterion to a loop breaker!
293
294 | is_fun
295 = splitFun dflags fam_envs new_fn_id fn_info wrap_dmds res_info rhs
296
297 | is_thunk -- See Note [Thunk splitting]
298 = splitThunk dflags fam_envs is_rec new_fn_id rhs
299
300 | otherwise
301 = return [ (new_fn_id, rhs) ]
302
303 where
304 loop_breaker = isStrongLoopBreaker (occInfo fn_info)
305 fn_info = idInfo fn_id
306 inline_act = inlinePragmaActivation (inlinePragInfo fn_info)
307 fn_unf = unfoldingInfo fn_info
308
309 -- In practice it always will have a strictness
310 -- signature, even if it's a uninformative one
311 strict_sig = strictnessInfo fn_info
312 StrictSig (DmdType env wrap_dmds res_info) = strict_sig
313
314 -- new_fn_id has the DmdEnv zapped.
315 -- (a) it is never used again
316 -- (b) it wastes space
317 -- (c) it becomes incorrect as things are cloned, because
318 -- we don't push the substitution into it
319 new_fn_id | isEmptyVarEnv env = fn_id
320 | otherwise = fn_id `setIdStrictness`
321 mkClosedStrictSig wrap_dmds res_info
322
323 is_fun = notNull wrap_dmds
324 is_thunk = not is_fun && not (exprIsHNF rhs)
325
326
327 ---------------------
328 splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult -> CoreExpr
329 -> UniqSM [(Id, CoreExpr)]
330 splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
331 = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do
332 -- The arity should match the signature
333 stuff <- mkWwBodies dflags fam_envs fun_ty wrap_dmds res_info one_shots
334 case stuff of
335 Just (work_demands, wrap_fn, work_fn) -> do
336 work_uniq <- getUniqueM
337 let work_rhs = work_fn rhs
338 work_prag = InlinePragma { inl_inline = inl_inline inl_prag
339 , inl_sat = Nothing
340 , inl_act = wrap_act
341 , inl_rule = FunLike }
342 -- idl_inline: copy from fn_id; see Note [Worker-wrapper for INLINABLE functions]
343 -- idl_act: see Note [Activation for INLINABLE workers]
344 -- inl_rule: it does not make sense for workers to be constructorlike.
345
346 work_id = mkWorkerId work_uniq fn_id (exprType work_rhs)
347 `setIdOccInfo` occInfo fn_info
348 -- Copy over occurrence info from parent
349 -- Notably whether it's a loop breaker
350 -- Doesn't matter much, since we will simplify next, but
351 -- seems right-er to do so
352
353 `setInlinePragma` work_prag
354
355 `setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info)
356 -- See Note [Worker-wrapper for INLINABLE functions]
357
358 `setIdStrictness` mkClosedStrictSig work_demands work_res_info
359 -- Even though we may not be at top level,
360 -- it's ok to give it an empty DmdEnv
361
362 `setIdArity` exprArity work_rhs
363 -- Set the arity so that the Core Lint check that the
364 -- arity is consistent with the demand type goes through
365
366 wrap_act = ActiveAfter 0
367 wrap_rhs = wrap_fn work_id
368 wrap_prag = InlinePragma { inl_inline = Inline
369 , inl_sat = Nothing
370 , inl_act = wrap_act
371 , inl_rule = rule_match_info }
372 -- See Note [Wrapper activation]
373 -- The RuleMatchInfo is (and must be) unaffected
374
375 wrap_id = fn_id `setIdUnfolding` mkWwInlineRule wrap_rhs arity
376 `setInlinePragma` wrap_prag
377 `setIdOccInfo` NoOccInfo
378 -- Zap any loop-breaker-ness, to avoid bleating from Lint
379 -- about a loop breaker with an INLINE rule
380
381 return $ [(work_id, work_rhs), (wrap_id, wrap_rhs)]
382 -- Worker first, because wrapper mentions it
383
384 Nothing -> return [(fn_id, rhs)]
385 where
386 fun_ty = idType fn_id
387 inl_prag = inlinePragInfo fn_info
388 rule_match_info = inlinePragmaRuleMatchInfo inl_prag
389 arity = arityInfo fn_info
390 -- The arity is set by the simplifier using exprEtaExpandArity
391 -- So it may be more than the number of top-level-visible lambdas
392
393 work_res_info | isBotRes res_info = botRes -- Cpr stuff done by wrapper
394 | otherwise = topRes
395
396 one_shots = get_one_shots rhs
397
398 -- If the original function has one-shot arguments, it is important to
399 -- make the wrapper and worker have corresponding one-shot arguments too.
400 -- Otherwise we spuriously float stuff out of case-expression join points,
401 -- which is very annoying.
402 get_one_shots :: Expr Var -> [OneShotInfo]
403 get_one_shots (Lam b e)
404 | isId b = idOneShotInfo b : get_one_shots e
405 | otherwise = get_one_shots e
406 get_one_shots (Tick _ e) = get_one_shots e
407 get_one_shots _ = []
408
409 {-
410 Note [Do not split void functions]
411 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
412 Consider this rather common form of binding:
413 $j = \x:Void# -> ...no use of x...
414
415 Since x is not used it'll be marked as absent. But there is no point
416 in w/w-ing because we'll simply add (\y:Void#), see WwLib.mkWorerArgs.
417
418 If x has a more interesting type (eg Int, or Int#), there *is* a point
419 in w/w so that we don't pass the argument at all.
420
421 Note [Thunk splitting]
422 ~~~~~~~~~~~~~~~~~~~~~~
423 Suppose x is used strictly (never mind whether it has the CPR
424 property).
425
426 let
427 x* = x-rhs
428 in body
429
430 splitThunk transforms like this:
431
432 let
433 x* = case x-rhs of { I# a -> I# a }
434 in body
435
436 Now simplifier will transform to
437
438 case x-rhs of
439 I# a -> let x* = I# a
440 in body
441
442 which is what we want. Now suppose x-rhs is itself a case:
443
444 x-rhs = case e of { T -> I# a; F -> I# b }
445
446 The join point will abstract over a, rather than over (which is
447 what would have happened before) which is fine.
448
449 Notice that x certainly has the CPR property now!
450
451 In fact, splitThunk uses the function argument w/w splitting
452 function, so that if x's demand is deeper (say U(U(L,L),L))
453 then the splitting will go deeper too.
454 -}
455
456 -- See Note [Thunk splitting]
457 -- splitThunk converts the *non-recursive* binding
458 -- x = e
459 -- into
460 -- x = let x = e
461 -- in case x of
462 -- I# y -> let x = I# y in x }
463 -- See comments above. Is it not beautifully short?
464 -- Moreover, it works just as well when there are
465 -- several binders, and if the binders are lifted
466 -- E.g. x = e
467 -- --> x = let x = e in
468 -- case x of (a,b) -> let x = (a,b) in x
469
470 splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
471 splitThunk dflags fam_envs is_rec fn_id rhs
472 = do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id]
473 ; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
474 ; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive
475 return res
476 else return [(fn_id, rhs)] }