A few typos in comments
[ghc.git] / compiler / coreSyn / CoreUnfold.hs
1 {-
2 (c) The University of Glasgow 2006
3 (c) The AQUA Project, Glasgow University, 1994-1998
4
5
6 Core-syntax unfoldings
7
8 Unfoldings (which can travel across module boundaries) are in Core
9 syntax (namely @CoreExpr@s).
10
11 The type @Unfolding@ sits ``above'' simply-Core-expressions
12 unfoldings, capturing ``higher-level'' things we know about a binding,
13 usually things that the simplifier found out (e.g., ``it's a
14 literal''). In the corner of a @CoreUnfolding@ unfolding, you will
15 find, unsurprisingly, a Core expression.
16 -}
17
18 {-# LANGUAGE CPP #-}
19
20 module CoreUnfold (
21 Unfolding, UnfoldingGuidance, -- Abstract types
22
23 noUnfolding, mkImplicitUnfolding,
24 mkUnfolding, mkCoreUnfolding,
25 mkTopUnfolding, mkSimpleUnfolding, mkWorkerUnfolding,
26 mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule,
27 mkCompulsoryUnfolding, mkDFunUnfolding,
28 specUnfolding,
29
30 ArgSummary(..),
31
32 couldBeSmallEnoughToInline, inlineBoringOk,
33 certainlyWillInline, smallEnoughToInline,
34
35 callSiteInline, CallCtxt(..),
36
37 -- Reexport from CoreSubst (it only live there so it can be used
38 -- by the Very Simple Optimiser)
39 exprIsConApp_maybe, exprIsLiteral_maybe
40 ) where
41
42 #include "HsVersions.h"
43
44 import DynFlags
45 import CoreSyn
46 import PprCore () -- Instances
47 import OccurAnal ( occurAnalyseExpr )
48 import CoreSubst hiding( substTy )
49 import CoreArity ( manifestArity, exprBotStrictness_maybe )
50 import CoreUtils
51 import Id
52 import DataCon
53 import Literal
54 import PrimOp
55 import IdInfo
56 import BasicTypes ( Arity )
57 import Type
58 import PrelNames
59 import TysPrim ( realWorldStatePrimTy )
60 import Bag
61 import Util
62 import FastString
63 import Outputable
64 import ForeignCall
65
66 import qualified Data.ByteString as BS
67 import Data.Maybe
68
69 {-
70 ************************************************************************
71 * *
72 \subsection{Making unfoldings}
73 * *
74 ************************************************************************
75 -}
76
77 mkTopUnfolding :: DynFlags -> Bool -> CoreExpr -> Unfolding
78 mkTopUnfolding dflags = mkUnfolding dflags InlineRhs True {- Top level -}
79
80 mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding
81 -- For implicit Ids, do a tiny bit of optimising first
82 mkImplicitUnfolding dflags expr
83 = mkTopUnfolding dflags False (simpleOptExpr expr)
84
85 -- Note [Top-level flag on inline rules]
86 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
87 -- Slight hack: note that mk_inline_rules conservatively sets the
88 -- top-level flag to True. It gets set more accurately by the simplifier
89 -- Simplify.simplUnfolding.
90
91 mkSimpleUnfolding :: DynFlags -> CoreExpr -> Unfolding
92 mkSimpleUnfolding dflags = mkUnfolding dflags InlineRhs False False
93
94 mkDFunUnfolding :: [Var] -> DataCon -> [CoreExpr] -> Unfolding
95 mkDFunUnfolding bndrs con ops
96 = DFunUnfolding { df_bndrs = bndrs
97 , df_con = con
98 , df_args = map occurAnalyseExpr ops }
99 -- See Note [Occurrrence analysis of unfoldings]
100
101 mkWwInlineRule :: CoreExpr -> Arity -> Unfolding
102 mkWwInlineRule expr arity
103 = mkCoreUnfolding InlineStable True
104 (simpleOptExpr expr)
105 (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk
106 , ug_boring_ok = boringCxtNotOk })
107
108 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
109 mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded
110 = mkCoreUnfolding InlineCompulsory True
111 (simpleOptExpr expr)
112 (UnfWhen { ug_arity = 0 -- Arity of unfolding doesn't matter
113 , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk })
114
115 mkWorkerUnfolding :: DynFlags -> (CoreExpr -> CoreExpr) -> Unfolding -> Unfolding
116 -- See Note [Worker-wrapper for INLINABLE functions] in WorkWrap
117 mkWorkerUnfolding dflags work_fn
118 (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
119 , uf_is_top = top_lvl })
120 | isStableSource src
121 = mkCoreUnfolding src top_lvl new_tmpl guidance
122 where
123 new_tmpl = simpleOptExpr (work_fn tmpl)
124 guidance = calcUnfoldingGuidance dflags new_tmpl
125
126 mkWorkerUnfolding _ _ _ = noUnfolding
127
128 mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding
129 mkInlineUnfolding mb_arity expr
130 = mkCoreUnfolding InlineStable
131 True -- Note [Top-level flag on inline rules]
132 expr' guide
133 where
134 expr' = simpleOptExpr expr
135 guide = case mb_arity of
136 Nothing -> UnfWhen { ug_arity = manifestArity expr'
137 , ug_unsat_ok = unSaturatedOk
138 , ug_boring_ok = boring_ok }
139 Just arity -> UnfWhen { ug_arity = arity
140 , ug_unsat_ok = needSaturated
141 , ug_boring_ok = boring_ok }
142 boring_ok = inlineBoringOk expr'
143
144 mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding
145 mkInlinableUnfolding dflags expr
146 = mkUnfolding dflags InlineStable True is_bot expr'
147 where
148 expr' = simpleOptExpr expr
149 is_bot = isJust (exprBotStrictness_maybe expr')
150
151 specUnfolding :: DynFlags -> Subst -> [Var] -> [CoreExpr] -> Unfolding -> Unfolding
152 -- See Note [Specialising unfoldings]
153 specUnfolding _ subst new_bndrs spec_args
154 df@(DFunUnfolding { df_bndrs = bndrs, df_con = con , df_args = args })
155 = ASSERT2( length bndrs >= length spec_args, ppr df $$ ppr spec_args $$ ppr new_bndrs )
156 mkDFunUnfolding (new_bndrs ++ extra_bndrs) con
157 (map (substExpr spec_doc subst2) args)
158 where
159 subst1 = extendSubstList subst (bndrs `zip` spec_args)
160 (subst2, extra_bndrs) = substBndrs subst1 (dropList spec_args bndrs)
161
162 specUnfolding _dflags subst new_bndrs spec_args
163 (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
164 , uf_is_top = top_lvl
165 , uf_guidance = old_guidance })
166 | isStableSource src -- See Note [Specialising unfoldings]
167 , UnfWhen { ug_arity = old_arity
168 , ug_unsat_ok = unsat_ok
169 , ug_boring_ok = boring_ok } <- old_guidance
170 = let guidance = UnfWhen { ug_arity = old_arity - count isValArg spec_args
171 + count isId new_bndrs
172 , ug_unsat_ok = unsat_ok
173 , ug_boring_ok = boring_ok }
174 new_tmpl = simpleOptExpr $ mkLams new_bndrs $
175 mkApps (substExpr spec_doc subst tmpl) spec_args
176 -- The beta-redexes created here will be simplified
177 -- away by simplOptExpr in mkUnfolding
178
179 in mkCoreUnfolding src top_lvl new_tmpl guidance
180
181 specUnfolding _ _ _ _ _ = noUnfolding
182
183 spec_doc :: SDoc
184 spec_doc = ptext (sLit "specUnfolding")
185
186 {-
187 Note [Specialising unfoldings]
188 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
189 When we specialise a function for some given type-class arguments, we use
190 specUnfolding to specialise its unfolding. Some important points:
191
192 * If the original function has a DFunUnfolding, the specialised one
193 must do so too! Otherwise we lose the magic rules that make it
194 interact with ClassOps
195
196 * There is a bit of hack for INLINABLE functions:
197 f :: Ord a => ....
198 f = <big-rhs>
199 {- INLINEABLE f #-}
200 Now if we specialise f, should the specialised version still have
201 an INLINEABLE pragma? If it does, we'll capture a specialised copy
202 of <big-rhs> as its unfolding, and that probaby won't inline. But
203 if we don't, the specialised version of <big-rhs> might be small
204 enough to inline at a call site. This happens with Control.Monad.liftM3,
205 and can cause a lot more allocation as a result (nofib n-body shows this).
206
207 Moreover, keeping the INLINEABLE thing isn't much help, because
208 the specialised function (probaby) isn't overloaded any more.
209
210 Conclusion: drop the INLINEALE pragma. In practice what this means is:
211 if a stable unfolding has UnfoldingGuidance of UnfWhen,
212 we keep it (so the specialised thing too will always inline)
213 if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs
214 (which arises from INLINEABLE), we discard it
215 -}
216
217 mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
218 -> UnfoldingGuidance -> Unfolding
219 -- Occurrence-analyses the expression before capturing it
220 mkCoreUnfolding src top_lvl expr guidance
221 = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
222 -- See Note [Occurrrence analysis of unfoldings]
223 uf_src = src,
224 uf_is_top = top_lvl,
225 uf_is_value = exprIsHNF expr,
226 uf_is_conlike = exprIsConLike expr,
227 uf_is_work_free = exprIsWorkFree expr,
228 uf_expandable = exprIsExpandable expr,
229 uf_guidance = guidance }
230
231 mkUnfolding :: DynFlags -> UnfoldingSource -> Bool -> Bool -> CoreExpr
232 -> Unfolding
233 -- Calculates unfolding guidance
234 -- Occurrence-analyses the expression before capturing it
235 mkUnfolding dflags src top_lvl is_bottoming expr
236 | top_lvl && is_bottoming
237 , not (exprIsTrivial expr)
238 = NoUnfolding -- See Note [Do not inline top-level bottoming functions]
239 | otherwise
240 = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
241 -- See Note [Occurrrence analysis of unfoldings]
242 uf_src = src,
243 uf_is_top = top_lvl,
244 uf_is_value = exprIsHNF expr,
245 uf_is_conlike = exprIsConLike expr,
246 uf_expandable = exprIsExpandable expr,
247 uf_is_work_free = exprIsWorkFree expr,
248 uf_guidance = guidance }
249 where
250 guidance = calcUnfoldingGuidance dflags expr
251 -- NB: *not* (calcUnfoldingGuidance (occurAnalyseExpr expr))!
252 -- See Note [Calculate unfolding guidance on the non-occ-anal'd expression]
253
254 {-
255 Note [Occurrence analysis of unfoldings]
256 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
257 We do occurrence-analysis of unfoldings once and for all, when the
258 unfolding is built, rather than each time we inline them.
259
260 But given this decision it's vital that we do
261 *always* do it. Consider this unfolding
262 \x -> letrec { f = ...g...; g* = f } in body
263 where g* is (for some strange reason) the loop breaker. If we don't
264 occ-anal it when reading it in, we won't mark g as a loop breaker, and
265 we may inline g entirely in body, dropping its binding, and leaving
266 the occurrence in f out of scope. This happened in Trac #8892, where
267 the unfolding in question was a DFun unfolding.
268
269 But more generally, the simplifier is designed on the
270 basis that it is looking at occurrence-analysed expressions, so better
271 ensure that they acutally are.
272
273 Note [Calculate unfolding guidance on the non-occ-anal'd expression]
274 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
275 Notice that we give the non-occur-analysed expression to
276 calcUnfoldingGuidance. In some ways it'd be better to occur-analyse
277 first; for example, sometimes during simplification, there's a large
278 let-bound thing which has been substituted, and so is now dead; so
279 'expr' contains two copies of the thing while the occurrence-analysed
280 expression doesn't.
281
282 Nevertheless, we *don't* and *must not* occ-analyse before computing
283 the size because
284
285 a) The size computation bales out after a while, whereas occurrence
286 analysis does not.
287
288 b) Residency increases sharply if you occ-anal first. I'm not
289 100% sure why, but it's a large effect. Compiling Cabal went
290 from residency of 534M to over 800M with this one change.
291
292 This can occasionally mean that the guidance is very pessimistic;
293 it gets fixed up next round. And it should be rare, because large
294 let-bound things that are dead are usually caught by preInlineUnconditionally
295
296
297 ************************************************************************
298 * *
299 \subsection{The UnfoldingGuidance type}
300 * *
301 ************************************************************************
302 -}
303
304 inlineBoringOk :: CoreExpr -> Bool
305 -- See Note [INLINE for small functions]
306 -- True => the result of inlining the expression is
307 -- no bigger than the expression itself
308 -- eg (\x y -> f y x)
309 -- This is a quick and dirty version. It doesn't attempt
310 -- to deal with (\x y z -> x (y z))
311 -- The really important one is (x `cast` c)
312 inlineBoringOk e
313 = go 0 e
314 where
315 go :: Int -> CoreExpr -> Bool
316 go credit (Lam x e) | isId x = go (credit+1) e
317 | otherwise = go credit e
318 go credit (App f (Type {})) = go credit f
319 go credit (App f a) | credit > 0
320 , exprIsTrivial a = go (credit-1) f
321 go credit (Tick _ e) = go credit e -- dubious
322 go credit (Cast e _) = go credit e
323 go _ (Var {}) = boringCxtOk
324 go _ _ = boringCxtNotOk
325
326 calcUnfoldingGuidance
327 :: DynFlags
328 -> CoreExpr -- Expression to look at
329 -> UnfoldingGuidance
330 calcUnfoldingGuidance dflags (Tick t expr)
331 | not (tickishIsCode t) -- non-code ticks don't matter for unfolding
332 = calcUnfoldingGuidance dflags expr
333 calcUnfoldingGuidance dflags expr
334 = case sizeExpr dflags bOMB_OUT_SIZE val_bndrs body of
335 TooBig -> UnfNever
336 SizeIs size cased_bndrs scrut_discount
337 | uncondInline expr n_val_bndrs size
338 -> UnfWhen { ug_unsat_ok = unSaturatedOk
339 , ug_boring_ok = boringCxtOk
340 , ug_arity = n_val_bndrs } -- Note [INLINE for small functions]
341 | otherwise
342 -> UnfIfGoodArgs { ug_args = map (mk_discount cased_bndrs) val_bndrs
343 , ug_size = size
344 , ug_res = scrut_discount }
345
346 where
347 (bndrs, body) = collectBinders expr
348 bOMB_OUT_SIZE = ufCreationThreshold dflags
349 -- Bomb out if size gets bigger than this
350 val_bndrs = filter isId bndrs
351 n_val_bndrs = length val_bndrs
352
353 mk_discount :: Bag (Id,Int) -> Id -> Int
354 mk_discount cbs bndr = foldlBag combine 0 cbs
355 where
356 combine acc (bndr', disc)
357 | bndr == bndr' = acc `plus_disc` disc
358 | otherwise = acc
359
360 plus_disc :: Int -> Int -> Int
361 plus_disc | isFunTy (idType bndr) = max
362 | otherwise = (+)
363 -- See Note [Function and non-function discounts]
364
365 {-
366 Note [Computing the size of an expression]
367 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
368 The basic idea of sizeExpr is obvious enough: count nodes. But getting the
369 heuristics right has taken a long time. Here's the basic strategy:
370
371 * Variables, literals: 0
372 (Exception for string literals, see litSize.)
373
374 * Function applications (f e1 .. en): 1 + #value args
375
376 * Constructor applications: 1, regardless of #args
377
378 * Let(rec): 1 + size of components
379
380 * Note, cast: 0
381
382 Examples
383
384 Size Term
385 --------------
386 0 42#
387 0 x
388 0 True
389 2 f x
390 1 Just x
391 4 f (g x)
392
393 Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's
394 a function call to account for. Notice also that constructor applications
395 are very cheap, because exposing them to a caller is so valuable.
396
397 [25/5/11] All sizes are now multiplied by 10, except for primops
398 (which have sizes like 1 or 4. This makes primops look fantastically
399 cheap, and seems to be almost unversally beneficial. Done partly as a
400 result of #4978.
401
402 Note [Do not inline top-level bottoming functions]
403 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
404 The FloatOut pass has gone to some trouble to float out calls to 'error'
405 and similar friends. See Note [Bottoming floats] in SetLevels.
406 Do not re-inline them! But we *do* still inline if they are very small
407 (the uncondInline stuff).
408
409 Note [INLINE for small functions]
410 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
411 Consider {-# INLINE f #-}
412 f x = Just x
413 g y = f y
414 Then f's RHS is no larger than its LHS, so we should inline it into
415 even the most boring context. In general, f the function is
416 sufficiently small that its body is as small as the call itself, the
417 inline unconditionally, regardless of how boring the context is.
418
419 Things to note:
420
421 (1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
422 than the thing it's replacing. Notice that
423 (f x) --> (g 3) -- YES, unconditionally
424 (f x) --> x : [] -- YES, *even though* there are two
425 -- arguments to the cons
426 x --> g 3 -- NO
427 x --> Just v -- NO
428
429 It's very important not to unconditionally replace a variable by
430 a non-atomic term.
431
432 (2) We do this even if the thing isn't saturated, else we end up with the
433 silly situation that
434 f x y = x
435 ...map (f 3)...
436 doesn't inline. Even in a boring context, inlining without being
437 saturated will give a lambda instead of a PAP, and will be more
438 efficient at runtime.
439
440 (3) However, when the function's arity > 0, we do insist that it
441 has at least one value argument at the call site. (This check is
442 made in the UnfWhen case of callSiteInline.) Otherwise we find this:
443 f = /\a \x:a. x
444 d = /\b. MkD (f b)
445 If we inline f here we get
446 d = /\b. MkD (\x:b. x)
447 and then prepareRhs floats out the argument, abstracting the type
448 variables, so we end up with the original again!
449
450 (4) We must be much more cautious about arity-zero things. Consider
451 let x = y +# z in ...
452 In *size* terms primops look very small, because the generate a
453 single instruction, but we do not want to unconditionally replace
454 every occurrence of x with (y +# z). So we only do the
455 unconditional-inline thing for *trivial* expressions.
456
457 NB: you might think that PostInlineUnconditionally would do this
458 but it doesn't fire for top-level things; see SimplUtils
459 Note [Top level and postInlineUnconditionally]
460 -}
461
462 uncondInline :: CoreExpr -> Arity -> Int -> Bool
463 -- Inline unconditionally if there no size increase
464 -- Size of call is arity (+1 for the function)
465 -- See Note [INLINE for small functions]
466 uncondInline rhs arity size
467 | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
468 | otherwise = exprIsTrivial rhs -- See Note [INLINE for small functions] (4)
469
470 sizeExpr :: DynFlags
471 -> Int -- Bomb out if it gets bigger than this
472 -> [Id] -- Arguments; we're interested in which of these
473 -- get case'd
474 -> CoreExpr
475 -> ExprSize
476
477 -- Note [Computing the size of an expression]
478
479 sizeExpr dflags bOMB_OUT_SIZE top_args expr
480 = size_up expr
481 where
482 size_up (Cast e _) = size_up e
483 size_up (Tick _ e) = size_up e
484 size_up (Type _) = sizeZero -- Types cost nothing
485 size_up (Coercion _) = sizeZero
486 size_up (Lit lit) = sizeN (litSize lit)
487 size_up (Var f) | isRealWorldId f = sizeZero
488 -- Make sure we get constructor discounts even
489 -- on nullary constructors
490 | otherwise = size_up_call f [] 0
491
492 size_up (App fun arg)
493 | isTyCoArg arg = size_up fun
494 | otherwise = size_up arg `addSizeNSD`
495 size_up_app fun [arg] (if isRealWorldExpr arg then 1 else 0)
496
497 size_up (Lam b e)
498 | isId b && not (isRealWorldId b) = lamScrutDiscount dflags (size_up e `addSizeN` 10)
499 | otherwise = size_up e
500
501 size_up (Let (NonRec binder rhs) body)
502 = size_up rhs `addSizeNSD`
503 size_up body `addSizeN`
504 (if isUnLiftedType (idType binder) then 0 else 10)
505 -- For the allocation
506 -- If the binder has an unlifted type there is no allocation
507
508 size_up (Let (Rec pairs) body)
509 = foldr (addSizeNSD . size_up . snd)
510 (size_up body `addSizeN` (10 * length pairs)) -- (length pairs) for the allocation
511 pairs
512
513 size_up (Case (Var v) _ _ alts)
514 | v `elem` top_args -- We are scrutinising an argument variable
515 = alts_size (foldr addAltSize sizeZero alt_sizes)
516 (foldr maxSize sizeZero alt_sizes)
517 -- Good to inline if an arg is scrutinised, because
518 -- that may eliminate allocation in the caller
519 -- And it eliminates the case itself
520 where
521 alt_sizes = map size_up_alt alts
522
523 -- alts_size tries to compute a good discount for
524 -- the case when we are scrutinising an argument variable
525 alts_size (SizeIs tot tot_disc tot_scrut) -- Size of all alternatives
526 (SizeIs max _ _) -- Size of biggest alternative
527 = SizeIs tot (unitBag (v, 20 + tot - max) `unionBags` tot_disc) tot_scrut
528 -- If the variable is known, we produce a discount that
529 -- will take us back to 'max', the size of the largest alternative
530 -- The 1+ is a little discount for reduced allocation in the caller
531 --
532 -- Notice though, that we return tot_disc, the total discount from
533 -- all branches. I think that's right.
534
535 alts_size tot_size _ = tot_size
536
537 size_up (Case e _ _ alts) = size_up e `addSizeNSD`
538 foldr (addAltSize . size_up_alt) case_size alts
539 where
540 case_size
541 | is_inline_scrut e, not (lengthExceeds alts 1) = sizeN (-10)
542 | otherwise = sizeZero
543 -- Normally we don't charge for the case itself, but
544 -- we charge one per alternative (see size_up_alt,
545 -- below) to account for the cost of the info table
546 -- and comparisons.
547 --
548 -- However, in certain cases (see is_inline_scrut
549 -- below), no code is generated for the case unless
550 -- there are multiple alts. In these cases we
551 -- subtract one, making the first alt free.
552 -- e.g. case x# +# y# of _ -> ... should cost 1
553 -- case touch# x# of _ -> ... should cost 0
554 -- (see #4978)
555 --
556 -- I would like to not have the "not (lengthExceeds alts 1)"
557 -- condition above, but without that some programs got worse
558 -- (spectral/hartel/event and spectral/para). I don't fully
559 -- understand why. (SDM 24/5/11)
560
561 -- unboxed variables, inline primops and unsafe foreign calls
562 -- are all "inline" things:
563 is_inline_scrut (Var v) = isUnLiftedType (idType v)
564 is_inline_scrut scrut
565 | (Var f, _) <- collectArgs scrut
566 = case idDetails f of
567 FCallId fc -> not (isSafeForeignCall fc)
568 PrimOpId op -> not (primOpOutOfLine op)
569 _other -> False
570 | otherwise
571 = False
572
573 ------------
574 -- size_up_app is used when there's ONE OR MORE value args
575 size_up_app (App fun arg) args voids
576 | isTyCoArg arg = size_up_app fun args voids
577 | isRealWorldExpr arg = size_up_app fun (arg:args) (voids + 1)
578 | otherwise = size_up arg `addSizeNSD`
579 size_up_app fun (arg:args) voids
580 size_up_app (Var fun) args voids = size_up_call fun args voids
581 size_up_app (Tick _ expr) args voids = size_up_app expr args voids
582 size_up_app other args voids = size_up other `addSizeN` (length args - voids)
583
584 ------------
585 size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
586 size_up_call fun val_args voids
587 = case idDetails fun of
588 FCallId _ -> sizeN (10 * (1 + length val_args))
589 DataConWorkId dc -> conSize dc (length val_args)
590 PrimOpId op -> primOpSize op (length val_args)
591 ClassOpId _ -> classOpSize dflags top_args val_args
592 _ -> funSize dflags top_args fun (length val_args) voids
593
594 ------------
595 size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
596 -- Don't charge for args, so that wrappers look cheap
597 -- (See comments about wrappers with Case)
598 --
599 -- IMPORATANT: *do* charge 1 for the alternative, else we
600 -- find that giant case nests are treated as practically free
601 -- A good example is Foreign.C.Error.errrnoToIOError
602
603 ------------
604 -- These addSize things have to be here because
605 -- I don't want to give them bOMB_OUT_SIZE as an argument
606 addSizeN TooBig _ = TooBig
607 addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n + m) xs d
608
609 -- addAltSize is used to add the sizes of case alternatives
610 addAltSize TooBig _ = TooBig
611 addAltSize _ TooBig = TooBig
612 addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
613 = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
614 (xs `unionBags` ys)
615 (d1 + d2) -- Note [addAltSize result discounts]
616
617 -- This variant ignores the result discount from its LEFT argument
618 -- It's used when the second argument isn't part of the result
619 addSizeNSD TooBig _ = TooBig
620 addSizeNSD _ TooBig = TooBig
621 addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
622 = mkSizeIs bOMB_OUT_SIZE (n1 + n2)
623 (xs `unionBags` ys)
624 d2 -- Ignore d1
625
626 isRealWorldId id = idType id `eqType` realWorldStatePrimTy
627
628 -- an expression of type State# RealWorld must be a variable
629 isRealWorldExpr (Var id) = isRealWorldId id
630 isRealWorldExpr (Tick _ e) = isRealWorldExpr e
631 isRealWorldExpr _ = False
632
633 -- | Finds a nominal size of a string literal.
634 litSize :: Literal -> Int
635 -- Used by CoreUnfold.sizeExpr
636 litSize (LitInteger {}) = 100 -- Note [Size of literal integers]
637 litSize (MachStr str) = 10 + 10 * ((BS.length str + 3) `div` 4)
638 -- If size could be 0 then @f "x"@ might be too small
639 -- [Sept03: make literal strings a bit bigger to avoid fruitless
640 -- duplication of little strings]
641 litSize _other = 0 -- Must match size of nullary constructors
642 -- Key point: if x |-> 4, then x must inline unconditionally
643 -- (eg via case binding)
644
645 classOpSize :: DynFlags -> [Id] -> [CoreExpr] -> ExprSize
646 -- See Note [Conlike is interesting]
647 classOpSize _ _ []
648 = sizeZero
649 classOpSize dflags top_args (arg1 : other_args)
650 = SizeIs size arg_discount 0
651 where
652 size = 20 + (10 * length other_args)
653 -- If the class op is scrutinising a lambda bound dictionary then
654 -- give it a discount, to encourage the inlining of this function
655 -- The actual discount is rather arbitrarily chosen
656 arg_discount = case arg1 of
657 Var dict | dict `elem` top_args
658 -> unitBag (dict, ufDictDiscount dflags)
659 _other -> emptyBag
660
661 funSize :: DynFlags -> [Id] -> Id -> Int -> Int -> ExprSize
662 -- Size for functions that are not constructors or primops
663 -- Note [Function applications]
664 funSize dflags top_args fun n_val_args voids
665 | fun `hasKey` buildIdKey = buildSize
666 | fun `hasKey` augmentIdKey = augmentSize
667 | otherwise = SizeIs size arg_discount res_discount
668 where
669 some_val_args = n_val_args > 0
670
671 size | some_val_args = 10 * (1 + n_val_args - voids)
672 | otherwise = 0
673 -- The 1+ is for the function itself
674 -- Add 1 for each non-trivial arg;
675 -- the allocation cost, as in let(rec)
676
677 -- DISCOUNTS
678 -- See Note [Function and non-function discounts]
679 arg_discount | some_val_args && fun `elem` top_args
680 = unitBag (fun, ufFunAppDiscount dflags)
681 | otherwise = emptyBag
682 -- If the function is an argument and is applied
683 -- to some values, give it an arg-discount
684
685 res_discount | idArity fun > n_val_args = ufFunAppDiscount dflags
686 | otherwise = 0
687 -- If the function is partially applied, show a result discount
688
689 conSize :: DataCon -> Int -> ExprSize
690 conSize dc n_val_args
691 | n_val_args == 0 = SizeIs 0 emptyBag 10 -- Like variables
692
693 -- See Note [Unboxed tuple size and result discount]
694 | isUnboxedTupleCon dc = SizeIs 0 emptyBag (10 * (1 + n_val_args))
695
696 -- See Note [Constructor size and result discount]
697 | otherwise = SizeIs 10 emptyBag (10 * (1 + n_val_args))
698
699 {-
700 Note [Constructor size and result discount]
701 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
702 Treat a constructors application as size 10, regardless of how many
703 arguments it has; we are keen to expose them (and we charge separately
704 for their args). We can't treat them as size zero, else we find that
705 (Just x) has size 0, which is the same as a lone variable; and hence
706 'v' will always be replaced by (Just x), where v is bound to Just x.
707
708 The "result discount" is applied if the result of the call is
709 scrutinised (say by a case). For a constructor application that will
710 mean the constructor application will disappear, so we don't need to
711 charge it to the function. So the discount should at least match the
712 cost of the constructor application, namely 10. But to give a bit
713 of extra incentive we give a discount of 10*(1 + n_val_args).
714
715 Simon M tried a MUCH bigger discount: (10 * (10 + n_val_args)),
716 and said it was an "unambiguous win", but its terribly dangerous
717 because a function with many many case branches, each finishing with
718 a constructor, can have an arbitrarily large discount. This led to
719 terrible code bloat: see Trac #6099.
720
721 Note [Unboxed tuple size and result discount]
722 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
723 However, unboxed tuples count as size zero. I found occasions where we had
724 f x y z = case op# x y z of { s -> (# s, () #) }
725 and f wasn't getting inlined.
726
727 I tried giving unboxed tuples a *result discount* of zero (see the
728 commented-out line). Why? When returned as a result they do not
729 allocate, so maybe we don't want to charge so much for them If you
730 have a non-zero discount here, we find that workers often get inlined
731 back into wrappers, because it look like
732 f x = case $wf x of (# a,b #) -> (a,b)
733 and we are keener because of the case. However while this change
734 shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
735 more. All other changes were very small. So it's not a big deal but I
736 didn't adopt the idea.
737
738 Note [Function and non-function discounts]
739 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
740 We want a discount if the function is applied. A good example is
741 monadic combinators with continuation arguments, where inlining is
742 quite important.
743
744 But we don't want a big discount when a function is called many times
745 (see the detailed comments with Trac #6048) because if the function is
746 big it won't be inlined at its many call sites and no benefit results.
747 Indeed, we can get exponentially big inlinings this way; that is what
748 Trac #6048 is about.
749
750 On the other hand, for data-valued arguments, if there are lots of
751 case expressions in the body, each one will get smaller if we apply
752 the function to a constructor application, so we *want* a big discount
753 if the argument is scrutinised by many case expressions.
754
755 Conclusion:
756 - For functions, take the max of the discounts
757 - For data values, take the sum of the discounts
758
759
760 Note [Literal integer size]
761 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
762 Literal integers *can* be big (mkInteger [...coefficients...]), but
763 need not be (S# n). We just use an aribitrary big-ish constant here
764 so that, in particular, we don't inline top-level defns like
765 n = S# 5
766 There's no point in doing so -- any optimisations will see the S#
767 through n's unfolding. Nor will a big size inhibit unfoldings functions
768 that mention a literal Integer, because the float-out pass will float
769 all those constants to top level.
770 -}
771
772 primOpSize :: PrimOp -> Int -> ExprSize
773 primOpSize op n_val_args
774 = if primOpOutOfLine op
775 then sizeN (op_size + n_val_args)
776 else sizeN op_size
777 where
778 op_size = primOpCodeSize op
779
780
781 buildSize :: ExprSize
782 buildSize = SizeIs 0 emptyBag 40
783 -- We really want to inline applications of build
784 -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
785 -- Indeed, we should add a result_discount becuause build is
786 -- very like a constructor. We don't bother to check that the
787 -- build is saturated (it usually is). The "-2" discounts for the \c n,
788 -- The "4" is rather arbitrary.
789
790 augmentSize :: ExprSize
791 augmentSize = SizeIs 0 emptyBag 40
792 -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
793 -- e plus ys. The -2 accounts for the \cn
794
795 -- When we return a lambda, give a discount if it's used (applied)
796 lamScrutDiscount :: DynFlags -> ExprSize -> ExprSize
797 lamScrutDiscount dflags (SizeIs n vs _) = SizeIs n vs (ufFunAppDiscount dflags)
798 lamScrutDiscount _ TooBig = TooBig
799
800 {-
801 Note [addAltSize result discounts]
802 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
803 When adding the size of alternatives, we *add* the result discounts
804 too, rather than take the *maximum*. For a multi-branch case, this
805 gives a discount for each branch that returns a constructor, making us
806 keener to inline. I did try using 'max' instead, but it makes nofib
807 'rewrite' and 'puzzle' allocate significantly more, and didn't make
808 binary sizes shrink significantly either.
809
810 Note [Discounts and thresholds]
811 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
812 Constants for discounts and thesholds are defined in main/DynFlags,
813 all of form ufXxxx. They are:
814
815 ufCreationThreshold
816 At a definition site, if the unfolding is bigger than this, we
817 may discard it altogether
818
819 ufUseThreshold
820 At a call site, if the unfolding, less discounts, is smaller than
821 this, then it's small enough inline
822
823 ufKeenessFactor
824 Factor by which the discounts are multiplied before
825 subtracting from size
826
827 ufDictDiscount
828 The discount for each occurrence of a dictionary argument
829 as an argument of a class method. Should be pretty small
830 else big functions may get inlined
831
832 ufFunAppDiscount
833 Discount for a function argument that is applied. Quite
834 large, because if we inline we avoid the higher-order call.
835
836 ufDearOp
837 The size of a foreign call or not-dupable PrimOp
838
839
840 Note [Function applications]
841 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
842 In a function application (f a b)
843
844 - If 'f' is an argument to the function being analysed,
845 and there's at least one value arg, record a FunAppDiscount for f
846
847 - If the application if a PAP (arity > 2 in this example)
848 record a *result* discount (because inlining
849 with "extra" args in the call may mean that we now
850 get a saturated application)
851
852 Code for manipulating sizes
853 -}
854
855 -- | The size of an candidate expression for unfolding
856 data ExprSize
857 = TooBig
858 | SizeIs { _es_size_is :: {-# UNPACK #-} !Int -- ^ Size found
859 , _es_args :: !(Bag (Id,Int))
860 -- ^ Arguments cased herein, and discount for each such
861 , _es_discount :: {-# UNPACK #-} !Int
862 -- ^ Size to subtract if result is scrutinised by a case
863 -- expression
864 }
865
866 instance Outputable ExprSize where
867 ppr TooBig = ptext (sLit "TooBig")
868 ppr (SizeIs a _ c) = brackets (int a <+> int c)
869
870 -- subtract the discount before deciding whether to bale out. eg. we
871 -- want to inline a large constructor application into a selector:
872 -- tup = (a_1, ..., a_99)
873 -- x = case tup of ...
874 --
875 mkSizeIs :: Int -> Int -> Bag (Id, Int) -> Int -> ExprSize
876 mkSizeIs max n xs d | (n - d) > max = TooBig
877 | otherwise = SizeIs n xs d
878
879 maxSize :: ExprSize -> ExprSize -> ExprSize
880 maxSize TooBig _ = TooBig
881 maxSize _ TooBig = TooBig
882 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 > n2 = s1
883 | otherwise = s2
884
885 sizeZero :: ExprSize
886 sizeN :: Int -> ExprSize
887
888 sizeZero = SizeIs 0 emptyBag 0
889 sizeN n = SizeIs n emptyBag 0
890
891 {-
892 ************************************************************************
893 * *
894 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
895 * *
896 ************************************************************************
897
898 We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
899 we ``couldn't possibly use'' on the other side. Can be overridden w/
900 flaggery. Just the same as smallEnoughToInline, except that it has no
901 actual arguments.
902 -}
903
904 couldBeSmallEnoughToInline :: DynFlags -> Int -> CoreExpr -> Bool
905 couldBeSmallEnoughToInline dflags threshold rhs
906 = case sizeExpr dflags threshold [] body of
907 TooBig -> False
908 _ -> True
909 where
910 (_, body) = collectBinders rhs
911
912 ----------------
913 smallEnoughToInline :: DynFlags -> Unfolding -> Bool
914 smallEnoughToInline dflags (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
915 = size <= ufUseThreshold dflags
916 smallEnoughToInline _ _
917 = False
918
919 ----------------
920 certainlyWillInline :: DynFlags -> Unfolding -> Maybe Unfolding
921 -- Sees if the unfolding is pretty certain to inline
922 -- If so, return a *stable* unfolding for it, that will always inline
923 certainlyWillInline dflags unf@(CoreUnfolding { uf_guidance = guidance, uf_tmpl = expr })
924 = case guidance of
925 UnfNever -> Nothing
926 UnfWhen {} -> Just (unf { uf_src = InlineStable })
927
928 -- The UnfIfGoodArgs case seems important. If we w/w small functions
929 -- binary sizes go up by 10%! (This is with SplitObjs.) I'm not totally
930 -- sure whyy.
931 UnfIfGoodArgs { ug_size = size, ug_args = args }
932 | not (null args) -- See Note [certainlyWillInline: be careful of thunks]
933 , let arity = length args
934 , size - (10 * (arity + 1)) <= ufUseThreshold dflags
935 -> Just (unf { uf_src = InlineStable
936 , uf_guidance = UnfWhen { ug_arity = arity
937 , ug_unsat_ok = unSaturatedOk
938 , ug_boring_ok = inlineBoringOk expr } })
939 -- Note the "unsaturatedOk". A function like f = \ab. a
940 -- will certainly inline, even if partially applied (f e), so we'd
941 -- better make sure that the transformed inlining has the same property
942
943 _ -> Nothing
944
945 certainlyWillInline _ unf@(DFunUnfolding {})
946 = Just unf
947
948 certainlyWillInline _ _
949 = Nothing
950
951 {-
952 Note [certainlyWillInline: be careful of thunks]
953 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
954 Don't claim that thunks will certainly inline, because that risks work
955 duplication. Even if the work duplication is not great (eg is_cheap
956 holds), it can make a big difference in an inner loop In Trac #5623 we
957 found that the WorkWrap phase thought that
958 y = case x of F# v -> F# (v +# v)
959 was certainlyWillInline, so the addition got duplicated.
960
961
962 ************************************************************************
963 * *
964 \subsection{callSiteInline}
965 * *
966 ************************************************************************
967
968 This is the key function. It decides whether to inline a variable at a call site
969
970 callSiteInline is used at call sites, so it is a bit more generous.
971 It's a very important function that embodies lots of heuristics.
972 A non-WHNF can be inlined if it doesn't occur inside a lambda,
973 and occurs exactly once or
974 occurs once in each branch of a case and is small
975
976 If the thing is in WHNF, there's no danger of duplicating work,
977 so we can inline if it occurs once, or is small
978
979 NOTE: we don't want to inline top-level functions that always diverge.
980 It just makes the code bigger. Tt turns out that the convenient way to prevent
981 them inlining is to give them a NOINLINE pragma, which we do in
982 StrictAnal.addStrictnessInfoToTopId
983 -}
984
985 callSiteInline :: DynFlags
986 -> Id -- The Id
987 -> Bool -- True <=> unfolding is active
988 -> Bool -- True if there are no arguments at all (incl type args)
989 -> [ArgSummary] -- One for each value arg; True if it is interesting
990 -> CallCtxt -- True <=> continuation is interesting
991 -> Maybe CoreExpr -- Unfolding, if any
992
993 data ArgSummary = TrivArg -- Nothing interesting
994 | NonTrivArg -- Arg has structure
995 | ValueArg -- Arg is a con-app or PAP
996 -- ..or con-like. Note [Conlike is interesting]
997
998 instance Outputable ArgSummary where
999 ppr TrivArg = ptext (sLit "TrivArg")
1000 ppr NonTrivArg = ptext (sLit "NonTrivArg")
1001 ppr ValueArg = ptext (sLit "ValueArg")
1002
1003 nonTriv :: ArgSummary -> Bool
1004 nonTriv TrivArg = False
1005 nonTriv _ = True
1006
1007 data CallCtxt
1008 = BoringCtxt
1009 | RhsCtxt -- Rhs of a let-binding; see Note [RHS of lets]
1010 | DiscArgCtxt -- Argument of a function with non-zero arg discount
1011 | RuleArgCtxt -- We are somewhere in the argument of a function with rules
1012
1013 | ValAppCtxt -- We're applied to at least one value arg
1014 -- This arises when we have ((f x |> co) y)
1015 -- Then the (f x) has argument 'x' but in a ValAppCtxt
1016
1017 | CaseCtxt -- We're the scrutinee of a case
1018 -- that decomposes its scrutinee
1019
1020 instance Outputable CallCtxt where
1021 ppr CaseCtxt = ptext (sLit "CaseCtxt")
1022 ppr ValAppCtxt = ptext (sLit "ValAppCtxt")
1023 ppr BoringCtxt = ptext (sLit "BoringCtxt")
1024 ppr RhsCtxt = ptext (sLit "RhsCtxt")
1025 ppr DiscArgCtxt = ptext (sLit "DiscArgCtxt")
1026 ppr RuleArgCtxt = ptext (sLit "RuleArgCtxt")
1027
1028 callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
1029 = case idUnfolding id of
1030 -- idUnfolding checks for loop-breakers, returning NoUnfolding
1031 -- Things with an INLINE pragma may have an unfolding *and*
1032 -- be a loop breaker (maybe the knot is not yet untied)
1033 CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top
1034 , uf_is_work_free = is_wf
1035 , uf_guidance = guidance, uf_expandable = is_exp }
1036 | active_unfolding -> tryUnfolding dflags id lone_variable
1037 arg_infos cont_info unf_template is_top
1038 is_wf is_exp guidance
1039 | otherwise -> traceInline dflags "Inactive unfolding:" (ppr id) Nothing
1040 NoUnfolding -> Nothing
1041 OtherCon {} -> Nothing
1042 DFunUnfolding {} -> Nothing -- Never unfold a DFun
1043
1044 traceInline :: DynFlags -> String -> SDoc -> a -> a
1045 traceInline dflags str doc result
1046 | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
1047 = pprTrace str doc result
1048 | otherwise
1049 = result
1050
1051 tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
1052 -> CoreExpr -> Bool -> Bool -> Bool -> UnfoldingGuidance
1053 -> Maybe CoreExpr
1054 tryUnfolding dflags id lone_variable
1055 arg_infos cont_info unf_template is_top
1056 is_wf is_exp guidance
1057 = case guidance of
1058 UnfNever -> traceInline dflags str (ptext (sLit "UnfNever")) Nothing
1059
1060 UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
1061 | enough_args && (boring_ok || some_benefit)
1062 -- See Note [INLINE for small functions (3)]
1063 -> traceInline dflags str (mk_doc some_benefit empty True) (Just unf_template)
1064 | otherwise
1065 -> traceInline dflags str (mk_doc some_benefit empty False) Nothing
1066 where
1067 some_benefit = calc_some_benefit uf_arity
1068 enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
1069
1070 UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
1071 | is_wf && some_benefit && small_enough
1072 -> traceInline dflags str (mk_doc some_benefit extra_doc True) (Just unf_template)
1073 | otherwise
1074 -> traceInline dflags str (mk_doc some_benefit extra_doc False) Nothing
1075 where
1076 some_benefit = calc_some_benefit (length arg_discounts)
1077 extra_doc = text "discounted size =" <+> int discounted_size
1078 discounted_size = size - discount
1079 small_enough = discounted_size <= ufUseThreshold dflags
1080 discount = computeDiscount dflags arg_discounts
1081 res_discount arg_infos cont_info
1082
1083 where
1084 mk_doc some_benefit extra_doc yes_or_no
1085 = vcat [ text "arg infos" <+> ppr arg_infos
1086 , text "interesting continuation" <+> ppr cont_info
1087 , text "some_benefit" <+> ppr some_benefit
1088 , text "is exp:" <+> ppr is_exp
1089 , text "is work-free:" <+> ppr is_wf
1090 , text "guidance" <+> ppr guidance
1091 , extra_doc
1092 , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]
1093
1094 str = "Considering inlining: " ++ showSDocDump dflags (ppr id)
1095 n_val_args = length arg_infos
1096
1097 -- some_benefit is used when the RHS is small enough
1098 -- and the call has enough (or too many) value
1099 -- arguments (ie n_val_args >= arity). But there must
1100 -- be *something* interesting about some argument, or the
1101 -- result context, to make it worth inlining
1102 calc_some_benefit :: Arity -> Bool -- The Arity is the number of args
1103 -- expected by the unfolding
1104 calc_some_benefit uf_arity
1105 | not saturated = interesting_args -- Under-saturated
1106 -- Note [Unsaturated applications]
1107 | otherwise = interesting_args -- Saturated or over-saturated
1108 || interesting_call
1109 where
1110 saturated = n_val_args >= uf_arity
1111 over_saturated = n_val_args > uf_arity
1112 interesting_args = any nonTriv arg_infos
1113 -- NB: (any nonTriv arg_infos) looks at the
1114 -- over-saturated args too which is "wrong";
1115 -- but if over-saturated we inline anyway.
1116
1117 interesting_call
1118 | over_saturated
1119 = True
1120 | otherwise
1121 = case cont_info of
1122 CaseCtxt -> not (lone_variable && is_wf) -- Note [Lone variables]
1123 ValAppCtxt -> True -- Note [Cast then apply]
1124 RuleArgCtxt -> uf_arity > 0 -- See Note [Unfold info lazy contexts]
1125 DiscArgCtxt -> uf_arity > 0 --
1126 RhsCtxt -> uf_arity > 0 --
1127 _ -> not is_top && uf_arity > 0 -- Note [Nested functions]
1128 -- Note [Inlining in ArgCtxt]
1129
1130 {-
1131 Note [Unfold into lazy contexts], Note [RHS of lets]
1132 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1133 When the call is the argument of a function with a RULE, or the RHS of a let,
1134 we are a little bit keener to inline. For example
1135 f y = (y,y,y)
1136 g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
1137 We'd inline 'f' if the call was in a case context, and it kind-of-is,
1138 only we can't see it. Also
1139 x = f v
1140 could be expensive whereas
1141 x = case v of (a,b) -> a
1142 is patently cheap and may allow more eta expansion.
1143 So we treat the RHS of a let as not-totally-boring.
1144
1145 Note [Unsaturated applications]
1146 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1147 When a call is not saturated, we *still* inline if one of the
1148 arguments has interesting structure. That's sometimes very important.
1149 A good example is the Ord instance for Bool in Base:
1150
1151 Rec {
1152 $fOrdBool =GHC.Classes.D:Ord
1153 @ Bool
1154 ...
1155 $cmin_ajX
1156
1157 $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
1158 $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
1159 }
1160
1161 But the defn of GHC.Classes.$dmmin is:
1162
1163 $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
1164 {- Arity: 3, HasNoCafRefs, Strictness: SLL,
1165 Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
1166 case @ a GHC.Classes.<= @ a $dOrd x y of wild {
1167 GHC.Types.False -> y GHC.Types.True -> x }) -}
1168
1169 We *really* want to inline $dmmin, even though it has arity 3, in
1170 order to unravel the recursion.
1171
1172
1173 Note [Things to watch]
1174 ~~~~~~~~~~~~~~~~~~~~~~
1175 * { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
1176 Assume x is exported, so not inlined unconditionally.
1177 Then we want x to inline unconditionally; no reason for it
1178 not to, and doing so avoids an indirection.
1179
1180 * { x = I# 3; ....f x.... }
1181 Make sure that x does not inline unconditionally!
1182 Lest we get extra allocation.
1183
1184 Note [Inlining an InlineRule]
1185 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1186 An InlineRules is used for
1187 (a) programmer INLINE pragmas
1188 (b) inlinings from worker/wrapper
1189
1190 For (a) the RHS may be large, and our contract is that we *only* inline
1191 when the function is applied to all the arguments on the LHS of the
1192 source-code defn. (The uf_arity in the rule.)
1193
1194 However for worker/wrapper it may be worth inlining even if the
1195 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
1196 require saturation.
1197
1198
1199 Note [Nested functions]
1200 ~~~~~~~~~~~~~~~~~~~~~~~
1201 If a function has a nested defn we also record some-benefit, on the
1202 grounds that we are often able to eliminate the binding, and hence the
1203 allocation, for the function altogether; this is good for join points.
1204 But this only makes sense for *functions*; inlining a constructor
1205 doesn't help allocation unless the result is scrutinised. UNLESS the
1206 constructor occurs just once, albeit possibly in multiple case
1207 branches. Then inlining it doesn't increase allocation, but it does
1208 increase the chance that the constructor won't be allocated at all in
1209 the branches that don't use it.
1210
1211 Note [Cast then apply]
1212 ~~~~~~~~~~~~~~~~~~~~~~
1213 Consider
1214 myIndex = __inline_me ( (/\a. <blah>) |> co )
1215 co :: (forall a. a -> a) ~ (forall a. T a)
1216 ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
1217
1218 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
1219 no value arguments. The ValAppCtxt gives it enough incentive to inline.
1220
1221 Note [Inlining in ArgCtxt]
1222 ~~~~~~~~~~~~~~~~~~~~~~~~~~
1223 The condition (arity > 0) here is very important, because otherwise
1224 we end up inlining top-level stuff into useless places; eg
1225 x = I# 3#
1226 f = \y. g x
1227 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
1228 and 20% to 'power'.
1229
1230 At one stage I replaced this condition by 'True' (leading to the above
1231 slow-down). The motivation was test eyeball/inline1.hs; but that seems
1232 to work ok now.
1233
1234 NOTE: arguably, we should inline in ArgCtxt only if the result of the
1235 call is at least CONLIKE. At least for the cases where we use ArgCtxt
1236 for the RHS of a 'let', we only profit from the inlining if we get a
1237 CONLIKE thing (modulo lets).
1238
1239 Note [Lone variables] See also Note [Interaction of exprIsWorkFree and lone variables]
1240 ~~~~~~~~~~~~~~~~~~~~~ which appears below
1241 The "lone-variable" case is important. I spent ages messing about
1242 with unsatisfactory variants, but this is nice. The idea is that if a
1243 variable appears all alone
1244
1245 as an arg of lazy fn, or rhs BoringCtxt
1246 as scrutinee of a case CaseCtxt
1247 as arg of a fn ArgCtxt
1248 AND
1249 it is bound to a cheap expression
1250
1251 then we should not inline it (unless there is some other reason,
1252 e.g. is is the sole occurrence). That is what is happening at
1253 the use of 'lone_variable' in 'interesting_call'.
1254
1255 Why? At least in the case-scrutinee situation, turning
1256 let x = (a,b) in case x of y -> ...
1257 into
1258 let x = (a,b) in case (a,b) of y -> ...
1259 and thence to
1260 let x = (a,b) in let y = (a,b) in ...
1261 is bad if the binding for x will remain.
1262
1263 Another example: I discovered that strings
1264 were getting inlined straight back into applications of 'error'
1265 because the latter is strict.
1266 s = "foo"
1267 f = \x -> ...(error s)...
1268
1269 Fundamentally such contexts should not encourage inlining because the
1270 context can ``see'' the unfolding of the variable (e.g. case or a
1271 RULE) so there's no gain. If the thing is bound to a value.
1272
1273 However, watch out:
1274
1275 * Consider this:
1276 foo = _inline_ (\n. [n])
1277 bar = _inline_ (foo 20)
1278 baz = \n. case bar of { (m:_) -> m + n }
1279 Here we really want to inline 'bar' so that we can inline 'foo'
1280 and the whole thing unravels as it should obviously do. This is
1281 important: in the NDP project, 'bar' generates a closure data
1282 structure rather than a list.
1283
1284 So the non-inlining of lone_variables should only apply if the
1285 unfolding is regarded as cheap; because that is when exprIsConApp_maybe
1286 looks through the unfolding. Hence the "&& is_wf" in the
1287 InlineRule branch.
1288
1289 * Even a type application or coercion isn't a lone variable.
1290 Consider
1291 case $fMonadST @ RealWorld of { :DMonad a b c -> c }
1292 We had better inline that sucker! The case won't see through it.
1293
1294 For now, I'm treating treating a variable applied to types
1295 in a *lazy* context "lone". The motivating example was
1296 f = /\a. \x. BIG
1297 g = /\a. \y. h (f a)
1298 There's no advantage in inlining f here, and perhaps
1299 a significant disadvantage. Hence some_val_args in the Stop case
1300
1301 Note [Interaction of exprIsWorkFree and lone variables]
1302 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1303 The lone-variable test says "don't inline if a case expression
1304 scrutines a lone variable whose unfolding is cheap". It's very
1305 important that, under these circumstances, exprIsConApp_maybe
1306 can spot a constructor application. So, for example, we don't
1307 consider
1308 let x = e in (x,x)
1309 to be cheap, and that's good because exprIsConApp_maybe doesn't
1310 think that expression is a constructor application.
1311
1312 In the 'not (lone_variable && is_wf)' test, I used to test is_value
1313 rather than is_wf, which was utterly wrong, because the above
1314 expression responds True to exprIsHNF, which is what sets is_value.
1315
1316 This kind of thing can occur if you have
1317
1318 {-# INLINE foo #-}
1319 foo = let x = e in (x,x)
1320
1321 which Roman did.
1322 -}
1323
1324 computeDiscount :: DynFlags -> [Int] -> Int -> [ArgSummary] -> CallCtxt
1325 -> Int
1326 computeDiscount dflags arg_discounts res_discount arg_infos cont_info
1327 -- We multiple the raw discounts (args_discount and result_discount)
1328 -- ty opt_UnfoldingKeenessFactor because the former have to do with
1329 -- *size* whereas the discounts imply that there's some extra
1330 -- *efficiency* to be gained (e.g. beta reductions, case reductions)
1331 -- by inlining.
1332
1333 = 10 -- Discount of 10 because the result replaces the call
1334 -- so we count 10 for the function itself
1335
1336 + 10 * length actual_arg_discounts
1337 -- Discount of 10 for each arg supplied,
1338 -- because the result replaces the call
1339
1340 + round (ufKeenessFactor dflags *
1341 fromIntegral (total_arg_discount + res_discount'))
1342 where
1343 actual_arg_discounts = zipWith mk_arg_discount arg_discounts arg_infos
1344 total_arg_discount = sum actual_arg_discounts
1345
1346 mk_arg_discount _ TrivArg = 0
1347 mk_arg_discount _ NonTrivArg = 10
1348 mk_arg_discount discount ValueArg = discount
1349
1350 res_discount'
1351 | LT <- arg_discounts `compareLength` arg_infos
1352 = res_discount -- Over-saturated
1353 | otherwise
1354 = case cont_info of
1355 BoringCtxt -> 0
1356 CaseCtxt -> res_discount -- Presumably a constructor
1357 ValAppCtxt -> res_discount -- Presumably a function
1358 _ -> 40 `min` res_discount
1359 -- ToDo: this 40 `min` res_discount doesn't seem right
1360 -- for DiscArgCtxt it shouldn't matter because the function will
1361 -- get the arg discount for any non-triv arg
1362 -- for RuleArgCtxt we do want to be keener to inline; but not only
1363 -- constructor results
1364 -- for RhsCtxt I suppose that exposing a data con is good in general
1365 -- And 40 seems very arbitrary
1366 --
1367 -- res_discount can be very large when a function returns
1368 -- constructors; but we only want to invoke that large discount
1369 -- when there's a case continuation.
1370 -- Otherwise we, rather arbitrarily, threshold it. Yuk.
1371 -- But we want to aovid inlining large functions that return
1372 -- constructors into contexts that are simply "interesting"
1373