Comments only: Refer to actually existing Notes
[ghc.git] / compiler / stranal / DmdAnal.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
3
4
5 -----------------
6 A demand analysis
7 -----------------
8 -}
9
10 {-# LANGUAGE CPP #-}
11
12 module DmdAnal ( dmdAnalProgram ) where
13
14 #include "HsVersions.h"
15
16 import DynFlags
17 import WwLib ( findTypeShape, deepSplitProductType_maybe )
18 import Demand -- All of it
19 import CoreSyn
20 import Outputable
21 import VarEnv
22 import BasicTypes
23 import Data.List
24 import DataCon
25 import Id
26 import CoreUtils ( exprIsHNF, exprType, exprIsTrivial )
27 import TyCon
28 import Type
29 import Coercion ( Coercion, coVarsOfCo )
30 import FamInstEnv
31 import Util
32 import Maybes ( isJust )
33 import TysWiredIn
34 import TysPrim ( realWorldStatePrimTy )
35 import ErrUtils ( dumpIfSet_dyn )
36 import Name ( getName, stableNameCmp )
37 import Data.Function ( on )
38
39 {-
40 ************************************************************************
41 * *
42 \subsection{Top level stuff}
43 * *
44 ************************************************************************
45 -}
46
47 dmdAnalProgram :: DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram
48 dmdAnalProgram dflags fam_envs binds
49 = do {
50 let { binds_plus_dmds = do_prog binds } ;
51 dumpIfSet_dyn dflags Opt_D_dump_str_signatures
52 "Strictness signatures" $
53 dumpStrSig binds_plus_dmds ;
54 return binds_plus_dmds
55 }
56 where
57 do_prog :: CoreProgram -> CoreProgram
58 do_prog binds = snd $ mapAccumL dmdAnalTopBind (emptyAnalEnv dflags fam_envs) binds
59
60 -- Analyse a (group of) top-level binding(s)
61 dmdAnalTopBind :: AnalEnv
62 -> CoreBind
63 -> (AnalEnv, CoreBind)
64 dmdAnalTopBind sigs (NonRec id rhs)
65 = (extendAnalEnv TopLevel sigs id sig, NonRec id2 rhs2)
66 where
67 ( _, _, _, rhs1) = dmdAnalRhsLetDown TopLevel Nothing sigs id rhs
68 (sig, _, id2, rhs2) = dmdAnalRhsLetDown TopLevel Nothing (nonVirgin sigs) id rhs1
69 -- Do two passes to improve CPR information
70 -- See Note [CPR for thunks]
71 -- See Note [Optimistic CPR in the "virgin" case]
72 -- See Note [Initial CPR for strict binders]
73
74 dmdAnalTopBind sigs (Rec pairs)
75 = (sigs', Rec pairs')
76 where
77 (sigs', _, pairs') = dmdFix TopLevel sigs pairs
78 -- We get two iterations automatically
79 -- c.f. the NonRec case above
80
81 {-
82 ************************************************************************
83 * *
84 \subsection{The analyser itself}
85 * *
86 ************************************************************************
87
88 Note [Ensure demand is strict]
89 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
90 It's important not to analyse e with a lazy demand because
91 a) When we encounter case s of (a,b) ->
92 we demand s with U(d1d2)... but if the overall demand is lazy
93 that is wrong, and we'd need to reduce the demand on s,
94 which is inconvenient
95 b) More important, consider
96 f (let x = R in x+x), where f is lazy
97 We still want to mark x as demanded, because it will be when we
98 enter the let. If we analyse f's arg with a Lazy demand, we'll
99 just mark x as Lazy
100 c) The application rule wouldn't be right either
101 Evaluating (f x) in a L demand does *not* cause
102 evaluation of f in a C(L) demand!
103 -}
104
105 -- If e is complicated enough to become a thunk, its contents will be evaluated
106 -- at most once, so oneify it.
107 dmdTransformThunkDmd :: CoreExpr -> Demand -> Demand
108 dmdTransformThunkDmd e
109 | exprIsTrivial e = id
110 | otherwise = oneifyDmd
111
112 -- Do not process absent demands
113 -- Otherwise act like in a normal demand analysis
114 -- See |-* relation in the companion paper
115 dmdAnalStar :: AnalEnv
116 -> Demand -- This one takes a *Demand*
117 -> CoreExpr -> (BothDmdArg, CoreExpr)
118 dmdAnalStar env dmd e
119 | (defer_and_use, cd) <- toCleanDmd dmd (exprType e)
120 , (dmd_ty, e') <- dmdAnal env cd e
121 = (postProcessDmdType defer_and_use dmd_ty, e')
122
123 -- Main Demand Analsysis machinery
124 dmdAnal, dmdAnal' :: AnalEnv
125 -> CleanDemand -- The main one takes a *CleanDemand*
126 -> CoreExpr -> (DmdType, CoreExpr)
127
128 -- The CleanDemand is always strict and not absent
129 -- See Note [Ensure demand is strict]
130
131 dmdAnal env d e = -- pprTrace "dmdAnal" (ppr d <+> ppr e) $
132 dmdAnal' env d e
133
134 dmdAnal' _ _ (Lit lit) = (nopDmdType, Lit lit)
135 dmdAnal' _ _ (Type ty) = (nopDmdType, Type ty) -- Doesn't happen, in fact
136 dmdAnal' _ _ (Coercion co)
137 = (unitDmdType (coercionDmdEnv co), Coercion co)
138
139 dmdAnal' env dmd (Var var)
140 = (dmdTransform env var dmd, Var var)
141
142 dmdAnal' env dmd (Cast e co)
143 = (dmd_ty `bothDmdType` mkBothDmdArg (coercionDmdEnv co), Cast e' co)
144 where
145 (dmd_ty, e') = dmdAnal env dmd e
146
147 {- ----- I don't get this, so commenting out -------
148 to_co = pSnd (coercionKind co)
149 dmd'
150 | Just tc <- tyConAppTyCon_maybe to_co
151 , isRecursiveTyCon tc = cleanEvalDmd
152 | otherwise = dmd
153 -- This coerce usually arises from a recursive
154 -- newtype, and we don't want to look inside them
155 -- for exactly the same reason that we don't look
156 -- inside recursive products -- we might not reach
157 -- a fixpoint. So revert to a vanilla Eval demand
158 -}
159
160 dmdAnal' env dmd (Tick t e)
161 = (dmd_ty, Tick t e')
162 where
163 (dmd_ty, e') = dmdAnal env dmd e
164
165 dmdAnal' env dmd (App fun (Type ty))
166 = (fun_ty, App fun' (Type ty))
167 where
168 (fun_ty, fun') = dmdAnal env dmd fun
169
170 -- Lots of the other code is there to make this
171 -- beautiful, compositional, application rule :-)
172 dmdAnal' env dmd (App fun arg)
173 = -- This case handles value arguments (type args handled above)
174 -- Crucially, coercions /are/ handled here, because they are
175 -- value arguments (Trac #10288)
176 let
177 call_dmd = mkCallDmd dmd
178 (fun_ty, fun') = dmdAnal env call_dmd fun
179 (arg_dmd, res_ty) = splitDmdTy fun_ty
180 (arg_ty, arg') = dmdAnalStar env (dmdTransformThunkDmd arg arg_dmd) arg
181 in
182 -- pprTrace "dmdAnal:app" (vcat
183 -- [ text "dmd =" <+> ppr dmd
184 -- , text "expr =" <+> ppr (App fun arg)
185 -- , text "fun dmd_ty =" <+> ppr fun_ty
186 -- , text "arg dmd =" <+> ppr arg_dmd
187 -- , text "arg dmd_ty =" <+> ppr arg_ty
188 -- , text "res dmd_ty =" <+> ppr res_ty
189 -- , text "overall res dmd_ty =" <+> ppr (res_ty `bothDmdType` arg_ty) ])
190 (res_ty `bothDmdType` arg_ty, App fun' arg')
191
192 -- this is an anonymous lambda, since @dmdAnalRhsLetDown@ uses @collectBinders@
193 dmdAnal' env dmd (Lam var body)
194 | isTyVar var
195 = let
196 (body_ty, body') = dmdAnal env dmd body
197 in
198 (body_ty, Lam var body')
199
200 | otherwise
201 = let (body_dmd, defer_and_use) = peelCallDmd dmd
202 -- body_dmd: a demand to analyze the body
203
204 env' = extendSigsWithLam env var
205 (body_ty, body') = dmdAnal env' body_dmd body
206 (lam_ty, var') = annotateLamIdBndr env notArgOfDfun body_ty var
207 in
208 (postProcessUnsat defer_and_use lam_ty, Lam var' body')
209
210 dmdAnal' env dmd (Case scrut case_bndr ty [(DataAlt dc, bndrs, rhs)])
211 -- Only one alternative with a product constructor
212 | let tycon = dataConTyCon dc
213 , isJust (isDataProductTyCon_maybe tycon)
214 , Just rec_tc' <- checkRecTc (ae_rec_tc env) tycon
215 = let
216 env_w_tc = env { ae_rec_tc = rec_tc' }
217 env_alt = extendEnvForProdAlt env_w_tc scrut case_bndr dc bndrs
218 (rhs_ty, rhs') = dmdAnal env_alt dmd rhs
219 (alt_ty1, dmds) = findBndrsDmds env rhs_ty bndrs
220 (alt_ty2, case_bndr_dmd) = findBndrDmd env False alt_ty1 case_bndr
221 id_dmds = addCaseBndrDmd case_bndr_dmd dmds
222 alt_ty3 | io_hack_reqd scrut dc bndrs = deferAfterIO alt_ty2
223 | otherwise = alt_ty2
224
225 -- Compute demand on the scrutinee
226 -- See Note [Demand on scrutinee of a product case]
227 scrut_dmd = mkProdDmd (addDataConStrictness dc id_dmds)
228 (scrut_ty, scrut') = dmdAnal env scrut_dmd scrut
229 res_ty = alt_ty3 `bothDmdType` toBothDmdArg scrut_ty
230 case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd
231 bndrs' = setBndrsDemandInfo bndrs id_dmds
232 in
233 -- pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut
234 -- , text "dmd" <+> ppr dmd
235 -- , text "case_bndr_dmd" <+> ppr (idDemandInfo case_bndr')
236 -- , text "scrut_dmd" <+> ppr scrut_dmd
237 -- , text "scrut_ty" <+> ppr scrut_ty
238 -- , text "alt_ty" <+> ppr alt_ty2
239 -- , text "res_ty" <+> ppr res_ty ]) $
240 (res_ty, Case scrut' case_bndr' ty [(DataAlt dc, bndrs', rhs')])
241
242 dmdAnal' env dmd (Case scrut case_bndr ty alts)
243 = let -- Case expression with multiple alternatives
244 (alt_tys, alts') = mapAndUnzip (dmdAnalAlt env dmd case_bndr) alts
245 (scrut_ty, scrut') = dmdAnal env cleanEvalDmd scrut
246 (alt_ty, case_bndr') = annotateBndr env (foldr lubDmdType botDmdType alt_tys) case_bndr
247 -- NB: Base case is botDmdType, for empty case alternatives
248 -- This is a unit for lubDmdType, and the right result
249 -- when there really are no alternatives
250 res_ty = alt_ty `bothDmdType` toBothDmdArg scrut_ty
251 in
252 -- pprTrace "dmdAnal:Case2" (vcat [ text "scrut" <+> ppr scrut
253 -- , text "scrut_ty" <+> ppr scrut_ty
254 -- , text "alt_tys" <+> ppr alt_tys
255 -- , text "alt_ty" <+> ppr alt_ty
256 -- , text "res_ty" <+> ppr res_ty ]) $
257 (res_ty, Case scrut' case_bndr' ty alts')
258
259 -- Let bindings can be processed in two ways:
260 -- Down (RHS before body) or Up (body before RHS).
261 -- The following case handle the up variant.
262 --
263 -- It is very simple. For let x = rhs in body
264 -- * Demand-analyse 'body' in the current environment
265 -- * Find the demand, 'rhs_dmd' placed on 'x' by 'body'
266 -- * Demand-analyse 'rhs' in 'rhs_dmd'
267 --
268 -- This is used for a non-recursive local let without manifest lambdas.
269 -- This is the LetUp rule in the paper “Higher-Order Cardinality Analysis”.
270 dmdAnal' env dmd (Let (NonRec id rhs) body)
271 | useLetUp rhs
272 , Nothing <- unpackTrivial rhs
273 -- dmdAnalRhsLetDown treats trivial right hand sides specially
274 -- so if we have a trival right hand side, fall through to that.
275 = (final_ty, Let (NonRec id' rhs') body')
276 where
277 (body_ty, body') = dmdAnal env dmd body
278 (body_ty', id_dmd) = findBndrDmd env notArgOfDfun body_ty id
279 id' = setIdDemandInfo id id_dmd
280
281 (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd) rhs
282 final_ty = body_ty' `bothDmdType` rhs_ty
283
284 dmdAnal' env dmd (Let (NonRec id rhs) body)
285 = (body_ty2, Let (NonRec id2 rhs') body')
286 where
287 (sig, lazy_fv, id1, rhs') = dmdAnalRhsLetDown NotTopLevel Nothing env id rhs
288 (body_ty, body') = dmdAnal (extendAnalEnv NotTopLevel env id sig) dmd body
289 (body_ty1, id2) = annotateBndr env body_ty id1
290 body_ty2 = addLazyFVs body_ty1 lazy_fv
291
292 -- If the actual demand is better than the vanilla call
293 -- demand, you might think that we might do better to re-analyse
294 -- the RHS with the stronger demand.
295 -- But (a) That seldom happens, because it means that *every* path in
296 -- the body of the let has to use that stronger demand
297 -- (b) It often happens temporarily in when fixpointing, because
298 -- the recursive function at first seems to place a massive demand.
299 -- But we don't want to go to extra work when the function will
300 -- probably iterate to something less demanding.
301 -- In practice, all the times the actual demand on id2 is more than
302 -- the vanilla call demand seem to be due to (b). So we don't
303 -- bother to re-analyse the RHS.
304
305 dmdAnal' env dmd (Let (Rec pairs) body)
306 = let
307 (env', lazy_fv, pairs') = dmdFix NotTopLevel env pairs
308 (body_ty, body') = dmdAnal env' dmd body
309 body_ty1 = deleteFVs body_ty (map fst pairs)
310 body_ty2 = addLazyFVs body_ty1 lazy_fv
311 in
312 body_ty2 `seq`
313 (body_ty2, Let (Rec pairs') body')
314
315 io_hack_reqd :: CoreExpr -> DataCon -> [Var] -> Bool
316 -- See Note [IO hack in the demand analyser]
317 io_hack_reqd scrut con bndrs
318 | (bndr:_) <- bndrs
319 , con == tupleDataCon Unboxed 2
320 , idType bndr `eqType` realWorldStatePrimTy
321 , (fun, _) <- collectArgs scrut
322 = case fun of
323 Var f -> not (isPrimOpId f)
324 _ -> True
325 | otherwise
326 = False
327
328 dmdAnalAlt :: AnalEnv -> CleanDemand -> Id -> Alt Var -> (DmdType, Alt Var)
329 dmdAnalAlt env dmd case_bndr (con,bndrs,rhs)
330 | null bndrs -- Literals, DEFAULT, and nullary constructors
331 , (rhs_ty, rhs') <- dmdAnal env dmd rhs
332 = (rhs_ty, (con, [], rhs'))
333
334 | otherwise -- Non-nullary data constructors
335 , (rhs_ty, rhs') <- dmdAnal env dmd rhs
336 , (alt_ty, dmds) <- findBndrsDmds env rhs_ty bndrs
337 , let case_bndr_dmd = findIdDemand alt_ty case_bndr
338 id_dmds = addCaseBndrDmd case_bndr_dmd dmds
339 = (alt_ty, (con, setBndrsDemandInfo bndrs id_dmds, rhs'))
340
341
342 {- Note [IO hack in the demand analyser]
343 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
344 There's a hack here for I/O operations. Consider
345 case foo x s of { (# s, r #) -> y }
346 Is this strict in 'y'? Normally yes, but what if 'foo' is an I/O
347 operation that simply terminates the program (not in an erroneous way)?
348 In that case we should not evaluate 'y' before the call to 'foo'.
349 Hackish solution: spot the IO-like situation and add a virtual branch,
350 as if we had
351 case foo x s of
352 (# s, r #) -> y
353 other -> return ()
354 So the 'y' isn't necessarily going to be evaluated
355
356 A more complete example (Trac #148, #1592) where this shows up is:
357 do { let len = <expensive> ;
358 ; when (...) (exitWith ExitSuccess)
359 ; print len }
360
361 However, consider
362 f x s = case getMaskingState# s of
363 (# s, r #) ->
364 case x of I# x2 -> ...
365
366 Here it is terribly sad to make 'f' lazy in 's'. After all,
367 getMaskingState# is not going to diverge or throw an exception! This
368 situation actually arises in GHC.IO.Handle.Internals.wantReadableHandle
369 (on an MVar not an Int), and made a material difference.
370
371 So if the scrutinee is a primop call, we *don't* apply the
372 state hack:
373 - If is a simple, terminating one like getMaskingState,
374 applying the hack is over-conservative.
375 - If the primop is raise# then it returns bottom, so
376 the case alternatives are already discarded.
377 - If the primop can raise a non-IO exception, like
378 divide by zero or seg-fault (eg writing an array
379 out of bounds) then we don't mind evaluating 'x' first.
380
381 Note [Demand on the scrutinee of a product case]
382 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
383 When figuring out the demand on the scrutinee of a product case,
384 we use the demands of the case alternative, i.e. id_dmds.
385 But note that these include the demand on the case binder;
386 see Note [Demand on case-alternative binders] in Demand.hs.
387 This is crucial. Example:
388 f x = case x of y { (a,b) -> k y a }
389 If we just take scrut_demand = U(L,A), then we won't pass x to the
390 worker, so the worker will rebuild
391 x = (a, absent-error)
392 and that'll crash.
393
394 Note [Aggregated demand for cardinality]
395 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
396 We use different strategies for strictness and usage/cardinality to
397 "unleash" demands captured on free variables by bindings. Let us
398 consider the example:
399
400 f1 y = let {-# NOINLINE h #-}
401 h = y
402 in (h, h)
403
404 We are interested in obtaining cardinality demand U1 on |y|, as it is
405 used only in a thunk, and, therefore, is not going to be updated any
406 more. Therefore, the demand on |y|, captured and unleashed by usage of
407 |h| is U1. However, if we unleash this demand every time |h| is used,
408 and then sum up the effects, the ultimate demand on |y| will be U1 +
409 U1 = U. In order to avoid it, we *first* collect the aggregate demand
410 on |h| in the body of let-expression, and only then apply the demand
411 transformer:
412
413 transf[x](U) = {y |-> U1}
414
415 so the resulting demand on |y| is U1.
416
417 The situation is, however, different for strictness, where this
418 aggregating approach exhibits worse results because of the nature of
419 |both| operation for strictness. Consider the example:
420
421 f y c =
422 let h x = y |seq| x
423 in case of
424 True -> h True
425 False -> y
426
427 It is clear that |f| is strict in |y|, however, the suggested analysis
428 will infer from the body of |let| that |h| is used lazily (as it is
429 used in one branch only), therefore lazy demand will be put on its
430 free variable |y|. Conversely, if the demand on |h| is unleashed right
431 on the spot, we will get the desired result, namely, that |f| is
432 strict in |y|.
433
434
435 ************************************************************************
436 * *
437 Demand transformer
438 * *
439 ************************************************************************
440 -}
441
442 dmdTransform :: AnalEnv -- The strictness environment
443 -> Id -- The function
444 -> CleanDemand -- The demand on the function
445 -> DmdType -- The demand type of the function in this context
446 -- Returned DmdEnv includes the demand on
447 -- this function plus demand on its free variables
448
449 dmdTransform env var dmd
450 | isDataConWorkId var -- Data constructor
451 = dmdTransformDataConSig (idArity var) (idStrictness var) dmd
452
453 | gopt Opt_DmdTxDictSel (ae_dflags env),
454 Just _ <- isClassOpId_maybe var -- Dictionary component selector
455 = dmdTransformDictSelSig (idStrictness var) dmd
456
457 | isGlobalId var -- Imported function
458 = let res = dmdTransformSig (idStrictness var) dmd in
459 -- pprTrace "dmdTransform" (vcat [ppr var, ppr (idStrictness var), ppr dmd, ppr res])
460 res
461
462 | Just (sig, top_lvl) <- lookupSigEnv env var -- Local letrec bound thing
463 , let fn_ty = dmdTransformSig sig dmd
464 = -- pprTrace "dmdTransform" (vcat [ppr var, ppr sig, ppr dmd, ppr fn_ty]) $
465 if isTopLevel top_lvl
466 then fn_ty -- Don't record top level things
467 else addVarDmd fn_ty var (mkOnceUsedDmd dmd)
468
469 | otherwise -- Local non-letrec-bound thing
470 = unitDmdType (unitVarEnv var (mkOnceUsedDmd dmd))
471
472 {-
473 ************************************************************************
474 * *
475 \subsection{Bindings}
476 * *
477 ************************************************************************
478 -}
479
480 -- Recursive bindings
481 dmdFix :: TopLevelFlag
482 -> AnalEnv -- Does not include bindings for this binding
483 -> [(Id,CoreExpr)]
484 -> (AnalEnv, DmdEnv,
485 [(Id,CoreExpr)]) -- Binders annotated with stricness info
486
487 dmdFix top_lvl env orig_pairs
488 = (updSigEnv env (sigEnv final_env), lazy_fv, pairs')
489 -- Return to original virgin state, keeping new signatures
490 where
491 bndrs = map fst orig_pairs
492 initial_env = addInitialSigs top_lvl env bndrs
493 (final_env, lazy_fv, pairs') = loop 1 initial_env orig_pairs
494
495 loop :: Int
496 -> AnalEnv -- Already contains the current sigs
497 -> [(Id,CoreExpr)]
498 -> (AnalEnv, DmdEnv, [(Id,CoreExpr)])
499 loop n env pairs
500 = -- pprTrace "dmd loop" (ppr n <+> ppr bndrs $$ ppr env) $
501 loop' n env pairs
502
503 loop' n env pairs
504 | found_fixpoint
505 = (env', lazy_fv, pairs')
506 -- Note: return pairs', not pairs. pairs' is the result of
507 -- processing the RHSs with sigs (= sigs'), whereas pairs
508 -- is the result of processing the RHSs with the *previous*
509 -- iteration of sigs.
510
511 | n >= 10
512 = -- pprTrace "dmdFix loop" (ppr n <+> (vcat
513 -- [ text "Sigs:" <+> ppr [ (id,lookupVarEnv (sigEnv env) id,
514 -- lookupVarEnv (sigEnv env') id)
515 -- | (id,_) <- pairs],
516 -- text "env:" <+> ppr env,
517 -- text "binds:" <+> pprCoreBinding (Rec pairs)]))
518 (env, lazy_fv, orig_pairs) -- Safe output
519 -- The lazy_fv part is really important! orig_pairs has no strictness
520 -- info, including nothing about free vars. But if we have
521 -- letrec f = ....y..... in ...f...
522 -- where 'y' is free in f, we must record that y is mentioned,
523 -- otherwise y will get recorded as absent altogether
524
525 | otherwise
526 = loop (n+1) (nonVirgin env') pairs'
527 where
528 found_fixpoint = all (same_sig (sigEnv env) (sigEnv env')) bndrs
529
530 ((env',lazy_fv), pairs') = mapAccumL my_downRhs (env, emptyDmdEnv) pairs
531 -- mapAccumL: Use the new signature to do the next pair
532 -- The occurrence analyser has arranged them in a good order
533 -- so this can significantly reduce the number of iterations needed
534
535 my_downRhs (env, lazy_fv) (id,rhs)
536 = ((env', lazy_fv'), (id', rhs'))
537 where
538 (sig, lazy_fv1, id', rhs') = dmdAnalRhsLetDown top_lvl (Just bndrs) env id rhs
539 lazy_fv' = plusVarEnv_C bothDmd lazy_fv lazy_fv1
540 env' = extendAnalEnv top_lvl env id sig
541
542 same_sig sigs sigs' var = lookup sigs var == lookup sigs' var
543 lookup sigs var = case lookupVarEnv sigs var of
544 Just (sig,_) -> sig
545 Nothing -> pprPanic "dmdFix" (ppr var)
546
547
548 -- Trivial RHS
549 -- See Note [Demand analysis for trivial right-hand sides]
550 dmdAnalTrivialRhs ::
551 AnalEnv -> Id -> CoreExpr -> Var ->
552 (StrictSig, VarEnv Demand, Id, CoreExpr)
553 dmdAnalTrivialRhs env id rhs fn
554 = (fn_str, fn_fv, set_idStrictness env id fn_str, rhs)
555 where
556 fn_str = getStrictness env fn
557 fn_fv | isLocalId fn = unitVarEnv fn topDmd
558 | otherwise = emptyDmdEnv
559 -- Note [Remember to demand the function itself]
560 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
561 -- fn_fv: don't forget to produce a demand for fn itself
562 -- Lacking this caused Trac #9128
563 -- The demand is very conservative (topDmd), but that doesn't
564 -- matter; trivial bindings are usually inlined, so it only
565 -- kicks in for top-level bindings and NOINLINE bindings
566
567 -- Let bindings can be processed in two ways:
568 -- Down (RHS before body) or Up (body before RHS).
569 -- dmdAnalRhsLetDown implements the Down variant:
570 -- * assuming a demand of <L,U>
571 -- * looking at the definition
572 -- * determining a strictness signature
573 --
574 -- It is used for toplevel definition, recursive definitions and local
575 -- non-recursive definitions that have manifest lambdas.
576 -- Local non-recursive definitions without a lambda are handled with LetUp.
577 --
578 -- This is the LetDown rule in the paper “Higher-Order Cardinality Analysis”.
579 dmdAnalRhsLetDown :: TopLevelFlag
580 -> Maybe [Id] -- Just bs <=> recursive, Nothing <=> non-recursive
581 -> AnalEnv -> Id -> CoreExpr
582 -> (StrictSig, DmdEnv, Id, CoreExpr)
583 -- Process the RHS of the binding, add the strictness signature
584 -- to the Id, and augment the environment with the signature as well.
585 dmdAnalRhsLetDown top_lvl rec_flag env id rhs
586 | Just fn <- unpackTrivial rhs -- See Note [Demand analysis for trivial right-hand sides]
587 = dmdAnalTrivialRhs env id rhs fn
588
589 | otherwise
590 = (sig_ty, lazy_fv, id', mkLams bndrs' body')
591 where
592 (bndrs, body) = collectBinders rhs
593 env_body = foldl extendSigsWithLam env bndrs
594 (body_ty, body') = dmdAnal env_body body_dmd body
595 body_ty' = removeDmdTyArgs body_ty -- zap possible deep CPR info
596 (DmdType rhs_fv rhs_dmds rhs_res, bndrs')
597 = annotateLamBndrs env (isDFunId id) body_ty' bndrs
598 sig_ty = mkStrictSig (mkDmdType sig_fv rhs_dmds rhs_res')
599 id' = set_idStrictness env id sig_ty
600 -- See Note [NOINLINE and strictness]
601
602 -- See Note [Product demands for function body]
603 body_dmd = case deepSplitProductType_maybe (ae_fam_envs env) (exprType body) of
604 Nothing -> cleanEvalDmd
605 Just (dc, _, _, _) -> cleanEvalProdDmd (dataConRepArity dc)
606
607 -- See Note [Lazy and unleashable free variables]
608 -- See Note [Aggregated demand for cardinality]
609 rhs_fv1 = case rec_flag of
610 Just bs -> reuseEnv (delVarEnvList rhs_fv bs)
611 Nothing -> rhs_fv
612
613 (lazy_fv, sig_fv) = splitFVs is_thunk rhs_fv1
614
615 rhs_res' = trimCPRInfo trim_all trim_sums rhs_res
616 trim_all = is_thunk && not_strict
617 trim_sums = not (isTopLevel top_lvl) -- See Note [CPR for sum types]
618
619 -- See Note [CPR for thunks]
620 is_thunk = not (exprIsHNF rhs)
621 not_strict
622 = isTopLevel top_lvl -- Top level and recursive things don't
623 || isJust rec_flag -- get their demandInfo set at all
624 || not (isStrictDmd (idDemandInfo id) || ae_virgin env)
625 -- See Note [Optimistic CPR in the "virgin" case]
626
627 unpackTrivial :: CoreExpr -> Maybe Id
628 -- Returns (Just v) if the arg is really equal to v, modulo
629 -- casts, type applications etc
630 -- See Note [Demand analysis for trivial right-hand sides]
631 unpackTrivial (Var v) = Just v
632 unpackTrivial (Cast e _) = unpackTrivial e
633 unpackTrivial (Lam v e) | isTyVar v = unpackTrivial e
634 unpackTrivial (App e a) | isTypeArg a = unpackTrivial e
635 unpackTrivial _ = Nothing
636
637 -- | If given the RHS of a let-binding, this 'useLetUp' determines
638 -- whether we should process the binding up (body before rhs) or
639 -- down (rhs before body).
640 --
641 -- We use LetDown if there is a chance to get a useful strictness signature.
642 -- This is the case when there are manifest value lambdas.
643 useLetUp :: CoreExpr -> Bool
644 useLetUp (Lam v e) | isTyVar v = useLetUp e
645 useLetUp (Lam _ _) = False
646 useLetUp _ = True
647
648
649 {-
650 Note [Demand analysis for trivial right-hand sides]
651 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
652 Consider
653 foo = plusInt |> co
654 where plusInt is an arity-2 function with known strictness. Clearly
655 we want plusInt's strictness to propagate to foo! But because it has
656 no manifest lambdas, it won't do so automatically, and indeed 'co' might
657 have type (Int->Int->Int) ~ T, so we *can't* eta-expand. So we have a
658 special case for right-hand sides that are "trivial", namely variables,
659 casts, type applications, and the like.
660
661 Note that this can mean that 'foo' has an arity that is smaller than that
662 indicated by its demand info. e.g. if co :: (Int->Int->Int) ~ T, then
663 foo's arity will be zero (see Note [exprArity invariant] in CoreArity),
664 but its demand signature will be that of plusInt. A small example is the
665 test case of Trac #8963.
666
667
668 Note [Product demands for function body]
669 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
670 This example comes from shootout/binary_trees:
671
672 Main.check' = \ b z ds. case z of z' { I# ip ->
673 case ds_d13s of
674 Main.Nil -> z'
675 Main.Node s14k s14l s14m ->
676 Main.check' (not b)
677 (Main.check' b
678 (case b {
679 False -> I# (-# s14h s14k);
680 True -> I# (+# s14h s14k)
681 })
682 s14l)
683 s14m } } }
684
685 Here we *really* want to unbox z, even though it appears to be used boxed in
686 the Nil case. Partly the Nil case is not a hot path. But more specifically,
687 the whole function gets the CPR property if we do.
688
689 So for the demand on the body of a RHS we use a product demand if it's
690 a product type.
691
692 ************************************************************************
693 * *
694 \subsection{Strictness signatures and types}
695 * *
696 ************************************************************************
697 -}
698
699 unitDmdType :: DmdEnv -> DmdType
700 unitDmdType dmd_env = DmdType dmd_env [] topRes
701
702 coercionDmdEnv :: Coercion -> DmdEnv
703 coercionDmdEnv co = mapVarEnv (const topDmd) (coVarsOfCo co)
704 -- The VarSet from coVarsOfCo is really a VarEnv Var
705
706 addVarDmd :: DmdType -> Var -> Demand -> DmdType
707 addVarDmd (DmdType fv ds res) var dmd
708 = DmdType (extendVarEnv_C bothDmd fv var dmd) ds res
709
710 addLazyFVs :: DmdType -> DmdEnv -> DmdType
711 addLazyFVs dmd_ty lazy_fvs
712 = dmd_ty `bothDmdType` mkBothDmdArg lazy_fvs
713 -- Using bothDmdType (rather than just both'ing the envs)
714 -- is vital. Consider
715 -- let f = \x -> (x,y)
716 -- in error (f 3)
717 -- Here, y is treated as a lazy-fv of f, but we must `bothDmd` that L
718 -- demand with the bottom coming up from 'error'
719 --
720 -- I got a loop in the fixpointer without this, due to an interaction
721 -- with the lazy_fv filtering in dmdAnalRhsLetDown. Roughly, it was
722 -- letrec f n x
723 -- = letrec g y = x `fatbar`
724 -- letrec h z = z + ...g...
725 -- in h (f (n-1) x)
726 -- in ...
727 -- In the initial iteration for f, f=Bot
728 -- Suppose h is found to be strict in z, but the occurrence of g in its RHS
729 -- is lazy. Now consider the fixpoint iteration for g, esp the demands it
730 -- places on its free variables. Suppose it places none. Then the
731 -- x `fatbar` ...call to h...
732 -- will give a x->V demand for x. That turns into a L demand for x,
733 -- which floats out of the defn for h. Without the modifyEnv, that
734 -- L demand doesn't get both'd with the Bot coming up from the inner
735 -- call to f. So we just get an L demand for x for g.
736
737 {-
738 Note [Do not strictify the argument dictionaries of a dfun]
739 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
740 The typechecker can tie recursive knots involving dfuns, so we do the
741 conservative thing and refrain from strictifying a dfun's argument
742 dictionaries.
743 -}
744
745 setBndrsDemandInfo :: [Var] -> [Demand] -> [Var]
746 setBndrsDemandInfo (b:bs) (d:ds)
747 | isTyVar b = b : setBndrsDemandInfo bs (d:ds)
748 | otherwise = setIdDemandInfo b d : setBndrsDemandInfo bs ds
749 setBndrsDemandInfo [] ds = ASSERT( null ds ) []
750 setBndrsDemandInfo bs _ = pprPanic "setBndrsDemandInfo" (ppr bs)
751
752 annotateBndr :: AnalEnv -> DmdType -> Var -> (DmdType, Var)
753 -- The returned env has the var deleted
754 -- The returned var is annotated with demand info
755 -- according to the result demand of the provided demand type
756 -- No effect on the argument demands
757 annotateBndr env dmd_ty var
758 | isId var = (dmd_ty', setIdDemandInfo var dmd)
759 | otherwise = (dmd_ty, var)
760 where
761 (dmd_ty', dmd) = findBndrDmd env False dmd_ty var
762
763 annotateLamBndrs :: AnalEnv -> DFunFlag -> DmdType -> [Var] -> (DmdType, [Var])
764 annotateLamBndrs env args_of_dfun ty bndrs = mapAccumR annotate ty bndrs
765 where
766 annotate dmd_ty bndr
767 | isId bndr = annotateLamIdBndr env args_of_dfun dmd_ty bndr
768 | otherwise = (dmd_ty, bndr)
769
770 annotateLamIdBndr :: AnalEnv
771 -> DFunFlag -- is this lambda at the top of the RHS of a dfun?
772 -> DmdType -- Demand type of body
773 -> Id -- Lambda binder
774 -> (DmdType, -- Demand type of lambda
775 Id) -- and binder annotated with demand
776
777 annotateLamIdBndr env arg_of_dfun dmd_ty id
778 -- For lambdas we add the demand to the argument demands
779 -- Only called for Ids
780 = ASSERT( isId id )
781 -- pprTrace "annLamBndr" (vcat [ppr id, ppr _dmd_ty]) $
782 (final_ty, setIdDemandInfo id dmd)
783 where
784 -- Watch out! See note [Lambda-bound unfoldings]
785 final_ty = case maybeUnfoldingTemplate (idUnfolding id) of
786 Nothing -> main_ty
787 Just unf -> main_ty `bothDmdType` unf_ty
788 where
789 (unf_ty, _) = dmdAnalStar env dmd unf
790
791 main_ty = addDemand dmd dmd_ty'
792 (dmd_ty', dmd) = findBndrDmd env arg_of_dfun dmd_ty id
793
794 deleteFVs :: DmdType -> [Var] -> DmdType
795 deleteFVs (DmdType fvs dmds res) bndrs
796 = DmdType (delVarEnvList fvs bndrs) dmds res
797
798 {-
799 Note [CPR for sum types]
800 ~~~~~~~~~~~~~~~~~~~~~~~~
801 At the moment we do not do CPR for let-bindings that
802 * non-top level
803 * bind a sum type
804 Reason: I found that in some benchmarks we were losing let-no-escapes,
805 which messed it all up. Example
806 let j = \x. ....
807 in case y of
808 True -> j False
809 False -> j True
810 If we w/w this we get
811 let j' = \x. ....
812 in case y of
813 True -> case j' False of { (# a #) -> Just a }
814 False -> case j' True of { (# a #) -> Just a }
815 Notice that j' is not a let-no-escape any more.
816
817 However this means in turn that the *enclosing* function
818 may be CPR'd (via the returned Justs). But in the case of
819 sums, there may be Nothing alternatives; and that messes
820 up the sum-type CPR.
821
822 Conclusion: only do this for products. It's still not
823 guaranteed OK for products, but sums definitely lose sometimes.
824
825 Note [CPR for thunks]
826 ~~~~~~~~~~~~~~~~~~~~~
827 If the rhs is a thunk, we usually forget the CPR info, because
828 it is presumably shared (else it would have been inlined, and
829 so we'd lose sharing if w/w'd it into a function). E.g.
830
831 let r = case expensive of
832 (a,b) -> (b,a)
833 in ...
834
835 If we marked r as having the CPR property, then we'd w/w into
836
837 let $wr = \() -> case expensive of
838 (a,b) -> (# b, a #)
839 r = case $wr () of
840 (# b,a #) -> (b,a)
841 in ...
842
843 But now r is a thunk, which won't be inlined, so we are no further ahead.
844 But consider
845
846 f x = let r = case expensive of (a,b) -> (b,a)
847 in if foo r then r else (x,x)
848
849 Does f have the CPR property? Well, no.
850
851 However, if the strictness analyser has figured out (in a previous
852 iteration) that it's strict, then we DON'T need to forget the CPR info.
853 Instead we can retain the CPR info and do the thunk-splitting transform
854 (see WorkWrap.splitThunk).
855
856 This made a big difference to PrelBase.modInt, which had something like
857 modInt = \ x -> let r = ... -> I# v in
858 ...body strict in r...
859 r's RHS isn't a value yet; but modInt returns r in various branches, so
860 if r doesn't have the CPR property then neither does modInt
861 Another case I found in practice (in Complex.magnitude), looks like this:
862 let k = if ... then I# a else I# b
863 in ... body strict in k ....
864 (For this example, it doesn't matter whether k is returned as part of
865 the overall result; but it does matter that k's RHS has the CPR property.)
866 Left to itself, the simplifier will make a join point thus:
867 let $j k = ...body strict in k...
868 if ... then $j (I# a) else $j (I# b)
869 With thunk-splitting, we get instead
870 let $j x = let k = I#x in ...body strict in k...
871 in if ... then $j a else $j b
872 This is much better; there's a good chance the I# won't get allocated.
873
874 The difficulty with this is that we need the strictness type to
875 look at the body... but we now need the body to calculate the demand
876 on the variable, so we can decide whether its strictness type should
877 have a CPR in it or not. Simple solution:
878 a) use strictness info from the previous iteration
879 b) make sure we do at least 2 iterations, by doing a second
880 round for top-level non-recs. Top level recs will get at
881 least 2 iterations except for totally-bottom functions
882 which aren't very interesting anyway.
883
884 NB: strictly_demanded is never true of a top-level Id, or of a recursive Id.
885
886 Note [Optimistic CPR in the "virgin" case]
887 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
888 Demand and strictness info are initialized by top elements. However,
889 this prevents from inferring a CPR property in the first pass of the
890 analyser, so we keep an explicit flag ae_virgin in the AnalEnv
891 datatype.
892
893 We can't start with 'not-demanded' (i.e., top) because then consider
894 f x = let
895 t = ... I# x
896 in
897 if ... then t else I# y else f x'
898
899 In the first iteration we'd have no demand info for x, so assume
900 not-demanded; then we'd get TopRes for f's CPR info. Next iteration
901 we'd see that t was demanded, and so give it the CPR property, but by
902 now f has TopRes, so it will stay TopRes. Instead, by checking the
903 ae_virgin flag at the first time round, we say 'yes t is demanded' the
904 first time.
905
906 However, this does mean that for non-recursive bindings we must
907 iterate twice to be sure of not getting over-optimistic CPR info,
908 in the case where t turns out to be not-demanded. This is handled
909 by dmdAnalTopBind.
910
911
912 Note [NOINLINE and strictness]
913 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
914 The strictness analyser used to have a HACK which ensured that NOINLNE
915 things were not strictness-analysed. The reason was unsafePerformIO.
916 Left to itself, the strictness analyser would discover this strictness
917 for unsafePerformIO:
918 unsafePerformIO: C(U(AV))
919 But then consider this sub-expression
920 unsafePerformIO (\s -> let r = f x in
921 case writeIORef v r s of (# s1, _ #) ->
922 (# s1, r #)
923 The strictness analyser will now find that r is sure to be eval'd,
924 and may then hoist it out. This makes tests/lib/should_run/memo002
925 deadlock.
926
927 Solving this by making all NOINLINE things have no strictness info is overkill.
928 In particular, it's overkill for runST, which is perfectly respectable.
929 Consider
930 f x = runST (return x)
931 This should be strict in x.
932
933 So the new plan is to define unsafePerformIO using the 'lazy' combinator:
934
935 unsafePerformIO (IO m) = lazy (case m realWorld# of (# _, r #) -> r)
936
937 Remember, 'lazy' is a wired-in identity-function Id, of type a->a, which is
938 magically NON-STRICT, and is inlined after strictness analysis. So
939 unsafePerformIO will look non-strict, and that's what we want.
940
941 Now we don't need the hack in the strictness analyser. HOWEVER, this
942 decision does mean that even a NOINLINE function is not entirely
943 opaque: some aspect of its implementation leaks out, notably its
944 strictness. For example, if you have a function implemented by an
945 error stub, but which has RULES, you may want it not to be eliminated
946 in favour of error!
947
948 Note [Lazy and unleasheable free variables]
949 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
950 We put the strict and once-used FVs in the DmdType of the Id, so
951 that at its call sites we unleash demands on its strict fvs.
952 An example is 'roll' in imaginary/wheel-sieve2
953 Something like this:
954 roll x = letrec
955 go y = if ... then roll (x-1) else x+1
956 in
957 go ms
958 We want to see that roll is strict in x, which is because
959 go is called. So we put the DmdEnv for x in go's DmdType.
960
961 Another example:
962
963 f :: Int -> Int -> Int
964 f x y = let t = x+1
965 h z = if z==0 then t else
966 if z==1 then x+1 else
967 x + h (z-1)
968 in h y
969
970 Calling h does indeed evaluate x, but we can only see
971 that if we unleash a demand on x at the call site for t.
972
973 Incidentally, here's a place where lambda-lifting h would
974 lose the cigar --- we couldn't see the joint strictness in t/x
975
976 ON THE OTHER HAND
977 We don't want to put *all* the fv's from the RHS into the
978 DmdType, because that makes fixpointing very slow --- the
979 DmdType gets full of lazy demands that are slow to converge.
980
981
982 Note [Lamba-bound unfoldings]
983 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
984 We allow a lambda-bound variable to carry an unfolding, a facility that is used
985 exclusively for join points; see Note [Case binders and join points]. If so,
986 we must be careful to demand-analyse the RHS of the unfolding! Example
987 \x. \y{=Just x}. <body>
988 Then if <body> uses 'y', then transitively it uses 'x', and we must not
989 forget that fact, otherwise we might make 'x' absent when it isn't.
990
991
992 ************************************************************************
993 * *
994 \subsection{Strictness signatures}
995 * *
996 ************************************************************************
997 -}
998
999 type DFunFlag = Bool -- indicates if the lambda being considered is in the
1000 -- sequence of lambdas at the top of the RHS of a dfun
1001 notArgOfDfun :: DFunFlag
1002 notArgOfDfun = False
1003
1004 data AnalEnv
1005 = AE { ae_dflags :: DynFlags
1006 , ae_sigs :: SigEnv
1007 , ae_virgin :: Bool -- True on first iteration only
1008 -- See Note [Initialising strictness]
1009 , ae_rec_tc :: RecTcChecker
1010 , ae_fam_envs :: FamInstEnvs
1011 }
1012
1013 -- We use the se_env to tell us whether to
1014 -- record info about a variable in the DmdEnv
1015 -- We do so if it's a LocalId, but not top-level
1016 --
1017 -- The DmdEnv gives the demand on the free vars of the function
1018 -- when it is given enough args to satisfy the strictness signature
1019
1020 type SigEnv = VarEnv (StrictSig, TopLevelFlag)
1021
1022 instance Outputable AnalEnv where
1023 ppr (AE { ae_sigs = env, ae_virgin = virgin })
1024 = text "AE" <+> braces (vcat
1025 [ text "ae_virgin =" <+> ppr virgin
1026 , text "ae_sigs =" <+> ppr env ])
1027
1028 emptyAnalEnv :: DynFlags -> FamInstEnvs -> AnalEnv
1029 emptyAnalEnv dflags fam_envs
1030 = AE { ae_dflags = dflags
1031 , ae_sigs = emptySigEnv
1032 , ae_virgin = True
1033 , ae_rec_tc = initRecTc
1034 , ae_fam_envs = fam_envs
1035 }
1036
1037 emptySigEnv :: SigEnv
1038 emptySigEnv = emptyVarEnv
1039
1040 sigEnv :: AnalEnv -> SigEnv
1041 sigEnv = ae_sigs
1042
1043 updSigEnv :: AnalEnv -> SigEnv -> AnalEnv
1044 updSigEnv env sigs = env { ae_sigs = sigs }
1045
1046 extendAnalEnv :: TopLevelFlag -> AnalEnv -> Id -> StrictSig -> AnalEnv
1047 extendAnalEnv top_lvl env var sig
1048 = env { ae_sigs = extendSigEnv top_lvl (ae_sigs env) var sig }
1049
1050 extendSigEnv :: TopLevelFlag -> SigEnv -> Id -> StrictSig -> SigEnv
1051 extendSigEnv top_lvl sigs var sig = extendVarEnv sigs var (sig, top_lvl)
1052
1053 lookupSigEnv :: AnalEnv -> Id -> Maybe (StrictSig, TopLevelFlag)
1054 lookupSigEnv env id = lookupVarEnv (ae_sigs env) id
1055
1056 getStrictness :: AnalEnv -> Id -> StrictSig
1057 getStrictness env fn
1058 | isGlobalId fn = idStrictness fn
1059 | Just (sig, _) <- lookupSigEnv env fn = sig
1060 | otherwise = nopSig
1061
1062 addInitialSigs :: TopLevelFlag -> AnalEnv -> [Id] -> AnalEnv
1063 -- See Note [Initialising strictness]
1064 addInitialSigs top_lvl env@(AE { ae_sigs = sigs, ae_virgin = virgin }) ids
1065 = env { ae_sigs = extendVarEnvList sigs [ (id, (init_sig id, top_lvl))
1066 | id <- ids ] }
1067 where
1068 init_sig | virgin = \_ -> botSig
1069 | otherwise = idStrictness
1070
1071 nonVirgin :: AnalEnv -> AnalEnv
1072 nonVirgin env = env { ae_virgin = False }
1073
1074 extendSigsWithLam :: AnalEnv -> Id -> AnalEnv
1075 -- Extend the AnalEnv when we meet a lambda binder
1076 extendSigsWithLam env id
1077 | isId id
1078 , isStrictDmd (idDemandInfo id) || ae_virgin env
1079 -- See Note [Optimistic CPR in the "virgin" case]
1080 -- See Note [Initial CPR for strict binders]
1081 , Just (dc,_,_,_) <- deepSplitProductType_maybe (ae_fam_envs env) $ idType id
1082 = extendAnalEnv NotTopLevel env id (cprProdSig (dataConRepArity dc))
1083
1084 | otherwise
1085 = env
1086
1087 extendEnvForProdAlt :: AnalEnv -> CoreExpr -> Id -> DataCon -> [Var] -> AnalEnv
1088 -- See Note [CPR in a product case alternative]
1089 extendEnvForProdAlt env scrut case_bndr dc bndrs
1090 = foldl do_con_arg env1 ids_w_strs
1091 where
1092 env1 = extendAnalEnv NotTopLevel env case_bndr case_bndr_sig
1093
1094 ids_w_strs = filter isId bndrs `zip` dataConRepStrictness dc
1095 case_bndr_sig = cprProdSig (dataConRepArity dc)
1096 fam_envs = ae_fam_envs env
1097
1098 do_con_arg env (id, str)
1099 | let is_strict = isStrictDmd (idDemandInfo id) || isMarkedStrict str
1100 , ae_virgin env || (is_var_scrut && is_strict) -- See Note [CPR in a product case alternative]
1101 , Just (dc,_,_,_) <- deepSplitProductType_maybe fam_envs $ idType id
1102 = extendAnalEnv NotTopLevel env id (cprProdSig (dataConRepArity dc))
1103 | otherwise
1104 = env
1105
1106 is_var_scrut = is_var scrut
1107 is_var (Cast e _) = is_var e
1108 is_var (Var v) = isLocalId v
1109 is_var _ = False
1110
1111 addDataConStrictness :: DataCon -> [Demand] -> [Demand]
1112 -- See Note [Add demands for strict constructors]
1113 addDataConStrictness con ds
1114 = ASSERT2( equalLength strs ds, ppr con $$ ppr strs $$ ppr ds )
1115 zipWith add ds strs
1116 where
1117 strs = dataConRepStrictness con
1118 add dmd str | isMarkedStrict str
1119 , not (isAbsDmd dmd) = dmd `bothDmd` seqDmd
1120 | otherwise = dmd
1121
1122 findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> (DmdType, [Demand])
1123 -- Return the demands on the Ids in the [Var]
1124 findBndrsDmds env dmd_ty bndrs
1125 = go dmd_ty bndrs
1126 where
1127 go dmd_ty [] = (dmd_ty, [])
1128 go dmd_ty (b:bs)
1129 | isId b = let (dmd_ty1, dmds) = go dmd_ty bs
1130 (dmd_ty2, dmd) = findBndrDmd env False dmd_ty1 b
1131 in (dmd_ty2, dmd : dmds)
1132 | otherwise = go dmd_ty bs
1133
1134 findBndrDmd :: AnalEnv -> Bool -> DmdType -> Id -> (DmdType, Demand)
1135 -- See Note [Trimming a demand to a type] in Demand.hs
1136 findBndrDmd env arg_of_dfun dmd_ty id
1137 = (dmd_ty', dmd')
1138 where
1139 dmd' = killUsageDemand (ae_dflags env) $
1140 strictify $
1141 trimToType starting_dmd (findTypeShape fam_envs id_ty)
1142
1143 (dmd_ty', starting_dmd) = peelFV dmd_ty id
1144
1145 id_ty = idType id
1146
1147 strictify dmd
1148 | gopt Opt_DictsStrict (ae_dflags env)
1149 -- We never want to strictify a recursive let. At the moment
1150 -- annotateBndr is only call for non-recursive lets; if that
1151 -- changes, we need a RecFlag parameter and another guard here.
1152 , not arg_of_dfun -- See Note [Do not strictify the argument dictionaries of a dfun]
1153 = strictifyDictDmd id_ty dmd
1154 | otherwise
1155 = dmd
1156
1157 fam_envs = ae_fam_envs env
1158
1159 set_idStrictness :: AnalEnv -> Id -> StrictSig -> Id
1160 set_idStrictness env id sig
1161 = setIdStrictness id (killUsageSig (ae_dflags env) sig)
1162
1163 dumpStrSig :: CoreProgram -> SDoc
1164 dumpStrSig binds = vcat (map printId ids)
1165 where
1166 ids = sortBy (stableNameCmp `on` getName) (concatMap getIds binds)
1167 getIds (NonRec i _) = [ i ]
1168 getIds (Rec bs) = map fst bs
1169 printId id | isExportedId id = ppr id <> colon <+> pprIfaceStrictSig (idStrictness id)
1170 | otherwise = empty
1171
1172 {- Note [CPR in a product case alternative]
1173 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1174 In a case alternative for a product type, we want to give some of the
1175 binders the CPR property. Specifically
1176
1177 * The case binder; inside the alternative, the case binder always has
1178 the CPR property, meaning that a case on it will successfully cancel.
1179 Example:
1180 f True x = case x of y { I# x' -> if x' ==# 3
1181 then y
1182 else I# 8 }
1183 f False x = I# 3
1184
1185 By giving 'y' the CPR property, we ensure that 'f' does too, so we get
1186 f b x = case fw b x of { r -> I# r }
1187 fw True x = case x of y { I# x' -> if x' ==# 3 then x' else 8 }
1188 fw False x = 3
1189
1190 Of course there is the usual risk of re-boxing: we have 'x' available
1191 boxed and unboxed, but we return the unboxed version for the wrapper to
1192 box. If the wrapper doesn't cancel with its caller, we'll end up
1193 re-boxing something that we did have available in boxed form.
1194
1195 * Any strict binders with product type, can use
1196 Note [Initial CPR for strict binders]. But we can go a little
1197 further. Consider
1198
1199 data T = MkT !Int Int
1200
1201 f2 (MkT x y) | y>0 = f2 (MkT x (y-1))
1202 | otherwise = x
1203
1204 For $wf2 we are going to unbox the MkT *and*, since it is strict, the
1205 first agument of the MkT; see Note [Add demands for strict constructors].
1206 But then we don't want box it up again when returning it! We want
1207 'f2' to have the CPR property, so we give 'x' the CPR property.
1208
1209 * It's a bit delicate because if this case is scrutinising something other
1210 than an argument the original function, we really don't have the unboxed
1211 version available. E.g
1212 g v = case foo v of
1213 MkT x y | y>0 -> ...
1214 | otherwise -> x
1215 Here we don't have the unboxed 'x' available. Hence the
1216 is_var_scrut test when making use of the strictness annoatation.
1217 Slightly ad-hoc, because even if the scrutinee *is* a variable it
1218 might not be a onre of the arguments to the original function, or a
1219 sub-component thereof. But it's simple, and nothing terrible
1220 happens if we get it wrong. e.g. Trac #10694.
1221
1222 Note [Add demands for strict constructors]
1223 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1224 Consider this program (due to Roman):
1225
1226 data X a = X !a
1227
1228 foo :: X Int -> Int -> Int
1229 foo (X a) n = go 0
1230 where
1231 go i | i < n = a + go (i+1)
1232 | otherwise = 0
1233
1234 We want the worker for 'foo' too look like this:
1235
1236 $wfoo :: Int# -> Int# -> Int#
1237
1238 with the first argument unboxed, so that it is not eval'd each time
1239 around the 'go' loop (which would otherwise happen, since 'foo' is not
1240 strict in 'a'). It is sound for the wrapper to pass an unboxed arg
1241 because X is strict, so its argument must be evaluated. And if we
1242 *don't* pass an unboxed argument, we can't even repair it by adding a
1243 `seq` thus:
1244
1245 foo (X a) n = a `seq` go 0
1246
1247 because the seq is discarded (very early) since X is strict!
1248
1249 We achieve the effect using addDataConStrictness. It is called at a
1250 case expression, such as the pattern match on (X a) in the example
1251 above. After computing how 'a' is used in the alternatives, we add an
1252 extra 'seqDmd' to it. The case alternative isn't itself strict in the
1253 sub-components, but simply evaluating the scrutinee to HNF does force
1254 those sub-components.
1255
1256 If the argument is not used at all in the alternative (i.e. it is
1257 Absent), then *don't* add a 'seqDmd'. If we do, it makes it look used
1258 and hence it'll be passed to the worker when it doesn't need to be.
1259 Hence the isAbsDmd test in addDataConStrictness.
1260
1261 There is the usual danger of reboxing, which as usual we ignore. But
1262 if X is monomorphic, and has an UNPACK pragma, then this optimisation
1263 is even more important. We don't want the wrapper to rebox an unboxed
1264 argument, and pass an Int to $wfoo!
1265
1266
1267 Note [Initial CPR for strict binders]
1268 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1269 CPR is initialized for a lambda binder in an optimistic manner, i.e,
1270 if the binder is used strictly and at least some of its components as
1271 a product are used, which is checked by the value of the absence
1272 demand.
1273
1274 If the binder is marked demanded with a strict demand, then give it a
1275 CPR signature. Here's a concrete example ('f1' in test T10482a),
1276 assuming h is strict:
1277
1278 f1 :: Int -> Int
1279 f1 x = case h x of
1280 A -> x
1281 B -> f1 (x-1)
1282 C -> x+1
1283
1284 If we notice that 'x' is used strictly, we can give it the CPR
1285 property; and hence f1 gets the CPR property too. It's sound (doesn't
1286 change strictness) to give it the CPR property because by the time 'x'
1287 is returned (case A above), it'll have been evaluated (by the wrapper
1288 of 'h' in the example).
1289
1290 Moreover, if f itself is strict in x, then we'll pass x unboxed to
1291 f1, and so the boxed version *won't* be available; in that case it's
1292 very helpful to give 'x' the CPR property.
1293
1294 Note that
1295
1296 * We only want to do this for something that definitely
1297 has product type, else we may get over-optimistic CPR results
1298 (e.g. from \x -> x!).
1299
1300 * See Note [CPR examples]
1301
1302 Note [CPR examples]
1303 ~~~~~~~~~~~~~~~~~~~~
1304 Here are some examples (stranal/should_compile/T10482a) of the
1305 usefulness of Note [CPR in a product case alternative]. The main
1306 point: all of these functions can have the CPR property.
1307
1308 ------- f1 -----------
1309 -- x is used strictly by h, so it'll be available
1310 -- unboxed before it is returned in the True branch
1311
1312 f1 :: Int -> Int
1313 f1 x = case h x x of
1314 True -> x
1315 False -> f1 (x-1)
1316
1317
1318 ------- f2 -----------
1319 -- x is a strict field of MkT2, so we'll pass it unboxed
1320 -- to $wf2, so it's available unboxed. This depends on
1321 -- the case expression analysing (a subcomponent of) one
1322 -- of the original arguments to the function, so it's
1323 -- a bit more delicate.
1324
1325 data T2 = MkT2 !Int Int
1326
1327 f2 :: T2 -> Int
1328 f2 (MkT2 x y) | y>0 = f2 (MkT2 x (y-1))
1329 | otherwise = x
1330
1331
1332 ------- f3 -----------
1333 -- h is strict in x, so x will be unboxed before it
1334 -- is rerturned in the otherwise case.
1335
1336 data T3 = MkT3 Int Int
1337
1338 f1 :: T3 -> Int
1339 f1 (MkT3 x y) | h x y = f3 (MkT3 x (y-1))
1340 | otherwise = x
1341
1342
1343 ------- f4 -----------
1344 -- Just like f2, but MkT4 can't unbox its strict
1345 -- argument automatically, as f2 can
1346
1347 data family Foo a
1348 newtype instance Foo Int = Foo Int
1349
1350 data T4 a = MkT4 !(Foo a) Int
1351
1352 f4 :: T4 Int -> Int
1353 f4 (MkT4 x@(Foo v) y) | y>0 = f4 (MkT4 x (y-1))
1354 | otherwise = v
1355
1356
1357 Note [Initialising strictness]
1358 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1359 See section 9.2 (Finding fixpoints) of the paper.
1360
1361 Our basic plan is to initialise the strictness of each Id in a
1362 recursive group to "bottom", and find a fixpoint from there. However,
1363 this group B might be inside an *enclosing* recursive group A, in
1364 which case we'll do the entire fixpoint shebang on for each iteration
1365 of A. This can be illustrated by the following example:
1366
1367 Example:
1368
1369 f [] = []
1370 f (x:xs) = let g [] = f xs
1371 g (y:ys) = y+1 : g ys
1372 in g (h x)
1373
1374 At each iteration of the fixpoint for f, the analyser has to find a
1375 fixpoint for the enclosed function g. In the meantime, the demand
1376 values for g at each iteration for f are *greater* than those we
1377 encountered in the previous iteration for f. Therefore, we can begin
1378 the fixpoint for g not with the bottom value but rather with the
1379 result of the previous analysis. I.e., when beginning the fixpoint
1380 process for g, we can start from the demand signature computed for g
1381 previously and attached to the binding occurrence of g.
1382
1383 To speed things up, we initialise each iteration of A (the enclosing
1384 one) from the result of the last one, which is neatly recorded in each
1385 binder. That way we make use of earlier iterations of the fixpoint
1386 algorithm. (Cunning plan.)
1387
1388 But on the *first* iteration we want to *ignore* the current strictness
1389 of the Id, and start from "bottom". Nowadays the Id can have a current
1390 strictness, because interface files record strictness for nested bindings.
1391 To know when we are in the first iteration, we look at the ae_virgin
1392 field of the AnalEnv.
1393
1394
1395 Note [Final Demand Analyser run]
1396 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1397 Some of the information that the demand analyser determines is not always
1398 preserved by the simplifier. For example, the simplifier will happily rewrite
1399 \y [Demand=1*U] let x = y in x + x
1400 to
1401 \y [Demand=1*U] y + y
1402 which is quite a lie.
1403
1404 The once-used information is (currently) only used by the code
1405 generator, though. So:
1406
1407 * We zap the used-once info in the worker-wrapper;
1408 see Note [Zapping Used Once info in WorkWrap] in WorkWrap. If it's
1409 not reliable, it's better not to have it at all.
1410
1411 * Just before TidyCore, we add a pass of the demand analyser,
1412 but WITHOUT subsequent worker/wrapper and simplifier,
1413 right before TidyCore. See SimplCore.getCoreToDo.
1414
1415 This way, correct information finds its way into the module interface
1416 (strictness signatures!) and the code generator (single-entry thunks!)
1417
1418 Note that, in contrast, the single-call information (C1(..)) /can/ be
1419 relied upon, as the simplifier tends to be very careful about not
1420 duplicating actual function calls.
1421
1422 Also see #11731.
1423 -}