168ece971c39962377af06d25f24c667b9757016
[ghc.git] / compiler / simplCore / SimplCore.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplCore ( core2core, simplifyExpr ) where
10
11 #include "HsVersions.h"
12
13 import GhcPrelude
14
15 import DynFlags
16 import CoreSyn
17 import HscTypes
18 import CSE ( cseProgram )
19 import Rules ( mkRuleBase, unionRuleBase,
20 extendRuleBaseList, ruleCheckProgram, addRuleInfo,
21 getRules )
22 import PprCore ( pprCoreBindings, pprCoreExpr )
23 import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
24 import IdInfo
25 import CoreStats ( coreBindsSize, coreBindsStats, exprSize )
26 import CoreUtils ( mkTicks, stripTicksTop )
27 import CoreLint ( endPass, lintPassResult, dumpPassResult,
28 lintAnnots )
29 import Simplify ( simplTopBinds, simplExpr, simplRules )
30 import SimplUtils ( simplEnvForGHCi, activeRule, activeUnfolding )
31 import SimplEnv
32 import SimplMonad
33 import CoreMonad
34 import qualified ErrUtils as Err
35 import FloatIn ( floatInwards )
36 import FloatOut ( floatOutwards )
37 import FamInstEnv
38 import Id
39 import ErrUtils ( withTiming )
40 import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma, defaultInlinePragma )
41 import VarSet
42 import VarEnv
43 import LiberateCase ( liberateCase )
44 import SAT ( doStaticArgs )
45 import Specialise ( specProgram)
46 import SpecConstr ( specConstrProgram)
47 import DmdAnal ( dmdAnalProgram )
48 import CallArity ( callArityAnalProgram )
49 import Exitify ( exitifyProgram )
50 import WorkWrap ( wwTopBinds )
51 import SrcLoc
52 import Util
53 import Module
54 import Plugins ( withPlugins, installCoreToDos )
55 import DynamicLoading -- ( initializePlugins )
56
57 import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
58 import UniqFM
59 import Outputable
60 import Control.Monad
61 import qualified GHC.LanguageExtensions as LangExt
62 {-
63 ************************************************************************
64 * *
65 \subsection{The driver for the simplifier}
66 * *
67 ************************************************************************
68 -}
69
70 core2core :: HscEnv -> ModGuts -> IO ModGuts
71 core2core hsc_env guts@(ModGuts { mg_module = mod
72 , mg_loc = loc
73 , mg_deps = deps
74 , mg_rdr_env = rdr_env })
75 = do { us <- mkSplitUniqSupply 's'
76 -- make sure all plugins are loaded
77
78 ; let builtin_passes = getCoreToDo dflags
79 orph_mods = mkModuleSet (mod : dep_orphs deps)
80 ;
81 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
82 orph_mods print_unqual loc $
83 do { hsc_env' <- getHscEnv
84 ; dflags' <- liftIO $ initializePlugins hsc_env'
85 (hsc_dflags hsc_env')
86 ; all_passes <- withPlugins dflags'
87 installCoreToDos
88 builtin_passes
89 ; runCorePasses all_passes guts }
90
91 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
92 "Grand total simplifier statistics"
93 (pprSimplCount stats)
94
95 ; return guts2 }
96 where
97 dflags = hsc_dflags hsc_env
98 home_pkg_rules = hptRules hsc_env (dep_mods deps)
99 hpt_rule_base = mkRuleBase home_pkg_rules
100 print_unqual = mkPrintUnqualified dflags rdr_env
101 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
102 -- This is very convienent for the users of the monad (e.g. plugins do not have to
103 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
104 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
105 -- would mean our cached value would go out of date.
106
107 {-
108 ************************************************************************
109 * *
110 Generating the main optimisation pipeline
111 * *
112 ************************************************************************
113 -}
114
115 getCoreToDo :: DynFlags -> [CoreToDo]
116 getCoreToDo dflags
117 = flatten_todos core_todo
118 where
119 opt_level = optLevel dflags
120 phases = simplPhases dflags
121 max_iter = maxSimplIterations dflags
122 rule_check = ruleCheck dflags
123 call_arity = gopt Opt_CallArity dflags
124 exitification = gopt Opt_Exitification dflags
125 strictness = gopt Opt_Strictness dflags
126 full_laziness = gopt Opt_FullLaziness dflags
127 do_specialise = gopt Opt_Specialise dflags
128 do_float_in = gopt Opt_FloatIn dflags
129 cse = gopt Opt_CSE dflags
130 spec_constr = gopt Opt_SpecConstr dflags
131 liberate_case = gopt Opt_LiberateCase dflags
132 late_dmd_anal = gopt Opt_LateDmdAnal dflags
133 late_specialise = gopt Opt_LateSpecialise dflags
134 static_args = gopt Opt_StaticArgumentTransformation dflags
135 rules_on = gopt Opt_EnableRewriteRules dflags
136 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
137 ww_on = gopt Opt_WorkerWrapper dflags
138 static_ptrs = xopt LangExt.StaticPointers dflags
139
140 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
141
142 maybe_strictness_before phase
143 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
144
145 base_mode = SimplMode { sm_phase = panic "base_mode"
146 , sm_names = []
147 , sm_dflags = dflags
148 , sm_rules = rules_on
149 , sm_eta_expand = eta_expand_on
150 , sm_inline = True
151 , sm_case_case = True }
152
153 simpl_phase phase names iter
154 = CoreDoPasses
155 $ [ maybe_strictness_before phase
156 , CoreDoSimplify iter
157 (base_mode { sm_phase = Phase phase
158 , sm_names = names })
159
160 , maybe_rule_check (Phase phase) ]
161
162 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
163 | phase <- [phases, phases-1 .. 1] ]
164
165
166 -- initial simplify: mk specialiser happy: minimum effort please
167 simpl_gently = CoreDoSimplify max_iter
168 (base_mode { sm_phase = InitialPhase
169 , sm_names = ["Gentle"]
170 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
171 , sm_inline = True
172 -- See Note [Inline in InitialPhase]
173 , sm_case_case = False })
174 -- Don't do case-of-case transformations.
175 -- This makes full laziness work better
176
177 strictness_pass = if ww_on
178 then [CoreDoStrictness,CoreDoWorkerWrapper]
179 else [CoreDoStrictness]
180
181
182 -- New demand analyser
183 demand_analyser = (CoreDoPasses (
184 strictness_pass ++
185 [simpl_phase 0 ["post-worker-wrapper"] max_iter]
186 ))
187
188 -- Static forms are moved to the top level with the FloatOut pass.
189 -- See Note [Grand plan for static forms] in StaticPtrTable.
190 static_ptrs_float_outwards =
191 runWhen static_ptrs $ CoreDoPasses
192 [ simpl_gently -- Float Out can't handle type lets (sometimes created
193 -- by simpleOptPgm via mkParallelBindings)
194 , CoreDoFloatOutwards FloatOutSwitches
195 { floatOutLambdas = Just 0
196 , floatOutConstants = True
197 , floatOutOverSatApps = False
198 , floatToTopLevelOnly = True
199 }
200 ]
201
202 core_todo =
203 if opt_level == 0 then
204 [ static_ptrs_float_outwards,
205 CoreDoSimplify max_iter
206 (base_mode { sm_phase = Phase 0
207 , sm_names = ["Non-opt simplification"] })
208 ]
209
210 else {- opt_level >= 1 -} [
211
212 -- We want to do the static argument transform before full laziness as it
213 -- may expose extra opportunities to float things outwards. However, to fix
214 -- up the output of the transformation we need at do at least one simplify
215 -- after this before anything else
216 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
217
218 -- initial simplify: mk specialiser happy: minimum effort please
219 simpl_gently,
220
221 -- Specialisation is best done before full laziness
222 -- so that overloaded functions have all their dictionary lambdas manifest
223 runWhen do_specialise CoreDoSpecialising,
224
225 if full_laziness then
226 CoreDoFloatOutwards FloatOutSwitches {
227 floatOutLambdas = Just 0,
228 floatOutConstants = True,
229 floatOutOverSatApps = False,
230 floatToTopLevelOnly = False }
231 -- Was: gentleFloatOutSwitches
232 --
233 -- I have no idea why, but not floating constants to
234 -- top level is very bad in some cases.
235 --
236 -- Notably: p_ident in spectral/rewrite
237 -- Changing from "gentle" to "constantsOnly"
238 -- improved rewrite's allocation by 19%, and
239 -- made 0.0% difference to any other nofib
240 -- benchmark
241 --
242 -- Not doing floatOutOverSatApps yet, we'll do
243 -- that later on when we've had a chance to get more
244 -- accurate arity information. In fact it makes no
245 -- difference at all to performance if we do it here,
246 -- but maybe we save some unnecessary to-and-fro in
247 -- the simplifier.
248 else
249 -- Even with full laziness turned off, we still need to float static
250 -- forms to the top level. See Note [Grand plan for static forms] in
251 -- StaticPtrTable.
252 static_ptrs_float_outwards,
253
254 simpl_phases,
255
256 -- Phase 0: allow all Ids to be inlined now
257 -- This gets foldr inlined before strictness analysis
258
259 -- At least 3 iterations because otherwise we land up with
260 -- huge dead expressions because of an infelicity in the
261 -- simplifier.
262 -- let k = BIG in foldr k z xs
263 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
264 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
265 -- Don't stop now!
266 simpl_phase 0 ["main"] (max max_iter 3),
267
268 runWhen do_float_in CoreDoFloatInwards,
269 -- Run float-inwards immediately before the strictness analyser
270 -- Doing so pushes bindings nearer their use site and hence makes
271 -- them more likely to be strict. These bindings might only show
272 -- up after the inlining from simplification. Example in fulsom,
273 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
274
275 runWhen call_arity $ CoreDoPasses
276 [ CoreDoCallArity
277 , simpl_phase 0 ["post-call-arity"] max_iter
278 ],
279
280 runWhen strictness demand_analyser,
281
282 runWhen exitification CoreDoExitify,
283 -- See note [Placement of the exitification pass]
284
285 runWhen full_laziness $
286 CoreDoFloatOutwards FloatOutSwitches {
287 floatOutLambdas = floatLamArgs dflags,
288 floatOutConstants = True,
289 floatOutOverSatApps = True,
290 floatToTopLevelOnly = False },
291 -- nofib/spectral/hartel/wang doubles in speed if you
292 -- do full laziness late in the day. It only happens
293 -- after fusion and other stuff, so the early pass doesn't
294 -- catch it. For the record, the redex is
295 -- f_el22 (f_el21 r_midblock)
296
297
298 runWhen cse CoreCSE,
299 -- We want CSE to follow the final full-laziness pass, because it may
300 -- succeed in commoning up things floated out by full laziness.
301 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
302
303 runWhen do_float_in CoreDoFloatInwards,
304
305 maybe_rule_check (Phase 0),
306
307 -- Case-liberation for -O2. This should be after
308 -- strictness analysis and the simplification which follows it.
309 runWhen liberate_case (CoreDoPasses [
310 CoreLiberateCase,
311 simpl_phase 0 ["post-liberate-case"] max_iter
312 ]), -- Run the simplifier after LiberateCase to vastly
313 -- reduce the possibility of shadowing
314 -- Reason: see Note [Shadowing] in SpecConstr.hs
315
316 runWhen spec_constr CoreDoSpecConstr,
317
318 maybe_rule_check (Phase 0),
319
320 runWhen late_specialise
321 (CoreDoPasses [ CoreDoSpecialising
322 , simpl_phase 0 ["post-late-spec"] max_iter]),
323
324 -- LiberateCase can yield new CSE opportunities because it peels
325 -- off one layer of a recursive function (concretely, I saw this
326 -- in wheel-sieve1), and I'm guessing that SpecConstr can too
327 -- And CSE is a very cheap pass. So it seems worth doing here.
328 runWhen ((liberate_case || spec_constr) && cse) CoreCSE,
329
330 -- Final clean-up simplification:
331 simpl_phase 0 ["final"] max_iter,
332
333 runWhen late_dmd_anal $ CoreDoPasses (
334 strictness_pass ++
335 [simpl_phase 0 ["post-late-ww"] max_iter]
336 ),
337
338 -- Final run of the demand_analyser, ensures that one-shot thunks are
339 -- really really one-shot thunks. Only needed if the demand analyser
340 -- has run at all. See Note [Final Demand Analyser run] in DmdAnal
341 -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
342 -- can become /exponentially/ more expensive. See Trac #11731, #12996.
343 runWhen (strictness || late_dmd_anal) CoreDoStrictness,
344
345 maybe_rule_check (Phase 0)
346 ]
347
348 -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
349 flatten_todos [] = []
350 flatten_todos (CoreDoNothing : rest) = flatten_todos rest
351 flatten_todos (CoreDoPasses passes : rest) =
352 flatten_todos passes ++ flatten_todos rest
353 flatten_todos (todo : rest) = todo : flatten_todos rest
354
355 {- Note [Inline in InitialPhase]
356 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
357 In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is
358 confusing for users because when they say INLINE they expect the function to inline
359 right away.
360
361 So now we do inlining immediately, even in the InitialPhase, assuming that the
362 Id's Activation allows it.
363
364 This is a surprisingly big deal. Compiler performance improved a lot
365 when I made this change:
366
367 perf/compiler/T5837.run T5837 [stat too good] (normal)
368 perf/compiler/parsing001.run parsing001 [stat too good] (normal)
369 perf/compiler/T12234.run T12234 [stat too good] (optasm)
370 perf/compiler/T9020.run T9020 [stat too good] (optasm)
371 perf/compiler/T3064.run T3064 [stat too good] (normal)
372 perf/compiler/T9961.run T9961 [stat too good] (normal)
373 perf/compiler/T13056.run T13056 [stat too good] (optasm)
374 perf/compiler/T9872d.run T9872d [stat too good] (normal)
375 perf/compiler/T783.run T783 [stat too good] (normal)
376 perf/compiler/T12227.run T12227 [stat too good] (normal)
377 perf/should_run/lazy-bs-alloc.run lazy-bs-alloc [stat too good] (normal)
378 perf/compiler/T1969.run T1969 [stat too good] (normal)
379 perf/compiler/T9872a.run T9872a [stat too good] (normal)
380 perf/compiler/T9872c.run T9872c [stat too good] (normal)
381 perf/compiler/T9872b.run T9872b [stat too good] (normal)
382 perf/compiler/T9872d.run T9872d [stat too good] (normal)
383
384 Note [RULEs enabled in SimplGently]
385 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
386 RULES are enabled when doing "gentle" simplification. Two reasons:
387
388 * We really want the class-op cancellation to happen:
389 op (df d1 d2) --> $cop3 d1 d2
390 because this breaks the mutual recursion between 'op' and 'df'
391
392 * I wanted the RULE
393 lift String ===> ...
394 to work in Template Haskell when simplifying
395 splices, so we get simpler code for literal strings
396
397 But watch out: list fusion can prevent floating. So use phase control
398 to switch off those rules until after floating.
399
400 ************************************************************************
401 * *
402 The CoreToDo interpreter
403 * *
404 ************************************************************************
405 -}
406
407 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
408 runCorePasses passes guts
409 = foldM do_pass guts passes
410 where
411 do_pass guts CoreDoNothing = return guts
412 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
413 do_pass guts pass
414 = withTiming getDynFlags
415 (ppr pass <+> brackets (ppr mod))
416 (const ()) $ do
417 { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
418 ; endPass pass (mg_binds guts') (mg_rules guts')
419 ; return guts' }
420
421 mod = mg_module guts
422
423 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
424 doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
425 simplifyPgm pass
426
427 doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-}
428 doPass cseProgram
429
430 doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-}
431 doPassD liberateCase
432
433 doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-}
434 floatInwards
435
436 doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-}
437 doPassDUM (floatOutwards f)
438
439 doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-}
440 doPassU doStaticArgs
441
442 doCorePass CoreDoCallArity = {-# SCC "CallArity" #-}
443 doPassD callArityAnalProgram
444
445 doCorePass CoreDoExitify = {-# SCC "Exitify" #-}
446 doPass exitifyProgram
447
448 doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-}
449 doPassDFM dmdAnalProgram
450
451 doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-}
452 doPassDFU wwTopBinds
453
454 doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-}
455 specProgram
456
457 doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-}
458 specConstrProgram
459
460 doCorePass CoreDoPrintCore = observe printCore
461 doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
462 doCorePass CoreDoNothing = return
463 doCorePass (CoreDoPasses passes) = runCorePasses passes
464
465 #if defined(GHCI)
466 doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
467 #else
468 doCorePass pass@CoreDoPluginPass {} = pprPanic "doCorePass" (ppr pass)
469 #endif
470
471 doCorePass pass@CoreDesugar = pprPanic "doCorePass" (ppr pass)
472 doCorePass pass@CoreDesugarOpt = pprPanic "doCorePass" (ppr pass)
473 doCorePass pass@CoreTidy = pprPanic "doCorePass" (ppr pass)
474 doCorePass pass@CorePrep = pprPanic "doCorePass" (ppr pass)
475 doCorePass pass@CoreOccurAnal = pprPanic "doCorePass" (ppr pass)
476
477 {-
478 ************************************************************************
479 * *
480 \subsection{Core pass combinators}
481 * *
482 ************************************************************************
483 -}
484
485 printCore :: DynFlags -> CoreProgram -> IO ()
486 printCore dflags binds
487 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
488
489 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
490 ruleCheckPass current_phase pat guts =
491 withTiming getDynFlags
492 (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
493 (const ()) $ do
494 { rb <- getRuleBase
495 ; dflags <- getDynFlags
496 ; vis_orphs <- getVisibleOrphanMods
497 ; let rule_fn fn = getRules (RuleEnv rb vis_orphs) fn
498 ++ (mg_rules guts)
499 ; liftIO $ putLogMsg dflags NoReason Err.SevDump noSrcSpan
500 (defaultDumpStyle dflags)
501 (ruleCheckProgram current_phase pat
502 rule_fn (mg_binds guts))
503 ; return guts }
504
505 doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
506 doPassDUM do_pass = doPassM $ \binds -> do
507 dflags <- getDynFlags
508 us <- getUniqueSupplyM
509 liftIO $ do_pass dflags us binds
510
511 doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
512 doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
513
514 doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
515 doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
516
517 doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
518 doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
519
520 doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
521 doPassU do_pass = doPassDU (const do_pass)
522
523 doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
524 doPassDFM do_pass guts = do
525 dflags <- getDynFlags
526 p_fam_env <- getPackageFamInstEnv
527 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
528 doPassM (liftIO . do_pass dflags fam_envs) guts
529
530 doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
531 doPassDFU do_pass guts = do
532 dflags <- getDynFlags
533 us <- getUniqueSupplyM
534 p_fam_env <- getPackageFamInstEnv
535 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
536 doPass (do_pass dflags fam_envs us) guts
537
538 -- Most passes return no stats and don't change rules: these combinators
539 -- let us lift them to the full blown ModGuts+CoreM world
540 doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
541 doPassM bind_f guts = do
542 binds' <- bind_f (mg_binds guts)
543 return (guts { mg_binds = binds' })
544
545 doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
546 doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
547
548 -- Observer passes just peek; don't modify the bindings at all
549 observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
550 observe do_pass = doPassM $ \binds -> do
551 dflags <- getDynFlags
552 _ <- liftIO $ do_pass dflags binds
553 return binds
554
555 {-
556 ************************************************************************
557 * *
558 Gentle simplification
559 * *
560 ************************************************************************
561 -}
562
563 simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
564 -> CoreExpr
565 -> IO CoreExpr
566 -- simplifyExpr is called by the driver to simplify an
567 -- expression typed in at the interactive prompt
568 --
569 -- Also used by Template Haskell
570 simplifyExpr dflags expr
571 = withTiming (pure dflags) (text "Simplify [expr]") (const ()) $
572 do {
573 ; us <- mkSplitUniqSupply 's'
574
575 ; let sz = exprSize expr
576
577 ; (expr', counts) <- initSmpl dflags emptyRuleEnv
578 emptyFamInstEnvs us sz
579 (simplExprGently (simplEnvForGHCi dflags) expr)
580
581 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
582 "Simplifier statistics" (pprSimplCount counts)
583
584 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
585 (pprCoreExpr expr')
586
587 ; return expr'
588 }
589
590 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
591 -- Simplifies an expression
592 -- does occurrence analysis, then simplification
593 -- and repeats (twice currently) because one pass
594 -- alone leaves tons of crud.
595 -- Used (a) for user expressions typed in at the interactive prompt
596 -- (b) the LHS and RHS of a RULE
597 -- (c) Template Haskell splices
598 --
599 -- The name 'Gently' suggests that the SimplMode is SimplGently,
600 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
601 -- enforce that; it just simplifies the expression twice
602
603 -- It's important that simplExprGently does eta reduction; see
604 -- Note [Simplifying the left-hand side of a RULE] above. The
605 -- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
606 -- but only if -O is on.
607
608 simplExprGently env expr = do
609 expr1 <- simplExpr env (occurAnalyseExpr expr)
610 simplExpr env (occurAnalyseExpr expr1)
611
612 {-
613 ************************************************************************
614 * *
615 \subsection{The driver for the simplifier}
616 * *
617 ************************************************************************
618 -}
619
620 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
621 simplifyPgm pass guts
622 = do { hsc_env <- getHscEnv
623 ; us <- getUniqueSupplyM
624 ; rb <- getRuleBase
625 ; liftIOWithCount $
626 simplifyPgmIO pass hsc_env us rb guts }
627
628 simplifyPgmIO :: CoreToDo
629 -> HscEnv
630 -> UniqSupply
631 -> RuleBase
632 -> ModGuts
633 -> IO (SimplCount, ModGuts) -- New bindings
634
635 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
636 hsc_env us hpt_rule_base
637 guts@(ModGuts { mg_module = this_mod
638 , mg_rdr_env = rdr_env
639 , mg_deps = deps
640 , mg_binds = binds, mg_rules = rules
641 , mg_fam_inst_env = fam_inst_env })
642 = do { (termination_msg, it_count, counts_out, guts')
643 <- do_iteration us 1 [] binds rules
644
645 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
646 dopt Opt_D_dump_simpl_stats dflags)
647 "Simplifier statistics for following pass"
648 (vcat [text termination_msg <+> text "after" <+> ppr it_count
649 <+> text "iterations",
650 blankLine,
651 pprSimplCount counts_out])
652
653 ; return (counts_out, guts')
654 }
655 where
656 dflags = hsc_dflags hsc_env
657 print_unqual = mkPrintUnqualified dflags rdr_env
658 simpl_env = mkSimplEnv mode
659 active_rule = activeRule mode
660 active_unf = activeUnfolding mode
661
662 do_iteration :: UniqSupply
663 -> Int -- Counts iterations
664 -> [SimplCount] -- Counts from earlier iterations, reversed
665 -> CoreProgram -- Bindings in
666 -> [CoreRule] -- and orphan rules
667 -> IO (String, Int, SimplCount, ModGuts)
668
669 do_iteration us iteration_no counts_so_far binds rules
670 -- iteration_no is the number of the iteration we are
671 -- about to begin, with '1' for the first
672 | iteration_no > max_iterations -- Stop if we've run out of iterations
673 = WARN( debugIsOn && (max_iterations > 2)
674 , hang (text "Simplifier bailing out after" <+> int max_iterations
675 <+> text "iterations"
676 <+> (brackets $ hsep $ punctuate comma $
677 map (int . simplCountN) (reverse counts_so_far)))
678 2 (text "Size =" <+> ppr (coreBindsStats binds)))
679
680 -- Subtract 1 from iteration_no to get the
681 -- number of iterations we actually completed
682 return ( "Simplifier baled out", iteration_no - 1
683 , totalise counts_so_far
684 , guts { mg_binds = binds, mg_rules = rules } )
685
686 -- Try and force thunks off the binds; significantly reduces
687 -- space usage, especially with -O. JRS, 000620.
688 | let sz = coreBindsSize binds
689 , () <- sz `seq` () -- Force it
690 = do {
691 -- Occurrence analysis
692 let { tagged_binds = {-# SCC "OccAnal" #-}
693 occurAnalysePgm this_mod active_unf active_rule rules
694 binds
695 } ;
696 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
697 (pprCoreBindings tagged_binds);
698
699 -- Get any new rules, and extend the rule base
700 -- See Note [Overall plumbing for rules] in Rules.hs
701 -- We need to do this regularly, because simplification can
702 -- poke on IdInfo thunks, which in turn brings in new rules
703 -- behind the scenes. Otherwise there's a danger we'll simply
704 -- miss the rules for Ids hidden inside imported inlinings
705 eps <- hscEPS hsc_env ;
706 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
707 ; rule_base2 = extendRuleBaseList rule_base1 rules
708 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
709 ; vis_orphs = this_mod : dep_orphs deps } ;
710
711 -- Simplify the program
712 ((binds1, rules1), counts1) <-
713 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
714 do { (floats, env1) <- {-# SCC "SimplTopBinds" #-}
715 simplTopBinds simpl_env tagged_binds
716
717 -- Apply the substitution to rules defined in this module
718 -- for imported Ids. Eg RULE map my_f = blah
719 -- If we have a substitution my_f :-> other_f, we'd better
720 -- apply it to the rule to, or it'll never match
721 ; rules1 <- simplRules env1 Nothing rules Nothing
722
723 ; return (getTopFloatBinds floats, rules1) } ;
724
725 -- Stop if nothing happened; don't dump output
726 -- See Note [Which transformations are innocuous] in CoreMonad
727 if isZeroSimplCount counts1 then
728 return ( "Simplifier reached fixed point", iteration_no
729 , totalise (counts1 : counts_so_far) -- Include "free" ticks
730 , guts { mg_binds = binds1, mg_rules = rules1 } )
731 else do {
732 -- Short out indirections
733 -- We do this *after* at least one run of the simplifier
734 -- because indirection-shorting uses the export flag on *occurrences*
735 -- and that isn't guaranteed to be ok until after the first run propagates
736 -- stuff from the binding site to its occurrences
737 --
738 -- ToDo: alas, this means that indirection-shorting does not happen at all
739 -- if the simplifier does nothing (not common, I know, but unsavoury)
740 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
741
742 -- Dump the result of this iteration
743 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
744 lintPassResult hsc_env pass binds2 ;
745
746 -- Loop
747 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
748 } }
749 | otherwise = panic "do_iteration"
750 where
751 (us1, us2) = splitUniqSupply us
752
753 -- Remember the counts_so_far are reversed
754 totalise :: [SimplCount] -> SimplCount
755 totalise = foldr (\c acc -> acc `plusSimplCount` c)
756 (zeroSimplCount dflags)
757
758 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
759
760 -------------------
761 dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
762 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
763 dump_end_iteration dflags print_unqual iteration_no counts binds rules
764 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
765 where
766 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
767 | otherwise = Nothing
768 -- Show details if Opt_D_dump_simpl_iterations is on
769
770 hdr = text "Simplifier iteration=" <> int iteration_no
771 pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
772 , pprSimplCount counts
773 , text "---- End of simplifier counts for" <+> hdr ]
774
775 {-
776 ************************************************************************
777 * *
778 Shorting out indirections
779 * *
780 ************************************************************************
781
782 If we have this:
783
784 x_local = <expression>
785 ...bindings...
786 x_exported = x_local
787
788 where x_exported is exported, and x_local is not, then we replace it with this:
789
790 x_exported = <expression>
791 x_local = x_exported
792 ...bindings...
793
794 Without this we never get rid of the x_exported = x_local thing. This
795 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
796 makes strictness information propagate better. This used to happen in
797 the final phase, but it's tidier to do it here.
798
799 Note [Messing up the exported Id's RULES]
800 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
801 We must be careful about discarding (obviously) or even merging the
802 RULES on the exported Id. The example that went bad on me at one stage
803 was this one:
804
805 iterate :: (a -> a) -> a -> [a]
806 [Exported]
807 iterate = iterateList
808
809 iterateFB c f x = x `c` iterateFB c f (f x)
810 iterateList f x = x : iterateList f (f x)
811 [Not exported]
812
813 {-# RULES
814 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
815 "iterateFB" iterateFB (:) = iterateList
816 #-}
817
818 This got shorted out to:
819
820 iterateList :: (a -> a) -> a -> [a]
821 iterateList = iterate
822
823 iterateFB c f x = x `c` iterateFB c f (f x)
824 iterate f x = x : iterate f (f x)
825
826 {-# RULES
827 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
828 "iterateFB" iterateFB (:) = iterate
829 #-}
830
831 And now we get an infinite loop in the rule system
832 iterate f x -> build (\cn -> iterateFB c f x)
833 -> iterateFB (:) f x
834 -> iterate f x
835
836 Old "solution":
837 use rule switching-off pragmas to get rid
838 of iterateList in the first place
839
840 But in principle the user *might* want rules that only apply to the Id
841 he says. And inline pragmas are similar
842 {-# NOINLINE f #-}
843 f = local
844 local = <stuff>
845 Then we do not want to get rid of the NOINLINE.
846
847 Hence hasShortableIdinfo.
848
849
850 Note [Rules and indirection-zapping]
851 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
852 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
853 Then the things mentioned can be out of scope! Solution
854 a) Make sure that in this pass the usage-info from x_exported is
855 available for ...bindings...
856 b) If there are any such RULES, rec-ify the entire top-level.
857 It'll get sorted out next time round
858
859 Other remarks
860 ~~~~~~~~~~~~~
861 If more than one exported thing is equal to a local thing (i.e., the
862 local thing really is shared), then we do one only:
863 \begin{verbatim}
864 x_local = ....
865 x_exported1 = x_local
866 x_exported2 = x_local
867 ==>
868 x_exported1 = ....
869
870 x_exported2 = x_exported1
871 \end{verbatim}
872
873 We rely on prior eta reduction to simplify things like
874 \begin{verbatim}
875 x_exported = /\ tyvars -> x_local tyvars
876 ==>
877 x_exported = x_local
878 \end{verbatim}
879 Hence,there's a possibility of leaving unchanged something like this:
880 \begin{verbatim}
881 x_local = ....
882 x_exported1 = x_local Int
883 \end{verbatim}
884 By the time we've thrown away the types in STG land this
885 could be eliminated. But I don't think it's very common
886 and it's dangerous to do this fiddling in STG land
887 because we might elminate a binding that's mentioned in the
888 unfolding for something.
889
890 Note [Indirection zapping and ticks]
891 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
892 Unfortunately this is another place where we need a special case for
893 ticks. The following happens quite regularly:
894
895 x_local = <expression>
896 x_exported = tick<x> x_local
897
898 Which we want to become:
899
900 x_exported = tick<x> <expression>
901
902 As it makes no sense to keep the tick and the expression on separate
903 bindings. Note however that that this might increase the ticks scoping
904 over the execution of x_local, so we can only do this for floatable
905 ticks. More often than not, other references will be unfoldings of
906 x_exported, and therefore carry the tick anyway.
907 -}
908
909 type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
910
911 shortOutIndirections :: CoreProgram -> CoreProgram
912 shortOutIndirections binds
913 | isEmptyVarEnv ind_env = binds
914 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
915 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
916 where
917 ind_env = makeIndEnv binds
918 -- These exported Ids are the subjects of the indirection-elimination
919 exp_ids = map fst $ nonDetEltsUFM ind_env
920 -- It's OK to use nonDetEltsUFM here because we forget the ordering
921 -- by immediately converting to a set or check if all the elements
922 -- satisfy a predicate.
923 exp_id_set = mkVarSet exp_ids
924 no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
925 binds' = concatMap zap binds
926
927 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
928 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
929
930 zapPair (bndr, rhs)
931 | bndr `elemVarSet` exp_id_set
932 = [] -- Kill the exported-id binding
933
934 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
935 , (exp_id', lcl_id') <- transferIdInfo exp_id bndr
936 = -- Turn a local-id binding into two bindings
937 -- exp_id = rhs; lcl_id = exp_id
938 [ (exp_id', mkTicks ticks rhs),
939 (lcl_id', Var exp_id') ]
940
941 | otherwise
942 = [(bndr,rhs)]
943
944 makeIndEnv :: [CoreBind] -> IndEnv
945 makeIndEnv binds
946 = foldr add_bind emptyVarEnv binds
947 where
948 add_bind :: CoreBind -> IndEnv -> IndEnv
949 add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
950 add_bind (Rec pairs) env = foldr add_pair env pairs
951
952 add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
953 add_pair (exported_id, exported) env
954 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
955 , shortMeOut env exported_id local_id
956 = extendVarEnv env local_id (exported_id, ticks)
957 add_pair _ env = env
958
959 -----------------
960 shortMeOut :: IndEnv -> Id -> Id -> Bool
961 shortMeOut ind_env exported_id local_id
962 -- The if-then-else stuff is just so I can get a pprTrace to see
963 -- how often I don't get shorting out because of IdInfo stuff
964 = if isExportedId exported_id && -- Only if this is exported
965
966 isLocalId local_id && -- Only if this one is defined in this
967 -- module, so that we *can* change its
968 -- binding to be the exported thing!
969
970 not (isExportedId local_id) && -- Only if this one is not itself exported,
971 -- since the transformation will nuke it
972
973 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
974 then
975 if hasShortableIdInfo exported_id
976 then True -- See Note [Messing up the exported Id's IdInfo]
977 else WARN( True, text "Not shorting out:" <+> ppr exported_id )
978 False
979 else
980 False
981
982 -----------------
983 hasShortableIdInfo :: Id -> Bool
984 -- True if there is no user-attached IdInfo on exported_id,
985 -- so we can safely discard it
986 -- See Note [Messing up the exported Id's IdInfo]
987 hasShortableIdInfo id
988 = isEmptyRuleInfo (ruleInfo info)
989 && isDefaultInlinePragma (inlinePragInfo info)
990 && not (isStableUnfolding (unfoldingInfo info))
991 where
992 info = idInfo id
993
994 -----------------
995 {- Note [Transferring IdInfo]
996 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
997 If we have
998 lcl_id = e; exp_id = lcl_id
999
1000 and lcl_id has useful IdInfo, we don't want to discard it by going
1001 gbl_id = e; lcl_id = gbl_id
1002
1003 Instead, transfer IdInfo from lcl_id to exp_id, specifically
1004 * (Stable) unfolding
1005 * Strictness
1006 * Rules
1007 * Inline pragma
1008
1009 Overwriting, rather than merging, seems to work ok.
1010
1011 We also zap the InlinePragma on the lcl_id. It might originally
1012 have had a NOINLINE, which we have now transferred; and we really
1013 want the lcl_id to inline now that its RHS is trivial!
1014 -}
1015
1016 transferIdInfo :: Id -> Id -> (Id, Id)
1017 -- See Note [Transferring IdInfo]
1018 transferIdInfo exported_id local_id
1019 = ( modifyIdInfo transfer exported_id
1020 , local_id `setInlinePragma` defaultInlinePragma )
1021 where
1022 local_info = idInfo local_id
1023 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info
1024 `setUnfoldingInfo` unfoldingInfo local_info
1025 `setInlinePragInfo` inlinePragInfo local_info
1026 `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info
1027 new_info = setRuleInfoHead (idName exported_id)
1028 (ruleInfo local_info)
1029 -- Remember to set the function-name field of the
1030 -- rules as we transfer them from one function to another