Refactor the Mighty Simplifier
[ghc.git] / compiler / simplCore / SimplCore.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplCore ( core2core, simplifyExpr ) where
10
11 #include "HsVersions.h"
12
13 import DynFlags
14 import CoreSyn
15 import HscTypes
16 import CSE ( cseProgram )
17 import Rules ( mkRuleBase, unionRuleBase,
18 extendRuleBaseList, ruleCheckProgram, addRuleInfo, )
19 import PprCore ( pprCoreBindings, pprCoreExpr )
20 import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
21 import IdInfo
22 import CoreStats ( coreBindsSize, coreBindsStats, exprSize )
23 import CoreUtils ( mkTicks, stripTicksTop )
24 import CoreLint ( endPass, lintPassResult, dumpPassResult,
25 lintAnnots )
26 import Simplify ( simplTopBinds, simplExpr, simplRules )
27 import SimplUtils ( simplEnvForGHCi, activeRule )
28 import SimplEnv
29 import SimplMonad
30 import CoreMonad
31 import qualified ErrUtils as Err
32 import FloatIn ( floatInwards )
33 import FloatOut ( floatOutwards )
34 import FamInstEnv
35 import Id
36 import ErrUtils ( withTiming )
37 import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma )
38 import VarSet
39 import VarEnv
40 import LiberateCase ( liberateCase )
41 import SAT ( doStaticArgs )
42 import Specialise ( specProgram)
43 import SpecConstr ( specConstrProgram)
44 import DmdAnal ( dmdAnalProgram )
45 import CallArity ( callArityAnalProgram )
46 import WorkWrap ( wwTopBinds )
47 import Vectorise ( vectorise )
48 import SrcLoc
49 import Util
50 import Module
51
52 import Maybes
53 import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
54 import UniqFM
55 import Outputable
56 import Control.Monad
57 import qualified GHC.LanguageExtensions as LangExt
58
59 #if defined(GHCI)
60 import DynamicLoading ( loadPlugins )
61 import Plugins ( installCoreToDos )
62 #else
63 import DynamicLoading ( pluginError )
64 #endif
65
66 {-
67 ************************************************************************
68 * *
69 \subsection{The driver for the simplifier}
70 * *
71 ************************************************************************
72 -}
73
74 core2core :: HscEnv -> ModGuts -> IO ModGuts
75 core2core hsc_env guts@(ModGuts { mg_module = mod
76 , mg_loc = loc
77 , mg_deps = deps
78 , mg_rdr_env = rdr_env })
79 = do { us <- mkSplitUniqSupply 's'
80 -- make sure all plugins are loaded
81
82 ; let builtin_passes = getCoreToDo dflags
83 orph_mods = mkModuleSet (mod : dep_orphs deps)
84 ;
85 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
86 orph_mods print_unqual loc $
87 do { all_passes <- addPluginPasses builtin_passes
88 ; runCorePasses all_passes guts }
89
90 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
91 "Grand total simplifier statistics"
92 (pprSimplCount stats)
93
94 ; return guts2 }
95 where
96 dflags = hsc_dflags hsc_env
97 home_pkg_rules = hptRules hsc_env (dep_mods deps)
98 hpt_rule_base = mkRuleBase home_pkg_rules
99 print_unqual = mkPrintUnqualified dflags rdr_env
100 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
101 -- This is very convienent for the users of the monad (e.g. plugins do not have to
102 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
103 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
104 -- would mean our cached value would go out of date.
105
106 {-
107 ************************************************************************
108 * *
109 Generating the main optimisation pipeline
110 * *
111 ************************************************************************
112 -}
113
114 getCoreToDo :: DynFlags -> [CoreToDo]
115 getCoreToDo dflags
116 = flatten_todos core_todo
117 where
118 opt_level = optLevel dflags
119 phases = simplPhases dflags
120 max_iter = maxSimplIterations dflags
121 rule_check = ruleCheck dflags
122 call_arity = gopt Opt_CallArity dflags
123 strictness = gopt Opt_Strictness dflags
124 full_laziness = gopt Opt_FullLaziness dflags
125 do_specialise = gopt Opt_Specialise dflags
126 do_float_in = gopt Opt_FloatIn dflags
127 cse = gopt Opt_CSE dflags
128 spec_constr = gopt Opt_SpecConstr dflags
129 liberate_case = gopt Opt_LiberateCase dflags
130 late_dmd_anal = gopt Opt_LateDmdAnal dflags
131 static_args = gopt Opt_StaticArgumentTransformation dflags
132 rules_on = gopt Opt_EnableRewriteRules dflags
133 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
134 ww_on = gopt Opt_WorkerWrapper dflags
135 vectorise_on = gopt Opt_Vectorise dflags
136 static_ptrs = xopt LangExt.StaticPointers dflags
137
138 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
139
140 maybe_strictness_before phase
141 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
142
143 base_mode = SimplMode { sm_phase = panic "base_mode"
144 , sm_names = []
145 , sm_dflags = dflags
146 , sm_rules = rules_on
147 , sm_eta_expand = eta_expand_on
148 , sm_inline = True
149 , sm_case_case = True }
150
151 simpl_phase phase names iter
152 = CoreDoPasses
153 $ [ maybe_strictness_before phase
154 , CoreDoSimplify iter
155 (base_mode { sm_phase = Phase phase
156 , sm_names = names })
157
158 , maybe_rule_check (Phase phase) ]
159
160 -- Vectorisation can introduce a fair few common sub expressions involving
161 -- DPH primitives. For example, see the Reverse test from dph-examples.
162 -- We need to eliminate these common sub expressions before their definitions
163 -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
164 -- so we also run simpl_gently to inline them.
165 ++ (if vectorise_on && phase == 3
166 then [CoreCSE, simpl_gently]
167 else [])
168
169 vectorisation
170 = runWhen vectorise_on $
171 CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
172
173 -- By default, we have 2 phases before phase 0.
174
175 -- Want to run with inline phase 2 after the specialiser to give
176 -- maximum chance for fusion to work before we inline build/augment
177 -- in phase 1. This made a difference in 'ansi' where an
178 -- overloaded function wasn't inlined till too late.
179
180 -- Need phase 1 so that build/augment get
181 -- inlined. I found that spectral/hartel/genfft lost some useful
182 -- strictness in the function sumcode' if augment is not inlined
183 -- before strictness analysis runs
184 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
185 | phase <- [phases, phases-1 .. 1] ]
186
187
188 -- initial simplify: mk specialiser happy: minimum effort please
189 simpl_gently = CoreDoSimplify max_iter
190 (base_mode { sm_phase = InitialPhase
191 , sm_names = ["Gentle"]
192 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
193 , sm_inline = not vectorise_on
194 -- See Note [Inline in InitialPhase]
195 , sm_case_case = False })
196 -- Don't do case-of-case transformations.
197 -- This makes full laziness work better
198
199 strictness_pass = if ww_on
200 then [CoreDoStrictness,CoreDoWorkerWrapper]
201 else [CoreDoStrictness]
202
203
204 -- New demand analyser
205 demand_analyser = (CoreDoPasses (
206 strictness_pass ++
207 [simpl_phase 0 ["post-worker-wrapper"] max_iter]
208 ))
209
210 -- Static forms are moved to the top level with the FloatOut pass.
211 -- See Note [Grand plan for static forms] in StaticPtrTable.
212 static_ptrs_float_outwards =
213 runWhen static_ptrs $ CoreDoPasses
214 [ simpl_gently -- Float Out can't handle type lets (sometimes created
215 -- by simpleOptPgm via mkParallelBindings)
216 , CoreDoFloatOutwards FloatOutSwitches
217 { floatOutLambdas = Just 0
218 , floatOutConstants = True
219 , floatOutOverSatApps = False
220 , floatToTopLevelOnly = True
221 }
222 ]
223
224 core_todo =
225 if opt_level == 0 then
226 [ vectorisation,
227 static_ptrs_float_outwards,
228 CoreDoSimplify max_iter
229 (base_mode { sm_phase = Phase 0
230 , sm_names = ["Non-opt simplification"] })
231 ]
232
233 else {- opt_level >= 1 -} [
234
235 -- We want to do the static argument transform before full laziness as it
236 -- may expose extra opportunities to float things outwards. However, to fix
237 -- up the output of the transformation we need at do at least one simplify
238 -- after this before anything else
239 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
240
241 -- We run vectorisation here for now, but we might also try to run
242 -- it later
243 vectorisation,
244
245 -- initial simplify: mk specialiser happy: minimum effort please
246 simpl_gently,
247
248 -- Specialisation is best done before full laziness
249 -- so that overloaded functions have all their dictionary lambdas manifest
250 runWhen do_specialise CoreDoSpecialising,
251
252 if full_laziness then
253 CoreDoFloatOutwards FloatOutSwitches {
254 floatOutLambdas = Just 0,
255 floatOutConstants = True,
256 floatOutOverSatApps = False,
257 floatToTopLevelOnly = False }
258 -- Was: gentleFloatOutSwitches
259 --
260 -- I have no idea why, but not floating constants to
261 -- top level is very bad in some cases.
262 --
263 -- Notably: p_ident in spectral/rewrite
264 -- Changing from "gentle" to "constantsOnly"
265 -- improved rewrite's allocation by 19%, and
266 -- made 0.0% difference to any other nofib
267 -- benchmark
268 --
269 -- Not doing floatOutOverSatApps yet, we'll do
270 -- that later on when we've had a chance to get more
271 -- accurate arity information. In fact it makes no
272 -- difference at all to performance if we do it here,
273 -- but maybe we save some unnecessary to-and-fro in
274 -- the simplifier.
275 else
276 -- Even with full laziness turned off, we still need to float static
277 -- forms to the top level. See Note [Grand plan for static forms] in
278 -- StaticPtrTable.
279 static_ptrs_float_outwards,
280
281 simpl_phases,
282
283 -- Phase 0: allow all Ids to be inlined now
284 -- This gets foldr inlined before strictness analysis
285
286 -- At least 3 iterations because otherwise we land up with
287 -- huge dead expressions because of an infelicity in the
288 -- simplifier.
289 -- let k = BIG in foldr k z xs
290 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
291 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
292 -- Don't stop now!
293 simpl_phase 0 ["main"] (max max_iter 3),
294
295 runWhen do_float_in CoreDoFloatInwards,
296 -- Run float-inwards immediately before the strictness analyser
297 -- Doing so pushes bindings nearer their use site and hence makes
298 -- them more likely to be strict. These bindings might only show
299 -- up after the inlining from simplification. Example in fulsom,
300 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
301
302 runWhen call_arity $ CoreDoPasses
303 [ CoreDoCallArity
304 , simpl_phase 0 ["post-call-arity"] max_iter
305 ],
306
307 runWhen strictness demand_analyser,
308
309 runWhen full_laziness $
310 CoreDoFloatOutwards FloatOutSwitches {
311 floatOutLambdas = floatLamArgs dflags,
312 floatOutConstants = True,
313 floatOutOverSatApps = True,
314 floatToTopLevelOnly = False },
315 -- nofib/spectral/hartel/wang doubles in speed if you
316 -- do full laziness late in the day. It only happens
317 -- after fusion and other stuff, so the early pass doesn't
318 -- catch it. For the record, the redex is
319 -- f_el22 (f_el21 r_midblock)
320
321
322 runWhen cse CoreCSE,
323 -- We want CSE to follow the final full-laziness pass, because it may
324 -- succeed in commoning up things floated out by full laziness.
325 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
326
327 runWhen do_float_in CoreDoFloatInwards,
328
329 maybe_rule_check (Phase 0),
330
331 -- Case-liberation for -O2. This should be after
332 -- strictness analysis and the simplification which follows it.
333 runWhen liberate_case (CoreDoPasses [
334 CoreLiberateCase,
335 simpl_phase 0 ["post-liberate-case"] max_iter
336 ]), -- Run the simplifier after LiberateCase to vastly
337 -- reduce the possibility of shadowing
338 -- Reason: see Note [Shadowing] in SpecConstr.hs
339
340 runWhen spec_constr CoreDoSpecConstr,
341
342 maybe_rule_check (Phase 0),
343
344 -- Final clean-up simplification:
345 simpl_phase 0 ["final"] max_iter,
346
347 runWhen late_dmd_anal $ CoreDoPasses (
348 strictness_pass ++
349 [simpl_phase 0 ["post-late-ww"] max_iter]
350 ),
351
352 -- Final run of the demand_analyser, ensures that one-shot thunks are
353 -- really really one-shot thunks. Only needed if the demand analyser
354 -- has run at all. See Note [Final Demand Analyser run] in DmdAnal
355 -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
356 -- can become /exponentially/ more expensive. See Trac #11731, #12996.
357 runWhen (strictness || late_dmd_anal) CoreDoStrictness,
358
359 maybe_rule_check (Phase 0)
360 ]
361
362 -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
363 flatten_todos [] = []
364 flatten_todos (CoreDoNothing : rest) = flatten_todos rest
365 flatten_todos (CoreDoPasses passes : rest) =
366 flatten_todos passes ++ flatten_todos rest
367 flatten_todos (todo : rest) = todo : flatten_todos rest
368
369 -- Loading plugins
370
371 addPluginPasses :: [CoreToDo] -> CoreM [CoreToDo]
372 #if !defined(GHCI)
373 addPluginPasses builtin_passes
374 = do { dflags <- getDynFlags
375 ; let pluginMods = pluginModNames dflags
376 ; unless (null pluginMods) (pluginError pluginMods)
377 ; return builtin_passes }
378 #else
379 addPluginPasses builtin_passes
380 = do { hsc_env <- getHscEnv
381 ; named_plugins <- liftIO (loadPlugins hsc_env)
382 ; foldM query_plug builtin_passes named_plugins }
383 where
384 query_plug todos (_, plug, options) = installCoreToDos plug options todos
385 #endif
386
387 {- Note [Inline in InitialPhase]
388 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
389 In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is
390 confusing for users because when they say INLINE they expect the function to inline
391 right away.
392
393 So now we do inlining immediately, even in the InitialPhase, assuming that the
394 Id's Activation allows it.
395
396 This is a surprisingly big deal. Compiler performance improved a lot
397 when I made this change:
398
399 perf/compiler/T5837.run T5837 [stat too good] (normal)
400 perf/compiler/parsing001.run parsing001 [stat too good] (normal)
401 perf/compiler/T12234.run T12234 [stat too good] (optasm)
402 perf/compiler/T9020.run T9020 [stat too good] (optasm)
403 perf/compiler/T3064.run T3064 [stat too good] (normal)
404 perf/compiler/T9961.run T9961 [stat too good] (normal)
405 perf/compiler/T13056.run T13056 [stat too good] (optasm)
406 perf/compiler/T9872d.run T9872d [stat too good] (normal)
407 perf/compiler/T783.run T783 [stat too good] (normal)
408 perf/compiler/T12227.run T12227 [stat too good] (normal)
409 perf/should_run/lazy-bs-alloc.run lazy-bs-alloc [stat too good] (normal)
410 perf/compiler/T1969.run T1969 [stat too good] (normal)
411 perf/compiler/T9872a.run T9872a [stat too good] (normal)
412 perf/compiler/T9872c.run T9872c [stat too good] (normal)
413 perf/compiler/T9872b.run T9872b [stat too good] (normal)
414 perf/compiler/T9872d.run T9872d [stat too good] (normal)
415
416 Note [RULEs enabled in SimplGently]
417 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
418 RULES are enabled when doing "gentle" simplification. Two reasons:
419
420 * We really want the class-op cancellation to happen:
421 op (df d1 d2) --> $cop3 d1 d2
422 because this breaks the mutual recursion between 'op' and 'df'
423
424 * I wanted the RULE
425 lift String ===> ...
426 to work in Template Haskell when simplifying
427 splices, so we get simpler code for literal strings
428
429 But watch out: list fusion can prevent floating. So use phase control
430 to switch off those rules until after floating.
431
432 ************************************************************************
433 * *
434 The CoreToDo interpreter
435 * *
436 ************************************************************************
437 -}
438
439 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
440 runCorePasses passes guts
441 = foldM do_pass guts passes
442 where
443 do_pass guts CoreDoNothing = return guts
444 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
445 do_pass guts pass
446 = withTiming getDynFlags
447 (ppr pass <+> brackets (ppr mod))
448 (const ()) $ do
449 { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
450 ; endPass pass (mg_binds guts') (mg_rules guts')
451 ; return guts' }
452
453 mod = mg_module guts
454
455 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
456 doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
457 simplifyPgm pass
458
459 doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-}
460 doPass cseProgram
461
462 doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-}
463 doPassD liberateCase
464
465 doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-}
466 floatInwards
467
468 doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-}
469 doPassDUM (floatOutwards f)
470
471 doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-}
472 doPassU doStaticArgs
473
474 doCorePass CoreDoCallArity = {-# SCC "CallArity" #-}
475 doPassD callArityAnalProgram
476
477 doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-}
478 doPassDFM dmdAnalProgram
479
480 doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-}
481 doPassDFU wwTopBinds
482
483 doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-}
484 specProgram
485
486 doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-}
487 specConstrProgram
488
489 doCorePass CoreDoVectorisation = {-# SCC "Vectorise" #-}
490 vectorise
491
492 doCorePass CoreDoPrintCore = observe printCore
493 doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
494 doCorePass CoreDoNothing = return
495 doCorePass (CoreDoPasses passes) = runCorePasses passes
496
497 #if defined(GHCI)
498 doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
499 #endif
500
501 doCorePass pass = pprPanic "doCorePass" (ppr pass)
502
503 {-
504 ************************************************************************
505 * *
506 \subsection{Core pass combinators}
507 * *
508 ************************************************************************
509 -}
510
511 printCore :: DynFlags -> CoreProgram -> IO ()
512 printCore dflags binds
513 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
514
515 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
516 ruleCheckPass current_phase pat guts =
517 withTiming getDynFlags
518 (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
519 (const ()) $ do
520 { rb <- getRuleBase
521 ; dflags <- getDynFlags
522 ; vis_orphs <- getVisibleOrphanMods
523 ; liftIO $ putLogMsg dflags NoReason Err.SevDump noSrcSpan
524 (defaultDumpStyle dflags)
525 (ruleCheckProgram current_phase pat
526 (RuleEnv rb vis_orphs) (mg_binds guts))
527 ; return guts }
528
529 doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
530 doPassDUM do_pass = doPassM $ \binds -> do
531 dflags <- getDynFlags
532 us <- getUniqueSupplyM
533 liftIO $ do_pass dflags us binds
534
535 doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
536 doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
537
538 doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
539 doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
540
541 doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
542 doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
543
544 doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
545 doPassU do_pass = doPassDU (const do_pass)
546
547 doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
548 doPassDFM do_pass guts = do
549 dflags <- getDynFlags
550 p_fam_env <- getPackageFamInstEnv
551 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
552 doPassM (liftIO . do_pass dflags fam_envs) guts
553
554 doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
555 doPassDFU do_pass guts = do
556 dflags <- getDynFlags
557 us <- getUniqueSupplyM
558 p_fam_env <- getPackageFamInstEnv
559 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
560 doPass (do_pass dflags fam_envs us) guts
561
562 -- Most passes return no stats and don't change rules: these combinators
563 -- let us lift them to the full blown ModGuts+CoreM world
564 doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
565 doPassM bind_f guts = do
566 binds' <- bind_f (mg_binds guts)
567 return (guts { mg_binds = binds' })
568
569 doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
570 doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
571
572 -- Observer passes just peek; don't modify the bindings at all
573 observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
574 observe do_pass = doPassM $ \binds -> do
575 dflags <- getDynFlags
576 _ <- liftIO $ do_pass dflags binds
577 return binds
578
579 {-
580 ************************************************************************
581 * *
582 Gentle simplification
583 * *
584 ************************************************************************
585 -}
586
587 simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
588 -> CoreExpr
589 -> IO CoreExpr
590 -- simplifyExpr is called by the driver to simplify an
591 -- expression typed in at the interactive prompt
592 --
593 -- Also used by Template Haskell
594 simplifyExpr dflags expr
595 = withTiming (pure dflags) (text "Simplify [expr]") (const ()) $
596 do {
597 ; us <- mkSplitUniqSupply 's'
598
599 ; let sz = exprSize expr
600
601 ; (expr', counts) <- initSmpl dflags emptyRuleEnv
602 emptyFamInstEnvs us sz
603 (simplExprGently (simplEnvForGHCi dflags) expr)
604
605 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
606 "Simplifier statistics" (pprSimplCount counts)
607
608 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
609 (pprCoreExpr expr')
610
611 ; return expr'
612 }
613
614 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
615 -- Simplifies an expression
616 -- does occurrence analysis, then simplification
617 -- and repeats (twice currently) because one pass
618 -- alone leaves tons of crud.
619 -- Used (a) for user expressions typed in at the interactive prompt
620 -- (b) the LHS and RHS of a RULE
621 -- (c) Template Haskell splices
622 --
623 -- The name 'Gently' suggests that the SimplMode is SimplGently,
624 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
625 -- enforce that; it just simplifies the expression twice
626
627 -- It's important that simplExprGently does eta reduction; see
628 -- Note [Simplifying the left-hand side of a RULE] above. The
629 -- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
630 -- but only if -O is on.
631
632 simplExprGently env expr = do
633 expr1 <- simplExpr env (occurAnalyseExpr expr)
634 simplExpr env (occurAnalyseExpr expr1)
635
636 {-
637 ************************************************************************
638 * *
639 \subsection{The driver for the simplifier}
640 * *
641 ************************************************************************
642 -}
643
644 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
645 simplifyPgm pass guts
646 = do { hsc_env <- getHscEnv
647 ; us <- getUniqueSupplyM
648 ; rb <- getRuleBase
649 ; liftIOWithCount $
650 simplifyPgmIO pass hsc_env us rb guts }
651
652 simplifyPgmIO :: CoreToDo
653 -> HscEnv
654 -> UniqSupply
655 -> RuleBase
656 -> ModGuts
657 -> IO (SimplCount, ModGuts) -- New bindings
658
659 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
660 hsc_env us hpt_rule_base
661 guts@(ModGuts { mg_module = this_mod
662 , mg_rdr_env = rdr_env
663 , mg_deps = deps
664 , mg_binds = binds, mg_rules = rules
665 , mg_fam_inst_env = fam_inst_env })
666 = do { (termination_msg, it_count, counts_out, guts')
667 <- do_iteration us 1 [] binds rules
668
669 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
670 dopt Opt_D_dump_simpl_stats dflags)
671 "Simplifier statistics for following pass"
672 (vcat [text termination_msg <+> text "after" <+> ppr it_count
673 <+> text "iterations",
674 blankLine,
675 pprSimplCount counts_out])
676
677 ; return (counts_out, guts')
678 }
679 where
680 dflags = hsc_dflags hsc_env
681 print_unqual = mkPrintUnqualified dflags rdr_env
682 simpl_env = mkSimplEnv mode
683 active_rule = activeRule simpl_env
684
685 do_iteration :: UniqSupply
686 -> Int -- Counts iterations
687 -> [SimplCount] -- Counts from earlier iterations, reversed
688 -> CoreProgram -- Bindings in
689 -> [CoreRule] -- and orphan rules
690 -> IO (String, Int, SimplCount, ModGuts)
691
692 do_iteration us iteration_no counts_so_far binds rules
693 -- iteration_no is the number of the iteration we are
694 -- about to begin, with '1' for the first
695 | iteration_no > max_iterations -- Stop if we've run out of iterations
696 = WARN( debugIsOn && (max_iterations > 2)
697 , hang (text "Simplifier bailing out after" <+> int max_iterations
698 <+> text "iterations"
699 <+> (brackets $ hsep $ punctuate comma $
700 map (int . simplCountN) (reverse counts_so_far)))
701 2 (text "Size =" <+> ppr (coreBindsStats binds)))
702
703 -- Subtract 1 from iteration_no to get the
704 -- number of iterations we actually completed
705 return ( "Simplifier baled out", iteration_no - 1
706 , totalise counts_so_far
707 , guts { mg_binds = binds, mg_rules = rules } )
708
709 -- Try and force thunks off the binds; significantly reduces
710 -- space usage, especially with -O. JRS, 000620.
711 | let sz = coreBindsSize binds
712 , () <- sz `seq` () -- Force it
713 = do {
714 -- Occurrence analysis
715 let { -- Note [Vectorisation declarations and occurrences]
716 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
717 -- During the 'InitialPhase' (i.e., before vectorisation), we need to make sure
718 -- that the right-hand sides of vectorisation declarations are taken into
719 -- account during occurrence analysis. After the 'InitialPhase', we need to ensure
720 -- that the binders representing variable vectorisation declarations are kept alive.
721 -- (In contrast to automatically vectorised variables, their unvectorised versions
722 -- don't depend on them.)
723 vectVars = mkVarSet $
724 catMaybes [ fmap snd $ lookupDVarEnv (vectInfoVar (mg_vect_info guts)) bndr
725 | Vect bndr _ <- mg_vect_decls guts]
726 ++
727 catMaybes [ fmap snd $ lookupDVarEnv (vectInfoVar (mg_vect_info guts)) bndr
728 | bndr <- bindersOfBinds binds]
729 -- FIXME: This second comprehensions is only needed as long as we
730 -- have vectorised bindings where we get "Could NOT call
731 -- vectorised from original version".
732 ; (maybeVects, maybeVectVars)
733 = case sm_phase mode of
734 InitialPhase -> (mg_vect_decls guts, vectVars)
735 _ -> ([], vectVars)
736 ; tagged_binds = {-# SCC "OccAnal" #-}
737 occurAnalysePgm this_mod active_rule rules
738 maybeVects maybeVectVars binds
739 } ;
740 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
741 (pprCoreBindings tagged_binds);
742
743 -- Get any new rules, and extend the rule base
744 -- See Note [Overall plumbing for rules] in Rules.hs
745 -- We need to do this regularly, because simplification can
746 -- poke on IdInfo thunks, which in turn brings in new rules
747 -- behind the scenes. Otherwise there's a danger we'll simply
748 -- miss the rules for Ids hidden inside imported inlinings
749 eps <- hscEPS hsc_env ;
750 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
751 ; rule_base2 = extendRuleBaseList rule_base1 rules
752 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
753 ; vis_orphs = this_mod : dep_orphs deps } ;
754
755 -- Simplify the program
756 ((binds1, rules1), counts1) <-
757 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
758 do { (floats, env1) <- {-# SCC "SimplTopBinds" #-}
759 simplTopBinds simpl_env tagged_binds
760
761 -- Apply the substitution to rules defined in this module
762 -- for imported Ids. Eg RULE map my_f = blah
763 -- If we have a substitution my_f :-> other_f, we'd better
764 -- apply it to the rule to, or it'll never match
765 ; rules1 <- simplRules env1 Nothing rules
766
767 ; return (getTopFloatBinds floats, rules1) } ;
768
769 -- Stop if nothing happened; don't dump output
770 if isZeroSimplCount counts1 then
771 return ( "Simplifier reached fixed point", iteration_no
772 , totalise (counts1 : counts_so_far) -- Include "free" ticks
773 , guts { mg_binds = binds1, mg_rules = rules1 } )
774 else do {
775 -- Short out indirections
776 -- We do this *after* at least one run of the simplifier
777 -- because indirection-shorting uses the export flag on *occurrences*
778 -- and that isn't guaranteed to be ok until after the first run propagates
779 -- stuff from the binding site to its occurrences
780 --
781 -- ToDo: alas, this means that indirection-shorting does not happen at all
782 -- if the simplifier does nothing (not common, I know, but unsavoury)
783 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
784
785 -- Dump the result of this iteration
786 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
787 lintPassResult hsc_env pass binds2 ;
788
789 -- Loop
790 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
791 } }
792 | otherwise = panic "do_iteration"
793 where
794 (us1, us2) = splitUniqSupply us
795
796 -- Remember the counts_so_far are reversed
797 totalise :: [SimplCount] -> SimplCount
798 totalise = foldr (\c acc -> acc `plusSimplCount` c)
799 (zeroSimplCount dflags)
800
801 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
802
803 -------------------
804 dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
805 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
806 dump_end_iteration dflags print_unqual iteration_no counts binds rules
807 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
808 where
809 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
810 | otherwise = Nothing
811 -- Show details if Opt_D_dump_simpl_iterations is on
812
813 hdr = text "Simplifier iteration=" <> int iteration_no
814 pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
815 , pprSimplCount counts
816 , text "---- End of simplifier counts for" <+> hdr ]
817
818 {-
819 ************************************************************************
820 * *
821 Shorting out indirections
822 * *
823 ************************************************************************
824
825 If we have this:
826
827 x_local = <expression>
828 ...bindings...
829 x_exported = x_local
830
831 where x_exported is exported, and x_local is not, then we replace it with this:
832
833 x_exported = <expression>
834 x_local = x_exported
835 ...bindings...
836
837 Without this we never get rid of the x_exported = x_local thing. This
838 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
839 makes strictness information propagate better. This used to happen in
840 the final phase, but it's tidier to do it here.
841
842 Note [Transferring IdInfo]
843 ~~~~~~~~~~~~~~~~~~~~~~~~~~
844 We want to propagage any useful IdInfo on x_local to x_exported.
845
846 STRICTNESS: if we have done strictness analysis, we want the strictness info on
847 x_local to transfer to x_exported. Hence the copyIdInfo call.
848
849 RULES: we want to *add* any RULES for x_local to x_exported.
850
851
852 Note [Messing up the exported Id's RULES]
853 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
854 We must be careful about discarding (obviously) or even merging the
855 RULES on the exported Id. The example that went bad on me at one stage
856 was this one:
857
858 iterate :: (a -> a) -> a -> [a]
859 [Exported]
860 iterate = iterateList
861
862 iterateFB c f x = x `c` iterateFB c f (f x)
863 iterateList f x = x : iterateList f (f x)
864 [Not exported]
865
866 {-# RULES
867 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
868 "iterateFB" iterateFB (:) = iterateList
869 #-}
870
871 This got shorted out to:
872
873 iterateList :: (a -> a) -> a -> [a]
874 iterateList = iterate
875
876 iterateFB c f x = x `c` iterateFB c f (f x)
877 iterate f x = x : iterate f (f x)
878
879 {-# RULES
880 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
881 "iterateFB" iterateFB (:) = iterate
882 #-}
883
884 And now we get an infinite loop in the rule system
885 iterate f x -> build (\cn -> iterateFB c f x)
886 -> iterateFB (:) f x
887 -> iterate f x
888
889 Old "solution":
890 use rule switching-off pragmas to get rid
891 of iterateList in the first place
892
893 But in principle the user *might* want rules that only apply to the Id
894 he says. And inline pragmas are similar
895 {-# NOINLINE f #-}
896 f = local
897 local = <stuff>
898 Then we do not want to get rid of the NOINLINE.
899
900 Hence hasShortableIdinfo.
901
902
903 Note [Rules and indirection-zapping]
904 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
905 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
906 Then the things mentioned can be out of scope! Solution
907 a) Make sure that in this pass the usage-info from x_exported is
908 available for ...bindings...
909 b) If there are any such RULES, rec-ify the entire top-level.
910 It'll get sorted out next time round
911
912 Other remarks
913 ~~~~~~~~~~~~~
914 If more than one exported thing is equal to a local thing (i.e., the
915 local thing really is shared), then we do one only:
916 \begin{verbatim}
917 x_local = ....
918 x_exported1 = x_local
919 x_exported2 = x_local
920 ==>
921 x_exported1 = ....
922
923 x_exported2 = x_exported1
924 \end{verbatim}
925
926 We rely on prior eta reduction to simplify things like
927 \begin{verbatim}
928 x_exported = /\ tyvars -> x_local tyvars
929 ==>
930 x_exported = x_local
931 \end{verbatim}
932 Hence,there's a possibility of leaving unchanged something like this:
933 \begin{verbatim}
934 x_local = ....
935 x_exported1 = x_local Int
936 \end{verbatim}
937 By the time we've thrown away the types in STG land this
938 could be eliminated. But I don't think it's very common
939 and it's dangerous to do this fiddling in STG land
940 because we might elminate a binding that's mentioned in the
941 unfolding for something.
942
943 Note [Indirection zapping and ticks]
944 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
945
946 Unfortunately this is another place where we need a special case for
947 ticks. The following happens quite regularly:
948
949 x_local = <expression>
950 x_exported = tick<x> x_local
951
952 Which we want to become:
953
954 x_exported = tick<x> <expression>
955
956 As it makes no sense to keep the tick and the expression on separate
957 bindings. Note however that that this might increase the ticks scoping
958 over the execution of x_local, so we can only do this for floatable
959 ticks. More often than not, other references will be unfoldings of
960 x_exported, and therefore carry the tick anyway.
961 -}
962
963 type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
964
965 shortOutIndirections :: CoreProgram -> CoreProgram
966 shortOutIndirections binds
967 | isEmptyVarEnv ind_env = binds
968 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
969 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
970 where
971 ind_env = makeIndEnv binds
972 -- These exported Ids are the subjects of the indirection-elimination
973 exp_ids = map fst $ nonDetEltsUFM ind_env
974 -- It's OK to use nonDetEltsUFM here because we forget the ordering
975 -- by immediately converting to a set or check if all the elements
976 -- satisfy a predicate.
977 exp_id_set = mkVarSet exp_ids
978 no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
979 binds' = concatMap zap binds
980
981 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
982 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
983
984 zapPair (bndr, rhs)
985 | bndr `elemVarSet` exp_id_set = []
986 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
987 = [(transferIdInfo exp_id bndr,
988 mkTicks ticks rhs),
989 (bndr, Var exp_id)]
990 | otherwise = [(bndr,rhs)]
991
992 makeIndEnv :: [CoreBind] -> IndEnv
993 makeIndEnv binds
994 = foldr add_bind emptyVarEnv binds
995 where
996 add_bind :: CoreBind -> IndEnv -> IndEnv
997 add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
998 add_bind (Rec pairs) env = foldr add_pair env pairs
999
1000 add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
1001 add_pair (exported_id, exported) env
1002 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
1003 , shortMeOut env exported_id local_id
1004 = extendVarEnv env local_id (exported_id, ticks)
1005 add_pair _ env = env
1006
1007 -----------------
1008 shortMeOut :: IndEnv -> Id -> Id -> Bool
1009 shortMeOut ind_env exported_id local_id
1010 -- The if-then-else stuff is just so I can get a pprTrace to see
1011 -- how often I don't get shorting out because of IdInfo stuff
1012 = if isExportedId exported_id && -- Only if this is exported
1013
1014 isLocalId local_id && -- Only if this one is defined in this
1015 -- module, so that we *can* change its
1016 -- binding to be the exported thing!
1017
1018 not (isExportedId local_id) && -- Only if this one is not itself exported,
1019 -- since the transformation will nuke it
1020
1021 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
1022 then
1023 if hasShortableIdInfo exported_id
1024 then True -- See Note [Messing up the exported Id's IdInfo]
1025 else WARN( True, text "Not shorting out:" <+> ppr exported_id )
1026 False
1027 else
1028 False
1029
1030 -----------------
1031 hasShortableIdInfo :: Id -> Bool
1032 -- True if there is no user-attached IdInfo on exported_id,
1033 -- so we can safely discard it
1034 -- See Note [Messing up the exported Id's IdInfo]
1035 hasShortableIdInfo id
1036 = isEmptyRuleInfo (ruleInfo info)
1037 && isDefaultInlinePragma (inlinePragInfo info)
1038 && not (isStableUnfolding (unfoldingInfo info))
1039 where
1040 info = idInfo id
1041
1042 -----------------
1043 transferIdInfo :: Id -> Id -> Id
1044 -- See Note [Transferring IdInfo]
1045 -- If we have
1046 -- lcl_id = e; exp_id = lcl_id
1047 -- and lcl_id has useful IdInfo, we don't want to discard it by going
1048 -- gbl_id = e; lcl_id = gbl_id
1049 -- Instead, transfer IdInfo from lcl_id to exp_id
1050 -- Overwriting, rather than merging, seems to work ok.
1051 transferIdInfo exported_id local_id
1052 = modifyIdInfo transfer exported_id
1053 where
1054 local_info = idInfo local_id
1055 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info
1056 `setUnfoldingInfo` unfoldingInfo local_info
1057 `setInlinePragInfo` inlinePragInfo local_info
1058 `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info
1059 new_info = setRuleInfoHead (idName exported_id)
1060 (ruleInfo local_info)
1061 -- Remember to set the function-name field of the
1062 -- rules as we transfer them from one function to another