c1513b8af6b45de88ab50b99c4016d6c1606cb9b
[ghc.git] / compiler / simplCore / SimplCore.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplCore ( core2core, simplifyExpr ) where
10
11 #include "HsVersions.h"
12
13 import DynFlags
14 import CoreSyn
15 import HscTypes
16 import CSE ( cseProgram )
17 import Rules ( mkRuleBase, unionRuleBase,
18 extendRuleBaseList, ruleCheckProgram, addRuleInfo, )
19 import PprCore ( pprCoreBindings, pprCoreExpr )
20 import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
21 import IdInfo
22 import CoreStats ( coreBindsSize, coreBindsStats, exprSize )
23 import CoreUtils ( mkTicks, stripTicksTop )
24 import CoreLint ( endPass, lintPassResult, dumpPassResult,
25 lintAnnots )
26 import Simplify ( simplTopBinds, simplExpr, simplRules )
27 import SimplUtils ( simplEnvForGHCi, activeRule )
28 import SimplEnv
29 import SimplMonad
30 import CoreMonad
31 import qualified ErrUtils as Err
32 import FloatIn ( floatInwards )
33 import FloatOut ( floatOutwards )
34 import FamInstEnv
35 import Id
36 import ErrUtils ( withTiming )
37 import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma )
38 import VarSet
39 import VarEnv
40 import LiberateCase ( liberateCase )
41 import SAT ( doStaticArgs )
42 import Specialise ( specProgram)
43 import SpecConstr ( specConstrProgram)
44 import DmdAnal ( dmdAnalProgram )
45 import CallArity ( callArityAnalProgram )
46 import WorkWrap ( wwTopBinds )
47 import Vectorise ( vectorise )
48 import SrcLoc
49 import Util
50 import Module
51
52 import Maybes
53 import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
54 import UniqFM
55 import Outputable
56 import Control.Monad
57 import qualified GHC.LanguageExtensions as LangExt
58
59 #if defined(GHCI)
60 import DynamicLoading ( loadPlugins )
61 import Plugins ( installCoreToDos )
62 #else
63 import DynamicLoading ( pluginError )
64 #endif
65
66 {-
67 ************************************************************************
68 * *
69 \subsection{The driver for the simplifier}
70 * *
71 ************************************************************************
72 -}
73
74 core2core :: HscEnv -> ModGuts -> IO ModGuts
75 core2core hsc_env guts@(ModGuts { mg_module = mod
76 , mg_loc = loc
77 , mg_deps = deps
78 , mg_rdr_env = rdr_env })
79 = do { us <- mkSplitUniqSupply 's'
80 -- make sure all plugins are loaded
81
82 ; let builtin_passes = getCoreToDo dflags
83 orph_mods = mkModuleSet (mod : dep_orphs deps)
84 ;
85 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
86 orph_mods print_unqual loc $
87 do { all_passes <- addPluginPasses builtin_passes
88 ; runCorePasses all_passes guts }
89
90 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
91 "Grand total simplifier statistics"
92 (pprSimplCount stats)
93
94 ; return guts2 }
95 where
96 dflags = hsc_dflags hsc_env
97 home_pkg_rules = hptRules hsc_env (dep_mods deps)
98 hpt_rule_base = mkRuleBase home_pkg_rules
99 print_unqual = mkPrintUnqualified dflags rdr_env
100 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
101 -- This is very convienent for the users of the monad (e.g. plugins do not have to
102 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
103 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
104 -- would mean our cached value would go out of date.
105
106 {-
107 ************************************************************************
108 * *
109 Generating the main optimisation pipeline
110 * *
111 ************************************************************************
112 -}
113
114 getCoreToDo :: DynFlags -> [CoreToDo]
115 getCoreToDo dflags
116 = flatten_todos core_todo
117 where
118 opt_level = optLevel dflags
119 phases = simplPhases dflags
120 max_iter = maxSimplIterations dflags
121 rule_check = ruleCheck dflags
122 call_arity = gopt Opt_CallArity dflags
123 strictness = gopt Opt_Strictness dflags
124 full_laziness = gopt Opt_FullLaziness dflags
125 do_specialise = gopt Opt_Specialise dflags
126 do_float_in = gopt Opt_FloatIn dflags
127 cse = gopt Opt_CSE dflags
128 spec_constr = gopt Opt_SpecConstr dflags
129 liberate_case = gopt Opt_LiberateCase dflags
130 late_dmd_anal = gopt Opt_LateDmdAnal dflags
131 static_args = gopt Opt_StaticArgumentTransformation dflags
132 rules_on = gopt Opt_EnableRewriteRules dflags
133 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
134 ww_on = gopt Opt_WorkerWrapper dflags
135 vectorise_on = gopt Opt_Vectorise dflags
136 static_ptrs = xopt LangExt.StaticPointers dflags
137
138 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
139
140 maybe_strictness_before phase
141 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
142
143 base_mode = SimplMode { sm_phase = panic "base_mode"
144 , sm_names = []
145 , sm_rules = rules_on
146 , sm_eta_expand = eta_expand_on
147 , sm_inline = True
148 , sm_case_case = True }
149
150 simpl_phase phase names iter
151 = CoreDoPasses
152 $ [ maybe_strictness_before phase
153 , CoreDoSimplify iter
154 (base_mode { sm_phase = Phase phase
155 , sm_names = names })
156
157 , maybe_rule_check (Phase phase) ]
158
159 -- Vectorisation can introduce a fair few common sub expressions involving
160 -- DPH primitives. For example, see the Reverse test from dph-examples.
161 -- We need to eliminate these common sub expressions before their definitions
162 -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
163 -- so we also run simpl_gently to inline them.
164 ++ (if vectorise_on && phase == 3
165 then [CoreCSE, simpl_gently]
166 else [])
167
168 vectorisation
169 = runWhen vectorise_on $
170 CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
171
172 -- By default, we have 2 phases before phase 0.
173
174 -- Want to run with inline phase 2 after the specialiser to give
175 -- maximum chance for fusion to work before we inline build/augment
176 -- in phase 1. This made a difference in 'ansi' where an
177 -- overloaded function wasn't inlined till too late.
178
179 -- Need phase 1 so that build/augment get
180 -- inlined. I found that spectral/hartel/genfft lost some useful
181 -- strictness in the function sumcode' if augment is not inlined
182 -- before strictness analysis runs
183 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
184 | phase <- [phases, phases-1 .. 1] ]
185
186
187 -- initial simplify: mk specialiser happy: minimum effort please
188 simpl_gently = CoreDoSimplify max_iter
189 (base_mode { sm_phase = InitialPhase
190 , sm_names = ["Gentle"]
191 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
192 , sm_inline = not vectorise_on
193 -- See Note [Inline in InitialPhase]
194 , sm_case_case = False })
195 -- Don't do case-of-case transformations.
196 -- This makes full laziness work better
197
198 strictness_pass = if ww_on
199 then [CoreDoStrictness,CoreDoWorkerWrapper]
200 else [CoreDoStrictness]
201
202
203 -- New demand analyser
204 demand_analyser = (CoreDoPasses (
205 strictness_pass ++
206 [simpl_phase 0 ["post-worker-wrapper"] max_iter]
207 ))
208
209 -- Static forms are moved to the top level with the FloatOut pass.
210 -- See Note [Grand plan for static forms] in StaticPtrTable.
211 static_ptrs_float_outwards =
212 runWhen static_ptrs $ CoreDoPasses
213 [ simpl_gently -- Float Out can't handle type lets (sometimes created
214 -- by simpleOptPgm via mkParallelBindings)
215 , CoreDoFloatOutwards FloatOutSwitches
216 { floatOutLambdas = Just 0
217 , floatOutConstants = True
218 , floatOutOverSatApps = False
219 , floatToTopLevelOnly = True
220 }
221 ]
222
223 core_todo =
224 if opt_level == 0 then
225 [ vectorisation,
226 static_ptrs_float_outwards,
227 CoreDoSimplify max_iter
228 (base_mode { sm_phase = Phase 0
229 , sm_names = ["Non-opt simplification"] })
230 ]
231
232 else {- opt_level >= 1 -} [
233
234 -- We want to do the static argument transform before full laziness as it
235 -- may expose extra opportunities to float things outwards. However, to fix
236 -- up the output of the transformation we need at do at least one simplify
237 -- after this before anything else
238 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
239
240 -- We run vectorisation here for now, but we might also try to run
241 -- it later
242 vectorisation,
243
244 -- initial simplify: mk specialiser happy: minimum effort please
245 simpl_gently,
246
247 -- Specialisation is best done before full laziness
248 -- so that overloaded functions have all their dictionary lambdas manifest
249 runWhen do_specialise CoreDoSpecialising,
250
251 if full_laziness then
252 CoreDoFloatOutwards FloatOutSwitches {
253 floatOutLambdas = Just 0,
254 floatOutConstants = True,
255 floatOutOverSatApps = False,
256 floatToTopLevelOnly = False }
257 -- Was: gentleFloatOutSwitches
258 --
259 -- I have no idea why, but not floating constants to
260 -- top level is very bad in some cases.
261 --
262 -- Notably: p_ident in spectral/rewrite
263 -- Changing from "gentle" to "constantsOnly"
264 -- improved rewrite's allocation by 19%, and
265 -- made 0.0% difference to any other nofib
266 -- benchmark
267 --
268 -- Not doing floatOutOverSatApps yet, we'll do
269 -- that later on when we've had a chance to get more
270 -- accurate arity information. In fact it makes no
271 -- difference at all to performance if we do it here,
272 -- but maybe we save some unnecessary to-and-fro in
273 -- the simplifier.
274 else
275 -- Even with full laziness turned off, we still need to float static
276 -- forms to the top level. See Note [Grand plan for static forms] in
277 -- StaticPtrTable.
278 static_ptrs_float_outwards,
279
280 simpl_phases,
281
282 -- Phase 0: allow all Ids to be inlined now
283 -- This gets foldr inlined before strictness analysis
284
285 -- At least 3 iterations because otherwise we land up with
286 -- huge dead expressions because of an infelicity in the
287 -- simplifier.
288 -- let k = BIG in foldr k z xs
289 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
290 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
291 -- Don't stop now!
292 simpl_phase 0 ["main"] (max max_iter 3),
293
294 runWhen do_float_in CoreDoFloatInwards,
295 -- Run float-inwards immediately before the strictness analyser
296 -- Doing so pushes bindings nearer their use site and hence makes
297 -- them more likely to be strict. These bindings might only show
298 -- up after the inlining from simplification. Example in fulsom,
299 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
300
301 runWhen call_arity $ CoreDoPasses
302 [ CoreDoCallArity
303 , simpl_phase 0 ["post-call-arity"] max_iter
304 ],
305
306 runWhen strictness demand_analyser,
307
308 runWhen full_laziness $
309 CoreDoFloatOutwards FloatOutSwitches {
310 floatOutLambdas = floatLamArgs dflags,
311 floatOutConstants = True,
312 floatOutOverSatApps = True,
313 floatToTopLevelOnly = False },
314 -- nofib/spectral/hartel/wang doubles in speed if you
315 -- do full laziness late in the day. It only happens
316 -- after fusion and other stuff, so the early pass doesn't
317 -- catch it. For the record, the redex is
318 -- f_el22 (f_el21 r_midblock)
319
320
321 runWhen cse CoreCSE,
322 -- We want CSE to follow the final full-laziness pass, because it may
323 -- succeed in commoning up things floated out by full laziness.
324 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
325
326 runWhen do_float_in CoreDoFloatInwards,
327
328 maybe_rule_check (Phase 0),
329
330 -- Case-liberation for -O2. This should be after
331 -- strictness analysis and the simplification which follows it.
332 runWhen liberate_case (CoreDoPasses [
333 CoreLiberateCase,
334 simpl_phase 0 ["post-liberate-case"] max_iter
335 ]), -- Run the simplifier after LiberateCase to vastly
336 -- reduce the possibility of shadowing
337 -- Reason: see Note [Shadowing] in SpecConstr.hs
338
339 runWhen spec_constr CoreDoSpecConstr,
340
341 maybe_rule_check (Phase 0),
342
343 -- Final clean-up simplification:
344 simpl_phase 0 ["final"] max_iter,
345
346 runWhen late_dmd_anal $ CoreDoPasses (
347 strictness_pass ++
348 [simpl_phase 0 ["post-late-ww"] max_iter]
349 ),
350
351 -- Final run of the demand_analyser, ensures that one-shot thunks are
352 -- really really one-shot thunks. Only needed if the demand analyser
353 -- has run at all. See Note [Final Demand Analyser run] in DmdAnal
354 -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
355 -- can become /exponentially/ more expensive. See Trac #11731, #12996.
356 runWhen (strictness || late_dmd_anal) CoreDoStrictness,
357
358 maybe_rule_check (Phase 0)
359 ]
360
361 -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
362 flatten_todos [] = []
363 flatten_todos (CoreDoNothing : rest) = flatten_todos rest
364 flatten_todos (CoreDoPasses passes : rest) =
365 flatten_todos passes ++ flatten_todos rest
366 flatten_todos (todo : rest) = todo : flatten_todos rest
367
368 -- Loading plugins
369
370 addPluginPasses :: [CoreToDo] -> CoreM [CoreToDo]
371 #if !defined(GHCI)
372 addPluginPasses builtin_passes
373 = do { dflags <- getDynFlags
374 ; let pluginMods = pluginModNames dflags
375 ; unless (null pluginMods) (pluginError pluginMods)
376 ; return builtin_passes }
377 #else
378 addPluginPasses builtin_passes
379 = do { hsc_env <- getHscEnv
380 ; named_plugins <- liftIO (loadPlugins hsc_env)
381 ; foldM query_plug builtin_passes named_plugins }
382 where
383 query_plug todos (_, plug, options) = installCoreToDos plug options todos
384 #endif
385
386 {- Note [Inline in InitialPhase]
387 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
388 In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is
389 confusing for users because when they say INLINE they expect the function to inline
390 right away.
391
392 So now we do inlining immediately, even in the InitialPhase, assuming that the
393 Id's Activation allows it.
394
395 This is a surprisingly big deal. Compiler performance improved a lot
396 when I made this change:
397
398 perf/compiler/T5837.run T5837 [stat too good] (normal)
399 perf/compiler/parsing001.run parsing001 [stat too good] (normal)
400 perf/compiler/T12234.run T12234 [stat too good] (optasm)
401 perf/compiler/T9020.run T9020 [stat too good] (optasm)
402 perf/compiler/T3064.run T3064 [stat too good] (normal)
403 perf/compiler/T9961.run T9961 [stat too good] (normal)
404 perf/compiler/T13056.run T13056 [stat too good] (optasm)
405 perf/compiler/T9872d.run T9872d [stat too good] (normal)
406 perf/compiler/T783.run T783 [stat too good] (normal)
407 perf/compiler/T12227.run T12227 [stat too good] (normal)
408 perf/should_run/lazy-bs-alloc.run lazy-bs-alloc [stat too good] (normal)
409 perf/compiler/T1969.run T1969 [stat too good] (normal)
410 perf/compiler/T9872a.run T9872a [stat too good] (normal)
411 perf/compiler/T9872c.run T9872c [stat too good] (normal)
412 perf/compiler/T9872b.run T9872b [stat too good] (normal)
413 perf/compiler/T9872d.run T9872d [stat too good] (normal)
414
415 Note [RULEs enabled in SimplGently]
416 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
417 RULES are enabled when doing "gentle" simplification. Two reasons:
418
419 * We really want the class-op cancellation to happen:
420 op (df d1 d2) --> $cop3 d1 d2
421 because this breaks the mutual recursion between 'op' and 'df'
422
423 * I wanted the RULE
424 lift String ===> ...
425 to work in Template Haskell when simplifying
426 splices, so we get simpler code for literal strings
427
428 But watch out: list fusion can prevent floating. So use phase control
429 to switch off those rules until after floating.
430
431 ************************************************************************
432 * *
433 The CoreToDo interpreter
434 * *
435 ************************************************************************
436 -}
437
438 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
439 runCorePasses passes guts
440 = foldM do_pass guts passes
441 where
442 do_pass guts CoreDoNothing = return guts
443 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
444 do_pass guts pass
445 = withTiming getDynFlags
446 (ppr pass <+> brackets (ppr mod))
447 (const ()) $ do
448 { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
449 ; endPass pass (mg_binds guts') (mg_rules guts')
450 ; return guts' }
451
452 mod = mg_module guts
453
454 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
455 doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
456 simplifyPgm pass
457
458 doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-}
459 doPass cseProgram
460
461 doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-}
462 doPassD liberateCase
463
464 doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-}
465 floatInwards
466
467 doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-}
468 doPassDUM (floatOutwards f)
469
470 doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-}
471 doPassU doStaticArgs
472
473 doCorePass CoreDoCallArity = {-# SCC "CallArity" #-}
474 doPassD callArityAnalProgram
475
476 doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-}
477 doPassDFM dmdAnalProgram
478
479 doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-}
480 doPassDFU wwTopBinds
481
482 doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-}
483 specProgram
484
485 doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-}
486 specConstrProgram
487
488 doCorePass CoreDoVectorisation = {-# SCC "Vectorise" #-}
489 vectorise
490
491 doCorePass CoreDoPrintCore = observe printCore
492 doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
493 doCorePass CoreDoNothing = return
494 doCorePass (CoreDoPasses passes) = runCorePasses passes
495
496 #if defined(GHCI)
497 doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
498 #endif
499
500 doCorePass pass = pprPanic "doCorePass" (ppr pass)
501
502 {-
503 ************************************************************************
504 * *
505 \subsection{Core pass combinators}
506 * *
507 ************************************************************************
508 -}
509
510 printCore :: DynFlags -> CoreProgram -> IO ()
511 printCore dflags binds
512 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
513
514 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
515 ruleCheckPass current_phase pat guts =
516 withTiming getDynFlags
517 (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
518 (const ()) $ do
519 { rb <- getRuleBase
520 ; dflags <- getDynFlags
521 ; vis_orphs <- getVisibleOrphanMods
522 ; liftIO $ putLogMsg dflags NoReason Err.SevDump noSrcSpan
523 (defaultDumpStyle dflags)
524 (ruleCheckProgram current_phase pat
525 (RuleEnv rb vis_orphs) (mg_binds guts))
526 ; return guts }
527
528 doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
529 doPassDUM do_pass = doPassM $ \binds -> do
530 dflags <- getDynFlags
531 us <- getUniqueSupplyM
532 liftIO $ do_pass dflags us binds
533
534 doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
535 doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
536
537 doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
538 doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
539
540 doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
541 doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
542
543 doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
544 doPassU do_pass = doPassDU (const do_pass)
545
546 doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
547 doPassDFM do_pass guts = do
548 dflags <- getDynFlags
549 p_fam_env <- getPackageFamInstEnv
550 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
551 doPassM (liftIO . do_pass dflags fam_envs) guts
552
553 doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
554 doPassDFU do_pass guts = do
555 dflags <- getDynFlags
556 us <- getUniqueSupplyM
557 p_fam_env <- getPackageFamInstEnv
558 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
559 doPass (do_pass dflags fam_envs us) guts
560
561 -- Most passes return no stats and don't change rules: these combinators
562 -- let us lift them to the full blown ModGuts+CoreM world
563 doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
564 doPassM bind_f guts = do
565 binds' <- bind_f (mg_binds guts)
566 return (guts { mg_binds = binds' })
567
568 doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
569 doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
570
571 -- Observer passes just peek; don't modify the bindings at all
572 observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
573 observe do_pass = doPassM $ \binds -> do
574 dflags <- getDynFlags
575 _ <- liftIO $ do_pass dflags binds
576 return binds
577
578 {-
579 ************************************************************************
580 * *
581 Gentle simplification
582 * *
583 ************************************************************************
584 -}
585
586 simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
587 -> CoreExpr
588 -> IO CoreExpr
589 -- simplifyExpr is called by the driver to simplify an
590 -- expression typed in at the interactive prompt
591 --
592 -- Also used by Template Haskell
593 simplifyExpr dflags expr
594 = withTiming (pure dflags) (text "Simplify [expr]") (const ()) $
595 do {
596 ; us <- mkSplitUniqSupply 's'
597
598 ; let sz = exprSize expr
599
600 ; (expr', counts) <- initSmpl dflags emptyRuleEnv
601 emptyFamInstEnvs us sz
602 (simplExprGently (simplEnvForGHCi dflags) expr)
603
604 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
605 "Simplifier statistics" (pprSimplCount counts)
606
607 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
608 (pprCoreExpr expr')
609
610 ; return expr'
611 }
612
613 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
614 -- Simplifies an expression
615 -- does occurrence analysis, then simplification
616 -- and repeats (twice currently) because one pass
617 -- alone leaves tons of crud.
618 -- Used (a) for user expressions typed in at the interactive prompt
619 -- (b) the LHS and RHS of a RULE
620 -- (c) Template Haskell splices
621 --
622 -- The name 'Gently' suggests that the SimplifierMode is SimplGently,
623 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
624 -- enforce that; it just simplifies the expression twice
625
626 -- It's important that simplExprGently does eta reduction; see
627 -- Note [Simplifying the left-hand side of a RULE] above. The
628 -- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
629 -- but only if -O is on.
630
631 simplExprGently env expr = do
632 expr1 <- simplExpr env (occurAnalyseExpr expr)
633 simplExpr env (occurAnalyseExpr expr1)
634
635 {-
636 ************************************************************************
637 * *
638 \subsection{The driver for the simplifier}
639 * *
640 ************************************************************************
641 -}
642
643 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
644 simplifyPgm pass guts
645 = do { hsc_env <- getHscEnv
646 ; us <- getUniqueSupplyM
647 ; rb <- getRuleBase
648 ; liftIOWithCount $
649 simplifyPgmIO pass hsc_env us rb guts }
650
651 simplifyPgmIO :: CoreToDo
652 -> HscEnv
653 -> UniqSupply
654 -> RuleBase
655 -> ModGuts
656 -> IO (SimplCount, ModGuts) -- New bindings
657
658 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
659 hsc_env us hpt_rule_base
660 guts@(ModGuts { mg_module = this_mod
661 , mg_rdr_env = rdr_env
662 , mg_deps = deps
663 , mg_binds = binds, mg_rules = rules
664 , mg_fam_inst_env = fam_inst_env })
665 = do { (termination_msg, it_count, counts_out, guts')
666 <- do_iteration us 1 [] binds rules
667
668 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
669 dopt Opt_D_dump_simpl_stats dflags)
670 "Simplifier statistics for following pass"
671 (vcat [text termination_msg <+> text "after" <+> ppr it_count
672 <+> text "iterations",
673 blankLine,
674 pprSimplCount counts_out])
675
676 ; return (counts_out, guts')
677 }
678 where
679 dflags = hsc_dflags hsc_env
680 print_unqual = mkPrintUnqualified dflags rdr_env
681 simpl_env = mkSimplEnv mode
682 active_rule = activeRule simpl_env
683
684 do_iteration :: UniqSupply
685 -> Int -- Counts iterations
686 -> [SimplCount] -- Counts from earlier iterations, reversed
687 -> CoreProgram -- Bindings in
688 -> [CoreRule] -- and orphan rules
689 -> IO (String, Int, SimplCount, ModGuts)
690
691 do_iteration us iteration_no counts_so_far binds rules
692 -- iteration_no is the number of the iteration we are
693 -- about to begin, with '1' for the first
694 | iteration_no > max_iterations -- Stop if we've run out of iterations
695 = WARN( debugIsOn && (max_iterations > 2)
696 , hang (text "Simplifier bailing out after" <+> int max_iterations
697 <+> text "iterations"
698 <+> (brackets $ hsep $ punctuate comma $
699 map (int . simplCountN) (reverse counts_so_far)))
700 2 (text "Size =" <+> ppr (coreBindsStats binds)))
701
702 -- Subtract 1 from iteration_no to get the
703 -- number of iterations we actually completed
704 return ( "Simplifier baled out", iteration_no - 1
705 , totalise counts_so_far
706 , guts { mg_binds = binds, mg_rules = rules } )
707
708 -- Try and force thunks off the binds; significantly reduces
709 -- space usage, especially with -O. JRS, 000620.
710 | let sz = coreBindsSize binds
711 , () <- sz `seq` () -- Force it
712 = do {
713 -- Occurrence analysis
714 let { -- Note [Vectorisation declarations and occurrences]
715 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
716 -- During the 'InitialPhase' (i.e., before vectorisation), we need to make sure
717 -- that the right-hand sides of vectorisation declarations are taken into
718 -- account during occurrence analysis. After the 'InitialPhase', we need to ensure
719 -- that the binders representing variable vectorisation declarations are kept alive.
720 -- (In contrast to automatically vectorised variables, their unvectorised versions
721 -- don't depend on them.)
722 vectVars = mkVarSet $
723 catMaybes [ fmap snd $ lookupDVarEnv (vectInfoVar (mg_vect_info guts)) bndr
724 | Vect bndr _ <- mg_vect_decls guts]
725 ++
726 catMaybes [ fmap snd $ lookupDVarEnv (vectInfoVar (mg_vect_info guts)) bndr
727 | bndr <- bindersOfBinds binds]
728 -- FIXME: This second comprehensions is only needed as long as we
729 -- have vectorised bindings where we get "Could NOT call
730 -- vectorised from original version".
731 ; (maybeVects, maybeVectVars)
732 = case sm_phase mode of
733 InitialPhase -> (mg_vect_decls guts, vectVars)
734 _ -> ([], vectVars)
735 ; tagged_binds = {-# SCC "OccAnal" #-}
736 occurAnalysePgm this_mod active_rule rules
737 maybeVects maybeVectVars binds
738 } ;
739 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
740 (pprCoreBindings tagged_binds);
741
742 -- Get any new rules, and extend the rule base
743 -- See Note [Overall plumbing for rules] in Rules.hs
744 -- We need to do this regularly, because simplification can
745 -- poke on IdInfo thunks, which in turn brings in new rules
746 -- behind the scenes. Otherwise there's a danger we'll simply
747 -- miss the rules for Ids hidden inside imported inlinings
748 eps <- hscEPS hsc_env ;
749 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
750 ; rule_base2 = extendRuleBaseList rule_base1 rules
751 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
752 ; vis_orphs = this_mod : dep_orphs deps } ;
753
754 -- Simplify the program
755 ((binds1, rules1), counts1) <-
756 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
757 do { env1 <- {-# SCC "SimplTopBinds" #-}
758 simplTopBinds simpl_env tagged_binds
759
760 -- Apply the substitution to rules defined in this module
761 -- for imported Ids. Eg RULE map my_f = blah
762 -- If we have a substitution my_f :-> other_f, we'd better
763 -- apply it to the rule to, or it'll never match
764 ; rules1 <- simplRules env1 Nothing rules
765
766 ; return (getFloatBinds env1, rules1) } ;
767
768 -- Stop if nothing happened; don't dump output
769 if isZeroSimplCount counts1 then
770 return ( "Simplifier reached fixed point", iteration_no
771 , totalise (counts1 : counts_so_far) -- Include "free" ticks
772 , guts { mg_binds = binds1, mg_rules = rules1 } )
773 else do {
774 -- Short out indirections
775 -- We do this *after* at least one run of the simplifier
776 -- because indirection-shorting uses the export flag on *occurrences*
777 -- and that isn't guaranteed to be ok until after the first run propagates
778 -- stuff from the binding site to its occurrences
779 --
780 -- ToDo: alas, this means that indirection-shorting does not happen at all
781 -- if the simplifier does nothing (not common, I know, but unsavoury)
782 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
783
784 -- Dump the result of this iteration
785 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
786 lintPassResult hsc_env pass binds2 ;
787
788 -- Loop
789 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
790 } }
791 | otherwise = panic "do_iteration"
792 where
793 (us1, us2) = splitUniqSupply us
794
795 -- Remember the counts_so_far are reversed
796 totalise :: [SimplCount] -> SimplCount
797 totalise = foldr (\c acc -> acc `plusSimplCount` c)
798 (zeroSimplCount dflags)
799
800 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
801
802 -------------------
803 dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
804 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
805 dump_end_iteration dflags print_unqual iteration_no counts binds rules
806 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
807 where
808 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
809 | otherwise = Nothing
810 -- Show details if Opt_D_dump_simpl_iterations is on
811
812 hdr = text "Simplifier iteration=" <> int iteration_no
813 pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
814 , pprSimplCount counts
815 , text "---- End of simplifier counts for" <+> hdr ]
816
817 {-
818 ************************************************************************
819 * *
820 Shorting out indirections
821 * *
822 ************************************************************************
823
824 If we have this:
825
826 x_local = <expression>
827 ...bindings...
828 x_exported = x_local
829
830 where x_exported is exported, and x_local is not, then we replace it with this:
831
832 x_exported = <expression>
833 x_local = x_exported
834 ...bindings...
835
836 Without this we never get rid of the x_exported = x_local thing. This
837 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
838 makes strictness information propagate better. This used to happen in
839 the final phase, but it's tidier to do it here.
840
841 Note [Transferring IdInfo]
842 ~~~~~~~~~~~~~~~~~~~~~~~~~~
843 We want to propagage any useful IdInfo on x_local to x_exported.
844
845 STRICTNESS: if we have done strictness analysis, we want the strictness info on
846 x_local to transfer to x_exported. Hence the copyIdInfo call.
847
848 RULES: we want to *add* any RULES for x_local to x_exported.
849
850
851 Note [Messing up the exported Id's RULES]
852 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
853 We must be careful about discarding (obviously) or even merging the
854 RULES on the exported Id. The example that went bad on me at one stage
855 was this one:
856
857 iterate :: (a -> a) -> a -> [a]
858 [Exported]
859 iterate = iterateList
860
861 iterateFB c f x = x `c` iterateFB c f (f x)
862 iterateList f x = x : iterateList f (f x)
863 [Not exported]
864
865 {-# RULES
866 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
867 "iterateFB" iterateFB (:) = iterateList
868 #-}
869
870 This got shorted out to:
871
872 iterateList :: (a -> a) -> a -> [a]
873 iterateList = iterate
874
875 iterateFB c f x = x `c` iterateFB c f (f x)
876 iterate f x = x : iterate f (f x)
877
878 {-# RULES
879 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
880 "iterateFB" iterateFB (:) = iterate
881 #-}
882
883 And now we get an infinite loop in the rule system
884 iterate f x -> build (\cn -> iterateFB c f x)
885 -> iterateFB (:) f x
886 -> iterate f x
887
888 Old "solution":
889 use rule switching-off pragmas to get rid
890 of iterateList in the first place
891
892 But in principle the user *might* want rules that only apply to the Id
893 he says. And inline pragmas are similar
894 {-# NOINLINE f #-}
895 f = local
896 local = <stuff>
897 Then we do not want to get rid of the NOINLINE.
898
899 Hence hasShortableIdinfo.
900
901
902 Note [Rules and indirection-zapping]
903 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
904 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
905 Then the things mentioned can be out of scope! Solution
906 a) Make sure that in this pass the usage-info from x_exported is
907 available for ...bindings...
908 b) If there are any such RULES, rec-ify the entire top-level.
909 It'll get sorted out next time round
910
911 Other remarks
912 ~~~~~~~~~~~~~
913 If more than one exported thing is equal to a local thing (i.e., the
914 local thing really is shared), then we do one only:
915 \begin{verbatim}
916 x_local = ....
917 x_exported1 = x_local
918 x_exported2 = x_local
919 ==>
920 x_exported1 = ....
921
922 x_exported2 = x_exported1
923 \end{verbatim}
924
925 We rely on prior eta reduction to simplify things like
926 \begin{verbatim}
927 x_exported = /\ tyvars -> x_local tyvars
928 ==>
929 x_exported = x_local
930 \end{verbatim}
931 Hence,there's a possibility of leaving unchanged something like this:
932 \begin{verbatim}
933 x_local = ....
934 x_exported1 = x_local Int
935 \end{verbatim}
936 By the time we've thrown away the types in STG land this
937 could be eliminated. But I don't think it's very common
938 and it's dangerous to do this fiddling in STG land
939 because we might elminate a binding that's mentioned in the
940 unfolding for something.
941
942 Note [Indirection zapping and ticks]
943 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
944
945 Unfortunately this is another place where we need a special case for
946 ticks. The following happens quite regularly:
947
948 x_local = <expression>
949 x_exported = tick<x> x_local
950
951 Which we want to become:
952
953 x_exported = tick<x> <expression>
954
955 As it makes no sense to keep the tick and the expression on separate
956 bindings. Note however that that this might increase the ticks scoping
957 over the execution of x_local, so we can only do this for floatable
958 ticks. More often than not, other references will be unfoldings of
959 x_exported, and therefore carry the tick anyway.
960 -}
961
962 type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
963
964 shortOutIndirections :: CoreProgram -> CoreProgram
965 shortOutIndirections binds
966 | isEmptyVarEnv ind_env = binds
967 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
968 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
969 where
970 ind_env = makeIndEnv binds
971 -- These exported Ids are the subjects of the indirection-elimination
972 exp_ids = map fst $ nonDetEltsUFM ind_env
973 -- It's OK to use nonDetEltsUFM here because we forget the ordering
974 -- by immediately converting to a set or check if all the elements
975 -- satisfy a predicate.
976 exp_id_set = mkVarSet exp_ids
977 no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
978 binds' = concatMap zap binds
979
980 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
981 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
982
983 zapPair (bndr, rhs)
984 | bndr `elemVarSet` exp_id_set = []
985 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
986 = [(transferIdInfo exp_id bndr,
987 mkTicks ticks rhs),
988 (bndr, Var exp_id)]
989 | otherwise = [(bndr,rhs)]
990
991 makeIndEnv :: [CoreBind] -> IndEnv
992 makeIndEnv binds
993 = foldr add_bind emptyVarEnv binds
994 where
995 add_bind :: CoreBind -> IndEnv -> IndEnv
996 add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
997 add_bind (Rec pairs) env = foldr add_pair env pairs
998
999 add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
1000 add_pair (exported_id, exported) env
1001 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
1002 , shortMeOut env exported_id local_id
1003 = extendVarEnv env local_id (exported_id, ticks)
1004 add_pair _ env = env
1005
1006 -----------------
1007 shortMeOut :: IndEnv -> Id -> Id -> Bool
1008 shortMeOut ind_env exported_id local_id
1009 -- The if-then-else stuff is just so I can get a pprTrace to see
1010 -- how often I don't get shorting out because of IdInfo stuff
1011 = if isExportedId exported_id && -- Only if this is exported
1012
1013 isLocalId local_id && -- Only if this one is defined in this
1014 -- module, so that we *can* change its
1015 -- binding to be the exported thing!
1016
1017 not (isExportedId local_id) && -- Only if this one is not itself exported,
1018 -- since the transformation will nuke it
1019
1020 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
1021 then
1022 if hasShortableIdInfo exported_id
1023 then True -- See Note [Messing up the exported Id's IdInfo]
1024 else WARN( True, text "Not shorting out:" <+> ppr exported_id )
1025 False
1026 else
1027 False
1028
1029 -----------------
1030 hasShortableIdInfo :: Id -> Bool
1031 -- True if there is no user-attached IdInfo on exported_id,
1032 -- so we can safely discard it
1033 -- See Note [Messing up the exported Id's IdInfo]
1034 hasShortableIdInfo id
1035 = isEmptyRuleInfo (ruleInfo info)
1036 && isDefaultInlinePragma (inlinePragInfo info)
1037 && not (isStableUnfolding (unfoldingInfo info))
1038 where
1039 info = idInfo id
1040
1041 -----------------
1042 transferIdInfo :: Id -> Id -> Id
1043 -- See Note [Transferring IdInfo]
1044 -- If we have
1045 -- lcl_id = e; exp_id = lcl_id
1046 -- and lcl_id has useful IdInfo, we don't want to discard it by going
1047 -- gbl_id = e; lcl_id = gbl_id
1048 -- Instead, transfer IdInfo from lcl_id to exp_id
1049 -- Overwriting, rather than merging, seems to work ok.
1050 transferIdInfo exported_id local_id
1051 = modifyIdInfo transfer exported_id
1052 where
1053 local_info = idInfo local_id
1054 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info
1055 `setUnfoldingInfo` unfoldingInfo local_info
1056 `setInlinePragInfo` inlinePragInfo local_info
1057 `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info
1058 new_info = setRuleInfoHead (idName exported_id)
1059 (ruleInfo local_info)
1060 -- Remember to set the function-name field of the
1061 -- rules as we transfer them from one function to another