Add GHCi help message for :def! and :: commands
[ghc.git] / compiler / simplCore / SimplCore.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplCore ( core2core, simplifyExpr ) where
10
11 #include "HsVersions.h"
12
13 import GhcPrelude
14
15 import DynFlags
16 import CoreSyn
17 import HscTypes
18 import CSE ( cseProgram )
19 import Rules ( mkRuleBase, unionRuleBase,
20 extendRuleBaseList, ruleCheckProgram, addRuleInfo,
21 getRules )
22 import PprCore ( pprCoreBindings, pprCoreExpr )
23 import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
24 import IdInfo
25 import CoreStats ( coreBindsSize, coreBindsStats, exprSize )
26 import CoreUtils ( mkTicks, stripTicksTop )
27 import CoreLint ( endPass, lintPassResult, dumpPassResult,
28 lintAnnots )
29 import Simplify ( simplTopBinds, simplExpr, simplRules )
30 import SimplUtils ( simplEnvForGHCi, activeRule, activeUnfolding )
31 import SimplEnv
32 import SimplMonad
33 import CoreMonad
34 import qualified ErrUtils as Err
35 import FloatIn ( floatInwards )
36 import FloatOut ( floatOutwards )
37 import FamInstEnv
38 import Id
39 import ErrUtils ( withTiming )
40 import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma, defaultInlinePragma )
41 import VarSet
42 import VarEnv
43 import LiberateCase ( liberateCase )
44 import SAT ( doStaticArgs )
45 import Specialise ( specProgram)
46 import SpecConstr ( specConstrProgram)
47 import DmdAnal ( dmdAnalProgram )
48 import CallArity ( callArityAnalProgram )
49 import Exitify ( exitifyProgram )
50 import WorkWrap ( wwTopBinds )
51 import SrcLoc
52 import Util
53 import Module
54 import Plugins ( withPlugins, installCoreToDos )
55 import DynamicLoading -- ( initializePlugins )
56
57 import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
58 import UniqFM
59 import Outputable
60 import Control.Monad
61 import qualified GHC.LanguageExtensions as LangExt
62 {-
63 ************************************************************************
64 * *
65 \subsection{The driver for the simplifier}
66 * *
67 ************************************************************************
68 -}
69
70 core2core :: HscEnv -> ModGuts -> IO ModGuts
71 core2core hsc_env guts@(ModGuts { mg_module = mod
72 , mg_loc = loc
73 , mg_deps = deps
74 , mg_rdr_env = rdr_env })
75 = do { us <- mkSplitUniqSupply 's'
76 -- make sure all plugins are loaded
77
78 ; let builtin_passes = getCoreToDo dflags
79 orph_mods = mkModuleSet (mod : dep_orphs deps)
80 ;
81 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
82 orph_mods print_unqual loc $
83 do { hsc_env' <- getHscEnv
84 ; dflags' <- liftIO $ initializePlugins hsc_env'
85 (hsc_dflags hsc_env')
86 ; all_passes <- withPlugins dflags'
87 installCoreToDos
88 builtin_passes
89 ; runCorePasses all_passes guts }
90
91 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
92 "Grand total simplifier statistics"
93 (pprSimplCount stats)
94
95 ; return guts2 }
96 where
97 dflags = hsc_dflags hsc_env
98 home_pkg_rules = hptRules hsc_env (dep_mods deps)
99 hpt_rule_base = mkRuleBase home_pkg_rules
100 print_unqual = mkPrintUnqualified dflags rdr_env
101 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
102 -- This is very convienent for the users of the monad (e.g. plugins do not have to
103 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
104 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
105 -- would mean our cached value would go out of date.
106
107 {-
108 ************************************************************************
109 * *
110 Generating the main optimisation pipeline
111 * *
112 ************************************************************************
113 -}
114
115 getCoreToDo :: DynFlags -> [CoreToDo]
116 getCoreToDo dflags
117 = flatten_todos core_todo
118 where
119 opt_level = optLevel dflags
120 phases = simplPhases dflags
121 max_iter = maxSimplIterations dflags
122 rule_check = ruleCheck dflags
123 call_arity = gopt Opt_CallArity dflags
124 exitification = gopt Opt_Exitification dflags
125 strictness = gopt Opt_Strictness dflags
126 full_laziness = gopt Opt_FullLaziness dflags
127 do_specialise = gopt Opt_Specialise dflags
128 do_float_in = gopt Opt_FloatIn dflags
129 cse = gopt Opt_CSE dflags
130 spec_constr = gopt Opt_SpecConstr dflags
131 liberate_case = gopt Opt_LiberateCase dflags
132 late_dmd_anal = gopt Opt_LateDmdAnal dflags
133 late_specialise = gopt Opt_LateSpecialise dflags
134 static_args = gopt Opt_StaticArgumentTransformation dflags
135 rules_on = gopt Opt_EnableRewriteRules dflags
136 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
137 ww_on = gopt Opt_WorkerWrapper dflags
138 static_ptrs = xopt LangExt.StaticPointers dflags
139
140 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
141
142 maybe_strictness_before phase
143 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
144
145 base_mode = SimplMode { sm_phase = panic "base_mode"
146 , sm_names = []
147 , sm_dflags = dflags
148 , sm_rules = rules_on
149 , sm_eta_expand = eta_expand_on
150 , sm_inline = True
151 , sm_case_case = True }
152
153 simpl_phase phase names iter
154 = CoreDoPasses
155 $ [ maybe_strictness_before phase
156 , CoreDoSimplify iter
157 (base_mode { sm_phase = Phase phase
158 , sm_names = names })
159
160 , maybe_rule_check (Phase phase) ]
161
162 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
163 | phase <- [phases, phases-1 .. 1] ]
164
165
166 -- initial simplify: mk specialiser happy: minimum effort please
167 simpl_gently = CoreDoSimplify max_iter
168 (base_mode { sm_phase = InitialPhase
169 , sm_names = ["Gentle"]
170 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
171 , sm_inline = True
172 -- See Note [Inline in InitialPhase]
173 , sm_case_case = False })
174 -- Don't do case-of-case transformations.
175 -- This makes full laziness work better
176
177 strictness_pass = if ww_on
178 then [CoreDoStrictness,CoreDoWorkerWrapper]
179 else [CoreDoStrictness]
180
181
182 -- New demand analyser
183 demand_analyser = (CoreDoPasses (
184 strictness_pass ++
185 [simpl_phase 0 ["post-worker-wrapper"] max_iter]
186 ))
187
188 -- Static forms are moved to the top level with the FloatOut pass.
189 -- See Note [Grand plan for static forms] in StaticPtrTable.
190 static_ptrs_float_outwards =
191 runWhen static_ptrs $ CoreDoPasses
192 [ simpl_gently -- Float Out can't handle type lets (sometimes created
193 -- by simpleOptPgm via mkParallelBindings)
194 , CoreDoFloatOutwards FloatOutSwitches
195 { floatOutLambdas = Just 0
196 , floatOutConstants = True
197 , floatOutOverSatApps = False
198 , floatToTopLevelOnly = True
199 }
200 ]
201
202 core_todo =
203 if opt_level == 0 then
204 [ static_ptrs_float_outwards,
205 CoreDoSimplify max_iter
206 (base_mode { sm_phase = Phase 0
207 , sm_names = ["Non-opt simplification"] })
208 ]
209
210 else {- opt_level >= 1 -} [
211
212 -- We want to do the static argument transform before full laziness as it
213 -- may expose extra opportunities to float things outwards. However, to fix
214 -- up the output of the transformation we need at do at least one simplify
215 -- after this before anything else
216 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
217
218 -- initial simplify: mk specialiser happy: minimum effort please
219 simpl_gently,
220
221 -- Specialisation is best done before full laziness
222 -- so that overloaded functions have all their dictionary lambdas manifest
223 runWhen do_specialise CoreDoSpecialising,
224
225 if full_laziness then
226 CoreDoFloatOutwards FloatOutSwitches {
227 floatOutLambdas = Just 0,
228 floatOutConstants = True,
229 floatOutOverSatApps = False,
230 floatToTopLevelOnly = False }
231 -- Was: gentleFloatOutSwitches
232 --
233 -- I have no idea why, but not floating constants to
234 -- top level is very bad in some cases.
235 --
236 -- Notably: p_ident in spectral/rewrite
237 -- Changing from "gentle" to "constantsOnly"
238 -- improved rewrite's allocation by 19%, and
239 -- made 0.0% difference to any other nofib
240 -- benchmark
241 --
242 -- Not doing floatOutOverSatApps yet, we'll do
243 -- that later on when we've had a chance to get more
244 -- accurate arity information. In fact it makes no
245 -- difference at all to performance if we do it here,
246 -- but maybe we save some unnecessary to-and-fro in
247 -- the simplifier.
248 else
249 -- Even with full laziness turned off, we still need to float static
250 -- forms to the top level. See Note [Grand plan for static forms] in
251 -- StaticPtrTable.
252 static_ptrs_float_outwards,
253
254 simpl_phases,
255
256 -- Phase 0: allow all Ids to be inlined now
257 -- This gets foldr inlined before strictness analysis
258
259 -- At least 3 iterations because otherwise we land up with
260 -- huge dead expressions because of an infelicity in the
261 -- simplifier.
262 -- let k = BIG in foldr k z xs
263 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
264 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
265 -- Don't stop now!
266 simpl_phase 0 ["main"] (max max_iter 3),
267
268 runWhen do_float_in CoreDoFloatInwards,
269 -- Run float-inwards immediately before the strictness analyser
270 -- Doing so pushes bindings nearer their use site and hence makes
271 -- them more likely to be strict. These bindings might only show
272 -- up after the inlining from simplification. Example in fulsom,
273 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
274
275 runWhen call_arity $ CoreDoPasses
276 [ CoreDoCallArity
277 , simpl_phase 0 ["post-call-arity"] max_iter
278 ],
279
280 runWhen strictness demand_analyser,
281
282 runWhen exitification CoreDoExitify,
283 -- See note [Placement of the exitification pass]
284
285 runWhen full_laziness $
286 CoreDoFloatOutwards FloatOutSwitches {
287 floatOutLambdas = floatLamArgs dflags,
288 floatOutConstants = True,
289 floatOutOverSatApps = True,
290 floatToTopLevelOnly = False },
291 -- nofib/spectral/hartel/wang doubles in speed if you
292 -- do full laziness late in the day. It only happens
293 -- after fusion and other stuff, so the early pass doesn't
294 -- catch it. For the record, the redex is
295 -- f_el22 (f_el21 r_midblock)
296
297
298 runWhen cse CoreCSE,
299 -- We want CSE to follow the final full-laziness pass, because it may
300 -- succeed in commoning up things floated out by full laziness.
301 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
302
303 runWhen do_float_in CoreDoFloatInwards,
304
305 maybe_rule_check (Phase 0),
306
307 -- Case-liberation for -O2. This should be after
308 -- strictness analysis and the simplification which follows it.
309 runWhen liberate_case (CoreDoPasses [
310 CoreLiberateCase,
311 simpl_phase 0 ["post-liberate-case"] max_iter
312 ]), -- Run the simplifier after LiberateCase to vastly
313 -- reduce the possibility of shadowing
314 -- Reason: see Note [Shadowing] in SpecConstr.hs
315
316 runWhen spec_constr CoreDoSpecConstr,
317
318 maybe_rule_check (Phase 0),
319
320 runWhen late_specialise
321 (CoreDoPasses [ CoreDoSpecialising
322 , simpl_phase 0 ["post-late-spec"] max_iter]),
323
324 -- LiberateCase can yield new CSE opportunities because it peels
325 -- off one layer of a recursive function (concretely, I saw this
326 -- in wheel-sieve1), and I'm guessing that SpecConstr can too
327 -- And CSE is a very cheap pass. So it seems worth doing here.
328 runWhen ((liberate_case || spec_constr) && cse) CoreCSE,
329
330 -- Final clean-up simplification:
331 simpl_phase 0 ["final"] max_iter,
332
333 runWhen late_dmd_anal $ CoreDoPasses (
334 strictness_pass ++
335 [simpl_phase 0 ["post-late-ww"] max_iter]
336 ),
337
338 -- Final run of the demand_analyser, ensures that one-shot thunks are
339 -- really really one-shot thunks. Only needed if the demand analyser
340 -- has run at all. See Note [Final Demand Analyser run] in DmdAnal
341 -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
342 -- can become /exponentially/ more expensive. See #11731, #12996.
343 runWhen (strictness || late_dmd_anal) CoreDoStrictness,
344
345 maybe_rule_check (Phase 0)
346 ]
347
348 -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
349 flatten_todos [] = []
350 flatten_todos (CoreDoNothing : rest) = flatten_todos rest
351 flatten_todos (CoreDoPasses passes : rest) =
352 flatten_todos passes ++ flatten_todos rest
353 flatten_todos (todo : rest) = todo : flatten_todos rest
354
355 {- Note [Inline in InitialPhase]
356 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
357 In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is
358 confusing for users because when they say INLINE they expect the function to inline
359 right away.
360
361 So now we do inlining immediately, even in the InitialPhase, assuming that the
362 Id's Activation allows it.
363
364 This is a surprisingly big deal. Compiler performance improved a lot
365 when I made this change:
366
367 perf/compiler/T5837.run T5837 [stat too good] (normal)
368 perf/compiler/parsing001.run parsing001 [stat too good] (normal)
369 perf/compiler/T12234.run T12234 [stat too good] (optasm)
370 perf/compiler/T9020.run T9020 [stat too good] (optasm)
371 perf/compiler/T3064.run T3064 [stat too good] (normal)
372 perf/compiler/T9961.run T9961 [stat too good] (normal)
373 perf/compiler/T13056.run T13056 [stat too good] (optasm)
374 perf/compiler/T9872d.run T9872d [stat too good] (normal)
375 perf/compiler/T783.run T783 [stat too good] (normal)
376 perf/compiler/T12227.run T12227 [stat too good] (normal)
377 perf/should_run/lazy-bs-alloc.run lazy-bs-alloc [stat too good] (normal)
378 perf/compiler/T1969.run T1969 [stat too good] (normal)
379 perf/compiler/T9872a.run T9872a [stat too good] (normal)
380 perf/compiler/T9872c.run T9872c [stat too good] (normal)
381 perf/compiler/T9872b.run T9872b [stat too good] (normal)
382 perf/compiler/T9872d.run T9872d [stat too good] (normal)
383
384 Note [RULEs enabled in SimplGently]
385 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
386 RULES are enabled when doing "gentle" simplification. Two reasons:
387
388 * We really want the class-op cancellation to happen:
389 op (df d1 d2) --> $cop3 d1 d2
390 because this breaks the mutual recursion between 'op' and 'df'
391
392 * I wanted the RULE
393 lift String ===> ...
394 to work in Template Haskell when simplifying
395 splices, so we get simpler code for literal strings
396
397 But watch out: list fusion can prevent floating. So use phase control
398 to switch off those rules until after floating.
399
400 ************************************************************************
401 * *
402 The CoreToDo interpreter
403 * *
404 ************************************************************************
405 -}
406
407 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
408 runCorePasses passes guts
409 = foldM do_pass guts passes
410 where
411 do_pass guts CoreDoNothing = return guts
412 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
413 do_pass guts pass
414 = withTiming getDynFlags
415 (ppr pass <+> brackets (ppr mod))
416 (const ()) $ do
417 { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
418 ; endPass pass (mg_binds guts') (mg_rules guts')
419 ; return guts' }
420
421 mod = mg_module guts
422
423 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
424 doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
425 simplifyPgm pass
426
427 doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-}
428 doPass cseProgram
429
430 doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-}
431 doPassD liberateCase
432
433 doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-}
434 floatInwards
435
436 doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-}
437 doPassDUM (floatOutwards f)
438
439 doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-}
440 doPassU doStaticArgs
441
442 doCorePass CoreDoCallArity = {-# SCC "CallArity" #-}
443 doPassD callArityAnalProgram
444
445 doCorePass CoreDoExitify = {-# SCC "Exitify" #-}
446 doPass exitifyProgram
447
448 doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-}
449 doPassDFM dmdAnalProgram
450
451 doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-}
452 doPassDFU wwTopBinds
453
454 doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-}
455 specProgram
456
457 doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-}
458 specConstrProgram
459
460 doCorePass CoreDoPrintCore = observe printCore
461 doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
462 doCorePass CoreDoNothing = return
463 doCorePass (CoreDoPasses passes) = runCorePasses passes
464
465 doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
466
467 doCorePass pass@CoreDesugar = pprPanic "doCorePass" (ppr pass)
468 doCorePass pass@CoreDesugarOpt = pprPanic "doCorePass" (ppr pass)
469 doCorePass pass@CoreTidy = pprPanic "doCorePass" (ppr pass)
470 doCorePass pass@CorePrep = pprPanic "doCorePass" (ppr pass)
471 doCorePass pass@CoreOccurAnal = pprPanic "doCorePass" (ppr pass)
472
473 {-
474 ************************************************************************
475 * *
476 \subsection{Core pass combinators}
477 * *
478 ************************************************************************
479 -}
480
481 printCore :: DynFlags -> CoreProgram -> IO ()
482 printCore dflags binds
483 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
484
485 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
486 ruleCheckPass current_phase pat guts =
487 withTiming getDynFlags
488 (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
489 (const ()) $ do
490 { rb <- getRuleBase
491 ; dflags <- getDynFlags
492 ; vis_orphs <- getVisibleOrphanMods
493 ; let rule_fn fn = getRules (RuleEnv rb vis_orphs) fn
494 ++ (mg_rules guts)
495 ; liftIO $ putLogMsg dflags NoReason Err.SevDump noSrcSpan
496 (defaultDumpStyle dflags)
497 (ruleCheckProgram current_phase pat
498 rule_fn (mg_binds guts))
499 ; return guts }
500
501 doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
502 doPassDUM do_pass = doPassM $ \binds -> do
503 dflags <- getDynFlags
504 us <- getUniqueSupplyM
505 liftIO $ do_pass dflags us binds
506
507 doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
508 doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
509
510 doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
511 doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
512
513 doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
514 doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
515
516 doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
517 doPassU do_pass = doPassDU (const do_pass)
518
519 doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
520 doPassDFM do_pass guts = do
521 dflags <- getDynFlags
522 p_fam_env <- getPackageFamInstEnv
523 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
524 doPassM (liftIO . do_pass dflags fam_envs) guts
525
526 doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
527 doPassDFU do_pass guts = do
528 dflags <- getDynFlags
529 us <- getUniqueSupplyM
530 p_fam_env <- getPackageFamInstEnv
531 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
532 doPass (do_pass dflags fam_envs us) guts
533
534 -- Most passes return no stats and don't change rules: these combinators
535 -- let us lift them to the full blown ModGuts+CoreM world
536 doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
537 doPassM bind_f guts = do
538 binds' <- bind_f (mg_binds guts)
539 return (guts { mg_binds = binds' })
540
541 doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
542 doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
543
544 -- Observer passes just peek; don't modify the bindings at all
545 observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
546 observe do_pass = doPassM $ \binds -> do
547 dflags <- getDynFlags
548 _ <- liftIO $ do_pass dflags binds
549 return binds
550
551 {-
552 ************************************************************************
553 * *
554 Gentle simplification
555 * *
556 ************************************************************************
557 -}
558
559 simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
560 -> CoreExpr
561 -> IO CoreExpr
562 -- simplifyExpr is called by the driver to simplify an
563 -- expression typed in at the interactive prompt
564 --
565 -- Also used by Template Haskell
566 simplifyExpr dflags expr
567 = withTiming (pure dflags) (text "Simplify [expr]") (const ()) $
568 do {
569 ; us <- mkSplitUniqSupply 's'
570
571 ; let sz = exprSize expr
572
573 ; (expr', counts) <- initSmpl dflags emptyRuleEnv
574 emptyFamInstEnvs us sz
575 (simplExprGently (simplEnvForGHCi dflags) expr)
576
577 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
578 "Simplifier statistics" (pprSimplCount counts)
579
580 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
581 (pprCoreExpr expr')
582
583 ; return expr'
584 }
585
586 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
587 -- Simplifies an expression
588 -- does occurrence analysis, then simplification
589 -- and repeats (twice currently) because one pass
590 -- alone leaves tons of crud.
591 -- Used (a) for user expressions typed in at the interactive prompt
592 -- (b) the LHS and RHS of a RULE
593 -- (c) Template Haskell splices
594 --
595 -- The name 'Gently' suggests that the SimplMode is SimplGently,
596 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
597 -- enforce that; it just simplifies the expression twice
598
599 -- It's important that simplExprGently does eta reduction; see
600 -- Note [Simplifying the left-hand side of a RULE] above. The
601 -- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
602 -- but only if -O is on.
603
604 simplExprGently env expr = do
605 expr1 <- simplExpr env (occurAnalyseExpr expr)
606 simplExpr env (occurAnalyseExpr expr1)
607
608 {-
609 ************************************************************************
610 * *
611 \subsection{The driver for the simplifier}
612 * *
613 ************************************************************************
614 -}
615
616 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
617 simplifyPgm pass guts
618 = do { hsc_env <- getHscEnv
619 ; us <- getUniqueSupplyM
620 ; rb <- getRuleBase
621 ; liftIOWithCount $
622 simplifyPgmIO pass hsc_env us rb guts }
623
624 simplifyPgmIO :: CoreToDo
625 -> HscEnv
626 -> UniqSupply
627 -> RuleBase
628 -> ModGuts
629 -> IO (SimplCount, ModGuts) -- New bindings
630
631 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
632 hsc_env us hpt_rule_base
633 guts@(ModGuts { mg_module = this_mod
634 , mg_rdr_env = rdr_env
635 , mg_deps = deps
636 , mg_binds = binds, mg_rules = rules
637 , mg_fam_inst_env = fam_inst_env })
638 = do { (termination_msg, it_count, counts_out, guts')
639 <- do_iteration us 1 [] binds rules
640
641 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
642 dopt Opt_D_dump_simpl_stats dflags)
643 "Simplifier statistics for following pass"
644 (vcat [text termination_msg <+> text "after" <+> ppr it_count
645 <+> text "iterations",
646 blankLine,
647 pprSimplCount counts_out])
648
649 ; return (counts_out, guts')
650 }
651 where
652 dflags = hsc_dflags hsc_env
653 print_unqual = mkPrintUnqualified dflags rdr_env
654 simpl_env = mkSimplEnv mode
655 active_rule = activeRule mode
656 active_unf = activeUnfolding mode
657
658 do_iteration :: UniqSupply
659 -> Int -- Counts iterations
660 -> [SimplCount] -- Counts from earlier iterations, reversed
661 -> CoreProgram -- Bindings in
662 -> [CoreRule] -- and orphan rules
663 -> IO (String, Int, SimplCount, ModGuts)
664
665 do_iteration us iteration_no counts_so_far binds rules
666 -- iteration_no is the number of the iteration we are
667 -- about to begin, with '1' for the first
668 | iteration_no > max_iterations -- Stop if we've run out of iterations
669 = WARN( debugIsOn && (max_iterations > 2)
670 , hang (text "Simplifier bailing out after" <+> int max_iterations
671 <+> text "iterations"
672 <+> (brackets $ hsep $ punctuate comma $
673 map (int . simplCountN) (reverse counts_so_far)))
674 2 (text "Size =" <+> ppr (coreBindsStats binds)))
675
676 -- Subtract 1 from iteration_no to get the
677 -- number of iterations we actually completed
678 return ( "Simplifier baled out", iteration_no - 1
679 , totalise counts_so_far
680 , guts { mg_binds = binds, mg_rules = rules } )
681
682 -- Try and force thunks off the binds; significantly reduces
683 -- space usage, especially with -O. JRS, 000620.
684 | let sz = coreBindsSize binds
685 , () <- sz `seq` () -- Force it
686 = do {
687 -- Occurrence analysis
688 let { tagged_binds = {-# SCC "OccAnal" #-}
689 occurAnalysePgm this_mod active_unf active_rule rules
690 binds
691 } ;
692 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
693 (pprCoreBindings tagged_binds);
694
695 -- Get any new rules, and extend the rule base
696 -- See Note [Overall plumbing for rules] in Rules.hs
697 -- We need to do this regularly, because simplification can
698 -- poke on IdInfo thunks, which in turn brings in new rules
699 -- behind the scenes. Otherwise there's a danger we'll simply
700 -- miss the rules for Ids hidden inside imported inlinings
701 eps <- hscEPS hsc_env ;
702 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
703 ; rule_base2 = extendRuleBaseList rule_base1 rules
704 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
705 ; vis_orphs = this_mod : dep_orphs deps } ;
706
707 -- Simplify the program
708 ((binds1, rules1), counts1) <-
709 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
710 do { (floats, env1) <- {-# SCC "SimplTopBinds" #-}
711 simplTopBinds simpl_env tagged_binds
712
713 -- Apply the substitution to rules defined in this module
714 -- for imported Ids. Eg RULE map my_f = blah
715 -- If we have a substitution my_f :-> other_f, we'd better
716 -- apply it to the rule to, or it'll never match
717 ; rules1 <- simplRules env1 Nothing rules Nothing
718
719 ; return (getTopFloatBinds floats, rules1) } ;
720
721 -- Stop if nothing happened; don't dump output
722 -- See Note [Which transformations are innocuous] in CoreMonad
723 if isZeroSimplCount counts1 then
724 return ( "Simplifier reached fixed point", iteration_no
725 , totalise (counts1 : counts_so_far) -- Include "free" ticks
726 , guts { mg_binds = binds1, mg_rules = rules1 } )
727 else do {
728 -- Short out indirections
729 -- We do this *after* at least one run of the simplifier
730 -- because indirection-shorting uses the export flag on *occurrences*
731 -- and that isn't guaranteed to be ok until after the first run propagates
732 -- stuff from the binding site to its occurrences
733 --
734 -- ToDo: alas, this means that indirection-shorting does not happen at all
735 -- if the simplifier does nothing (not common, I know, but unsavoury)
736 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
737
738 -- Dump the result of this iteration
739 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
740 lintPassResult hsc_env pass binds2 ;
741
742 -- Loop
743 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
744 } }
745 | otherwise = panic "do_iteration"
746 where
747 (us1, us2) = splitUniqSupply us
748
749 -- Remember the counts_so_far are reversed
750 totalise :: [SimplCount] -> SimplCount
751 totalise = foldr (\c acc -> acc `plusSimplCount` c)
752 (zeroSimplCount dflags)
753
754 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
755
756 -------------------
757 dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
758 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
759 dump_end_iteration dflags print_unqual iteration_no counts binds rules
760 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
761 where
762 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
763 | otherwise = Nothing
764 -- Show details if Opt_D_dump_simpl_iterations is on
765
766 hdr = text "Simplifier iteration=" <> int iteration_no
767 pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
768 , pprSimplCount counts
769 , text "---- End of simplifier counts for" <+> hdr ]
770
771 {-
772 ************************************************************************
773 * *
774 Shorting out indirections
775 * *
776 ************************************************************************
777
778 If we have this:
779
780 x_local = <expression>
781 ...bindings...
782 x_exported = x_local
783
784 where x_exported is exported, and x_local is not, then we replace it with this:
785
786 x_exported = <expression>
787 x_local = x_exported
788 ...bindings...
789
790 Without this we never get rid of the x_exported = x_local thing. This
791 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
792 makes strictness information propagate better. This used to happen in
793 the final phase, but it's tidier to do it here.
794
795 Note [Messing up the exported Id's RULES]
796 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
797 We must be careful about discarding (obviously) or even merging the
798 RULES on the exported Id. The example that went bad on me at one stage
799 was this one:
800
801 iterate :: (a -> a) -> a -> [a]
802 [Exported]
803 iterate = iterateList
804
805 iterateFB c f x = x `c` iterateFB c f (f x)
806 iterateList f x = x : iterateList f (f x)
807 [Not exported]
808
809 {-# RULES
810 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
811 "iterateFB" iterateFB (:) = iterateList
812 #-}
813
814 This got shorted out to:
815
816 iterateList :: (a -> a) -> a -> [a]
817 iterateList = iterate
818
819 iterateFB c f x = x `c` iterateFB c f (f x)
820 iterate f x = x : iterate f (f x)
821
822 {-# RULES
823 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
824 "iterateFB" iterateFB (:) = iterate
825 #-}
826
827 And now we get an infinite loop in the rule system
828 iterate f x -> build (\cn -> iterateFB c f x)
829 -> iterateFB (:) f x
830 -> iterate f x
831
832 Old "solution":
833 use rule switching-off pragmas to get rid
834 of iterateList in the first place
835
836 But in principle the user *might* want rules that only apply to the Id
837 he says. And inline pragmas are similar
838 {-# NOINLINE f #-}
839 f = local
840 local = <stuff>
841 Then we do not want to get rid of the NOINLINE.
842
843 Hence hasShortableIdinfo.
844
845
846 Note [Rules and indirection-zapping]
847 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
848 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
849 Then the things mentioned can be out of scope! Solution
850 a) Make sure that in this pass the usage-info from x_exported is
851 available for ...bindings...
852 b) If there are any such RULES, rec-ify the entire top-level.
853 It'll get sorted out next time round
854
855 Other remarks
856 ~~~~~~~~~~~~~
857 If more than one exported thing is equal to a local thing (i.e., the
858 local thing really is shared), then we do one only:
859 \begin{verbatim}
860 x_local = ....
861 x_exported1 = x_local
862 x_exported2 = x_local
863 ==>
864 x_exported1 = ....
865
866 x_exported2 = x_exported1
867 \end{verbatim}
868
869 We rely on prior eta reduction to simplify things like
870 \begin{verbatim}
871 x_exported = /\ tyvars -> x_local tyvars
872 ==>
873 x_exported = x_local
874 \end{verbatim}
875 Hence,there's a possibility of leaving unchanged something like this:
876 \begin{verbatim}
877 x_local = ....
878 x_exported1 = x_local Int
879 \end{verbatim}
880 By the time we've thrown away the types in STG land this
881 could be eliminated. But I don't think it's very common
882 and it's dangerous to do this fiddling in STG land
883 because we might elminate a binding that's mentioned in the
884 unfolding for something.
885
886 Note [Indirection zapping and ticks]
887 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
888 Unfortunately this is another place where we need a special case for
889 ticks. The following happens quite regularly:
890
891 x_local = <expression>
892 x_exported = tick<x> x_local
893
894 Which we want to become:
895
896 x_exported = tick<x> <expression>
897
898 As it makes no sense to keep the tick and the expression on separate
899 bindings. Note however that that this might increase the ticks scoping
900 over the execution of x_local, so we can only do this for floatable
901 ticks. More often than not, other references will be unfoldings of
902 x_exported, and therefore carry the tick anyway.
903 -}
904
905 type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
906
907 shortOutIndirections :: CoreProgram -> CoreProgram
908 shortOutIndirections binds
909 | isEmptyVarEnv ind_env = binds
910 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
911 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
912 where
913 ind_env = makeIndEnv binds
914 -- These exported Ids are the subjects of the indirection-elimination
915 exp_ids = map fst $ nonDetEltsUFM ind_env
916 -- It's OK to use nonDetEltsUFM here because we forget the ordering
917 -- by immediately converting to a set or check if all the elements
918 -- satisfy a predicate.
919 exp_id_set = mkVarSet exp_ids
920 no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
921 binds' = concatMap zap binds
922
923 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
924 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
925
926 zapPair (bndr, rhs)
927 | bndr `elemVarSet` exp_id_set
928 = [] -- Kill the exported-id binding
929
930 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
931 , (exp_id', lcl_id') <- transferIdInfo exp_id bndr
932 = -- Turn a local-id binding into two bindings
933 -- exp_id = rhs; lcl_id = exp_id
934 [ (exp_id', mkTicks ticks rhs),
935 (lcl_id', Var exp_id') ]
936
937 | otherwise
938 = [(bndr,rhs)]
939
940 makeIndEnv :: [CoreBind] -> IndEnv
941 makeIndEnv binds
942 = foldl' add_bind emptyVarEnv binds
943 where
944 add_bind :: IndEnv -> CoreBind -> IndEnv
945 add_bind env (NonRec exported_id rhs) = add_pair env (exported_id, rhs)
946 add_bind env (Rec pairs) = foldl' add_pair env pairs
947
948 add_pair :: IndEnv -> (Id,CoreExpr) -> IndEnv
949 add_pair env (exported_id, exported)
950 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
951 , shortMeOut env exported_id local_id
952 = extendVarEnv env local_id (exported_id, ticks)
953 add_pair env _ = env
954
955 -----------------
956 shortMeOut :: IndEnv -> Id -> Id -> Bool
957 shortMeOut ind_env exported_id local_id
958 -- The if-then-else stuff is just so I can get a pprTrace to see
959 -- how often I don't get shorting out because of IdInfo stuff
960 = if isExportedId exported_id && -- Only if this is exported
961
962 isLocalId local_id && -- Only if this one is defined in this
963 -- module, so that we *can* change its
964 -- binding to be the exported thing!
965
966 not (isExportedId local_id) && -- Only if this one is not itself exported,
967 -- since the transformation will nuke it
968
969 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
970 then
971 if hasShortableIdInfo exported_id
972 then True -- See Note [Messing up the exported Id's IdInfo]
973 else WARN( True, text "Not shorting out:" <+> ppr exported_id )
974 False
975 else
976 False
977
978 -----------------
979 hasShortableIdInfo :: Id -> Bool
980 -- True if there is no user-attached IdInfo on exported_id,
981 -- so we can safely discard it
982 -- See Note [Messing up the exported Id's IdInfo]
983 hasShortableIdInfo id
984 = isEmptyRuleInfo (ruleInfo info)
985 && isDefaultInlinePragma (inlinePragInfo info)
986 && not (isStableUnfolding (unfoldingInfo info))
987 where
988 info = idInfo id
989
990 -----------------
991 {- Note [Transferring IdInfo]
992 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
993 If we have
994 lcl_id = e; exp_id = lcl_id
995
996 and lcl_id has useful IdInfo, we don't want to discard it by going
997 gbl_id = e; lcl_id = gbl_id
998
999 Instead, transfer IdInfo from lcl_id to exp_id, specifically
1000 * (Stable) unfolding
1001 * Strictness
1002 * Rules
1003 * Inline pragma
1004
1005 Overwriting, rather than merging, seems to work ok.
1006
1007 We also zap the InlinePragma on the lcl_id. It might originally
1008 have had a NOINLINE, which we have now transferred; and we really
1009 want the lcl_id to inline now that its RHS is trivial!
1010 -}
1011
1012 transferIdInfo :: Id -> Id -> (Id, Id)
1013 -- See Note [Transferring IdInfo]
1014 transferIdInfo exported_id local_id
1015 = ( modifyIdInfo transfer exported_id
1016 , local_id `setInlinePragma` defaultInlinePragma )
1017 where
1018 local_info = idInfo local_id
1019 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info
1020 `setUnfoldingInfo` unfoldingInfo local_info
1021 `setInlinePragInfo` inlinePragInfo local_info
1022 `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info
1023 new_info = setRuleInfoHead (idName exported_id)
1024 (ruleInfo local_info)
1025 -- Remember to set the function-name field of the
1026 -- rules as we transfer them from one function to another