7c89dc9099c5c2cc52d47f889a66acc0741d4a75
[ghc.git] / compiler / simplCore / SimplCore.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplCore ( core2core, simplifyExpr ) where
10
11 #include "HsVersions.h"
12
13 import DynFlags
14 import CoreSyn
15 import HscTypes
16 import CSE ( cseProgram )
17 import Rules ( mkRuleBase, unionRuleBase,
18 extendRuleBaseList, ruleCheckProgram, addRuleInfo, )
19 import PprCore ( pprCoreBindings, pprCoreExpr )
20 import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
21 import IdInfo
22 import CoreStats ( coreBindsSize, coreBindsStats, exprSize )
23 import CoreUtils ( mkTicks, stripTicksTop )
24 import CoreLint ( endPass, lintPassResult, dumpPassResult,
25 lintAnnots )
26 import Simplify ( simplTopBinds, simplExpr, simplRules )
27 import SimplUtils ( simplEnvForGHCi, activeRule )
28 import SimplEnv
29 import SimplMonad
30 import CoreMonad
31 import qualified ErrUtils as Err
32 import FloatIn ( floatInwards )
33 import FloatOut ( floatOutwards )
34 import FamInstEnv
35 import Id
36 import ErrUtils ( withTiming )
37 import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma )
38 import VarSet
39 import VarEnv
40 import LiberateCase ( liberateCase )
41 import SAT ( doStaticArgs )
42 import Specialise ( specProgram)
43 import SpecConstr ( specConstrProgram)
44 import DmdAnal ( dmdAnalProgram )
45 import CallArity ( callArityAnalProgram )
46 import WorkWrap ( wwTopBinds )
47 import Vectorise ( vectorise )
48 import SrcLoc
49 import Util
50 import Module
51
52 import Maybes
53 import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
54 import UniqFM
55 import Outputable
56 import Control.Monad
57 import qualified GHC.LanguageExtensions as LangExt
58
59 #ifdef GHCI
60 import DynamicLoading ( loadPlugins )
61 import Plugins ( installCoreToDos )
62 #else
63 import DynamicLoading ( pluginError )
64 #endif
65
66 {-
67 ************************************************************************
68 * *
69 \subsection{The driver for the simplifier}
70 * *
71 ************************************************************************
72 -}
73
74 core2core :: HscEnv -> ModGuts -> IO ModGuts
75 core2core hsc_env guts@(ModGuts { mg_module = mod
76 , mg_loc = loc
77 , mg_deps = deps
78 , mg_rdr_env = rdr_env })
79 = do { us <- mkSplitUniqSupply 's'
80 -- make sure all plugins are loaded
81
82 ; let builtin_passes = getCoreToDo dflags
83 orph_mods = mkModuleSet (mod : dep_orphs deps)
84 ;
85 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
86 orph_mods print_unqual loc $
87 do { all_passes <- addPluginPasses builtin_passes
88 ; runCorePasses all_passes guts }
89
90 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
91 "Grand total simplifier statistics"
92 (pprSimplCount stats)
93
94 ; return guts2 }
95 where
96 dflags = hsc_dflags hsc_env
97 home_pkg_rules = hptRules hsc_env (dep_mods deps)
98 hpt_rule_base = mkRuleBase home_pkg_rules
99 print_unqual = mkPrintUnqualified dflags rdr_env
100 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
101 -- This is very convienent for the users of the monad (e.g. plugins do not have to
102 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
103 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
104 -- would mean our cached value would go out of date.
105
106 {-
107 ************************************************************************
108 * *
109 Generating the main optimisation pipeline
110 * *
111 ************************************************************************
112 -}
113
114 getCoreToDo :: DynFlags -> [CoreToDo]
115 getCoreToDo dflags
116 = flatten_todos core_todo
117 where
118 opt_level = optLevel dflags
119 phases = simplPhases dflags
120 max_iter = maxSimplIterations dflags
121 rule_check = ruleCheck dflags
122 call_arity = gopt Opt_CallArity dflags
123 strictness = gopt Opt_Strictness dflags
124 full_laziness = gopt Opt_FullLaziness dflags
125 do_specialise = gopt Opt_Specialise dflags
126 do_float_in = gopt Opt_FloatIn dflags
127 cse = gopt Opt_CSE dflags
128 spec_constr = gopt Opt_SpecConstr dflags
129 liberate_case = gopt Opt_LiberateCase dflags
130 late_dmd_anal = gopt Opt_LateDmdAnal dflags
131 static_args = gopt Opt_StaticArgumentTransformation dflags
132 rules_on = gopt Opt_EnableRewriteRules dflags
133 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
134 ww_on = gopt Opt_WorkerWrapper dflags
135 static_ptrs = xopt LangExt.StaticPointers dflags
136
137 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
138
139 maybe_strictness_before phase
140 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
141
142 base_mode = SimplMode { sm_phase = panic "base_mode"
143 , sm_names = []
144 , sm_rules = rules_on
145 , sm_eta_expand = eta_expand_on
146 , sm_inline = True
147 , sm_case_case = True }
148
149 simpl_phase phase names iter
150 = CoreDoPasses
151 $ [ maybe_strictness_before phase
152 , CoreDoSimplify iter
153 (base_mode { sm_phase = Phase phase
154 , sm_names = names })
155
156 , maybe_rule_check (Phase phase) ]
157
158 -- Vectorisation can introduce a fair few common sub expressions involving
159 -- DPH primitives. For example, see the Reverse test from dph-examples.
160 -- We need to eliminate these common sub expressions before their definitions
161 -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
162 -- so we also run simpl_gently to inline them.
163 ++ (if gopt Opt_Vectorise dflags && phase == 3
164 then [CoreCSE, simpl_gently]
165 else [])
166
167 vectorisation
168 = runWhen (gopt Opt_Vectorise dflags) $
169 CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
170
171 -- By default, we have 2 phases before phase 0.
172
173 -- Want to run with inline phase 2 after the specialiser to give
174 -- maximum chance for fusion to work before we inline build/augment
175 -- in phase 1. This made a difference in 'ansi' where an
176 -- overloaded function wasn't inlined till too late.
177
178 -- Need phase 1 so that build/augment get
179 -- inlined. I found that spectral/hartel/genfft lost some useful
180 -- strictness in the function sumcode' if augment is not inlined
181 -- before strictness analysis runs
182 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
183 | phase <- [phases, phases-1 .. 1] ]
184
185
186 -- initial simplify: mk specialiser happy: minimum effort please
187 simpl_gently = CoreDoSimplify max_iter
188 (base_mode { sm_phase = InitialPhase
189 , sm_names = ["Gentle"]
190 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
191 , sm_inline = False
192 , sm_case_case = False })
193 -- Don't do case-of-case transformations.
194 -- This makes full laziness work better
195
196 strictness_pass = if ww_on
197 then [CoreDoStrictness,CoreDoWorkerWrapper]
198 else [CoreDoStrictness]
199
200
201 -- New demand analyser
202 demand_analyser = (CoreDoPasses (
203 strictness_pass ++
204 [simpl_phase 0 ["post-worker-wrapper"] max_iter]
205 ))
206
207 -- Static forms are moved to the top level with the FloatOut pass.
208 -- See Note [Grand plan for static forms] in StaticPtrTable.
209 static_ptrs_float_outwards =
210 runWhen static_ptrs $ CoreDoPasses
211 [ simpl_gently -- Float Out can't handle type lets (sometimes created
212 -- by simpleOptPgm via mkParallelBindings)
213 , CoreDoFloatOutwards FloatOutSwitches
214 { floatOutLambdas = Just 0
215 , floatOutConstants = True
216 , floatOutOverSatApps = False
217 , floatToTopLevelOnly = True
218 }
219 ]
220
221 core_todo =
222 if opt_level == 0 then
223 [ vectorisation,
224 static_ptrs_float_outwards,
225 CoreDoSimplify max_iter
226 (base_mode { sm_phase = Phase 0
227 , sm_names = ["Non-opt simplification"] })
228 ]
229
230 else {- opt_level >= 1 -} [
231
232 -- We want to do the static argument transform before full laziness as it
233 -- may expose extra opportunities to float things outwards. However, to fix
234 -- up the output of the transformation we need at do at least one simplify
235 -- after this before anything else
236 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
237
238 -- We run vectorisation here for now, but we might also try to run
239 -- it later
240 vectorisation,
241
242 -- initial simplify: mk specialiser happy: minimum effort please
243 simpl_gently,
244
245 -- Specialisation is best done before full laziness
246 -- so that overloaded functions have all their dictionary lambdas manifest
247 runWhen do_specialise CoreDoSpecialising,
248
249 if full_laziness then
250 CoreDoFloatOutwards FloatOutSwitches {
251 floatOutLambdas = Just 0,
252 floatOutConstants = True,
253 floatOutOverSatApps = False,
254 floatToTopLevelOnly = False }
255 -- Was: gentleFloatOutSwitches
256 --
257 -- I have no idea why, but not floating constants to
258 -- top level is very bad in some cases.
259 --
260 -- Notably: p_ident in spectral/rewrite
261 -- Changing from "gentle" to "constantsOnly"
262 -- improved rewrite's allocation by 19%, and
263 -- made 0.0% difference to any other nofib
264 -- benchmark
265 --
266 -- Not doing floatOutOverSatApps yet, we'll do
267 -- that later on when we've had a chance to get more
268 -- accurate arity information. In fact it makes no
269 -- difference at all to performance if we do it here,
270 -- but maybe we save some unnecessary to-and-fro in
271 -- the simplifier.
272 else
273 -- Even with full laziness turned off, we still need to float static
274 -- forms to the top level. See Note [Grand plan for static forms] in
275 -- StaticPtrTable.
276 static_ptrs_float_outwards,
277
278 simpl_phases,
279
280 -- Phase 0: allow all Ids to be inlined now
281 -- This gets foldr inlined before strictness analysis
282
283 -- At least 3 iterations because otherwise we land up with
284 -- huge dead expressions because of an infelicity in the
285 -- simpifier.
286 -- let k = BIG in foldr k z xs
287 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
288 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
289 -- Don't stop now!
290 simpl_phase 0 ["main"] (max max_iter 3),
291
292 runWhen do_float_in CoreDoFloatInwards,
293 -- Run float-inwards immediately before the strictness analyser
294 -- Doing so pushes bindings nearer their use site and hence makes
295 -- them more likely to be strict. These bindings might only show
296 -- up after the inlining from simplification. Example in fulsom,
297 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
298
299 runWhen call_arity $ CoreDoPasses
300 [ CoreDoCallArity
301 , simpl_phase 0 ["post-call-arity"] max_iter
302 ],
303
304 runWhen strictness demand_analyser,
305
306 runWhen full_laziness $
307 CoreDoFloatOutwards FloatOutSwitches {
308 floatOutLambdas = floatLamArgs dflags,
309 floatOutConstants = True,
310 floatOutOverSatApps = True,
311 floatToTopLevelOnly = False },
312 -- nofib/spectral/hartel/wang doubles in speed if you
313 -- do full laziness late in the day. It only happens
314 -- after fusion and other stuff, so the early pass doesn't
315 -- catch it. For the record, the redex is
316 -- f_el22 (f_el21 r_midblock)
317
318
319 runWhen cse CoreCSE,
320 -- We want CSE to follow the final full-laziness pass, because it may
321 -- succeed in commoning up things floated out by full laziness.
322 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
323
324 runWhen do_float_in CoreDoFloatInwards,
325
326 maybe_rule_check (Phase 0),
327
328 -- Case-liberation for -O2. This should be after
329 -- strictness analysis and the simplification which follows it.
330 runWhen liberate_case (CoreDoPasses [
331 CoreLiberateCase,
332 simpl_phase 0 ["post-liberate-case"] max_iter
333 ]), -- Run the simplifier after LiberateCase to vastly
334 -- reduce the possibility of shadowing
335 -- Reason: see Note [Shadowing] in SpecConstr.hs
336
337 runWhen spec_constr CoreDoSpecConstr,
338
339 maybe_rule_check (Phase 0),
340
341 -- Final clean-up simplification:
342 simpl_phase 0 ["final"] max_iter,
343
344 runWhen late_dmd_anal $ CoreDoPasses (
345 strictness_pass ++
346 [simpl_phase 0 ["post-late-ww"] max_iter]
347 ),
348
349 -- Final run of the demand_analyser, ensures that one-shot thunks are
350 -- really really one-shot thunks. Only needed if the demand analyser
351 -- has run at all. See Note [Final Demand Analyser run] in DmdAnal
352 -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
353 -- can become /exponentially/ more expensive. See Trac #11731, #12996.
354 runWhen (strictness || late_dmd_anal) CoreDoStrictness,
355
356 maybe_rule_check (Phase 0)
357 ]
358
359 -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
360 flatten_todos [] = []
361 flatten_todos (CoreDoNothing : rest) = flatten_todos rest
362 flatten_todos (CoreDoPasses passes : rest) =
363 flatten_todos passes ++ flatten_todos rest
364 flatten_todos (todo : rest) = todo : flatten_todos rest
365
366 -- Loading plugins
367
368 addPluginPasses :: [CoreToDo] -> CoreM [CoreToDo]
369 #ifndef GHCI
370 addPluginPasses builtin_passes
371 = do { dflags <- getDynFlags
372 ; let pluginMods = pluginModNames dflags
373 ; unless (null pluginMods) (pluginError pluginMods)
374 ; return builtin_passes }
375 #else
376 addPluginPasses builtin_passes
377 = do { hsc_env <- getHscEnv
378 ; named_plugins <- liftIO (loadPlugins hsc_env)
379 ; foldM query_plug builtin_passes named_plugins }
380 where
381 query_plug todos (_, plug, options) = installCoreToDos plug options todos
382 #endif
383
384 {-
385 Note [RULEs enabled in SimplGently]
386 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
387 RULES are enabled when doing "gentle" simplification. Two reasons:
388
389 * We really want the class-op cancellation to happen:
390 op (df d1 d2) --> $cop3 d1 d2
391 because this breaks the mutual recursion between 'op' and 'df'
392
393 * I wanted the RULE
394 lift String ===> ...
395 to work in Template Haskell when simplifying
396 splices, so we get simpler code for literal strings
397
398 But watch out: list fusion can prevent floating. So use phase control
399 to switch off those rules until after floating.
400
401 ************************************************************************
402 * *
403 The CoreToDo interpreter
404 * *
405 ************************************************************************
406 -}
407
408 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
409 runCorePasses passes guts
410 = foldM do_pass guts passes
411 where
412 do_pass guts CoreDoNothing = return guts
413 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
414 do_pass guts pass
415 = withTiming getDynFlags
416 (ppr pass <+> brackets (ppr mod))
417 (const ()) $ do
418 { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
419 ; endPass pass (mg_binds guts') (mg_rules guts')
420 ; return guts' }
421
422 mod = mg_module guts
423
424 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
425 doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
426 simplifyPgm pass
427
428 doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-}
429 doPass cseProgram
430
431 doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-}
432 doPassD liberateCase
433
434 doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-}
435 floatInwards
436
437 doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-}
438 doPassDUM (floatOutwards f)
439
440 doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-}
441 doPassU doStaticArgs
442
443 doCorePass CoreDoCallArity = {-# SCC "CallArity" #-}
444 doPassD callArityAnalProgram
445
446 doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-}
447 doPassDFM dmdAnalProgram
448
449 doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-}
450 doPassDFU wwTopBinds
451
452 doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-}
453 specProgram
454
455 doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-}
456 specConstrProgram
457
458 doCorePass CoreDoVectorisation = {-# SCC "Vectorise" #-}
459 vectorise
460
461 doCorePass CoreDoPrintCore = observe printCore
462 doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
463 doCorePass CoreDoNothing = return
464 doCorePass (CoreDoPasses passes) = runCorePasses passes
465
466 #ifdef GHCI
467 doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
468 #endif
469
470 doCorePass pass = pprPanic "doCorePass" (ppr pass)
471
472 {-
473 ************************************************************************
474 * *
475 \subsection{Core pass combinators}
476 * *
477 ************************************************************************
478 -}
479
480 printCore :: DynFlags -> CoreProgram -> IO ()
481 printCore dflags binds
482 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
483
484 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
485 ruleCheckPass current_phase pat guts =
486 withTiming getDynFlags
487 (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
488 (const ()) $ do
489 { rb <- getRuleBase
490 ; dflags <- getDynFlags
491 ; vis_orphs <- getVisibleOrphanMods
492 ; liftIO $ log_action dflags dflags NoReason Err.SevDump noSrcSpan
493 (defaultDumpStyle dflags)
494 (ruleCheckProgram current_phase pat
495 (RuleEnv rb vis_orphs) (mg_binds guts))
496 ; return guts }
497
498 doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
499 doPassDUM do_pass = doPassM $ \binds -> do
500 dflags <- getDynFlags
501 us <- getUniqueSupplyM
502 liftIO $ do_pass dflags us binds
503
504 doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
505 doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
506
507 doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
508 doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
509
510 doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
511 doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
512
513 doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
514 doPassU do_pass = doPassDU (const do_pass)
515
516 doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
517 doPassDFM do_pass guts = do
518 dflags <- getDynFlags
519 p_fam_env <- getPackageFamInstEnv
520 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
521 doPassM (liftIO . do_pass dflags fam_envs) guts
522
523 doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
524 doPassDFU do_pass guts = do
525 dflags <- getDynFlags
526 us <- getUniqueSupplyM
527 p_fam_env <- getPackageFamInstEnv
528 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
529 doPass (do_pass dflags fam_envs us) guts
530
531 -- Most passes return no stats and don't change rules: these combinators
532 -- let us lift them to the full blown ModGuts+CoreM world
533 doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
534 doPassM bind_f guts = do
535 binds' <- bind_f (mg_binds guts)
536 return (guts { mg_binds = binds' })
537
538 doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
539 doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
540
541 -- Observer passes just peek; don't modify the bindings at all
542 observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
543 observe do_pass = doPassM $ \binds -> do
544 dflags <- getDynFlags
545 _ <- liftIO $ do_pass dflags binds
546 return binds
547
548 {-
549 ************************************************************************
550 * *
551 Gentle simplification
552 * *
553 ************************************************************************
554 -}
555
556 simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
557 -> CoreExpr
558 -> IO CoreExpr
559 -- simplifyExpr is called by the driver to simplify an
560 -- expression typed in at the interactive prompt
561 --
562 -- Also used by Template Haskell
563 simplifyExpr dflags expr
564 = withTiming (pure dflags) (text "Simplify [expr]") (const ()) $
565 do {
566 ; us <- mkSplitUniqSupply 's'
567
568 ; let sz = exprSize expr
569
570 ; (expr', counts) <- initSmpl dflags emptyRuleEnv
571 emptyFamInstEnvs us sz
572 (simplExprGently (simplEnvForGHCi dflags) expr)
573
574 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
575 "Simplifier statistics" (pprSimplCount counts)
576
577 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
578 (pprCoreExpr expr')
579
580 ; return expr'
581 }
582
583 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
584 -- Simplifies an expression
585 -- does occurrence analysis, then simplification
586 -- and repeats (twice currently) because one pass
587 -- alone leaves tons of crud.
588 -- Used (a) for user expressions typed in at the interactive prompt
589 -- (b) the LHS and RHS of a RULE
590 -- (c) Template Haskell splices
591 --
592 -- The name 'Gently' suggests that the SimplifierMode is SimplGently,
593 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
594 -- enforce that; it just simplifies the expression twice
595
596 -- It's important that simplExprGently does eta reduction; see
597 -- Note [Simplifying the left-hand side of a RULE] above. The
598 -- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
599 -- but only if -O is on.
600
601 simplExprGently env expr = do
602 expr1 <- simplExpr env (occurAnalyseExpr expr)
603 simplExpr env (occurAnalyseExpr expr1)
604
605 {-
606 ************************************************************************
607 * *
608 \subsection{The driver for the simplifier}
609 * *
610 ************************************************************************
611 -}
612
613 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
614 simplifyPgm pass guts
615 = do { hsc_env <- getHscEnv
616 ; us <- getUniqueSupplyM
617 ; rb <- getRuleBase
618 ; liftIOWithCount $
619 simplifyPgmIO pass hsc_env us rb guts }
620
621 simplifyPgmIO :: CoreToDo
622 -> HscEnv
623 -> UniqSupply
624 -> RuleBase
625 -> ModGuts
626 -> IO (SimplCount, ModGuts) -- New bindings
627
628 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
629 hsc_env us hpt_rule_base
630 guts@(ModGuts { mg_module = this_mod
631 , mg_rdr_env = rdr_env
632 , mg_deps = deps
633 , mg_binds = binds, mg_rules = rules
634 , mg_fam_inst_env = fam_inst_env })
635 = do { (termination_msg, it_count, counts_out, guts')
636 <- do_iteration us 1 [] binds rules
637
638 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
639 dopt Opt_D_dump_simpl_stats dflags)
640 "Simplifier statistics for following pass"
641 (vcat [text termination_msg <+> text "after" <+> ppr it_count
642 <+> text "iterations",
643 blankLine,
644 pprSimplCount counts_out])
645
646 ; return (counts_out, guts')
647 }
648 where
649 dflags = hsc_dflags hsc_env
650 print_unqual = mkPrintUnqualified dflags rdr_env
651 simpl_env = mkSimplEnv mode
652 active_rule = activeRule simpl_env
653
654 do_iteration :: UniqSupply
655 -> Int -- Counts iterations
656 -> [SimplCount] -- Counts from earlier iterations, reversed
657 -> CoreProgram -- Bindings in
658 -> [CoreRule] -- and orphan rules
659 -> IO (String, Int, SimplCount, ModGuts)
660
661 do_iteration us iteration_no counts_so_far binds rules
662 -- iteration_no is the number of the iteration we are
663 -- about to begin, with '1' for the first
664 | iteration_no > max_iterations -- Stop if we've run out of iterations
665 = WARN( debugIsOn && (max_iterations > 2)
666 , hang (text "Simplifier bailing out after" <+> int max_iterations
667 <+> text "iterations"
668 <+> (brackets $ hsep $ punctuate comma $
669 map (int . simplCountN) (reverse counts_so_far)))
670 2 (text "Size =" <+> ppr (coreBindsStats binds)))
671
672 -- Subtract 1 from iteration_no to get the
673 -- number of iterations we actually completed
674 return ( "Simplifier baled out", iteration_no - 1
675 , totalise counts_so_far
676 , guts { mg_binds = binds, mg_rules = rules } )
677
678 -- Try and force thunks off the binds; significantly reduces
679 -- space usage, especially with -O. JRS, 000620.
680 | let sz = coreBindsSize binds
681 , () <- sz `seq` () -- Force it
682 = do {
683 -- Occurrence analysis
684 let { -- Note [Vectorisation declarations and occurrences]
685 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
686 -- During the 'InitialPhase' (i.e., before vectorisation), we need to make sure
687 -- that the right-hand sides of vectorisation declarations are taken into
688 -- account during occurrence analysis. After the 'InitialPhase', we need to ensure
689 -- that the binders representing variable vectorisation declarations are kept alive.
690 -- (In contrast to automatically vectorised variables, their unvectorised versions
691 -- don't depend on them.)
692 vectVars = mkVarSet $
693 catMaybes [ fmap snd $ lookupDVarEnv (vectInfoVar (mg_vect_info guts)) bndr
694 | Vect bndr _ <- mg_vect_decls guts]
695 ++
696 catMaybes [ fmap snd $ lookupDVarEnv (vectInfoVar (mg_vect_info guts)) bndr
697 | bndr <- bindersOfBinds binds]
698 -- FIXME: This second comprehensions is only needed as long as we
699 -- have vectorised bindings where we get "Could NOT call
700 -- vectorised from original version".
701 ; (maybeVects, maybeVectVars)
702 = case sm_phase mode of
703 InitialPhase -> (mg_vect_decls guts, vectVars)
704 _ -> ([], vectVars)
705 ; tagged_binds = {-# SCC "OccAnal" #-}
706 occurAnalysePgm this_mod active_rule rules
707 maybeVects maybeVectVars binds
708 } ;
709 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
710 (pprCoreBindings tagged_binds);
711 lintPassResult hsc_env CoreOccurAnal tagged_binds;
712
713 -- Get any new rules, and extend the rule base
714 -- See Note [Overall plumbing for rules] in Rules.hs
715 -- We need to do this regularly, because simplification can
716 -- poke on IdInfo thunks, which in turn brings in new rules
717 -- behind the scenes. Otherwise there's a danger we'll simply
718 -- miss the rules for Ids hidden inside imported inlinings
719 eps <- hscEPS hsc_env ;
720 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
721 ; rule_base2 = extendRuleBaseList rule_base1 rules
722 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
723 ; vis_orphs = this_mod : dep_orphs deps } ;
724
725 -- Simplify the program
726 ((binds1, rules1), counts1) <-
727 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
728 do { env1 <- {-# SCC "SimplTopBinds" #-}
729 simplTopBinds simpl_env tagged_binds
730
731 -- Apply the substitution to rules defined in this module
732 -- for imported Ids. Eg RULE map my_f = blah
733 -- If we have a substitution my_f :-> other_f, we'd better
734 -- apply it to the rule to, or it'll never match
735 ; rules1 <- simplRules env1 Nothing rules
736
737 ; return (getFloatBinds env1, rules1) } ;
738
739 -- Stop if nothing happened; don't dump output
740 if isZeroSimplCount counts1 then
741 return ( "Simplifier reached fixed point", iteration_no
742 , totalise (counts1 : counts_so_far) -- Include "free" ticks
743 , guts { mg_binds = binds1, mg_rules = rules1 } )
744 else do {
745 -- Short out indirections
746 -- We do this *after* at least one run of the simplifier
747 -- because indirection-shorting uses the export flag on *occurrences*
748 -- and that isn't guaranteed to be ok until after the first run propagates
749 -- stuff from the binding site to its occurrences
750 --
751 -- ToDo: alas, this means that indirection-shorting does not happen at all
752 -- if the simplifier does nothing (not common, I know, but unsavoury)
753 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
754
755 -- Dump the result of this iteration
756 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
757 lintPassResult hsc_env pass binds2 ;
758
759 -- Loop
760 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
761 } }
762 | otherwise = panic "do_iteration"
763 where
764 (us1, us2) = splitUniqSupply us
765
766 -- Remember the counts_so_far are reversed
767 totalise :: [SimplCount] -> SimplCount
768 totalise = foldr (\c acc -> acc `plusSimplCount` c)
769 (zeroSimplCount dflags)
770
771 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
772
773 -------------------
774 dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
775 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
776 dump_end_iteration dflags print_unqual iteration_no counts binds rules
777 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
778 where
779 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
780 | otherwise = Nothing
781 -- Show details if Opt_D_dump_simpl_iterations is on
782
783 hdr = text "Simplifier iteration=" <> int iteration_no
784 pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
785 , pprSimplCount counts
786 , text "---- End of simplifier counts for" <+> hdr ]
787
788 {-
789 ************************************************************************
790 * *
791 Shorting out indirections
792 * *
793 ************************************************************************
794
795 If we have this:
796
797 x_local = <expression>
798 ...bindings...
799 x_exported = x_local
800
801 where x_exported is exported, and x_local is not, then we replace it with this:
802
803 x_exported = <expression>
804 x_local = x_exported
805 ...bindings...
806
807 Without this we never get rid of the x_exported = x_local thing. This
808 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
809 makes strictness information propagate better. This used to happen in
810 the final phase, but it's tidier to do it here.
811
812 Note [Transferring IdInfo]
813 ~~~~~~~~~~~~~~~~~~~~~~~~~~
814 We want to propagage any useful IdInfo on x_local to x_exported.
815
816 STRICTNESS: if we have done strictness analysis, we want the strictness info on
817 x_local to transfer to x_exported. Hence the copyIdInfo call.
818
819 RULES: we want to *add* any RULES for x_local to x_exported.
820
821
822 Note [Messing up the exported Id's RULES]
823 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
824 We must be careful about discarding (obviously) or even merging the
825 RULES on the exported Id. The example that went bad on me at one stage
826 was this one:
827
828 iterate :: (a -> a) -> a -> [a]
829 [Exported]
830 iterate = iterateList
831
832 iterateFB c f x = x `c` iterateFB c f (f x)
833 iterateList f x = x : iterateList f (f x)
834 [Not exported]
835
836 {-# RULES
837 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
838 "iterateFB" iterateFB (:) = iterateList
839 #-}
840
841 This got shorted out to:
842
843 iterateList :: (a -> a) -> a -> [a]
844 iterateList = iterate
845
846 iterateFB c f x = x `c` iterateFB c f (f x)
847 iterate f x = x : iterate f (f x)
848
849 {-# RULES
850 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
851 "iterateFB" iterateFB (:) = iterate
852 #-}
853
854 And now we get an infinite loop in the rule system
855 iterate f x -> build (\cn -> iterateFB c f x)
856 -> iterateFB (:) f x
857 -> iterate f x
858
859 Old "solution":
860 use rule switching-off pragmas to get rid
861 of iterateList in the first place
862
863 But in principle the user *might* want rules that only apply to the Id
864 he says. And inline pragmas are similar
865 {-# NOINLINE f #-}
866 f = local
867 local = <stuff>
868 Then we do not want to get rid of the NOINLINE.
869
870 Hence hasShortableIdinfo.
871
872
873 Note [Rules and indirection-zapping]
874 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
875 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
876 Then the things mentioned can be out of scope! Solution
877 a) Make sure that in this pass the usage-info from x_exported is
878 available for ...bindings...
879 b) If there are any such RULES, rec-ify the entire top-level.
880 It'll get sorted out next time round
881
882 Other remarks
883 ~~~~~~~~~~~~~
884 If more than one exported thing is equal to a local thing (i.e., the
885 local thing really is shared), then we do one only:
886 \begin{verbatim}
887 x_local = ....
888 x_exported1 = x_local
889 x_exported2 = x_local
890 ==>
891 x_exported1 = ....
892
893 x_exported2 = x_exported1
894 \end{verbatim}
895
896 We rely on prior eta reduction to simplify things like
897 \begin{verbatim}
898 x_exported = /\ tyvars -> x_local tyvars
899 ==>
900 x_exported = x_local
901 \end{verbatim}
902 Hence,there's a possibility of leaving unchanged something like this:
903 \begin{verbatim}
904 x_local = ....
905 x_exported1 = x_local Int
906 \end{verbatim}
907 By the time we've thrown away the types in STG land this
908 could be eliminated. But I don't think it's very common
909 and it's dangerous to do this fiddling in STG land
910 because we might elminate a binding that's mentioned in the
911 unfolding for something.
912
913 Note [Indirection zapping and ticks]
914 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
915
916 Unfortunately this is another place where we need a special case for
917 ticks. The following happens quite regularly:
918
919 x_local = <expression>
920 x_exported = tick<x> x_local
921
922 Which we want to become:
923
924 x_exported = tick<x> <expression>
925
926 As it makes no sense to keep the tick and the expression on separate
927 bindings. Note however that that this might increase the ticks scoping
928 over the execution of x_local, so we can only do this for floatable
929 ticks. More often than not, other references will be unfoldings of
930 x_exported, and therefore carry the tick anyway.
931 -}
932
933 type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
934
935 shortOutIndirections :: CoreProgram -> CoreProgram
936 shortOutIndirections binds
937 | isEmptyVarEnv ind_env = binds
938 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
939 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
940 where
941 ind_env = makeIndEnv binds
942 -- These exported Ids are the subjects of the indirection-elimination
943 exp_ids = map fst $ nonDetEltsUFM ind_env
944 -- It's OK to use nonDetEltsUFM here because we forget the ordering
945 -- by immediately converting to a set or check if all the elements
946 -- satisfy a predicate.
947 exp_id_set = mkVarSet exp_ids
948 no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
949 binds' = concatMap zap binds
950
951 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
952 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
953
954 zapPair (bndr, rhs)
955 | bndr `elemVarSet` exp_id_set = []
956 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
957 = [(transferIdInfo exp_id bndr,
958 mkTicks ticks rhs),
959 (bndr, Var exp_id)]
960 | otherwise = [(bndr,rhs)]
961
962 makeIndEnv :: [CoreBind] -> IndEnv
963 makeIndEnv binds
964 = foldr add_bind emptyVarEnv binds
965 where
966 add_bind :: CoreBind -> IndEnv -> IndEnv
967 add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
968 add_bind (Rec pairs) env = foldr add_pair env pairs
969
970 add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
971 add_pair (exported_id, exported) env
972 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
973 , shortMeOut env exported_id local_id
974 = extendVarEnv env local_id (exported_id, ticks)
975 add_pair _ env = env
976
977 -----------------
978 shortMeOut :: IndEnv -> Id -> Id -> Bool
979 shortMeOut ind_env exported_id local_id
980 -- The if-then-else stuff is just so I can get a pprTrace to see
981 -- how often I don't get shorting out because of IdInfo stuff
982 = if isExportedId exported_id && -- Only if this is exported
983
984 isLocalId local_id && -- Only if this one is defined in this
985 -- module, so that we *can* change its
986 -- binding to be the exported thing!
987
988 not (isExportedId local_id) && -- Only if this one is not itself exported,
989 -- since the transformation will nuke it
990
991 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
992 then
993 if hasShortableIdInfo exported_id
994 then True -- See Note [Messing up the exported Id's IdInfo]
995 else WARN( True, text "Not shorting out:" <+> ppr exported_id )
996 False
997 else
998 False
999
1000 -----------------
1001 hasShortableIdInfo :: Id -> Bool
1002 -- True if there is no user-attached IdInfo on exported_id,
1003 -- so we can safely discard it
1004 -- See Note [Messing up the exported Id's IdInfo]
1005 hasShortableIdInfo id
1006 = isEmptyRuleInfo (ruleInfo info)
1007 && isDefaultInlinePragma (inlinePragInfo info)
1008 && not (isStableUnfolding (unfoldingInfo info))
1009 where
1010 info = idInfo id
1011
1012 -----------------
1013 transferIdInfo :: Id -> Id -> Id
1014 -- See Note [Transferring IdInfo]
1015 -- If we have
1016 -- lcl_id = e; exp_id = lcl_id
1017 -- and lcl_id has useful IdInfo, we don't want to discard it by going
1018 -- gbl_id = e; lcl_id = gbl_id
1019 -- Instead, transfer IdInfo from lcl_id to exp_id
1020 -- Overwriting, rather than merging, seems to work ok.
1021 transferIdInfo exported_id local_id
1022 = modifyIdInfo transfer exported_id
1023 where
1024 local_info = idInfo local_id
1025 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info
1026 `setUnfoldingInfo` unfoldingInfo local_info
1027 `setInlinePragInfo` inlinePragInfo local_info
1028 `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info
1029 new_info = setRuleInfoHead (idName exported_id)
1030 (ruleInfo local_info)
1031 -- Remember to set the function-name field of the
1032 -- rules as we transfer them from one function to another