Abort the build when a Core plugin pass is specified in stage1 compiler
[ghc.git] / compiler / simplCore / SimplCore.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplCore ( core2core, simplifyExpr ) where
10
11 #include "HsVersions.h"
12
13 import DynFlags
14 import CoreSyn
15 import HscTypes
16 import CSE ( cseProgram )
17 import Rules ( mkRuleBase, unionRuleBase,
18 extendRuleBaseList, ruleCheckProgram, addRuleInfo, )
19 import PprCore ( pprCoreBindings, pprCoreExpr )
20 import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
21 import IdInfo
22 import CoreStats ( coreBindsSize, coreBindsStats, exprSize )
23 import CoreUtils ( mkTicks, stripTicksTop )
24 import CoreLint ( endPass, lintPassResult, dumpPassResult,
25 lintAnnots )
26 import Simplify ( simplTopBinds, simplExpr, simplRules )
27 import SimplUtils ( simplEnvForGHCi, activeRule )
28 import SimplEnv
29 import SimplMonad
30 import CoreMonad
31 import qualified ErrUtils as Err
32 import FloatIn ( floatInwards )
33 import FloatOut ( floatOutwards )
34 import FamInstEnv
35 import Id
36 import ErrUtils ( withTiming )
37 import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma )
38 import VarSet
39 import VarEnv
40 import LiberateCase ( liberateCase )
41 import SAT ( doStaticArgs )
42 import Specialise ( specProgram)
43 import SpecConstr ( specConstrProgram)
44 import DmdAnal ( dmdAnalProgram )
45 import CallArity ( callArityAnalProgram )
46 import WorkWrap ( wwTopBinds )
47 import Vectorise ( vectorise )
48 import SrcLoc
49 import Util
50 import Module
51
52 import Maybes
53 import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
54 import UniqFM
55 import Outputable
56 import Control.Monad
57 import qualified GHC.LanguageExtensions as LangExt
58
59 #ifdef GHCI
60 import DynamicLoading ( loadPlugins )
61 import Plugins ( installCoreToDos )
62 #else
63 import DynamicLoading ( pluginError )
64 #endif
65
66 {-
67 ************************************************************************
68 * *
69 \subsection{The driver for the simplifier}
70 * *
71 ************************************************************************
72 -}
73
74 core2core :: HscEnv -> ModGuts -> IO ModGuts
75 core2core hsc_env guts@(ModGuts { mg_module = mod
76 , mg_loc = loc
77 , mg_deps = deps
78 , mg_rdr_env = rdr_env })
79 = do { us <- mkSplitUniqSupply 's'
80 -- make sure all plugins are loaded
81
82 ; let builtin_passes = getCoreToDo dflags
83 orph_mods = mkModuleSet (mod : dep_orphs deps)
84 ;
85 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
86 orph_mods print_unqual loc $
87 do { all_passes <- addPluginPasses builtin_passes
88 ; runCorePasses all_passes guts }
89
90 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
91 "Grand total simplifier statistics"
92 (pprSimplCount stats)
93
94 ; return guts2 }
95 where
96 dflags = hsc_dflags hsc_env
97 home_pkg_rules = hptRules hsc_env (dep_mods deps)
98 hpt_rule_base = mkRuleBase home_pkg_rules
99 print_unqual = mkPrintUnqualified dflags rdr_env
100 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
101 -- This is very convienent for the users of the monad (e.g. plugins do not have to
102 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
103 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
104 -- would mean our cached value would go out of date.
105
106 {-
107 ************************************************************************
108 * *
109 Generating the main optimisation pipeline
110 * *
111 ************************************************************************
112 -}
113
114 getCoreToDo :: DynFlags -> [CoreToDo]
115 getCoreToDo dflags
116 = flatten_todos core_todo
117 where
118 opt_level = optLevel dflags
119 phases = simplPhases dflags
120 max_iter = maxSimplIterations dflags
121 rule_check = ruleCheck dflags
122 call_arity = gopt Opt_CallArity dflags
123 strictness = gopt Opt_Strictness dflags
124 full_laziness = gopt Opt_FullLaziness dflags
125 do_specialise = gopt Opt_Specialise dflags
126 do_float_in = gopt Opt_FloatIn dflags
127 cse = gopt Opt_CSE dflags
128 spec_constr = gopt Opt_SpecConstr dflags
129 liberate_case = gopt Opt_LiberateCase dflags
130 late_dmd_anal = gopt Opt_LateDmdAnal dflags
131 static_args = gopt Opt_StaticArgumentTransformation dflags
132 rules_on = gopt Opt_EnableRewriteRules dflags
133 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
134 ww_on = gopt Opt_WorkerWrapper dflags
135 static_ptrs = xopt LangExt.StaticPointers dflags
136
137 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
138
139 maybe_strictness_before phase
140 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
141
142 base_mode = SimplMode { sm_phase = panic "base_mode"
143 , sm_names = []
144 , sm_rules = rules_on
145 , sm_eta_expand = eta_expand_on
146 , sm_inline = True
147 , sm_case_case = True }
148
149 simpl_phase phase names iter
150 = CoreDoPasses
151 $ [ maybe_strictness_before phase
152 , CoreDoSimplify iter
153 (base_mode { sm_phase = Phase phase
154 , sm_names = names })
155
156 , maybe_rule_check (Phase phase) ]
157
158 -- Vectorisation can introduce a fair few common sub expressions involving
159 -- DPH primitives. For example, see the Reverse test from dph-examples.
160 -- We need to eliminate these common sub expressions before their definitions
161 -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
162 -- so we also run simpl_gently to inline them.
163 ++ (if gopt Opt_Vectorise dflags && phase == 3
164 then [CoreCSE, simpl_gently]
165 else [])
166
167 vectorisation
168 = runWhen (gopt Opt_Vectorise dflags) $
169 CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
170
171 -- By default, we have 2 phases before phase 0.
172
173 -- Want to run with inline phase 2 after the specialiser to give
174 -- maximum chance for fusion to work before we inline build/augment
175 -- in phase 1. This made a difference in 'ansi' where an
176 -- overloaded function wasn't inlined till too late.
177
178 -- Need phase 1 so that build/augment get
179 -- inlined. I found that spectral/hartel/genfft lost some useful
180 -- strictness in the function sumcode' if augment is not inlined
181 -- before strictness analysis runs
182 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
183 | phase <- [phases, phases-1 .. 1] ]
184
185
186 -- initial simplify: mk specialiser happy: minimum effort please
187 simpl_gently = CoreDoSimplify max_iter
188 (base_mode { sm_phase = InitialPhase
189 , sm_names = ["Gentle"]
190 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
191 , sm_inline = False
192 , sm_case_case = False })
193 -- Don't do case-of-case transformations.
194 -- This makes full laziness work better
195
196 strictness_pass = if ww_on
197 then [CoreDoStrictness,CoreDoWorkerWrapper]
198 else [CoreDoStrictness]
199
200
201 -- New demand analyser
202 demand_analyser = (CoreDoPasses (
203 strictness_pass ++
204 [simpl_phase 0 ["post-worker-wrapper"] max_iter]
205 ))
206
207 core_todo =
208 if opt_level == 0 then
209 [ vectorisation,
210 -- Static forms are moved to the top level with the FloatOut pass.
211 -- See Note [Grand plan for static forms].
212 runWhen static_ptrs $ CoreDoFloatOutwards FloatOutSwitches {
213 floatOutLambdas = Just 0,
214 floatOutConstants = True,
215 floatOutOverSatApps = False,
216 floatToTopLevelOnly = True },
217 CoreDoSimplify max_iter
218 (base_mode { sm_phase = Phase 0
219 , sm_names = ["Non-opt simplification"] })
220 ]
221
222 else {- opt_level >= 1 -} [
223
224 -- We want to do the static argument transform before full laziness as it
225 -- may expose extra opportunities to float things outwards. However, to fix
226 -- up the output of the transformation we need at do at least one simplify
227 -- after this before anything else
228 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
229
230 -- We run vectorisation here for now, but we might also try to run
231 -- it later
232 vectorisation,
233
234 -- initial simplify: mk specialiser happy: minimum effort please
235 simpl_gently,
236
237 -- Specialisation is best done before full laziness
238 -- so that overloaded functions have all their dictionary lambdas manifest
239 runWhen do_specialise CoreDoSpecialising,
240
241 runWhen full_laziness $
242 CoreDoFloatOutwards FloatOutSwitches {
243 floatOutLambdas = Just 0,
244 floatOutConstants = True,
245 floatOutOverSatApps = False,
246 floatToTopLevelOnly = False },
247 -- Was: gentleFloatOutSwitches
248 --
249 -- I have no idea why, but not floating constants to
250 -- top level is very bad in some cases.
251 --
252 -- Notably: p_ident in spectral/rewrite
253 -- Changing from "gentle" to "constantsOnly"
254 -- improved rewrite's allocation by 19%, and
255 -- made 0.0% difference to any other nofib
256 -- benchmark
257 --
258 -- Not doing floatOutOverSatApps yet, we'll do
259 -- that later on when we've had a chance to get more
260 -- accurate arity information. In fact it makes no
261 -- difference at all to performance if we do it here,
262 -- but maybe we save some unnecessary to-and-fro in
263 -- the simplifier.
264
265 simpl_phases,
266
267 -- Phase 0: allow all Ids to be inlined now
268 -- This gets foldr inlined before strictness analysis
269
270 -- At least 3 iterations because otherwise we land up with
271 -- huge dead expressions because of an infelicity in the
272 -- simpifier.
273 -- let k = BIG in foldr k z xs
274 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
275 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
276 -- Don't stop now!
277 simpl_phase 0 ["main"] (max max_iter 3),
278
279 runWhen do_float_in CoreDoFloatInwards,
280 -- Run float-inwards immediately before the strictness analyser
281 -- Doing so pushes bindings nearer their use site and hence makes
282 -- them more likely to be strict. These bindings might only show
283 -- up after the inlining from simplification. Example in fulsom,
284 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
285
286 runWhen call_arity $ CoreDoPasses
287 [ CoreDoCallArity
288 , simpl_phase 0 ["post-call-arity"] max_iter
289 ],
290
291 runWhen strictness demand_analyser,
292
293 runWhen full_laziness $
294 CoreDoFloatOutwards FloatOutSwitches {
295 floatOutLambdas = floatLamArgs dflags,
296 floatOutConstants = True,
297 floatOutOverSatApps = True,
298 floatToTopLevelOnly = False },
299 -- nofib/spectral/hartel/wang doubles in speed if you
300 -- do full laziness late in the day. It only happens
301 -- after fusion and other stuff, so the early pass doesn't
302 -- catch it. For the record, the redex is
303 -- f_el22 (f_el21 r_midblock)
304
305
306 runWhen cse CoreCSE,
307 -- We want CSE to follow the final full-laziness pass, because it may
308 -- succeed in commoning up things floated out by full laziness.
309 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
310
311 runWhen do_float_in CoreDoFloatInwards,
312
313 maybe_rule_check (Phase 0),
314
315 -- Case-liberation for -O2. This should be after
316 -- strictness analysis and the simplification which follows it.
317 runWhen liberate_case (CoreDoPasses [
318 CoreLiberateCase,
319 simpl_phase 0 ["post-liberate-case"] max_iter
320 ]), -- Run the simplifier after LiberateCase to vastly
321 -- reduce the possiblility of shadowing
322 -- Reason: see Note [Shadowing] in SpecConstr.hs
323
324 runWhen spec_constr CoreDoSpecConstr,
325
326 maybe_rule_check (Phase 0),
327
328 -- Final clean-up simplification:
329 simpl_phase 0 ["final"] max_iter,
330
331 runWhen late_dmd_anal $ CoreDoPasses (
332 strictness_pass ++
333 [simpl_phase 0 ["post-late-ww"] max_iter]
334 ),
335
336 -- Final run of the demand_analyser, ensures that one-shot thunks are
337 -- really really one-shot thunks. Only needed if the demand analyser
338 -- has run at all. See Note [Final Demand Analyser run] in DmdAnal
339 runWhen (strictness || late_dmd_anal) CoreDoStrictness,
340
341 maybe_rule_check (Phase 0)
342 ]
343
344 -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
345 flatten_todos [] = []
346 flatten_todos (CoreDoNothing : rest) = flatten_todos rest
347 flatten_todos (CoreDoPasses passes : rest) =
348 flatten_todos passes ++ flatten_todos rest
349 flatten_todos (todo : rest) = todo : flatten_todos rest
350
351 -- Loading plugins
352
353 addPluginPasses :: [CoreToDo] -> CoreM [CoreToDo]
354 #ifndef GHCI
355 addPluginPasses builtin_passes
356 = do { dflags <- getDynFlags
357 ; let pluginMods = pluginModNames dflags
358 ; unless (null pluginMods) (pluginError pluginMods)
359 ; return builtin_passes }
360 #else
361 addPluginPasses builtin_passes
362 = do { hsc_env <- getHscEnv
363 ; named_plugins <- liftIO (loadPlugins hsc_env)
364 ; foldM query_plug builtin_passes named_plugins }
365 where
366 query_plug todos (_, plug, options) = installCoreToDos plug options todos
367 #endif
368
369 {-
370 ************************************************************************
371 * *
372 The CoreToDo interpreter
373 * *
374 ************************************************************************
375 -}
376
377 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
378 runCorePasses passes guts
379 = foldM do_pass guts passes
380 where
381 do_pass guts CoreDoNothing = return guts
382 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
383 do_pass guts pass
384 = withTiming getDynFlags
385 (ppr pass <+> brackets (ppr mod))
386 (const ()) $ do
387 { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
388 ; endPass pass (mg_binds guts') (mg_rules guts')
389 ; return guts' }
390
391 mod = mg_module guts
392
393 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
394 doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
395 simplifyPgm pass
396
397 doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-}
398 doPass cseProgram
399
400 doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-}
401 doPassD liberateCase
402
403 doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-}
404 doPassD floatInwards
405
406 doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-}
407 doPassDUM (floatOutwards f)
408
409 doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-}
410 doPassU doStaticArgs
411
412 doCorePass CoreDoCallArity = {-# SCC "CallArity" #-}
413 doPassD callArityAnalProgram
414
415 doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-}
416 doPassDFM dmdAnalProgram
417
418 doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-}
419 doPassDFU wwTopBinds
420
421 doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-}
422 specProgram
423
424 doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-}
425 specConstrProgram
426
427 doCorePass CoreDoVectorisation = {-# SCC "Vectorise" #-}
428 vectorise
429
430 doCorePass CoreDoPrintCore = observe printCore
431 doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
432 doCorePass CoreDoNothing = return
433 doCorePass (CoreDoPasses passes) = runCorePasses passes
434
435 #ifdef GHCI
436 doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
437 #endif
438
439 doCorePass pass = pprPanic "doCorePass" (ppr pass)
440
441 {-
442 ************************************************************************
443 * *
444 \subsection{Core pass combinators}
445 * *
446 ************************************************************************
447 -}
448
449 printCore :: DynFlags -> CoreProgram -> IO ()
450 printCore dflags binds
451 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
452
453 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
454 ruleCheckPass current_phase pat guts =
455 withTiming getDynFlags
456 (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
457 (const ()) $ do
458 { rb <- getRuleBase
459 ; dflags <- getDynFlags
460 ; vis_orphs <- getVisibleOrphanMods
461 ; liftIO $ log_action dflags dflags NoReason Err.SevDump noSrcSpan
462 defaultDumpStyle
463 (ruleCheckProgram current_phase pat
464 (RuleEnv rb vis_orphs) (mg_binds guts))
465 ; return guts }
466
467 doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
468 doPassDUM do_pass = doPassM $ \binds -> do
469 dflags <- getDynFlags
470 us <- getUniqueSupplyM
471 liftIO $ do_pass dflags us binds
472
473 doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
474 doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
475
476 doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
477 doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
478
479 doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
480 doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
481
482 doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
483 doPassU do_pass = doPassDU (const do_pass)
484
485 doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
486 doPassDFM do_pass guts = do
487 dflags <- getDynFlags
488 p_fam_env <- getPackageFamInstEnv
489 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
490 doPassM (liftIO . do_pass dflags fam_envs) guts
491
492 doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
493 doPassDFU do_pass guts = do
494 dflags <- getDynFlags
495 us <- getUniqueSupplyM
496 p_fam_env <- getPackageFamInstEnv
497 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
498 doPass (do_pass dflags fam_envs us) guts
499
500 -- Most passes return no stats and don't change rules: these combinators
501 -- let us lift them to the full blown ModGuts+CoreM world
502 doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
503 doPassM bind_f guts = do
504 binds' <- bind_f (mg_binds guts)
505 return (guts { mg_binds = binds' })
506
507 doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
508 doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
509
510 -- Observer passes just peek; don't modify the bindings at all
511 observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
512 observe do_pass = doPassM $ \binds -> do
513 dflags <- getDynFlags
514 _ <- liftIO $ do_pass dflags binds
515 return binds
516
517 {-
518 ************************************************************************
519 * *
520 Gentle simplification
521 * *
522 ************************************************************************
523 -}
524
525 simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
526 -> CoreExpr
527 -> IO CoreExpr
528 -- simplifyExpr is called by the driver to simplify an
529 -- expression typed in at the interactive prompt
530 --
531 -- Also used by Template Haskell
532 simplifyExpr dflags expr
533 = withTiming (pure dflags) (text "Simplify [expr]") (const ()) $
534 do {
535 ; us <- mkSplitUniqSupply 's'
536
537 ; let sz = exprSize expr
538
539 ; (expr', counts) <- initSmpl dflags emptyRuleEnv
540 emptyFamInstEnvs us sz
541 (simplExprGently (simplEnvForGHCi dflags) expr)
542
543 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
544 "Simplifier statistics" (pprSimplCount counts)
545
546 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
547 (pprCoreExpr expr')
548
549 ; return expr'
550 }
551
552 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
553 -- Simplifies an expression
554 -- does occurrence analysis, then simplification
555 -- and repeats (twice currently) because one pass
556 -- alone leaves tons of crud.
557 -- Used (a) for user expressions typed in at the interactive prompt
558 -- (b) the LHS and RHS of a RULE
559 -- (c) Template Haskell splices
560 --
561 -- The name 'Gently' suggests that the SimplifierMode is SimplGently,
562 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
563 -- enforce that; it just simplifies the expression twice
564
565 -- It's important that simplExprGently does eta reduction; see
566 -- Note [Simplifying the left-hand side of a RULE] above. The
567 -- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
568 -- but only if -O is on.
569
570 simplExprGently env expr = do
571 expr1 <- simplExpr env (occurAnalyseExpr expr)
572 simplExpr env (occurAnalyseExpr expr1)
573
574 {-
575 ************************************************************************
576 * *
577 \subsection{The driver for the simplifier}
578 * *
579 ************************************************************************
580 -}
581
582 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
583 simplifyPgm pass guts
584 = do { hsc_env <- getHscEnv
585 ; us <- getUniqueSupplyM
586 ; rb <- getRuleBase
587 ; liftIOWithCount $
588 simplifyPgmIO pass hsc_env us rb guts }
589
590 simplifyPgmIO :: CoreToDo
591 -> HscEnv
592 -> UniqSupply
593 -> RuleBase
594 -> ModGuts
595 -> IO (SimplCount, ModGuts) -- New bindings
596
597 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
598 hsc_env us hpt_rule_base
599 guts@(ModGuts { mg_module = this_mod
600 , mg_rdr_env = rdr_env
601 , mg_deps = deps
602 , mg_binds = binds, mg_rules = rules
603 , mg_fam_inst_env = fam_inst_env })
604 = do { (termination_msg, it_count, counts_out, guts')
605 <- do_iteration us 1 [] binds rules
606
607 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
608 dopt Opt_D_dump_simpl_stats dflags)
609 "Simplifier statistics for following pass"
610 (vcat [text termination_msg <+> text "after" <+> ppr it_count
611 <+> text "iterations",
612 blankLine,
613 pprSimplCount counts_out])
614
615 ; return (counts_out, guts')
616 }
617 where
618 dflags = hsc_dflags hsc_env
619 print_unqual = mkPrintUnqualified dflags rdr_env
620 simpl_env = mkSimplEnv mode
621 active_rule = activeRule simpl_env
622
623 do_iteration :: UniqSupply
624 -> Int -- Counts iterations
625 -> [SimplCount] -- Counts from earlier iterations, reversed
626 -> CoreProgram -- Bindings in
627 -> [CoreRule] -- and orphan rules
628 -> IO (String, Int, SimplCount, ModGuts)
629
630 do_iteration us iteration_no counts_so_far binds rules
631 -- iteration_no is the number of the iteration we are
632 -- about to begin, with '1' for the first
633 | iteration_no > max_iterations -- Stop if we've run out of iterations
634 = WARN( debugIsOn && (max_iterations > 2)
635 , hang (text "Simplifier bailing out after" <+> int max_iterations
636 <+> text "iterations"
637 <+> (brackets $ hsep $ punctuate comma $
638 map (int . simplCountN) (reverse counts_so_far)))
639 2 (text "Size =" <+> ppr (coreBindsStats binds)))
640
641 -- Subtract 1 from iteration_no to get the
642 -- number of iterations we actually completed
643 return ( "Simplifier baled out", iteration_no - 1
644 , totalise counts_so_far
645 , guts { mg_binds = binds, mg_rules = rules } )
646
647 -- Try and force thunks off the binds; significantly reduces
648 -- space usage, especially with -O. JRS, 000620.
649 | let sz = coreBindsSize binds
650 , () <- sz `seq` () -- Force it
651 = do {
652 -- Occurrence analysis
653 let { -- Note [Vectorisation declarations and occurrences]
654 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
655 -- During the 'InitialPhase' (i.e., before vectorisation), we need to make sure
656 -- that the right-hand sides of vectorisation declarations are taken into
657 -- account during occurrence analysis. After the 'InitialPhase', we need to ensure
658 -- that the binders representing variable vectorisation declarations are kept alive.
659 -- (In contrast to automatically vectorised variables, their unvectorised versions
660 -- don't depend on them.)
661 vectVars = mkVarSet $
662 catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
663 | Vect bndr _ <- mg_vect_decls guts]
664 ++
665 catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
666 | bndr <- bindersOfBinds binds]
667 -- FIXME: This second comprehensions is only needed as long as we
668 -- have vectorised bindings where we get "Could NOT call
669 -- vectorised from original version".
670 ; (maybeVects, maybeVectVars)
671 = case sm_phase mode of
672 InitialPhase -> (mg_vect_decls guts, vectVars)
673 _ -> ([], vectVars)
674 ; tagged_binds = {-# SCC "OccAnal" #-}
675 occurAnalysePgm this_mod active_rule rules
676 maybeVects maybeVectVars binds
677 } ;
678 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
679 (pprCoreBindings tagged_binds);
680
681 -- Get any new rules, and extend the rule base
682 -- See Note [Overall plumbing for rules] in Rules.hs
683 -- We need to do this regularly, because simplification can
684 -- poke on IdInfo thunks, which in turn brings in new rules
685 -- behind the scenes. Otherwise there's a danger we'll simply
686 -- miss the rules for Ids hidden inside imported inlinings
687 eps <- hscEPS hsc_env ;
688 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
689 ; rule_base2 = extendRuleBaseList rule_base1 rules
690 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
691 ; vis_orphs = this_mod : dep_orphs deps } ;
692
693 -- Simplify the program
694 ((binds1, rules1), counts1) <-
695 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
696 do { env1 <- {-# SCC "SimplTopBinds" #-}
697 simplTopBinds simpl_env tagged_binds
698
699 -- Apply the substitution to rules defined in this module
700 -- for imported Ids. Eg RULE map my_f = blah
701 -- If we have a substitution my_f :-> other_f, we'd better
702 -- apply it to the rule to, or it'll never match
703 ; rules1 <- simplRules env1 Nothing rules
704
705 ; return (getFloatBinds env1, rules1) } ;
706
707 -- Stop if nothing happened; don't dump output
708 if isZeroSimplCount counts1 then
709 return ( "Simplifier reached fixed point", iteration_no
710 , totalise (counts1 : counts_so_far) -- Include "free" ticks
711 , guts { mg_binds = binds1, mg_rules = rules1 } )
712 else do {
713 -- Short out indirections
714 -- We do this *after* at least one run of the simplifier
715 -- because indirection-shorting uses the export flag on *occurrences*
716 -- and that isn't guaranteed to be ok until after the first run propagates
717 -- stuff from the binding site to its occurrences
718 --
719 -- ToDo: alas, this means that indirection-shorting does not happen at all
720 -- if the simplifier does nothing (not common, I know, but unsavoury)
721 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
722
723 -- Dump the result of this iteration
724 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
725 lintPassResult hsc_env pass binds2 ;
726
727 -- Loop
728 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
729 } }
730 | otherwise = panic "do_iteration"
731 where
732 (us1, us2) = splitUniqSupply us
733
734 -- Remember the counts_so_far are reversed
735 totalise :: [SimplCount] -> SimplCount
736 totalise = foldr (\c acc -> acc `plusSimplCount` c)
737 (zeroSimplCount dflags)
738
739 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
740
741 -------------------
742 dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
743 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
744 dump_end_iteration dflags print_unqual iteration_no counts binds rules
745 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
746 where
747 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
748 | otherwise = Nothing
749 -- Show details if Opt_D_dump_simpl_iterations is on
750
751 hdr = text "Simplifier iteration=" <> int iteration_no
752 pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
753 , pprSimplCount counts
754 , text "---- End of simplifier counts for" <+> hdr ]
755
756 {-
757 ************************************************************************
758 * *
759 Shorting out indirections
760 * *
761 ************************************************************************
762
763 If we have this:
764
765 x_local = <expression>
766 ...bindings...
767 x_exported = x_local
768
769 where x_exported is exported, and x_local is not, then we replace it with this:
770
771 x_exported = <expression>
772 x_local = x_exported
773 ...bindings...
774
775 Without this we never get rid of the x_exported = x_local thing. This
776 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
777 makes strictness information propagate better. This used to happen in
778 the final phase, but it's tidier to do it here.
779
780 Note [Transferring IdInfo]
781 ~~~~~~~~~~~~~~~~~~~~~~~~~~
782 We want to propagage any useful IdInfo on x_local to x_exported.
783
784 STRICTNESS: if we have done strictness analysis, we want the strictness info on
785 x_local to transfer to x_exported. Hence the copyIdInfo call.
786
787 RULES: we want to *add* any RULES for x_local to x_exported.
788
789
790 Note [Messing up the exported Id's RULES]
791 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
792 We must be careful about discarding (obviously) or even merging the
793 RULES on the exported Id. The example that went bad on me at one stage
794 was this one:
795
796 iterate :: (a -> a) -> a -> [a]
797 [Exported]
798 iterate = iterateList
799
800 iterateFB c f x = x `c` iterateFB c f (f x)
801 iterateList f x = x : iterateList f (f x)
802 [Not exported]
803
804 {-# RULES
805 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
806 "iterateFB" iterateFB (:) = iterateList
807 #-}
808
809 This got shorted out to:
810
811 iterateList :: (a -> a) -> a -> [a]
812 iterateList = iterate
813
814 iterateFB c f x = x `c` iterateFB c f (f x)
815 iterate f x = x : iterate f (f x)
816
817 {-# RULES
818 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
819 "iterateFB" iterateFB (:) = iterate
820 #-}
821
822 And now we get an infinite loop in the rule system
823 iterate f x -> build (\cn -> iterateFB c f x)
824 -> iterateFB (:) f x
825 -> iterate f x
826
827 Old "solution":
828 use rule switching-off pragmas to get rid
829 of iterateList in the first place
830
831 But in principle the user *might* want rules that only apply to the Id
832 he says. And inline pragmas are similar
833 {-# NOINLINE f #-}
834 f = local
835 local = <stuff>
836 Then we do not want to get rid of the NOINLINE.
837
838 Hence hasShortableIdinfo.
839
840
841 Note [Rules and indirection-zapping]
842 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
843 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
844 Then the things mentioned can be out of scope! Solution
845 a) Make sure that in this pass the usage-info from x_exported is
846 available for ...bindings...
847 b) If there are any such RULES, rec-ify the entire top-level.
848 It'll get sorted out next time round
849
850 Other remarks
851 ~~~~~~~~~~~~~
852 If more than one exported thing is equal to a local thing (i.e., the
853 local thing really is shared), then we do one only:
854 \begin{verbatim}
855 x_local = ....
856 x_exported1 = x_local
857 x_exported2 = x_local
858 ==>
859 x_exported1 = ....
860
861 x_exported2 = x_exported1
862 \end{verbatim}
863
864 We rely on prior eta reduction to simplify things like
865 \begin{verbatim}
866 x_exported = /\ tyvars -> x_local tyvars
867 ==>
868 x_exported = x_local
869 \end{verbatim}
870 Hence,there's a possibility of leaving unchanged something like this:
871 \begin{verbatim}
872 x_local = ....
873 x_exported1 = x_local Int
874 \end{verbatim}
875 By the time we've thrown away the types in STG land this
876 could be eliminated. But I don't think it's very common
877 and it's dangerous to do this fiddling in STG land
878 because we might elminate a binding that's mentioned in the
879 unfolding for something.
880
881 Note [Indirection zapping and ticks]
882 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
883
884 Unfortunately this is another place where we need a special case for
885 ticks. The following happens quite regularly:
886
887 x_local = <expression>
888 x_exported = tick<x> x_local
889
890 Which we want to become:
891
892 x_exported = tick<x> <expression>
893
894 As it makes no sense to keep the tick and the expression on separate
895 bindings. Note however that that this might increase the ticks scoping
896 over the execution of x_local, so we can only do this for floatable
897 ticks. More often than not, other references will be unfoldings of
898 x_exported, and therefore carry the tick anyway.
899 -}
900
901 type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
902
903 shortOutIndirections :: CoreProgram -> CoreProgram
904 shortOutIndirections binds
905 | isEmptyVarEnv ind_env = binds
906 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
907 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
908 where
909 ind_env = makeIndEnv binds
910 -- These exported Ids are the subjects of the indirection-elimination
911 exp_ids = map fst $ nonDetEltsUFM ind_env
912 -- It's OK to use nonDetEltsUFM here because we forget the ordering
913 -- by immediately converting to a set or check if all the elements
914 -- satisfy a predicate.
915 exp_id_set = mkVarSet exp_ids
916 no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
917 binds' = concatMap zap binds
918
919 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
920 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
921
922 zapPair (bndr, rhs)
923 | bndr `elemVarSet` exp_id_set = []
924 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
925 = [(transferIdInfo exp_id bndr,
926 mkTicks ticks rhs),
927 (bndr, Var exp_id)]
928 | otherwise = [(bndr,rhs)]
929
930 makeIndEnv :: [CoreBind] -> IndEnv
931 makeIndEnv binds
932 = foldr add_bind emptyVarEnv binds
933 where
934 add_bind :: CoreBind -> IndEnv -> IndEnv
935 add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
936 add_bind (Rec pairs) env = foldr add_pair env pairs
937
938 add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
939 add_pair (exported_id, exported) env
940 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
941 , shortMeOut env exported_id local_id
942 = extendVarEnv env local_id (exported_id, ticks)
943 add_pair _ env = env
944
945 -----------------
946 shortMeOut :: IndEnv -> Id -> Id -> Bool
947 shortMeOut ind_env exported_id local_id
948 -- The if-then-else stuff is just so I can get a pprTrace to see
949 -- how often I don't get shorting out because of IdInfo stuff
950 = if isExportedId exported_id && -- Only if this is exported
951
952 isLocalId local_id && -- Only if this one is defined in this
953 -- module, so that we *can* change its
954 -- binding to be the exported thing!
955
956 not (isExportedId local_id) && -- Only if this one is not itself exported,
957 -- since the transformation will nuke it
958
959 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
960 then
961 if hasShortableIdInfo exported_id
962 then True -- See Note [Messing up the exported Id's IdInfo]
963 else WARN( True, text "Not shorting out:" <+> ppr exported_id )
964 False
965 else
966 False
967
968 -----------------
969 hasShortableIdInfo :: Id -> Bool
970 -- True if there is no user-attached IdInfo on exported_id,
971 -- so we can safely discard it
972 -- See Note [Messing up the exported Id's IdInfo]
973 hasShortableIdInfo id
974 = isEmptyRuleInfo (ruleInfo info)
975 && isDefaultInlinePragma (inlinePragInfo info)
976 && not (isStableUnfolding (unfoldingInfo info))
977 where
978 info = idInfo id
979
980 -----------------
981 transferIdInfo :: Id -> Id -> Id
982 -- See Note [Transferring IdInfo]
983 -- If we have
984 -- lcl_id = e; exp_id = lcl_id
985 -- and lcl_id has useful IdInfo, we don't want to discard it by going
986 -- gbl_id = e; lcl_id = gbl_id
987 -- Instead, transfer IdInfo from lcl_id to exp_id
988 -- Overwriting, rather than merging, seems to work ok.
989 transferIdInfo exported_id local_id
990 = modifyIdInfo transfer exported_id
991 where
992 local_info = idInfo local_id
993 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info
994 `setUnfoldingInfo` unfoldingInfo local_info
995 `setInlinePragInfo` inlinePragInfo local_info
996 `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info
997 new_info = setRuleInfoHead (idName exported_id)
998 (ruleInfo local_info)
999 -- Remember to set the function-name field of the
1000 -- rules as we transfer them from one function to another
1001
1002
1003 {- Note [Grand plan for static forms]
1004 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1005 Static forms go through the compilation phases as follows.
1006 Here is a running example:
1007
1008 f x = let k = map toUpper
1009 in ...(static k)...
1010
1011 * The renamer looks for out-of-scope names in the body of the static
1012 form, as always If all names are in scope, the free variables of the
1013 body are stored in AST at the location of the static form.
1014
1015 * The typechecker verifies that all free variables occurring in the
1016 static form are closed (see Note [Bindings with closed types] in
1017 TcRnTypes). In our example, 'k' is closed, even though it is bound
1018 in a nested let, we are fine.
1019
1020 * The desugarer replaces the static form with an application of the
1021 data constructor 'StaticPtr' (defined in module GHC.StaticPtr of
1022 base). So we get
1023
1024 f x = let k = map toUpper
1025 in ...(StaticPtr <fingerprint> k)...
1026
1027 * The simplifier runs the FloatOut pass which moves the applications
1028 of 'StaticPtr' to the top level. Thus the FloatOut pass is always
1029 executed, even when optimizations are disabled. So we get
1030
1031 k = map toUpper
1032 lvl = StaticPtr <fingerprint> k
1033 f x = ...lvl...
1034
1035 * The CoreTidy pass produces a C function which inserts all the
1036 floated 'StaticPtr' in the static pointer table (see the call to
1037 StaticPtrTable.sptModuleInitCode in TidyPgm). CoreTidy pass also
1038 exports the Ids of floated 'StaticPtr's so they can be linked with
1039 the C function.
1040 -}