Print which warning-flag controls an emitted warning
[ghc.git] / compiler / simplCore / SimplCore.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplCore ( core2core, simplifyExpr ) where
10
11 #include "HsVersions.h"
12
13 import DynFlags
14 import CoreSyn
15 import HscTypes
16 import CSE ( cseProgram )
17 import Rules ( mkRuleBase, unionRuleBase,
18 extendRuleBaseList, ruleCheckProgram, addRuleInfo, )
19 import PprCore ( pprCoreBindings, pprCoreExpr )
20 import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
21 import IdInfo
22 import CoreStats ( coreBindsSize, coreBindsStats, exprSize )
23 import CoreUtils ( mkTicks, stripTicksTop )
24 import CoreLint ( showPass, endPass, lintPassResult, dumpPassResult,
25 lintAnnots )
26 import Simplify ( simplTopBinds, simplExpr, simplRules )
27 import SimplUtils ( simplEnvForGHCi, activeRule )
28 import SimplEnv
29 import SimplMonad
30 import CoreMonad
31 import qualified ErrUtils as Err
32 import FloatIn ( floatInwards )
33 import FloatOut ( floatOutwards )
34 import FamInstEnv
35 import Id
36 import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma )
37 import VarSet
38 import VarEnv
39 import LiberateCase ( liberateCase )
40 import SAT ( doStaticArgs )
41 import Specialise ( specProgram)
42 import SpecConstr ( specConstrProgram)
43 import DmdAnal ( dmdAnalProgram )
44 import CallArity ( callArityAnalProgram )
45 import WorkWrap ( wwTopBinds )
46 import Vectorise ( vectorise )
47 import SrcLoc
48 import Util
49 import Module
50
51 import Maybes
52 import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
53 import Outputable
54 import Control.Monad
55
56 #ifdef GHCI
57 import DynamicLoading ( loadPlugins )
58 import Plugins ( installCoreToDos )
59 #endif
60
61 {-
62 ************************************************************************
63 * *
64 \subsection{The driver for the simplifier}
65 * *
66 ************************************************************************
67 -}
68
69 core2core :: HscEnv -> ModGuts -> IO ModGuts
70 core2core hsc_env guts@(ModGuts { mg_module = mod
71 , mg_loc = loc
72 , mg_deps = deps
73 , mg_rdr_env = rdr_env })
74 = do { us <- mkSplitUniqSupply 's'
75 -- make sure all plugins are loaded
76
77 ; let builtin_passes = getCoreToDo dflags
78 orph_mods = mkModuleSet (mod : dep_orphs deps)
79 ;
80 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
81 orph_mods print_unqual loc $
82 do { all_passes <- addPluginPasses builtin_passes
83 ; runCorePasses all_passes guts }
84
85 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
86 "Grand total simplifier statistics"
87 (pprSimplCount stats)
88
89 ; return guts2 }
90 where
91 dflags = hsc_dflags hsc_env
92 home_pkg_rules = hptRules hsc_env (dep_mods deps)
93 hpt_rule_base = mkRuleBase home_pkg_rules
94 print_unqual = mkPrintUnqualified dflags rdr_env
95 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
96 -- This is very convienent for the users of the monad (e.g. plugins do not have to
97 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
98 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
99 -- would mean our cached value would go out of date.
100
101 {-
102 ************************************************************************
103 * *
104 Generating the main optimisation pipeline
105 * *
106 ************************************************************************
107 -}
108
109 getCoreToDo :: DynFlags -> [CoreToDo]
110 getCoreToDo dflags
111 = flatten_todos core_todo
112 where
113 opt_level = optLevel dflags
114 phases = simplPhases dflags
115 max_iter = maxSimplIterations dflags
116 rule_check = ruleCheck dflags
117 call_arity = gopt Opt_CallArity dflags
118 strictness = gopt Opt_Strictness dflags
119 full_laziness = gopt Opt_FullLaziness dflags
120 do_specialise = gopt Opt_Specialise dflags
121 do_float_in = gopt Opt_FloatIn dflags
122 cse = gopt Opt_CSE dflags
123 spec_constr = gopt Opt_SpecConstr dflags
124 liberate_case = gopt Opt_LiberateCase dflags
125 late_dmd_anal = gopt Opt_LateDmdAnal dflags
126 static_args = gopt Opt_StaticArgumentTransformation dflags
127 rules_on = gopt Opt_EnableRewriteRules dflags
128 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
129 ww_on = gopt Opt_WorkerWrapper dflags
130
131 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
132
133 maybe_strictness_before phase
134 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
135
136 base_mode = SimplMode { sm_phase = panic "base_mode"
137 , sm_names = []
138 , sm_rules = rules_on
139 , sm_eta_expand = eta_expand_on
140 , sm_inline = True
141 , sm_case_case = True }
142
143 simpl_phase phase names iter
144 = CoreDoPasses
145 $ [ maybe_strictness_before phase
146 , CoreDoSimplify iter
147 (base_mode { sm_phase = Phase phase
148 , sm_names = names })
149
150 , maybe_rule_check (Phase phase) ]
151
152 -- Vectorisation can introduce a fair few common sub expressions involving
153 -- DPH primitives. For example, see the Reverse test from dph-examples.
154 -- We need to eliminate these common sub expressions before their definitions
155 -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
156 -- so we also run simpl_gently to inline them.
157 ++ (if gopt Opt_Vectorise dflags && phase == 3
158 then [CoreCSE, simpl_gently]
159 else [])
160
161 vectorisation
162 = runWhen (gopt Opt_Vectorise dflags) $
163 CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
164
165 -- By default, we have 2 phases before phase 0.
166
167 -- Want to run with inline phase 2 after the specialiser to give
168 -- maximum chance for fusion to work before we inline build/augment
169 -- in phase 1. This made a difference in 'ansi' where an
170 -- overloaded function wasn't inlined till too late.
171
172 -- Need phase 1 so that build/augment get
173 -- inlined. I found that spectral/hartel/genfft lost some useful
174 -- strictness in the function sumcode' if augment is not inlined
175 -- before strictness analysis runs
176 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
177 | phase <- [phases, phases-1 .. 1] ]
178
179
180 -- initial simplify: mk specialiser happy: minimum effort please
181 simpl_gently = CoreDoSimplify max_iter
182 (base_mode { sm_phase = InitialPhase
183 , sm_names = ["Gentle"]
184 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
185 , sm_inline = False
186 , sm_case_case = False })
187 -- Don't do case-of-case transformations.
188 -- This makes full laziness work better
189
190 strictness_pass = if ww_on
191 then [CoreDoStrictness,CoreDoWorkerWrapper]
192 else [CoreDoStrictness]
193
194
195 -- New demand analyser
196 demand_analyser = (CoreDoPasses (
197 strictness_pass ++
198 [simpl_phase 0 ["post-worker-wrapper"] max_iter]
199 ))
200
201 core_todo =
202 if opt_level == 0 then
203 [ vectorisation
204 , CoreDoSimplify max_iter
205 (base_mode { sm_phase = Phase 0
206 , sm_names = ["Non-opt simplification"] })
207 ]
208
209 else {- opt_level >= 1 -} [
210
211 -- We want to do the static argument transform before full laziness as it
212 -- may expose extra opportunities to float things outwards. However, to fix
213 -- up the output of the transformation we need at do at least one simplify
214 -- after this before anything else
215 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
216
217 -- We run vectorisation here for now, but we might also try to run
218 -- it later
219 vectorisation,
220
221 -- initial simplify: mk specialiser happy: minimum effort please
222 simpl_gently,
223
224 -- Specialisation is best done before full laziness
225 -- so that overloaded functions have all their dictionary lambdas manifest
226 runWhen do_specialise CoreDoSpecialising,
227
228 runWhen full_laziness $
229 CoreDoFloatOutwards FloatOutSwitches {
230 floatOutLambdas = Just 0,
231 floatOutConstants = True,
232 floatOutOverSatApps = False },
233 -- Was: gentleFloatOutSwitches
234 --
235 -- I have no idea why, but not floating constants to
236 -- top level is very bad in some cases.
237 --
238 -- Notably: p_ident in spectral/rewrite
239 -- Changing from "gentle" to "constantsOnly"
240 -- improved rewrite's allocation by 19%, and
241 -- made 0.0% difference to any other nofib
242 -- benchmark
243 --
244 -- Not doing floatOutOverSatApps yet, we'll do
245 -- that later on when we've had a chance to get more
246 -- accurate arity information. In fact it makes no
247 -- difference at all to performance if we do it here,
248 -- but maybe we save some unnecessary to-and-fro in
249 -- the simplifier.
250
251 simpl_phases,
252
253 -- Phase 0: allow all Ids to be inlined now
254 -- This gets foldr inlined before strictness analysis
255
256 -- At least 3 iterations because otherwise we land up with
257 -- huge dead expressions because of an infelicity in the
258 -- simpifier.
259 -- let k = BIG in foldr k z xs
260 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
261 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
262 -- Don't stop now!
263 simpl_phase 0 ["main"] (max max_iter 3),
264
265 runWhen do_float_in CoreDoFloatInwards,
266 -- Run float-inwards immediately before the strictness analyser
267 -- Doing so pushes bindings nearer their use site and hence makes
268 -- them more likely to be strict. These bindings might only show
269 -- up after the inlining from simplification. Example in fulsom,
270 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
271
272 runWhen call_arity $ CoreDoPasses
273 [ CoreDoCallArity
274 , simpl_phase 0 ["post-call-arity"] max_iter
275 ],
276
277 runWhen strictness demand_analyser,
278
279 runWhen full_laziness $
280 CoreDoFloatOutwards FloatOutSwitches {
281 floatOutLambdas = floatLamArgs dflags,
282 floatOutConstants = True,
283 floatOutOverSatApps = True },
284 -- nofib/spectral/hartel/wang doubles in speed if you
285 -- do full laziness late in the day. It only happens
286 -- after fusion and other stuff, so the early pass doesn't
287 -- catch it. For the record, the redex is
288 -- f_el22 (f_el21 r_midblock)
289
290
291 runWhen cse CoreCSE,
292 -- We want CSE to follow the final full-laziness pass, because it may
293 -- succeed in commoning up things floated out by full laziness.
294 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
295
296 runWhen do_float_in CoreDoFloatInwards,
297
298 maybe_rule_check (Phase 0),
299
300 -- Case-liberation for -O2. This should be after
301 -- strictness analysis and the simplification which follows it.
302 runWhen liberate_case (CoreDoPasses [
303 CoreLiberateCase,
304 simpl_phase 0 ["post-liberate-case"] max_iter
305 ]), -- Run the simplifier after LiberateCase to vastly
306 -- reduce the possiblility of shadowing
307 -- Reason: see Note [Shadowing] in SpecConstr.hs
308
309 runWhen spec_constr CoreDoSpecConstr,
310
311 maybe_rule_check (Phase 0),
312
313 -- Final clean-up simplification:
314 simpl_phase 0 ["final"] max_iter,
315
316 runWhen late_dmd_anal $ CoreDoPasses (
317 strictness_pass ++
318 [simpl_phase 0 ["post-late-ww"] max_iter]
319 ),
320
321 maybe_rule_check (Phase 0)
322 ]
323
324 -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
325 flatten_todos [] = []
326 flatten_todos (CoreDoNothing : rest) = flatten_todos rest
327 flatten_todos (CoreDoPasses passes : rest) =
328 flatten_todos passes ++ flatten_todos rest
329 flatten_todos (todo : rest) = todo : flatten_todos rest
330
331 -- Loading plugins
332
333 addPluginPasses :: [CoreToDo] -> CoreM [CoreToDo]
334 #ifndef GHCI
335 addPluginPasses builtin_passes = return builtin_passes
336 #else
337 addPluginPasses builtin_passes
338 = do { hsc_env <- getHscEnv
339 ; named_plugins <- liftIO (loadPlugins hsc_env)
340 ; foldM query_plug builtin_passes named_plugins }
341 where
342 query_plug todos (_, plug, options) = installCoreToDos plug options todos
343 #endif
344
345 {-
346 ************************************************************************
347 * *
348 The CoreToDo interpreter
349 * *
350 ************************************************************************
351 -}
352
353 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
354 runCorePasses passes guts
355 = foldM do_pass guts passes
356 where
357 do_pass guts CoreDoNothing = return guts
358 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
359 do_pass guts pass
360 = do { showPass pass
361 ; guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
362 ; endPass pass (mg_binds guts') (mg_rules guts')
363 ; return guts' }
364
365 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
366 doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
367 simplifyPgm pass
368
369 doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-}
370 doPass cseProgram
371
372 doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-}
373 doPassD liberateCase
374
375 doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-}
376 doPassD floatInwards
377
378 doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-}
379 doPassDUM (floatOutwards f)
380
381 doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-}
382 doPassU doStaticArgs
383
384 doCorePass CoreDoCallArity = {-# SCC "CallArity" #-}
385 doPassD callArityAnalProgram
386
387 doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-}
388 doPassDFM dmdAnalProgram
389
390 doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-}
391 doPassDFU wwTopBinds
392
393 doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-}
394 specProgram
395
396 doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-}
397 specConstrProgram
398
399 doCorePass CoreDoVectorisation = {-# SCC "Vectorise" #-}
400 vectorise
401
402 doCorePass CoreDoPrintCore = observe printCore
403 doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
404 doCorePass CoreDoNothing = return
405 doCorePass (CoreDoPasses passes) = runCorePasses passes
406
407 #ifdef GHCI
408 doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
409 #endif
410
411 doCorePass pass = pprPanic "doCorePass" (ppr pass)
412
413 {-
414 ************************************************************************
415 * *
416 \subsection{Core pass combinators}
417 * *
418 ************************************************************************
419 -}
420
421 printCore :: DynFlags -> CoreProgram -> IO ()
422 printCore dflags binds
423 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
424
425 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
426 ruleCheckPass current_phase pat guts = do
427 rb <- getRuleBase
428 dflags <- getDynFlags
429 vis_orphs <- getVisibleOrphanMods
430 liftIO $ Err.showPass dflags "RuleCheck"
431 liftIO $ log_action dflags dflags NoReason Err.SevDump noSrcSpan
432 defaultDumpStyle
433 (ruleCheckProgram current_phase pat
434 (RuleEnv rb vis_orphs) (mg_binds guts))
435 return guts
436
437
438 doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
439 doPassDUM do_pass = doPassM $ \binds -> do
440 dflags <- getDynFlags
441 us <- getUniqueSupplyM
442 liftIO $ do_pass dflags us binds
443
444 doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
445 doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
446
447 doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
448 doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
449
450 doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
451 doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
452
453 doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
454 doPassU do_pass = doPassDU (const do_pass)
455
456 doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
457 doPassDFM do_pass guts = do
458 dflags <- getDynFlags
459 p_fam_env <- getPackageFamInstEnv
460 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
461 doPassM (liftIO . do_pass dflags fam_envs) guts
462
463 doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
464 doPassDFU do_pass guts = do
465 dflags <- getDynFlags
466 us <- getUniqueSupplyM
467 p_fam_env <- getPackageFamInstEnv
468 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
469 doPass (do_pass dflags fam_envs us) guts
470
471 -- Most passes return no stats and don't change rules: these combinators
472 -- let us lift them to the full blown ModGuts+CoreM world
473 doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
474 doPassM bind_f guts = do
475 binds' <- bind_f (mg_binds guts)
476 return (guts { mg_binds = binds' })
477
478 doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
479 doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
480
481 -- Observer passes just peek; don't modify the bindings at all
482 observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
483 observe do_pass = doPassM $ \binds -> do
484 dflags <- getDynFlags
485 _ <- liftIO $ do_pass dflags binds
486 return binds
487
488 {-
489 ************************************************************************
490 * *
491 Gentle simplification
492 * *
493 ************************************************************************
494 -}
495
496 simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
497 -> CoreExpr
498 -> IO CoreExpr
499 -- simplifyExpr is called by the driver to simplify an
500 -- expression typed in at the interactive prompt
501 --
502 -- Also used by Template Haskell
503 simplifyExpr dflags expr
504 = do {
505 ; Err.showPass dflags "Simplify"
506
507 ; us <- mkSplitUniqSupply 's'
508
509 ; let sz = exprSize expr
510
511 ; (expr', counts) <- initSmpl dflags emptyRuleEnv
512 emptyFamInstEnvs us sz
513 (simplExprGently (simplEnvForGHCi dflags) expr)
514
515 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
516 "Simplifier statistics" (pprSimplCount counts)
517
518 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
519 (pprCoreExpr expr')
520
521 ; return expr'
522 }
523
524 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
525 -- Simplifies an expression
526 -- does occurrence analysis, then simplification
527 -- and repeats (twice currently) because one pass
528 -- alone leaves tons of crud.
529 -- Used (a) for user expressions typed in at the interactive prompt
530 -- (b) the LHS and RHS of a RULE
531 -- (c) Template Haskell splices
532 --
533 -- The name 'Gently' suggests that the SimplifierMode is SimplGently,
534 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
535 -- enforce that; it just simplifies the expression twice
536
537 -- It's important that simplExprGently does eta reduction; see
538 -- Note [Simplifying the left-hand side of a RULE] above. The
539 -- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
540 -- but only if -O is on.
541
542 simplExprGently env expr = do
543 expr1 <- simplExpr env (occurAnalyseExpr expr)
544 simplExpr env (occurAnalyseExpr expr1)
545
546 {-
547 ************************************************************************
548 * *
549 \subsection{The driver for the simplifier}
550 * *
551 ************************************************************************
552 -}
553
554 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
555 simplifyPgm pass guts
556 = do { hsc_env <- getHscEnv
557 ; us <- getUniqueSupplyM
558 ; rb <- getRuleBase
559 ; liftIOWithCount $
560 simplifyPgmIO pass hsc_env us rb guts }
561
562 simplifyPgmIO :: CoreToDo
563 -> HscEnv
564 -> UniqSupply
565 -> RuleBase
566 -> ModGuts
567 -> IO (SimplCount, ModGuts) -- New bindings
568
569 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
570 hsc_env us hpt_rule_base
571 guts@(ModGuts { mg_module = this_mod
572 , mg_rdr_env = rdr_env
573 , mg_deps = deps
574 , mg_binds = binds, mg_rules = rules
575 , mg_fam_inst_env = fam_inst_env })
576 = do { (termination_msg, it_count, counts_out, guts')
577 <- do_iteration us 1 [] binds rules
578
579 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
580 dopt Opt_D_dump_simpl_stats dflags)
581 "Simplifier statistics for following pass"
582 (vcat [text termination_msg <+> text "after" <+> ppr it_count
583 <+> text "iterations",
584 blankLine,
585 pprSimplCount counts_out])
586
587 ; return (counts_out, guts')
588 }
589 where
590 dflags = hsc_dflags hsc_env
591 print_unqual = mkPrintUnqualified dflags rdr_env
592 simpl_env = mkSimplEnv mode
593 active_rule = activeRule simpl_env
594
595 do_iteration :: UniqSupply
596 -> Int -- Counts iterations
597 -> [SimplCount] -- Counts from earlier iterations, reversed
598 -> CoreProgram -- Bindings in
599 -> [CoreRule] -- and orphan rules
600 -> IO (String, Int, SimplCount, ModGuts)
601
602 do_iteration us iteration_no counts_so_far binds rules
603 -- iteration_no is the number of the iteration we are
604 -- about to begin, with '1' for the first
605 | iteration_no > max_iterations -- Stop if we've run out of iterations
606 = WARN( debugIsOn && (max_iterations > 2)
607 , hang (text "Simplifier bailing out after" <+> int max_iterations
608 <+> text "iterations"
609 <+> (brackets $ hsep $ punctuate comma $
610 map (int . simplCountN) (reverse counts_so_far)))
611 2 (text "Size =" <+> ppr (coreBindsStats binds)))
612
613 -- Subtract 1 from iteration_no to get the
614 -- number of iterations we actually completed
615 return ( "Simplifier baled out", iteration_no - 1
616 , totalise counts_so_far
617 , guts { mg_binds = binds, mg_rules = rules } )
618
619 -- Try and force thunks off the binds; significantly reduces
620 -- space usage, especially with -O. JRS, 000620.
621 | let sz = coreBindsSize binds
622 , () <- sz `seq` () -- Force it
623 = do {
624 -- Occurrence analysis
625 let { -- Note [Vectorisation declarations and occurrences]
626 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
627 -- During the 'InitialPhase' (i.e., before vectorisation), we need to make sure
628 -- that the right-hand sides of vectorisation declarations are taken into
629 -- account during occurrence analysis. After the 'InitialPhase', we need to ensure
630 -- that the binders representing variable vectorisation declarations are kept alive.
631 -- (In contrast to automatically vectorised variables, their unvectorised versions
632 -- don't depend on them.)
633 vectVars = mkVarSet $
634 catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
635 | Vect bndr _ <- mg_vect_decls guts]
636 ++
637 catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
638 | bndr <- bindersOfBinds binds]
639 -- FIXME: This second comprehensions is only needed as long as we
640 -- have vectorised bindings where we get "Could NOT call
641 -- vectorised from original version".
642 ; (maybeVects, maybeVectVars)
643 = case sm_phase mode of
644 InitialPhase -> (mg_vect_decls guts, vectVars)
645 _ -> ([], vectVars)
646 ; tagged_binds = {-# SCC "OccAnal" #-}
647 occurAnalysePgm this_mod active_rule rules
648 maybeVects maybeVectVars binds
649 } ;
650 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
651 (pprCoreBindings tagged_binds);
652
653 -- Get any new rules, and extend the rule base
654 -- See Note [Overall plumbing for rules] in Rules.hs
655 -- We need to do this regularly, because simplification can
656 -- poke on IdInfo thunks, which in turn brings in new rules
657 -- behind the scenes. Otherwise there's a danger we'll simply
658 -- miss the rules for Ids hidden inside imported inlinings
659 eps <- hscEPS hsc_env ;
660 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
661 ; rule_base2 = extendRuleBaseList rule_base1 rules
662 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
663 ; vis_orphs = this_mod : dep_orphs deps } ;
664
665 -- Simplify the program
666 ((binds1, rules1), counts1) <-
667 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
668 do { env1 <- {-# SCC "SimplTopBinds" #-}
669 simplTopBinds simpl_env tagged_binds
670
671 -- Apply the substitution to rules defined in this module
672 -- for imported Ids. Eg RULE map my_f = blah
673 -- If we have a substitution my_f :-> other_f, we'd better
674 -- apply it to the rule to, or it'll never match
675 ; rules1 <- simplRules env1 Nothing rules
676
677 ; return (getFloatBinds env1, rules1) } ;
678
679 -- Stop if nothing happened; don't dump output
680 if isZeroSimplCount counts1 then
681 return ( "Simplifier reached fixed point", iteration_no
682 , totalise (counts1 : counts_so_far) -- Include "free" ticks
683 , guts { mg_binds = binds1, mg_rules = rules1 } )
684 else do {
685 -- Short out indirections
686 -- We do this *after* at least one run of the simplifier
687 -- because indirection-shorting uses the export flag on *occurrences*
688 -- and that isn't guaranteed to be ok until after the first run propagates
689 -- stuff from the binding site to its occurrences
690 --
691 -- ToDo: alas, this means that indirection-shorting does not happen at all
692 -- if the simplifier does nothing (not common, I know, but unsavoury)
693 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
694
695 -- Dump the result of this iteration
696 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
697 lintPassResult hsc_env pass binds2 ;
698
699 -- Loop
700 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
701 } }
702 | otherwise = panic "do_iteration"
703 where
704 (us1, us2) = splitUniqSupply us
705
706 -- Remember the counts_so_far are reversed
707 totalise :: [SimplCount] -> SimplCount
708 totalise = foldr (\c acc -> acc `plusSimplCount` c)
709 (zeroSimplCount dflags)
710
711 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
712
713 -------------------
714 dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
715 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
716 dump_end_iteration dflags print_unqual iteration_no counts binds rules
717 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
718 where
719 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
720 | otherwise = Nothing
721 -- Show details if Opt_D_dump_simpl_iterations is on
722
723 hdr = text "Simplifier iteration=" <> int iteration_no
724 pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
725 , pprSimplCount counts
726 , text "---- End of simplifier counts for" <+> hdr ]
727
728 {-
729 ************************************************************************
730 * *
731 Shorting out indirections
732 * *
733 ************************************************************************
734
735 If we have this:
736
737 x_local = <expression>
738 ...bindings...
739 x_exported = x_local
740
741 where x_exported is exported, and x_local is not, then we replace it with this:
742
743 x_exported = <expression>
744 x_local = x_exported
745 ...bindings...
746
747 Without this we never get rid of the x_exported = x_local thing. This
748 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
749 makes strictness information propagate better. This used to happen in
750 the final phase, but it's tidier to do it here.
751
752 Note [Transferring IdInfo]
753 ~~~~~~~~~~~~~~~~~~~~~~~~~~
754 We want to propagage any useful IdInfo on x_local to x_exported.
755
756 STRICTNESS: if we have done strictness analysis, we want the strictness info on
757 x_local to transfer to x_exported. Hence the copyIdInfo call.
758
759 RULES: we want to *add* any RULES for x_local to x_exported.
760
761
762 Note [Messing up the exported Id's RULES]
763 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
764 We must be careful about discarding (obviously) or even merging the
765 RULES on the exported Id. The example that went bad on me at one stage
766 was this one:
767
768 iterate :: (a -> a) -> a -> [a]
769 [Exported]
770 iterate = iterateList
771
772 iterateFB c f x = x `c` iterateFB c f (f x)
773 iterateList f x = x : iterateList f (f x)
774 [Not exported]
775
776 {-# RULES
777 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
778 "iterateFB" iterateFB (:) = iterateList
779 #-}
780
781 This got shorted out to:
782
783 iterateList :: (a -> a) -> a -> [a]
784 iterateList = iterate
785
786 iterateFB c f x = x `c` iterateFB c f (f x)
787 iterate f x = x : iterate f (f x)
788
789 {-# RULES
790 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
791 "iterateFB" iterateFB (:) = iterate
792 #-}
793
794 And now we get an infinite loop in the rule system
795 iterate f x -> build (\cn -> iterateFB c f x)
796 -> iterateFB (:) f x
797 -> iterate f x
798
799 Old "solution":
800 use rule switching-off pragmas to get rid
801 of iterateList in the first place
802
803 But in principle the user *might* want rules that only apply to the Id
804 he says. And inline pragmas are similar
805 {-# NOINLINE f #-}
806 f = local
807 local = <stuff>
808 Then we do not want to get rid of the NOINLINE.
809
810 Hence hasShortableIdinfo.
811
812
813 Note [Rules and indirection-zapping]
814 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
815 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
816 Then the things mentioned can be out of scope! Solution
817 a) Make sure that in this pass the usage-info from x_exported is
818 available for ...bindings...
819 b) If there are any such RULES, rec-ify the entire top-level.
820 It'll get sorted out next time round
821
822 Other remarks
823 ~~~~~~~~~~~~~
824 If more than one exported thing is equal to a local thing (i.e., the
825 local thing really is shared), then we do one only:
826 \begin{verbatim}
827 x_local = ....
828 x_exported1 = x_local
829 x_exported2 = x_local
830 ==>
831 x_exported1 = ....
832
833 x_exported2 = x_exported1
834 \end{verbatim}
835
836 We rely on prior eta reduction to simplify things like
837 \begin{verbatim}
838 x_exported = /\ tyvars -> x_local tyvars
839 ==>
840 x_exported = x_local
841 \end{verbatim}
842 Hence,there's a possibility of leaving unchanged something like this:
843 \begin{verbatim}
844 x_local = ....
845 x_exported1 = x_local Int
846 \end{verbatim}
847 By the time we've thrown away the types in STG land this
848 could be eliminated. But I don't think it's very common
849 and it's dangerous to do this fiddling in STG land
850 because we might elminate a binding that's mentioned in the
851 unfolding for something.
852
853 Note [Indirection zapping and ticks]
854 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
855
856 Unfortunately this is another place where we need a special case for
857 ticks. The following happens quite regularly:
858
859 x_local = <expression>
860 x_exported = tick<x> x_local
861
862 Which we want to become:
863
864 x_exported = tick<x> <expression>
865
866 As it makes no sense to keep the tick and the expression on separate
867 bindings. Note however that that this might increase the ticks scoping
868 over the execution of x_local, so we can only do this for floatable
869 ticks. More often than not, other references will be unfoldings of
870 x_exported, and therefore carry the tick anyway.
871 -}
872
873 type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
874
875 shortOutIndirections :: CoreProgram -> CoreProgram
876 shortOutIndirections binds
877 | isEmptyVarEnv ind_env = binds
878 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
879 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
880 where
881 ind_env = makeIndEnv binds
882 -- These exported Ids are the subjects of the indirection-elimination
883 exp_ids = map fst $ varEnvElts ind_env
884 exp_id_set = mkVarSet exp_ids
885 no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
886 binds' = concatMap zap binds
887
888 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
889 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
890
891 zapPair (bndr, rhs)
892 | bndr `elemVarSet` exp_id_set = []
893 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
894 = [(transferIdInfo exp_id bndr,
895 mkTicks ticks rhs),
896 (bndr, Var exp_id)]
897 | otherwise = [(bndr,rhs)]
898
899 makeIndEnv :: [CoreBind] -> IndEnv
900 makeIndEnv binds
901 = foldr add_bind emptyVarEnv binds
902 where
903 add_bind :: CoreBind -> IndEnv -> IndEnv
904 add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
905 add_bind (Rec pairs) env = foldr add_pair env pairs
906
907 add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
908 add_pair (exported_id, exported) env
909 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
910 , shortMeOut env exported_id local_id
911 = extendVarEnv env local_id (exported_id, ticks)
912 add_pair _ env = env
913
914 -----------------
915 shortMeOut :: IndEnv -> Id -> Id -> Bool
916 shortMeOut ind_env exported_id local_id
917 -- The if-then-else stuff is just so I can get a pprTrace to see
918 -- how often I don't get shorting out because of IdInfo stuff
919 = if isExportedId exported_id && -- Only if this is exported
920
921 isLocalId local_id && -- Only if this one is defined in this
922 -- module, so that we *can* change its
923 -- binding to be the exported thing!
924
925 not (isExportedId local_id) && -- Only if this one is not itself exported,
926 -- since the transformation will nuke it
927
928 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
929 then
930 if hasShortableIdInfo exported_id
931 then True -- See Note [Messing up the exported Id's IdInfo]
932 else WARN( True, text "Not shorting out:" <+> ppr exported_id )
933 False
934 else
935 False
936
937 -----------------
938 hasShortableIdInfo :: Id -> Bool
939 -- True if there is no user-attached IdInfo on exported_id,
940 -- so we can safely discard it
941 -- See Note [Messing up the exported Id's IdInfo]
942 hasShortableIdInfo id
943 = isEmptyRuleInfo (ruleInfo info)
944 && isDefaultInlinePragma (inlinePragInfo info)
945 && not (isStableUnfolding (unfoldingInfo info))
946 where
947 info = idInfo id
948
949 -----------------
950 transferIdInfo :: Id -> Id -> Id
951 -- See Note [Transferring IdInfo]
952 -- If we have
953 -- lcl_id = e; exp_id = lcl_id
954 -- and lcl_id has useful IdInfo, we don't want to discard it by going
955 -- gbl_id = e; lcl_id = gbl_id
956 -- Instead, transfer IdInfo from lcl_id to exp_id
957 -- Overwriting, rather than merging, seems to work ok.
958 transferIdInfo exported_id local_id
959 = modifyIdInfo transfer exported_id
960 where
961 local_info = idInfo local_id
962 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info
963 `setUnfoldingInfo` unfoldingInfo local_info
964 `setInlinePragInfo` inlinePragInfo local_info
965 `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info
966 new_info = setRuleInfoHead (idName exported_id)
967 (ruleInfo local_info)
968 -- Remember to set the function-name field of the
969 -- rules as we transfer them from one function to another