Allow proper errors/warnings in core2core passes
[ghc.git] / compiler / simplCore / SimplCore.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplCore ( core2core, simplifyExpr ) where
10
11 #include "HsVersions.h"
12
13 import DynFlags
14 import CoreSyn
15 import HscTypes
16 import CSE ( cseProgram )
17 import Rules ( mkRuleBase, unionRuleBase,
18 extendRuleBaseList, ruleCheckProgram, addSpecInfo, )
19 import PprCore ( pprCoreBindings, pprCoreExpr )
20 import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
21 import IdInfo
22 import CoreStats ( coreBindsSize, coreBindsStats, exprSize )
23 import CoreUtils ( mkTicks, stripTicksTop )
24 import CoreLint ( showPass, endPass, lintPassResult, dumpPassResult,
25 lintAnnots )
26 import Simplify ( simplTopBinds, simplExpr, simplRules )
27 import SimplUtils ( simplEnvForGHCi, activeRule )
28 import SimplEnv
29 import SimplMonad
30 import CoreMonad
31 import qualified ErrUtils as Err
32 import FloatIn ( floatInwards )
33 import FloatOut ( floatOutwards )
34 import FamInstEnv
35 import Id
36 import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma )
37 import VarSet
38 import VarEnv
39 import LiberateCase ( liberateCase )
40 import SAT ( doStaticArgs )
41 import Specialise ( specProgram)
42 import SpecConstr ( specConstrProgram)
43 import DmdAnal ( dmdAnalProgram )
44 import CallArity ( callArityAnalProgram )
45 import WorkWrap ( wwTopBinds )
46 import Vectorise ( vectorise )
47 import FastString
48 import SrcLoc
49 import Util
50 import Module
51
52 import Maybes
53 import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
54 import Outputable
55 import Control.Monad
56
57 #ifdef GHCI
58 import DynamicLoading ( loadPlugins )
59 import Plugins ( installCoreToDos )
60 #endif
61
62 {-
63 ************************************************************************
64 * *
65 \subsection{The driver for the simplifier}
66 * *
67 ************************************************************************
68 -}
69
70 core2core :: HscEnv -> ModGuts -> IO ModGuts
71 core2core hsc_env guts@(ModGuts { mg_module = mod
72 , mg_loc = loc
73 , mg_deps = deps
74 , mg_rdr_env = rdr_env })
75 = do { us <- mkSplitUniqSupply 's'
76 -- make sure all plugins are loaded
77
78 ; let builtin_passes = getCoreToDo dflags
79 orph_mods = mkModuleSet (mod : dep_orphs deps)
80 ;
81 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
82 orph_mods print_unqual loc $
83 do { all_passes <- addPluginPasses builtin_passes
84 ; runCorePasses all_passes guts }
85
86 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
87 "Grand total simplifier statistics"
88 (pprSimplCount stats)
89
90 ; return guts2 }
91 where
92 dflags = hsc_dflags hsc_env
93 home_pkg_rules = hptRules hsc_env (dep_mods deps)
94 hpt_rule_base = mkRuleBase home_pkg_rules
95 print_unqual = mkPrintUnqualified dflags rdr_env
96 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
97 -- This is very convienent for the users of the monad (e.g. plugins do not have to
98 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
99 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
100 -- would mean our cached value would go out of date.
101
102 {-
103 ************************************************************************
104 * *
105 Generating the main optimisation pipeline
106 * *
107 ************************************************************************
108 -}
109
110 getCoreToDo :: DynFlags -> [CoreToDo]
111 getCoreToDo dflags
112 = core_todo
113 where
114 opt_level = optLevel dflags
115 phases = simplPhases dflags
116 max_iter = maxSimplIterations dflags
117 rule_check = ruleCheck dflags
118 call_arity = gopt Opt_CallArity dflags
119 strictness = gopt Opt_Strictness dflags
120 full_laziness = gopt Opt_FullLaziness dflags
121 do_specialise = gopt Opt_Specialise dflags
122 do_float_in = gopt Opt_FloatIn dflags
123 cse = gopt Opt_CSE dflags
124 spec_constr = gopt Opt_SpecConstr dflags
125 liberate_case = gopt Opt_LiberateCase dflags
126 late_dmd_anal = gopt Opt_LateDmdAnal dflags
127 static_args = gopt Opt_StaticArgumentTransformation dflags
128 rules_on = gopt Opt_EnableRewriteRules dflags
129 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
130
131 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
132
133 maybe_strictness_before phase
134 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
135
136 base_mode = SimplMode { sm_phase = panic "base_mode"
137 , sm_names = []
138 , sm_rules = rules_on
139 , sm_eta_expand = eta_expand_on
140 , sm_inline = True
141 , sm_case_case = True }
142
143 simpl_phase phase names iter
144 = CoreDoPasses
145 $ [ maybe_strictness_before phase
146 , CoreDoSimplify iter
147 (base_mode { sm_phase = Phase phase
148 , sm_names = names })
149
150 , maybe_rule_check (Phase phase) ]
151
152 -- Vectorisation can introduce a fair few common sub expressions involving
153 -- DPH primitives. For example, see the Reverse test from dph-examples.
154 -- We need to eliminate these common sub expressions before their definitions
155 -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
156 -- so we also run simpl_gently to inline them.
157 ++ (if gopt Opt_Vectorise dflags && phase == 3
158 then [CoreCSE, simpl_gently]
159 else [])
160
161 vectorisation
162 = runWhen (gopt Opt_Vectorise dflags) $
163 CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
164
165 -- By default, we have 2 phases before phase 0.
166
167 -- Want to run with inline phase 2 after the specialiser to give
168 -- maximum chance for fusion to work before we inline build/augment
169 -- in phase 1. This made a difference in 'ansi' where an
170 -- overloaded function wasn't inlined till too late.
171
172 -- Need phase 1 so that build/augment get
173 -- inlined. I found that spectral/hartel/genfft lost some useful
174 -- strictness in the function sumcode' if augment is not inlined
175 -- before strictness analysis runs
176 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
177 | phase <- [phases, phases-1 .. 1] ]
178
179
180 -- initial simplify: mk specialiser happy: minimum effort please
181 simpl_gently = CoreDoSimplify max_iter
182 (base_mode { sm_phase = InitialPhase
183 , sm_names = ["Gentle"]
184 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
185 , sm_inline = False
186 , sm_case_case = False })
187 -- Don't do case-of-case transformations.
188 -- This makes full laziness work better
189
190 -- New demand analyser
191 demand_analyser = (CoreDoPasses ([
192 CoreDoStrictness,
193 CoreDoWorkerWrapper,
194 simpl_phase 0 ["post-worker-wrapper"] max_iter
195 ]))
196
197 core_todo =
198 if opt_level == 0 then
199 [ vectorisation
200 , CoreDoSimplify max_iter
201 (base_mode { sm_phase = Phase 0
202 , sm_names = ["Non-opt simplification"] })
203 ]
204
205 else {- opt_level >= 1 -} [
206
207 -- We want to do the static argument transform before full laziness as it
208 -- may expose extra opportunities to float things outwards. However, to fix
209 -- up the output of the transformation we need at do at least one simplify
210 -- after this before anything else
211 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
212
213 -- We run vectorisation here for now, but we might also try to run
214 -- it later
215 vectorisation,
216
217 -- initial simplify: mk specialiser happy: minimum effort please
218 simpl_gently,
219
220 -- Specialisation is best done before full laziness
221 -- so that overloaded functions have all their dictionary lambdas manifest
222 runWhen do_specialise CoreDoSpecialising,
223
224 runWhen full_laziness $
225 CoreDoFloatOutwards FloatOutSwitches {
226 floatOutLambdas = Just 0,
227 floatOutConstants = True,
228 floatOutOverSatApps = False },
229 -- Was: gentleFloatOutSwitches
230 --
231 -- I have no idea why, but not floating constants to
232 -- top level is very bad in some cases.
233 --
234 -- Notably: p_ident in spectral/rewrite
235 -- Changing from "gentle" to "constantsOnly"
236 -- improved rewrite's allocation by 19%, and
237 -- made 0.0% difference to any other nofib
238 -- benchmark
239 --
240 -- Not doing floatOutOverSatApps yet, we'll do
241 -- that later on when we've had a chance to get more
242 -- accurate arity information. In fact it makes no
243 -- difference at all to performance if we do it here,
244 -- but maybe we save some unnecessary to-and-fro in
245 -- the simplifier.
246
247 simpl_phases,
248
249 -- Phase 0: allow all Ids to be inlined now
250 -- This gets foldr inlined before strictness analysis
251
252 -- At least 3 iterations because otherwise we land up with
253 -- huge dead expressions because of an infelicity in the
254 -- simpifier.
255 -- let k = BIG in foldr k z xs
256 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
257 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
258 -- Don't stop now!
259 simpl_phase 0 ["main"] (max max_iter 3),
260
261 runWhen do_float_in CoreDoFloatInwards,
262 -- Run float-inwards immediately before the strictness analyser
263 -- Doing so pushes bindings nearer their use site and hence makes
264 -- them more likely to be strict. These bindings might only show
265 -- up after the inlining from simplification. Example in fulsom,
266 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
267
268 runWhen call_arity $ CoreDoPasses
269 [ CoreDoCallArity
270 , simpl_phase 0 ["post-call-arity"] max_iter
271 ],
272
273 runWhen strictness demand_analyser,
274
275 runWhen full_laziness $
276 CoreDoFloatOutwards FloatOutSwitches {
277 floatOutLambdas = floatLamArgs dflags,
278 floatOutConstants = True,
279 floatOutOverSatApps = True },
280 -- nofib/spectral/hartel/wang doubles in speed if you
281 -- do full laziness late in the day. It only happens
282 -- after fusion and other stuff, so the early pass doesn't
283 -- catch it. For the record, the redex is
284 -- f_el22 (f_el21 r_midblock)
285
286
287 runWhen cse CoreCSE,
288 -- We want CSE to follow the final full-laziness pass, because it may
289 -- succeed in commoning up things floated out by full laziness.
290 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
291
292 runWhen do_float_in CoreDoFloatInwards,
293
294 maybe_rule_check (Phase 0),
295
296 -- Case-liberation for -O2. This should be after
297 -- strictness analysis and the simplification which follows it.
298 runWhen liberate_case (CoreDoPasses [
299 CoreLiberateCase,
300 simpl_phase 0 ["post-liberate-case"] max_iter
301 ]), -- Run the simplifier after LiberateCase to vastly
302 -- reduce the possiblility of shadowing
303 -- Reason: see Note [Shadowing] in SpecConstr.hs
304
305 runWhen spec_constr CoreDoSpecConstr,
306
307 maybe_rule_check (Phase 0),
308
309 -- Final clean-up simplification:
310 simpl_phase 0 ["final"] max_iter,
311
312 runWhen late_dmd_anal $ CoreDoPasses [
313 CoreDoStrictness,
314 CoreDoWorkerWrapper,
315 simpl_phase 0 ["post-late-ww"] max_iter
316 ],
317
318 maybe_rule_check (Phase 0)
319 ]
320
321 -- Loading plugins
322
323 addPluginPasses :: [CoreToDo] -> CoreM [CoreToDo]
324 #ifndef GHCI
325 addPluginPasses builtin_passes = return builtin_passes
326 #else
327 addPluginPasses builtin_passes
328 = do { hsc_env <- getHscEnv
329 ; named_plugins <- liftIO (loadPlugins hsc_env)
330 ; foldM query_plug builtin_passes named_plugins }
331 where
332 query_plug todos (_, plug, options) = installCoreToDos plug options todos
333 #endif
334
335 {-
336 ************************************************************************
337 * *
338 The CoreToDo interpreter
339 * *
340 ************************************************************************
341 -}
342
343 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
344 runCorePasses passes guts
345 = foldM do_pass guts passes
346 where
347 do_pass guts CoreDoNothing = return guts
348 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
349 do_pass guts pass
350 = do { showPass pass
351 ; guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
352 ; endPass pass (mg_binds guts') (mg_rules guts')
353 ; return guts' }
354
355 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
356 doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
357 simplifyPgm pass
358
359 doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-}
360 doPass cseProgram
361
362 doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-}
363 doPassD liberateCase
364
365 doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-}
366 doPassD floatInwards
367
368 doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-}
369 doPassDUM (floatOutwards f)
370
371 doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-}
372 doPassU doStaticArgs
373
374 doCorePass CoreDoCallArity = {-# SCC "CallArity" #-}
375 doPassD callArityAnalProgram
376
377 doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-}
378 doPassDFM dmdAnalProgram
379
380 doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-}
381 doPassDFU wwTopBinds
382
383 doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-}
384 specProgram
385
386 doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-}
387 specConstrProgram
388
389 doCorePass CoreDoVectorisation = {-# SCC "Vectorise" #-}
390 vectorise
391
392 doCorePass CoreDoPrintCore = observe printCore
393 doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
394 doCorePass CoreDoNothing = return
395 doCorePass (CoreDoPasses passes) = runCorePasses passes
396
397 #ifdef GHCI
398 doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
399 #endif
400
401 doCorePass pass = pprPanic "doCorePass" (ppr pass)
402
403 {-
404 ************************************************************************
405 * *
406 \subsection{Core pass combinators}
407 * *
408 ************************************************************************
409 -}
410
411 printCore :: DynFlags -> CoreProgram -> IO ()
412 printCore dflags binds
413 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
414
415 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
416 ruleCheckPass current_phase pat guts = do
417 rb <- getRuleBase
418 dflags <- getDynFlags
419 vis_orphs <- getVisibleOrphanMods
420 liftIO $ Err.showPass dflags "RuleCheck"
421 liftIO $ log_action dflags dflags Err.SevDump noSrcSpan defaultDumpStyle
422 (ruleCheckProgram current_phase pat
423 (RuleEnv rb vis_orphs) (mg_binds guts))
424 return guts
425
426
427 doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
428 doPassDUM do_pass = doPassM $ \binds -> do
429 dflags <- getDynFlags
430 us <- getUniqueSupplyM
431 liftIO $ do_pass dflags us binds
432
433 doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
434 doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
435
436 doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
437 doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
438
439 doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
440 doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
441
442 doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
443 doPassU do_pass = doPassDU (const do_pass)
444
445 doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
446 doPassDFM do_pass guts = do
447 dflags <- getDynFlags
448 p_fam_env <- getPackageFamInstEnv
449 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
450 doPassM (liftIO . do_pass dflags fam_envs) guts
451
452 doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
453 doPassDFU do_pass guts = do
454 dflags <- getDynFlags
455 us <- getUniqueSupplyM
456 p_fam_env <- getPackageFamInstEnv
457 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
458 doPass (do_pass dflags fam_envs us) guts
459
460 -- Most passes return no stats and don't change rules: these combinators
461 -- let us lift them to the full blown ModGuts+CoreM world
462 doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
463 doPassM bind_f guts = do
464 binds' <- bind_f (mg_binds guts)
465 return (guts { mg_binds = binds' })
466
467 doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
468 doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
469
470 -- Observer passes just peek; don't modify the bindings at all
471 observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
472 observe do_pass = doPassM $ \binds -> do
473 dflags <- getDynFlags
474 _ <- liftIO $ do_pass dflags binds
475 return binds
476
477 {-
478 ************************************************************************
479 * *
480 Gentle simplification
481 * *
482 ************************************************************************
483 -}
484
485 simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
486 -> CoreExpr
487 -> IO CoreExpr
488 -- simplifyExpr is called by the driver to simplify an
489 -- expression typed in at the interactive prompt
490 --
491 -- Also used by Template Haskell
492 simplifyExpr dflags expr
493 = do {
494 ; Err.showPass dflags "Simplify"
495
496 ; us <- mkSplitUniqSupply 's'
497
498 ; let sz = exprSize expr
499
500 ; (expr', counts) <- initSmpl dflags emptyRuleEnv
501 emptyFamInstEnvs us sz
502 (simplExprGently (simplEnvForGHCi dflags) expr)
503
504 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
505 "Simplifier statistics" (pprSimplCount counts)
506
507 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
508 (pprCoreExpr expr')
509
510 ; return expr'
511 }
512
513 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
514 -- Simplifies an expression
515 -- does occurrence analysis, then simplification
516 -- and repeats (twice currently) because one pass
517 -- alone leaves tons of crud.
518 -- Used (a) for user expressions typed in at the interactive prompt
519 -- (b) the LHS and RHS of a RULE
520 -- (c) Template Haskell splices
521 --
522 -- The name 'Gently' suggests that the SimplifierMode is SimplGently,
523 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
524 -- enforce that; it just simplifies the expression twice
525
526 -- It's important that simplExprGently does eta reduction; see
527 -- Note [Simplifying the left-hand side of a RULE] above. The
528 -- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
529 -- but only if -O is on.
530
531 simplExprGently env expr = do
532 expr1 <- simplExpr env (occurAnalyseExpr expr)
533 simplExpr env (occurAnalyseExpr expr1)
534
535 {-
536 ************************************************************************
537 * *
538 \subsection{The driver for the simplifier}
539 * *
540 ************************************************************************
541 -}
542
543 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
544 simplifyPgm pass guts
545 = do { hsc_env <- getHscEnv
546 ; us <- getUniqueSupplyM
547 ; rb <- getRuleBase
548 ; liftIOWithCount $
549 simplifyPgmIO pass hsc_env us rb guts }
550
551 simplifyPgmIO :: CoreToDo
552 -> HscEnv
553 -> UniqSupply
554 -> RuleBase
555 -> ModGuts
556 -> IO (SimplCount, ModGuts) -- New bindings
557
558 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
559 hsc_env us hpt_rule_base
560 guts@(ModGuts { mg_module = this_mod
561 , mg_rdr_env = rdr_env
562 , mg_deps = deps
563 , mg_binds = binds, mg_rules = rules
564 , mg_fam_inst_env = fam_inst_env })
565 = do { (termination_msg, it_count, counts_out, guts')
566 <- do_iteration us 1 [] binds rules
567
568 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
569 dopt Opt_D_dump_simpl_stats dflags)
570 "Simplifier statistics for following pass"
571 (vcat [text termination_msg <+> text "after" <+> ppr it_count
572 <+> text "iterations",
573 blankLine,
574 pprSimplCount counts_out])
575
576 ; return (counts_out, guts')
577 }
578 where
579 dflags = hsc_dflags hsc_env
580 print_unqual = mkPrintUnqualified dflags rdr_env
581 simpl_env = mkSimplEnv mode
582 active_rule = activeRule simpl_env
583
584 do_iteration :: UniqSupply
585 -> Int -- Counts iterations
586 -> [SimplCount] -- Counts from earlier iterations, reversed
587 -> CoreProgram -- Bindings in
588 -> [CoreRule] -- and orphan rules
589 -> IO (String, Int, SimplCount, ModGuts)
590
591 do_iteration us iteration_no counts_so_far binds rules
592 -- iteration_no is the number of the iteration we are
593 -- about to begin, with '1' for the first
594 | iteration_no > max_iterations -- Stop if we've run out of iterations
595 = WARN( debugIsOn && (max_iterations > 2)
596 , hang (ptext (sLit "Simplifier bailing out after") <+> int max_iterations
597 <+> ptext (sLit "iterations")
598 <+> (brackets $ hsep $ punctuate comma $
599 map (int . simplCountN) (reverse counts_so_far)))
600 2 (ptext (sLit "Size =") <+> ppr (coreBindsStats binds)))
601
602 -- Subtract 1 from iteration_no to get the
603 -- number of iterations we actually completed
604 return ( "Simplifier baled out", iteration_no - 1
605 , totalise counts_so_far
606 , guts { mg_binds = binds, mg_rules = rules } )
607
608 -- Try and force thunks off the binds; significantly reduces
609 -- space usage, especially with -O. JRS, 000620.
610 | let sz = coreBindsSize binds
611 , () <- sz `seq` () -- Force it
612 = do {
613 -- Occurrence analysis
614 let { -- Note [Vectorisation declarations and occurrences]
615 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
616 -- During the 'InitialPhase' (i.e., before vectorisation), we need to make sure
617 -- that the right-hand sides of vectorisation declarations are taken into
618 -- account during occurrence analysis. After the 'InitialPhase', we need to ensure
619 -- that the binders representing variable vectorisation declarations are kept alive.
620 -- (In contrast to automatically vectorised variables, their unvectorised versions
621 -- don't depend on them.)
622 vectVars = mkVarSet $
623 catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
624 | Vect bndr _ <- mg_vect_decls guts]
625 ++
626 catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
627 | bndr <- bindersOfBinds binds]
628 -- FIXME: This second comprehensions is only needed as long as we
629 -- have vectorised bindings where we get "Could NOT call
630 -- vectorised from original version".
631 ; (maybeVects, maybeVectVars)
632 = case sm_phase mode of
633 InitialPhase -> (mg_vect_decls guts, vectVars)
634 _ -> ([], vectVars)
635 ; tagged_binds = {-# SCC "OccAnal" #-}
636 occurAnalysePgm this_mod active_rule rules
637 maybeVects maybeVectVars binds
638 } ;
639 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
640 (pprCoreBindings tagged_binds);
641
642 -- Get any new rules, and extend the rule base
643 -- See Note [Overall plumbing for rules] in Rules.hs
644 -- We need to do this regularly, because simplification can
645 -- poke on IdInfo thunks, which in turn brings in new rules
646 -- behind the scenes. Otherwise there's a danger we'll simply
647 -- miss the rules for Ids hidden inside imported inlinings
648 eps <- hscEPS hsc_env ;
649 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
650 ; rule_base2 = extendRuleBaseList rule_base1 rules
651 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
652 ; vis_orphs = this_mod : dep_orphs deps } ;
653
654 -- Simplify the program
655 ((binds1, rules1), counts1) <-
656 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
657 do { env1 <- {-# SCC "SimplTopBinds" #-}
658 simplTopBinds simpl_env tagged_binds
659
660 -- Apply the substitution to rules defined in this module
661 -- for imported Ids. Eg RULE map my_f = blah
662 -- If we have a substitution my_f :-> other_f, we'd better
663 -- apply it to the rule to, or it'll never match
664 ; rules1 <- simplRules env1 Nothing rules
665
666 ; return (getFloatBinds env1, rules1) } ;
667
668 -- Stop if nothing happened; don't dump output
669 if isZeroSimplCount counts1 then
670 return ( "Simplifier reached fixed point", iteration_no
671 , totalise (counts1 : counts_so_far) -- Include "free" ticks
672 , guts { mg_binds = binds1, mg_rules = rules1 } )
673 else do {
674 -- Short out indirections
675 -- We do this *after* at least one run of the simplifier
676 -- because indirection-shorting uses the export flag on *occurrences*
677 -- and that isn't guaranteed to be ok until after the first run propagates
678 -- stuff from the binding site to its occurrences
679 --
680 -- ToDo: alas, this means that indirection-shorting does not happen at all
681 -- if the simplifier does nothing (not common, I know, but unsavoury)
682 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
683
684 -- Dump the result of this iteration
685 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
686 lintPassResult hsc_env pass binds2 ;
687
688 -- Loop
689 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
690 } }
691 | otherwise = panic "do_iteration"
692 where
693 (us1, us2) = splitUniqSupply us
694
695 -- Remember the counts_so_far are reversed
696 totalise :: [SimplCount] -> SimplCount
697 totalise = foldr (\c acc -> acc `plusSimplCount` c)
698 (zeroSimplCount dflags)
699
700 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
701
702 -------------------
703 dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
704 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
705 dump_end_iteration dflags print_unqual iteration_no counts binds rules
706 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
707 where
708 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
709 | otherwise = Nothing
710 -- Show details if Opt_D_dump_simpl_iterations is on
711
712 hdr = ptext (sLit "Simplifier iteration=") <> int iteration_no
713 pp_counts = vcat [ ptext (sLit "---- Simplifier counts for") <+> hdr
714 , pprSimplCount counts
715 , ptext (sLit "---- End of simplifier counts for") <+> hdr ]
716
717 {-
718 ************************************************************************
719 * *
720 Shorting out indirections
721 * *
722 ************************************************************************
723
724 If we have this:
725
726 x_local = <expression>
727 ...bindings...
728 x_exported = x_local
729
730 where x_exported is exported, and x_local is not, then we replace it with this:
731
732 x_exported = <expression>
733 x_local = x_exported
734 ...bindings...
735
736 Without this we never get rid of the x_exported = x_local thing. This
737 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
738 makes strictness information propagate better. This used to happen in
739 the final phase, but it's tidier to do it here.
740
741 Note [Transferring IdInfo]
742 ~~~~~~~~~~~~~~~~~~~~~~~~~~
743 We want to propagage any useful IdInfo on x_local to x_exported.
744
745 STRICTNESS: if we have done strictness analysis, we want the strictness info on
746 x_local to transfer to x_exported. Hence the copyIdInfo call.
747
748 RULES: we want to *add* any RULES for x_local to x_exported.
749
750
751 Note [Messing up the exported Id's RULES]
752 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
753 We must be careful about discarding (obviously) or even merging the
754 RULES on the exported Id. The example that went bad on me at one stage
755 was this one:
756
757 iterate :: (a -> a) -> a -> [a]
758 [Exported]
759 iterate = iterateList
760
761 iterateFB c f x = x `c` iterateFB c f (f x)
762 iterateList f x = x : iterateList f (f x)
763 [Not exported]
764
765 {-# RULES
766 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
767 "iterateFB" iterateFB (:) = iterateList
768 #-}
769
770 This got shorted out to:
771
772 iterateList :: (a -> a) -> a -> [a]
773 iterateList = iterate
774
775 iterateFB c f x = x `c` iterateFB c f (f x)
776 iterate f x = x : iterate f (f x)
777
778 {-# RULES
779 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
780 "iterateFB" iterateFB (:) = iterate
781 #-}
782
783 And now we get an infinite loop in the rule system
784 iterate f x -> build (\cn -> iterateFB c f x)
785 -> iterateFB (:) f x
786 -> iterate f x
787
788 Old "solution":
789 use rule switching-off pragmas to get rid
790 of iterateList in the first place
791
792 But in principle the user *might* want rules that only apply to the Id
793 he says. And inline pragmas are similar
794 {-# NOINLINE f #-}
795 f = local
796 local = <stuff>
797 Then we do not want to get rid of the NOINLINE.
798
799 Hence hasShortableIdinfo.
800
801
802 Note [Rules and indirection-zapping]
803 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
804 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
805 Then the things mentioned can be out of scope! Solution
806 a) Make sure that in this pass the usage-info from x_exported is
807 available for ...bindings...
808 b) If there are any such RULES, rec-ify the entire top-level.
809 It'll get sorted out next time round
810
811 Other remarks
812 ~~~~~~~~~~~~~
813 If more than one exported thing is equal to a local thing (i.e., the
814 local thing really is shared), then we do one only:
815 \begin{verbatim}
816 x_local = ....
817 x_exported1 = x_local
818 x_exported2 = x_local
819 ==>
820 x_exported1 = ....
821
822 x_exported2 = x_exported1
823 \end{verbatim}
824
825 We rely on prior eta reduction to simplify things like
826 \begin{verbatim}
827 x_exported = /\ tyvars -> x_local tyvars
828 ==>
829 x_exported = x_local
830 \end{verbatim}
831 Hence,there's a possibility of leaving unchanged something like this:
832 \begin{verbatim}
833 x_local = ....
834 x_exported1 = x_local Int
835 \end{verbatim}
836 By the time we've thrown away the types in STG land this
837 could be eliminated. But I don't think it's very common
838 and it's dangerous to do this fiddling in STG land
839 because we might elminate a binding that's mentioned in the
840 unfolding for something.
841
842 Note [Indirection zapping and ticks]
843 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
844
845 Unfortunately this is another place where we need a special case for
846 ticks. The following happens quite regularly:
847
848 x_local = <expression>
849 x_exported = tick<x> x_local
850
851 Which we want to become:
852
853 x_exported = tick<x> <expression>
854
855 As it makes no sense to keep the tick and the expression on separate
856 bindings. Note however that that this might increase the ticks scoping
857 over the execution of x_local, so we can only do this for floatable
858 ticks. More often than not, other references will be unfoldings of
859 x_exported, and therefore carry the tick anyway.
860 -}
861
862 type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
863
864 shortOutIndirections :: CoreProgram -> CoreProgram
865 shortOutIndirections binds
866 | isEmptyVarEnv ind_env = binds
867 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
868 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
869 where
870 ind_env = makeIndEnv binds
871 -- These exported Ids are the subjects of the indirection-elimination
872 exp_ids = map fst $ varEnvElts ind_env
873 exp_id_set = mkVarSet exp_ids
874 no_need_to_flatten = all (null . specInfoRules . idSpecialisation) exp_ids
875 binds' = concatMap zap binds
876
877 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
878 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
879
880 zapPair (bndr, rhs)
881 | bndr `elemVarSet` exp_id_set = []
882 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
883 = [(transferIdInfo exp_id bndr,
884 mkTicks ticks rhs),
885 (bndr, Var exp_id)]
886 | otherwise = [(bndr,rhs)]
887
888 makeIndEnv :: [CoreBind] -> IndEnv
889 makeIndEnv binds
890 = foldr add_bind emptyVarEnv binds
891 where
892 add_bind :: CoreBind -> IndEnv -> IndEnv
893 add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
894 add_bind (Rec pairs) env = foldr add_pair env pairs
895
896 add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
897 add_pair (exported_id, exported) env
898 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
899 , shortMeOut env exported_id local_id
900 = extendVarEnv env local_id (exported_id, ticks)
901 add_pair _ env = env
902
903 -----------------
904 shortMeOut :: IndEnv -> Id -> Id -> Bool
905 shortMeOut ind_env exported_id local_id
906 -- The if-then-else stuff is just so I can get a pprTrace to see
907 -- how often I don't get shorting out because of IdInfo stuff
908 = if isExportedId exported_id && -- Only if this is exported
909
910 isLocalId local_id && -- Only if this one is defined in this
911 -- module, so that we *can* change its
912 -- binding to be the exported thing!
913
914 not (isExportedId local_id) && -- Only if this one is not itself exported,
915 -- since the transformation will nuke it
916
917 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
918 then
919 if hasShortableIdInfo exported_id
920 then True -- See Note [Messing up the exported Id's IdInfo]
921 else WARN( True, ptext (sLit "Not shorting out:") <+> ppr exported_id )
922 False
923 else
924 False
925
926 -----------------
927 hasShortableIdInfo :: Id -> Bool
928 -- True if there is no user-attached IdInfo on exported_id,
929 -- so we can safely discard it
930 -- See Note [Messing up the exported Id's IdInfo]
931 hasShortableIdInfo id
932 = isEmptySpecInfo (specInfo info)
933 && isDefaultInlinePragma (inlinePragInfo info)
934 && not (isStableUnfolding (unfoldingInfo info))
935 where
936 info = idInfo id
937
938 -----------------
939 transferIdInfo :: Id -> Id -> Id
940 -- See Note [Transferring IdInfo]
941 -- If we have
942 -- lcl_id = e; exp_id = lcl_id
943 -- and lcl_id has useful IdInfo, we don't want to discard it by going
944 -- gbl_id = e; lcl_id = gbl_id
945 -- Instead, transfer IdInfo from lcl_id to exp_id
946 -- Overwriting, rather than merging, seems to work ok.
947 transferIdInfo exported_id local_id
948 = modifyIdInfo transfer exported_id
949 where
950 local_info = idInfo local_id
951 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info
952 `setUnfoldingInfo` unfoldingInfo local_info
953 `setInlinePragInfo` inlinePragInfo local_info
954 `setSpecInfo` addSpecInfo (specInfo exp_info) new_info
955 new_info = setSpecInfoHead (idName exported_id)
956 (specInfo local_info)
957 -- Remember to set the function-name field of the
958 -- rules as we transfer them from one function to another