a6672507a93d3b8d56629a34e1ca7e362a6b6f04
[ghc.git] / compiler / simplCore / SimplCore.hs
1 {-
2 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4 \section[SimplCore]{Driver for simplifying @Core@ programs}
5 -}
6
7 {-# LANGUAGE CPP #-}
8
9 module SimplCore ( core2core, simplifyExpr ) where
10
11 #include "HsVersions.h"
12
13 import DynFlags
14 import CoreSyn
15 import HscTypes
16 import CSE ( cseProgram )
17 import Rules ( mkRuleBase, unionRuleBase,
18 extendRuleBaseList, ruleCheckProgram, addSpecInfo, )
19 import PprCore ( pprCoreBindings, pprCoreExpr )
20 import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
21 import IdInfo
22 import CoreStats ( coreBindsSize, coreBindsStats, exprSize )
23 import CoreUtils ( mkTicks, stripTicksTop )
24 import CoreLint ( showPass, endPass, lintPassResult, dumpPassResult,
25 lintAnnots )
26 import Simplify ( simplTopBinds, simplExpr, simplRule )
27 import SimplUtils ( simplEnvForGHCi, activeRule )
28 import SimplEnv
29 import SimplMonad
30 import CoreMonad
31 import qualified ErrUtils as Err
32 import FloatIn ( floatInwards )
33 import FloatOut ( floatOutwards )
34 import FamInstEnv
35 import Id
36 import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma )
37 import VarSet
38 import VarEnv
39 import LiberateCase ( liberateCase )
40 import SAT ( doStaticArgs )
41 import Specialise ( specProgram)
42 import SpecConstr ( specConstrProgram)
43 import DmdAnal ( dmdAnalProgram )
44 import CallArity ( callArityAnalProgram )
45 import WorkWrap ( wwTopBinds )
46 import Vectorise ( vectorise )
47 import FastString
48 import SrcLoc
49 import Util
50 import Module
51
52 import Maybes
53 import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
54 import Outputable
55 import Control.Monad
56
57 #ifdef GHCI
58 import DynamicLoading ( loadPlugins )
59 import Plugins ( installCoreToDos )
60 #endif
61
62 {-
63 ************************************************************************
64 * *
65 \subsection{The driver for the simplifier}
66 * *
67 ************************************************************************
68 -}
69
70 core2core :: HscEnv -> ModGuts -> IO ModGuts
71 core2core hsc_env guts
72 = do { us <- mkSplitUniqSupply 's'
73 -- make sure all plugins are loaded
74
75 ; let builtin_passes = getCoreToDo dflags
76 orph_mods = mkModuleSet (mg_module guts : dep_orphs (mg_deps guts))
77 ;
78 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod
79 orph_mods print_unqual $
80 do { all_passes <- addPluginPasses builtin_passes
81 ; runCorePasses all_passes guts }
82
83 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
84 "Grand total simplifier statistics"
85 (pprSimplCount stats)
86
87 ; return guts2 }
88 where
89 dflags = hsc_dflags hsc_env
90 home_pkg_rules = hptRules hsc_env (dep_mods (mg_deps guts))
91 hpt_rule_base = mkRuleBase home_pkg_rules
92 mod = mg_module guts
93 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
94 -- This is very convienent for the users of the monad (e.g. plugins do not have to
95 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
96 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
97 -- would mean our cached value would go out of date.
98 print_unqual = mkPrintUnqualified dflags (mg_rdr_env guts)
99
100 {-
101 ************************************************************************
102 * *
103 Generating the main optimisation pipeline
104 * *
105 ************************************************************************
106 -}
107
108 getCoreToDo :: DynFlags -> [CoreToDo]
109 getCoreToDo dflags
110 = core_todo
111 where
112 opt_level = optLevel dflags
113 phases = simplPhases dflags
114 max_iter = maxSimplIterations dflags
115 rule_check = ruleCheck dflags
116 call_arity = gopt Opt_CallArity dflags
117 strictness = gopt Opt_Strictness dflags
118 full_laziness = gopt Opt_FullLaziness dflags
119 do_specialise = gopt Opt_Specialise dflags
120 do_float_in = gopt Opt_FloatIn dflags
121 cse = gopt Opt_CSE dflags
122 spec_constr = gopt Opt_SpecConstr dflags
123 liberate_case = gopt Opt_LiberateCase dflags
124 late_dmd_anal = gopt Opt_LateDmdAnal dflags
125 static_args = gopt Opt_StaticArgumentTransformation dflags
126 rules_on = gopt Opt_EnableRewriteRules dflags
127 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
128
129 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
130
131 maybe_strictness_before phase
132 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
133
134 base_mode = SimplMode { sm_phase = panic "base_mode"
135 , sm_names = []
136 , sm_rules = rules_on
137 , sm_eta_expand = eta_expand_on
138 , sm_inline = True
139 , sm_case_case = True }
140
141 simpl_phase phase names iter
142 = CoreDoPasses
143 $ [ maybe_strictness_before phase
144 , CoreDoSimplify iter
145 (base_mode { sm_phase = Phase phase
146 , sm_names = names })
147
148 , maybe_rule_check (Phase phase) ]
149
150 -- Vectorisation can introduce a fair few common sub expressions involving
151 -- DPH primitives. For example, see the Reverse test from dph-examples.
152 -- We need to eliminate these common sub expressions before their definitions
153 -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
154 -- so we also run simpl_gently to inline them.
155 ++ (if gopt Opt_Vectorise dflags && phase == 3
156 then [CoreCSE, simpl_gently]
157 else [])
158
159 vectorisation
160 = runWhen (gopt Opt_Vectorise dflags) $
161 CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
162
163 -- By default, we have 2 phases before phase 0.
164
165 -- Want to run with inline phase 2 after the specialiser to give
166 -- maximum chance for fusion to work before we inline build/augment
167 -- in phase 1. This made a difference in 'ansi' where an
168 -- overloaded function wasn't inlined till too late.
169
170 -- Need phase 1 so that build/augment get
171 -- inlined. I found that spectral/hartel/genfft lost some useful
172 -- strictness in the function sumcode' if augment is not inlined
173 -- before strictness analysis runs
174 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
175 | phase <- [phases, phases-1 .. 1] ]
176
177
178 -- initial simplify: mk specialiser happy: minimum effort please
179 simpl_gently = CoreDoSimplify max_iter
180 (base_mode { sm_phase = InitialPhase
181 , sm_names = ["Gentle"]
182 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
183 , sm_inline = False
184 , sm_case_case = False })
185 -- Don't do case-of-case transformations.
186 -- This makes full laziness work better
187
188 -- New demand analyser
189 demand_analyser = (CoreDoPasses ([
190 CoreDoStrictness,
191 CoreDoWorkerWrapper,
192 simpl_phase 0 ["post-worker-wrapper"] max_iter
193 ]))
194
195 core_todo =
196 if opt_level == 0 then
197 [ vectorisation
198 , CoreDoSimplify max_iter
199 (base_mode { sm_phase = Phase 0
200 , sm_names = ["Non-opt simplification"] })
201 ]
202
203 else {- opt_level >= 1 -} [
204
205 -- We want to do the static argument transform before full laziness as it
206 -- may expose extra opportunities to float things outwards. However, to fix
207 -- up the output of the transformation we need at do at least one simplify
208 -- after this before anything else
209 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
210
211 -- We run vectorisation here for now, but we might also try to run
212 -- it later
213 vectorisation,
214
215 -- initial simplify: mk specialiser happy: minimum effort please
216 simpl_gently,
217
218 -- Specialisation is best done before full laziness
219 -- so that overloaded functions have all their dictionary lambdas manifest
220 runWhen do_specialise CoreDoSpecialising,
221
222 runWhen full_laziness $
223 CoreDoFloatOutwards FloatOutSwitches {
224 floatOutLambdas = Just 0,
225 floatOutConstants = True,
226 floatOutOverSatApps = False },
227 -- Was: gentleFloatOutSwitches
228 --
229 -- I have no idea why, but not floating constants to
230 -- top level is very bad in some cases.
231 --
232 -- Notably: p_ident in spectral/rewrite
233 -- Changing from "gentle" to "constantsOnly"
234 -- improved rewrite's allocation by 19%, and
235 -- made 0.0% difference to any other nofib
236 -- benchmark
237 --
238 -- Not doing floatOutOverSatApps yet, we'll do
239 -- that later on when we've had a chance to get more
240 -- accurate arity information. In fact it makes no
241 -- difference at all to performance if we do it here,
242 -- but maybe we save some unnecessary to-and-fro in
243 -- the simplifier.
244
245 simpl_phases,
246
247 -- Phase 0: allow all Ids to be inlined now
248 -- This gets foldr inlined before strictness analysis
249
250 -- At least 3 iterations because otherwise we land up with
251 -- huge dead expressions because of an infelicity in the
252 -- simpifier.
253 -- let k = BIG in foldr k z xs
254 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
255 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
256 -- Don't stop now!
257 simpl_phase 0 ["main"] (max max_iter 3),
258
259 runWhen do_float_in CoreDoFloatInwards,
260 -- Run float-inwards immediately before the strictness analyser
261 -- Doing so pushes bindings nearer their use site and hence makes
262 -- them more likely to be strict. These bindings might only show
263 -- up after the inlining from simplification. Example in fulsom,
264 -- Csg.calc, where an arg of timesDouble thereby becomes strict.
265
266 runWhen call_arity $ CoreDoPasses
267 [ CoreDoCallArity
268 , simpl_phase 0 ["post-call-arity"] max_iter
269 ],
270
271 runWhen strictness demand_analyser,
272
273 runWhen full_laziness $
274 CoreDoFloatOutwards FloatOutSwitches {
275 floatOutLambdas = floatLamArgs dflags,
276 floatOutConstants = True,
277 floatOutOverSatApps = True },
278 -- nofib/spectral/hartel/wang doubles in speed if you
279 -- do full laziness late in the day. It only happens
280 -- after fusion and other stuff, so the early pass doesn't
281 -- catch it. For the record, the redex is
282 -- f_el22 (f_el21 r_midblock)
283
284
285 runWhen cse CoreCSE,
286 -- We want CSE to follow the final full-laziness pass, because it may
287 -- succeed in commoning up things floated out by full laziness.
288 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
289
290 runWhen do_float_in CoreDoFloatInwards,
291
292 maybe_rule_check (Phase 0),
293
294 -- Case-liberation for -O2. This should be after
295 -- strictness analysis and the simplification which follows it.
296 runWhen liberate_case (CoreDoPasses [
297 CoreLiberateCase,
298 simpl_phase 0 ["post-liberate-case"] max_iter
299 ]), -- Run the simplifier after LiberateCase to vastly
300 -- reduce the possiblility of shadowing
301 -- Reason: see Note [Shadowing] in SpecConstr.hs
302
303 runWhen spec_constr CoreDoSpecConstr,
304
305 maybe_rule_check (Phase 0),
306
307 -- Final clean-up simplification:
308 simpl_phase 0 ["final"] max_iter,
309
310 runWhen late_dmd_anal $ CoreDoPasses [
311 CoreDoStrictness,
312 CoreDoWorkerWrapper,
313 simpl_phase 0 ["post-late-ww"] max_iter
314 ],
315
316 maybe_rule_check (Phase 0)
317 ]
318
319 -- Loading plugins
320
321 addPluginPasses :: [CoreToDo] -> CoreM [CoreToDo]
322 #ifndef GHCI
323 addPluginPasses builtin_passes = return builtin_passes
324 #else
325 addPluginPasses builtin_passes
326 = do { hsc_env <- getHscEnv
327 ; named_plugins <- liftIO (loadPlugins hsc_env)
328 ; foldM query_plug builtin_passes named_plugins }
329 where
330 query_plug todos (_, plug, options) = installCoreToDos plug options todos
331 #endif
332
333 {-
334 ************************************************************************
335 * *
336 The CoreToDo interpreter
337 * *
338 ************************************************************************
339 -}
340
341 runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
342 runCorePasses passes guts
343 = foldM do_pass guts passes
344 where
345 do_pass guts CoreDoNothing = return guts
346 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
347 do_pass guts pass
348 = do { showPass pass
349 ; guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
350 ; endPass pass (mg_binds guts') (mg_rules guts')
351 ; return guts' }
352
353 doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
354 doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
355 simplifyPgm pass
356
357 doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-}
358 doPass cseProgram
359
360 doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-}
361 doPassD liberateCase
362
363 doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-}
364 doPassD floatInwards
365
366 doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-}
367 doPassDUM (floatOutwards f)
368
369 doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-}
370 doPassU doStaticArgs
371
372 doCorePass CoreDoCallArity = {-# SCC "CallArity" #-}
373 doPassD callArityAnalProgram
374
375 doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-}
376 doPassDFM dmdAnalProgram
377
378 doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-}
379 doPassDFU wwTopBinds
380
381 doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-}
382 specProgram
383
384 doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-}
385 specConstrProgram
386
387 doCorePass CoreDoVectorisation = {-# SCC "Vectorise" #-}
388 vectorise
389
390 doCorePass CoreDoPrintCore = observe printCore
391 doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
392 doCorePass CoreDoNothing = return
393 doCorePass (CoreDoPasses passes) = runCorePasses passes
394
395 #ifdef GHCI
396 doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
397 #endif
398
399 doCorePass pass = pprPanic "doCorePass" (ppr pass)
400
401 {-
402 ************************************************************************
403 * *
404 \subsection{Core pass combinators}
405 * *
406 ************************************************************************
407 -}
408
409 printCore :: DynFlags -> CoreProgram -> IO ()
410 printCore dflags binds
411 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
412
413 ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
414 ruleCheckPass current_phase pat guts = do
415 rb <- getRuleBase
416 dflags <- getDynFlags
417 vis_orphs <- getVisibleOrphanMods
418 liftIO $ Err.showPass dflags "RuleCheck"
419 liftIO $ log_action dflags dflags Err.SevDump noSrcSpan defaultDumpStyle
420 (ruleCheckProgram current_phase pat
421 (RuleEnv rb vis_orphs) (mg_binds guts))
422 return guts
423
424
425 doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
426 doPassDUM do_pass = doPassM $ \binds -> do
427 dflags <- getDynFlags
428 us <- getUniqueSupplyM
429 liftIO $ do_pass dflags us binds
430
431 doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
432 doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
433
434 doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
435 doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
436
437 doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
438 doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
439
440 doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
441 doPassU do_pass = doPassDU (const do_pass)
442
443 doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
444 doPassDFM do_pass guts = do
445 dflags <- getDynFlags
446 p_fam_env <- getPackageFamInstEnv
447 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
448 doPassM (liftIO . do_pass dflags fam_envs) guts
449
450 doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
451 doPassDFU do_pass guts = do
452 dflags <- getDynFlags
453 us <- getUniqueSupplyM
454 p_fam_env <- getPackageFamInstEnv
455 let fam_envs = (p_fam_env, mg_fam_inst_env guts)
456 doPass (do_pass dflags fam_envs us) guts
457
458 -- Most passes return no stats and don't change rules: these combinators
459 -- let us lift them to the full blown ModGuts+CoreM world
460 doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
461 doPassM bind_f guts = do
462 binds' <- bind_f (mg_binds guts)
463 return (guts { mg_binds = binds' })
464
465 doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
466 doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
467
468 -- Observer passes just peek; don't modify the bindings at all
469 observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
470 observe do_pass = doPassM $ \binds -> do
471 dflags <- getDynFlags
472 _ <- liftIO $ do_pass dflags binds
473 return binds
474
475 {-
476 ************************************************************************
477 * *
478 Gentle simplification
479 * *
480 ************************************************************************
481 -}
482
483 simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
484 -> CoreExpr
485 -> IO CoreExpr
486 -- simplifyExpr is called by the driver to simplify an
487 -- expression typed in at the interactive prompt
488 --
489 -- Also used by Template Haskell
490 simplifyExpr dflags expr
491 = do {
492 ; Err.showPass dflags "Simplify"
493
494 ; us <- mkSplitUniqSupply 's'
495
496 ; let sz = exprSize expr
497
498 ; (expr', counts) <- initSmpl dflags emptyRuleEnv
499 emptyFamInstEnvs us sz
500 (simplExprGently (simplEnvForGHCi dflags) expr)
501
502 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
503 "Simplifier statistics" (pprSimplCount counts)
504
505 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
506 (pprCoreExpr expr')
507
508 ; return expr'
509 }
510
511 simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
512 -- Simplifies an expression
513 -- does occurrence analysis, then simplification
514 -- and repeats (twice currently) because one pass
515 -- alone leaves tons of crud.
516 -- Used (a) for user expressions typed in at the interactive prompt
517 -- (b) the LHS and RHS of a RULE
518 -- (c) Template Haskell splices
519 --
520 -- The name 'Gently' suggests that the SimplifierMode is SimplGently,
521 -- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
522 -- enforce that; it just simplifies the expression twice
523
524 -- It's important that simplExprGently does eta reduction; see
525 -- Note [Simplifying the left-hand side of a RULE] above. The
526 -- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
527 -- but only if -O is on.
528
529 simplExprGently env expr = do
530 expr1 <- simplExpr env (occurAnalyseExpr expr)
531 simplExpr env (occurAnalyseExpr expr1)
532
533 {-
534 ************************************************************************
535 * *
536 \subsection{The driver for the simplifier}
537 * *
538 ************************************************************************
539 -}
540
541 simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
542 simplifyPgm pass guts
543 = do { hsc_env <- getHscEnv
544 ; us <- getUniqueSupplyM
545 ; rb <- getRuleBase
546 ; liftIOWithCount $
547 simplifyPgmIO pass hsc_env us rb guts }
548
549 simplifyPgmIO :: CoreToDo
550 -> HscEnv
551 -> UniqSupply
552 -> RuleBase
553 -> ModGuts
554 -> IO (SimplCount, ModGuts) -- New bindings
555
556 simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
557 hsc_env us hpt_rule_base
558 guts@(ModGuts { mg_module = this_mod
559 , mg_rdr_env = rdr_env
560 , mg_deps = deps
561 , mg_binds = binds, mg_rules = rules
562 , mg_fam_inst_env = fam_inst_env })
563 = do { (termination_msg, it_count, counts_out, guts')
564 <- do_iteration us 1 [] binds rules
565
566 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
567 dopt Opt_D_dump_simpl_stats dflags)
568 "Simplifier statistics for following pass"
569 (vcat [text termination_msg <+> text "after" <+> ppr it_count
570 <+> text "iterations",
571 blankLine,
572 pprSimplCount counts_out])
573
574 ; return (counts_out, guts')
575 }
576 where
577 dflags = hsc_dflags hsc_env
578 print_unqual = mkPrintUnqualified dflags rdr_env
579 simpl_env = mkSimplEnv mode
580 active_rule = activeRule simpl_env
581
582 do_iteration :: UniqSupply
583 -> Int -- Counts iterations
584 -> [SimplCount] -- Counts from earlier iterations, reversed
585 -> CoreProgram -- Bindings in
586 -> [CoreRule] -- and orphan rules
587 -> IO (String, Int, SimplCount, ModGuts)
588
589 do_iteration us iteration_no counts_so_far binds rules
590 -- iteration_no is the number of the iteration we are
591 -- about to begin, with '1' for the first
592 | iteration_no > max_iterations -- Stop if we've run out of iterations
593 = WARN( debugIsOn && (max_iterations > 2)
594 , hang (ptext (sLit "Simplifier bailing out after") <+> int max_iterations
595 <+> ptext (sLit "iterations")
596 <+> (brackets $ hsep $ punctuate comma $
597 map (int . simplCountN) (reverse counts_so_far)))
598 2 (ptext (sLit "Size =") <+> ppr (coreBindsStats binds)))
599
600 -- Subtract 1 from iteration_no to get the
601 -- number of iterations we actually completed
602 return ( "Simplifier baled out", iteration_no - 1
603 , totalise counts_so_far
604 , guts { mg_binds = binds, mg_rules = rules } )
605
606 -- Try and force thunks off the binds; significantly reduces
607 -- space usage, especially with -O. JRS, 000620.
608 | let sz = coreBindsSize binds
609 , () <- sz `seq` () -- Force it
610 = do {
611 -- Occurrence analysis
612 let { -- Note [Vectorisation declarations and occurrences]
613 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
614 -- During the 'InitialPhase' (i.e., before vectorisation), we need to make sure
615 -- that the right-hand sides of vectorisation declarations are taken into
616 -- account during occurrence analysis. After the 'InitialPhase', we need to ensure
617 -- that the binders representing variable vectorisation declarations are kept alive.
618 -- (In contrast to automatically vectorised variables, their unvectorised versions
619 -- don't depend on them.)
620 vectVars = mkVarSet $
621 catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
622 | Vect bndr _ <- mg_vect_decls guts]
623 ++
624 catMaybes [ fmap snd $ lookupVarEnv (vectInfoVar (mg_vect_info guts)) bndr
625 | bndr <- bindersOfBinds binds]
626 -- FIXME: This second comprehensions is only needed as long as we
627 -- have vectorised bindings where we get "Could NOT call
628 -- vectorised from original version".
629 ; (maybeVects, maybeVectVars)
630 = case sm_phase mode of
631 InitialPhase -> (mg_vect_decls guts, vectVars)
632 _ -> ([], vectVars)
633 ; tagged_binds = {-# SCC "OccAnal" #-}
634 occurAnalysePgm this_mod active_rule rules
635 maybeVects maybeVectVars binds
636 } ;
637 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
638 (pprCoreBindings tagged_binds);
639
640 -- Get any new rules, and extend the rule base
641 -- See Note [Overall plumbing for rules] in Rules.hs
642 -- We need to do this regularly, because simplification can
643 -- poke on IdInfo thunks, which in turn brings in new rules
644 -- behind the scenes. Otherwise there's a danger we'll simply
645 -- miss the rules for Ids hidden inside imported inlinings
646 eps <- hscEPS hsc_env ;
647 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
648 ; rule_base2 = extendRuleBaseList rule_base1 rules
649 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
650 ; vis_orphs = this_mod : dep_orphs deps } ;
651
652 -- Simplify the program
653 ((binds1, rules1), counts1) <-
654 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
655 do { env1 <- {-# SCC "SimplTopBinds" #-}
656 simplTopBinds simpl_env tagged_binds
657
658 -- Apply the substitution to rules defined in this module
659 -- for imported Ids. Eg RULE map my_f = blah
660 -- If we have a substitution my_f :-> other_f, we'd better
661 -- apply it to the rule to, or it'll never match
662 ; rules1 <- mapM (simplRule env1 Nothing) rules
663
664 ; return (getFloatBinds env1, rules1) } ;
665
666 -- Stop if nothing happened; don't dump output
667 if isZeroSimplCount counts1 then
668 return ( "Simplifier reached fixed point", iteration_no
669 , totalise (counts1 : counts_so_far) -- Include "free" ticks
670 , guts { mg_binds = binds1, mg_rules = rules1 } )
671 else do {
672 -- Short out indirections
673 -- We do this *after* at least one run of the simplifier
674 -- because indirection-shorting uses the export flag on *occurrences*
675 -- and that isn't guaranteed to be ok until after the first run propagates
676 -- stuff from the binding site to its occurrences
677 --
678 -- ToDo: alas, this means that indirection-shorting does not happen at all
679 -- if the simplifier does nothing (not common, I know, but unsavoury)
680 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
681
682 -- Dump the result of this iteration
683 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
684 lintPassResult hsc_env pass binds2 ;
685
686 -- Loop
687 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
688 } }
689 | otherwise = panic "do_iteration"
690 where
691 (us1, us2) = splitUniqSupply us
692
693 -- Remember the counts_so_far are reversed
694 totalise :: [SimplCount] -> SimplCount
695 totalise = foldr (\c acc -> acc `plusSimplCount` c)
696 (zeroSimplCount dflags)
697
698 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
699
700 -------------------
701 dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
702 -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
703 dump_end_iteration dflags print_unqual iteration_no counts binds rules
704 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
705 where
706 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
707 | otherwise = Nothing
708 -- Show details if Opt_D_dump_simpl_iterations is on
709
710 hdr = ptext (sLit "Simplifier iteration=") <> int iteration_no
711 pp_counts = vcat [ ptext (sLit "---- Simplifier counts for") <+> hdr
712 , pprSimplCount counts
713 , ptext (sLit "---- End of simplifier counts for") <+> hdr ]
714
715 {-
716 ************************************************************************
717 * *
718 Shorting out indirections
719 * *
720 ************************************************************************
721
722 If we have this:
723
724 x_local = <expression>
725 ...bindings...
726 x_exported = x_local
727
728 where x_exported is exported, and x_local is not, then we replace it with this:
729
730 x_exported = <expression>
731 x_local = x_exported
732 ...bindings...
733
734 Without this we never get rid of the x_exported = x_local thing. This
735 save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
736 makes strictness information propagate better. This used to happen in
737 the final phase, but it's tidier to do it here.
738
739 Note [Transferring IdInfo]
740 ~~~~~~~~~~~~~~~~~~~~~~~~~~
741 We want to propagage any useful IdInfo on x_local to x_exported.
742
743 STRICTNESS: if we have done strictness analysis, we want the strictness info on
744 x_local to transfer to x_exported. Hence the copyIdInfo call.
745
746 RULES: we want to *add* any RULES for x_local to x_exported.
747
748
749 Note [Messing up the exported Id's RULES]
750 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
751 We must be careful about discarding (obviously) or even merging the
752 RULES on the exported Id. The example that went bad on me at one stage
753 was this one:
754
755 iterate :: (a -> a) -> a -> [a]
756 [Exported]
757 iterate = iterateList
758
759 iterateFB c f x = x `c` iterateFB c f (f x)
760 iterateList f x = x : iterateList f (f x)
761 [Not exported]
762
763 {-# RULES
764 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
765 "iterateFB" iterateFB (:) = iterateList
766 #-}
767
768 This got shorted out to:
769
770 iterateList :: (a -> a) -> a -> [a]
771 iterateList = iterate
772
773 iterateFB c f x = x `c` iterateFB c f (f x)
774 iterate f x = x : iterate f (f x)
775
776 {-# RULES
777 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
778 "iterateFB" iterateFB (:) = iterate
779 #-}
780
781 And now we get an infinite loop in the rule system
782 iterate f x -> build (\cn -> iterateFB c f x)
783 -> iterateFB (:) f x
784 -> iterate f x
785
786 Old "solution":
787 use rule switching-off pragmas to get rid
788 of iterateList in the first place
789
790 But in principle the user *might* want rules that only apply to the Id
791 he says. And inline pragmas are similar
792 {-# NOINLINE f #-}
793 f = local
794 local = <stuff>
795 Then we do not want to get rid of the NOINLINE.
796
797 Hence hasShortableIdinfo.
798
799
800 Note [Rules and indirection-zapping]
801 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
802 Problem: what if x_exported has a RULE that mentions something in ...bindings...?
803 Then the things mentioned can be out of scope! Solution
804 a) Make sure that in this pass the usage-info from x_exported is
805 available for ...bindings...
806 b) If there are any such RULES, rec-ify the entire top-level.
807 It'll get sorted out next time round
808
809 Other remarks
810 ~~~~~~~~~~~~~
811 If more than one exported thing is equal to a local thing (i.e., the
812 local thing really is shared), then we do one only:
813 \begin{verbatim}
814 x_local = ....
815 x_exported1 = x_local
816 x_exported2 = x_local
817 ==>
818 x_exported1 = ....
819
820 x_exported2 = x_exported1
821 \end{verbatim}
822
823 We rely on prior eta reduction to simplify things like
824 \begin{verbatim}
825 x_exported = /\ tyvars -> x_local tyvars
826 ==>
827 x_exported = x_local
828 \end{verbatim}
829 Hence,there's a possibility of leaving unchanged something like this:
830 \begin{verbatim}
831 x_local = ....
832 x_exported1 = x_local Int
833 \end{verbatim}
834 By the time we've thrown away the types in STG land this
835 could be eliminated. But I don't think it's very common
836 and it's dangerous to do this fiddling in STG land
837 because we might elminate a binding that's mentioned in the
838 unfolding for something.
839
840 Note [Indirection zapping and ticks]
841 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
842
843 Unfortunately this is another place where we need a special case for
844 ticks. The following happens quite regularly:
845
846 x_local = <expression>
847 x_exported = tick<x> x_local
848
849 Which we want to become:
850
851 x_exported = tick<x> <expression>
852
853 As it makes no sense to keep the tick and the expression on separate
854 bindings. Note however that that this might increase the ticks scoping
855 over the execution of x_local, so we can only do this for floatable
856 ticks. More often than not, other references will be unfoldings of
857 x_exported, and therefore carry the tick anyway.
858 -}
859
860 type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
861
862 shortOutIndirections :: CoreProgram -> CoreProgram
863 shortOutIndirections binds
864 | isEmptyVarEnv ind_env = binds
865 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping]
866 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
867 where
868 ind_env = makeIndEnv binds
869 -- These exported Ids are the subjects of the indirection-elimination
870 exp_ids = map fst $ varEnvElts ind_env
871 exp_id_set = mkVarSet exp_ids
872 no_need_to_flatten = all (null . specInfoRules . idSpecialisation) exp_ids
873 binds' = concatMap zap binds
874
875 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
876 zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
877
878 zapPair (bndr, rhs)
879 | bndr `elemVarSet` exp_id_set = []
880 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
881 = [(transferIdInfo exp_id bndr,
882 mkTicks ticks rhs),
883 (bndr, Var exp_id)]
884 | otherwise = [(bndr,rhs)]
885
886 makeIndEnv :: [CoreBind] -> IndEnv
887 makeIndEnv binds
888 = foldr add_bind emptyVarEnv binds
889 where
890 add_bind :: CoreBind -> IndEnv -> IndEnv
891 add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
892 add_bind (Rec pairs) env = foldr add_pair env pairs
893
894 add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
895 add_pair (exported_id, exported) env
896 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
897 , shortMeOut env exported_id local_id
898 = extendVarEnv env local_id (exported_id, ticks)
899 add_pair _ env = env
900
901 -----------------
902 shortMeOut :: IndEnv -> Id -> Id -> Bool
903 shortMeOut ind_env exported_id local_id
904 -- The if-then-else stuff is just so I can get a pprTrace to see
905 -- how often I don't get shorting out because of IdInfo stuff
906 = if isExportedId exported_id && -- Only if this is exported
907
908 isLocalId local_id && -- Only if this one is defined in this
909 -- module, so that we *can* change its
910 -- binding to be the exported thing!
911
912 not (isExportedId local_id) && -- Only if this one is not itself exported,
913 -- since the transformation will nuke it
914
915 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
916 then
917 if hasShortableIdInfo exported_id
918 then True -- See Note [Messing up the exported Id's IdInfo]
919 else WARN( True, ptext (sLit "Not shorting out:") <+> ppr exported_id )
920 False
921 else
922 False
923
924 -----------------
925 hasShortableIdInfo :: Id -> Bool
926 -- True if there is no user-attached IdInfo on exported_id,
927 -- so we can safely discard it
928 -- See Note [Messing up the exported Id's IdInfo]
929 hasShortableIdInfo id
930 = isEmptySpecInfo (specInfo info)
931 && isDefaultInlinePragma (inlinePragInfo info)
932 && not (isStableUnfolding (unfoldingInfo info))
933 where
934 info = idInfo id
935
936 -----------------
937 transferIdInfo :: Id -> Id -> Id
938 -- See Note [Transferring IdInfo]
939 -- If we have
940 -- lcl_id = e; exp_id = lcl_id
941 -- and lcl_id has useful IdInfo, we don't want to discard it by going
942 -- gbl_id = e; lcl_id = gbl_id
943 -- Instead, transfer IdInfo from lcl_id to exp_id
944 -- Overwriting, rather than merging, seems to work ok.
945 transferIdInfo exported_id local_id
946 = modifyIdInfo transfer exported_id
947 where
948 local_info = idInfo local_id
949 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info
950 `setUnfoldingInfo` unfoldingInfo local_info
951 `setInlinePragInfo` inlinePragInfo local_info
952 `setSpecInfo` addSpecInfo (specInfo exp_info) new_info
953 new_info = setSpecInfoHead (idName exported_id)
954 (specInfo local_info)
955 -- Remember to set the function-name field of the
956 -- rules as we transfer them from one function to another