Typofixes in comments and whitespace only [ci skip]
[ghc.git] / compiler / cmm / CmmPipeline.hs
1 {-# LANGUAGE BangPatterns #-}
2
3 module CmmPipeline (
4 -- | Converts C-- with an implicit stack and native C-- calls into
5 -- optimized, CPS converted and native-call-less C--. The latter
6 -- C-- can be used to generate assembly.
7 cmmPipeline
8 ) where
9
10 import GhcPrelude
11
12 import Cmm
13 import CmmLint
14 import CmmBuildInfoTables
15 import CmmCommonBlockElim
16 import CmmImplementSwitchPlans
17 import CmmProcPoint
18 import CmmContFlowOpt
19 import CmmLayoutStack
20 import CmmSink
21 import Hoopl.Collections
22
23 import UniqSupply
24 import DynFlags
25 import ErrUtils
26 import HscTypes
27 import Control.Monad
28 import Outputable
29 import Platform
30
31 -----------------------------------------------------------------------------
32 -- | Top level driver for C-- pipeline
33 -----------------------------------------------------------------------------
34
35 cmmPipeline
36 :: HscEnv -- Compilation env including
37 -- dynamic flags: -dcmm-lint -ddump-cmm-cps
38 -> ModuleSRTInfo -- Info about SRTs generated so far
39 -> CmmGroup -- Input C-- with Procedures
40 -> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C--
41
42 cmmPipeline hsc_env srtInfo prog =
43 do let dflags = hsc_dflags hsc_env
44
45 tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog
46
47 (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops
48 dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" (ppr cmms)
49
50 return (srtInfo, cmms)
51
52
53 cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl])
54 cpsTop _ p@(CmmData {}) = return (mapEmpty, [p])
55 cpsTop hsc_env proc =
56 do
57 ----------- Control-flow optimisations ----------------------------------
58
59 -- The first round of control-flow optimisation speeds up the
60 -- later passes by removing lots of empty blocks, so we do it
61 -- even when optimisation isn't turned on.
62 --
63 CmmProc h l v g <- {-# SCC "cmmCfgOpts(1)" #-}
64 return $ cmmCfgOptsProc splitting_proc_points proc
65 dump Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
66
67 let !TopInfo {stack_info=StackInfo { arg_space = entry_off
68 , do_layout = do_layout }} = h
69
70 ----------- Eliminate common blocks -------------------------------------
71 g <- {-# SCC "elimCommonBlocks" #-}
72 condPass Opt_CmmElimCommonBlocks elimCommonBlocks g
73 Opt_D_dump_cmm_cbe "Post common block elimination"
74
75 -- Any work storing block Labels must be performed _after_
76 -- elimCommonBlocks
77
78 g <- {-# SCC "createSwitchPlans" #-}
79 runUniqSM $ cmmImplementSwitchPlans dflags g
80 dump Opt_D_dump_cmm_switch "Post switch plan" g
81
82 ----------- Proc points -------------------------------------------------
83 let call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
84 proc_points <-
85 if splitting_proc_points
86 then do
87 pp <- {-# SCC "minimalProcPointSet" #-} runUniqSM $
88 minimalProcPointSet (targetPlatform dflags) call_pps g
89 dumpWith dflags Opt_D_dump_cmm_proc "Proc points"
90 (ppr l $$ ppr pp $$ ppr g)
91 return pp
92 else
93 return call_pps
94
95 ----------- Layout the stack and manifest Sp ----------------------------
96 (g, stackmaps) <-
97 {-# SCC "layoutStack" #-}
98 if do_layout
99 then runUniqSM $ cmmLayoutStack dflags proc_points entry_off g
100 else return (g, mapEmpty)
101 dump Opt_D_dump_cmm_sp "Layout Stack" g
102
103 ----------- Sink and inline assignments --------------------------------
104 g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout]
105 condPass Opt_CmmSink (cmmSink dflags) g
106 Opt_D_dump_cmm_sink "Sink assignments"
107
108 ------------- CAF analysis ----------------------------------------------
109 let cafEnv = {-# SCC "cafAnal" #-} cafAnal call_pps l g
110 dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" (ppr cafEnv)
111
112 g <- if splitting_proc_points
113 then do
114 ------------- Split into separate procedures -----------------------
115 let pp_map = {-# SCC "procPointAnalysis" #-}
116 procPointAnalysis proc_points g
117 dumpWith dflags Opt_D_dump_cmm_procmap "procpoint map" $
118 ppr pp_map
119 g <- {-# SCC "splitAtProcPoints" #-} runUniqSM $
120 splitAtProcPoints dflags l call_pps proc_points pp_map
121 (CmmProc h l v g)
122 dumps Opt_D_dump_cmm_split "Post splitting" g
123 return g
124 else do
125 -- attach info tables to return points
126 return $ [attachContInfoTables call_pps (CmmProc h l v g)]
127
128 ------------- Populate info tables with stack info -----------------
129 g <- {-# SCC "setInfoTableStackMap" #-}
130 return $ map (setInfoTableStackMap dflags stackmaps) g
131 dumps Opt_D_dump_cmm_info "after setInfoTableStackMap" g
132
133 ----------- Control-flow optimisations -----------------------------
134 g <- {-# SCC "cmmCfgOpts(2)" #-}
135 return $ if optLevel dflags >= 1
136 then map (cmmCfgOptsProc splitting_proc_points) g
137 else g
138 g <- return (map removeUnreachableBlocksProc g)
139 -- See Note [unreachable blocks]
140 dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
141
142 return (cafEnv, g)
143
144 where dflags = hsc_dflags hsc_env
145 platform = targetPlatform dflags
146 dump = dumpGraph dflags
147
148 dumps flag name
149 = mapM_ (dumpWith dflags flag name . ppr)
150
151 condPass flag pass g dumpflag dumpname =
152 if gopt flag dflags
153 then do
154 g <- return $ pass g
155 dump dumpflag dumpname g
156 return g
157 else return g
158
159
160 -- we don't need to split proc points for the NCG, unless
161 -- tablesNextToCode is off. The latter is because we have no
162 -- label to put on info tables for basic blocks that are not
163 -- the entry point.
164 splitting_proc_points = hscTarget dflags /= HscAsm
165 || not (tablesNextToCode dflags)
166 || -- Note [inconsistent-pic-reg]
167 usingInconsistentPicReg
168 usingInconsistentPicReg
169 = case (platformArch platform, platformOS platform, positionIndependent dflags)
170 of (ArchX86, OSDarwin, pic) -> pic
171 (ArchPPC, OSDarwin, pic) -> pic
172 _ -> False
173
174 -- Note [Sinking after stack layout]
175 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176 --
177 -- In the past we considered running sinking pass also before stack
178 -- layout, but after making some measurements we realized that:
179 --
180 -- a) running sinking only before stack layout produces slower
181 -- code than running sinking only before stack layout
182 --
183 -- b) running sinking both before and after stack layout produces
184 -- code that has the same performance as when running sinking
185 -- only after stack layout.
186 --
187 -- In other words sinking before stack layout doesn't buy as anything.
188 --
189 -- An interesting question is "why is it better to run sinking after
190 -- stack layout"? It seems that the major reason are stores and loads
191 -- generated by stack layout. Consider this code before stack layout:
192 --
193 -- c1E:
194 -- _c1C::P64 = R3;
195 -- _c1B::P64 = R2;
196 -- _c1A::P64 = R1;
197 -- I64[(young<c1D> + 8)] = c1D;
198 -- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
199 -- c1D:
200 -- R3 = _c1C::P64;
201 -- R2 = _c1B::P64;
202 -- R1 = _c1A::P64;
203 -- call (P64[(old + 8)])(R3, R2, R1) args: 8, res: 0, upd: 8;
204 --
205 -- Stack layout pass will save all local variables live across a call
206 -- (_c1C, _c1B and _c1A in this example) on the stack just before
207 -- making a call and reload them from the stack after returning from a
208 -- call:
209 --
210 -- c1E:
211 -- _c1C::P64 = R3;
212 -- _c1B::P64 = R2;
213 -- _c1A::P64 = R1;
214 -- I64[Sp - 32] = c1D;
215 -- P64[Sp - 24] = _c1A::P64;
216 -- P64[Sp - 16] = _c1B::P64;
217 -- P64[Sp - 8] = _c1C::P64;
218 -- Sp = Sp - 32;
219 -- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
220 -- c1D:
221 -- _c1A::P64 = P64[Sp + 8];
222 -- _c1B::P64 = P64[Sp + 16];
223 -- _c1C::P64 = P64[Sp + 24];
224 -- R3 = _c1C::P64;
225 -- R2 = _c1B::P64;
226 -- R1 = _c1A::P64;
227 -- Sp = Sp + 32;
228 -- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
229 --
230 -- If we don't run sinking pass after stack layout we are basically
231 -- left with such code. However, running sinking on this code can lead
232 -- to significant improvements:
233 --
234 -- c1E:
235 -- I64[Sp - 32] = c1D;
236 -- P64[Sp - 24] = R1;
237 -- P64[Sp - 16] = R2;
238 -- P64[Sp - 8] = R3;
239 -- Sp = Sp - 32;
240 -- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
241 -- c1D:
242 -- R3 = P64[Sp + 24];
243 -- R2 = P64[Sp + 16];
244 -- R1 = P64[Sp + 8];
245 -- Sp = Sp + 32;
246 -- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
247 --
248 -- Now we only have 9 assignments instead of 15.
249 --
250 -- There is one case when running sinking before stack layout could
251 -- be beneficial. Consider this:
252 --
253 -- L1:
254 -- x = y
255 -- call f() returns L2
256 -- L2: ...x...y...
257 --
258 -- Since both x and y are live across a call to f, they will be stored
259 -- on the stack during stack layout and restored after the call:
260 --
261 -- L1:
262 -- x = y
263 -- P64[Sp - 24] = L2
264 -- P64[Sp - 16] = x
265 -- P64[Sp - 8] = y
266 -- Sp = Sp - 24
267 -- call f() returns L2
268 -- L2:
269 -- y = P64[Sp + 16]
270 -- x = P64[Sp + 8]
271 -- Sp = Sp + 24
272 -- ...x...y...
273 --
274 -- However, if we run sinking before stack layout we would propagate x
275 -- to its usage place (both x and y must be local register for this to
276 -- be possible - global registers cannot be floated past a call):
277 --
278 -- L1:
279 -- x = y
280 -- call f() returns L2
281 -- L2: ...y...y...
282 --
283 -- Thus making x dead at the call to f(). If we ran stack layout now
284 -- we would generate less stores and loads:
285 --
286 -- L1:
287 -- x = y
288 -- P64[Sp - 16] = L2
289 -- P64[Sp - 8] = y
290 -- Sp = Sp - 16
291 -- call f() returns L2
292 -- L2:
293 -- y = P64[Sp + 8]
294 -- Sp = Sp + 16
295 -- ...y...y...
296 --
297 -- But since we don't see any benefits from running sinking befroe stack
298 -- layout, this situation probably doesn't arise too often in practice.
299 --
300
301 {- Note [inconsistent-pic-reg]
302
303 On x86/Darwin, PIC is implemented by inserting a sequence like
304
305 call 1f
306 1: popl %reg
307
308 at the proc entry point, and then referring to labels as offsets from
309 %reg. If we don't split proc points, then we could have many entry
310 points in a proc that would need this sequence, and each entry point
311 would then get a different value for %reg. If there are any join
312 points, then at the join point we don't have a consistent value for
313 %reg, so we don't know how to refer to labels.
314
315 Hence, on x86/Darwin, we have to split proc points, and then each proc
316 point will get its own PIC initialisation sequence.
317
318 The situation is the same for ppc/Darwin. We use essentially the same
319 sequence to load the program counter onto reg:
320
321 bcl 20,31,1f
322 1: mflr reg
323
324 This isn't an issue on x86/ELF, where the sequence is
325
326 call 1f
327 1: popl %reg
328 addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %reg
329
330 so %reg always has a consistent value: the address of
331 _GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via.
332
333 -}
334
335 {- Note [unreachable blocks]
336
337 The control-flow optimiser sometimes leaves unreachable blocks behind
338 containing junk code. These aren't necessarily a problem, but
339 removing them is good because it might save time in the native code
340 generator later.
341
342 -}
343
344 runUniqSM :: UniqSM a -> IO a
345 runUniqSM m = do
346 us <- mkSplitUniqSupply 'u'
347 return (initUs_ us m)
348
349
350 dumpGraph :: DynFlags -> DumpFlag -> String -> CmmGraph -> IO ()
351 dumpGraph dflags flag name g = do
352 when (gopt Opt_DoCmmLinting dflags) $ do_lint g
353 dumpWith dflags flag name (ppr g)
354 where
355 do_lint g = case cmmLintGraph dflags g of
356 Just err -> do { fatalErrorMsg dflags err
357 ; ghcExit dflags 1
358 }
359 Nothing -> return ()
360
361 dumpWith :: DynFlags -> DumpFlag -> String -> SDoc -> IO ()
362 dumpWith dflags flag txt sdoc = do
363 -- ToDo: No easy way of say "dump all the cmm, *and* split
364 -- them into files." Also, -ddump-cmm-verbose doesn't play
365 -- nicely with -ddump-to-file, since the headers get omitted.
366 dumpIfSet_dyn dflags flag txt sdoc
367 when (not (dopt flag dflags)) $
368 dumpIfSet_dyn dflags Opt_D_dump_cmm_verbose txt sdoc