Typos in comments [ci skip]
[ghc.git] / compiler / cmm / CmmPipeline.hs
1 {-# LANGUAGE BangPatterns #-}
2
3 module CmmPipeline (
4 -- | Converts C-- with an implicit stack and native C-- calls into
5 -- optimized, CPS converted and native-call-less C--. The latter
6 -- C-- can be used to generate assembly.
7 cmmPipeline
8 ) where
9
10 import Cmm
11 import CmmLint
12 import CmmBuildInfoTables
13 import CmmCommonBlockElim
14 import CmmImplementSwitchPlans
15 import CmmProcPoint
16 import CmmContFlowOpt
17 import CmmLayoutStack
18 import CmmSink
19 import Hoopl
20
21 import UniqSupply
22 import DynFlags
23 import ErrUtils
24 import HscTypes
25 import Control.Monad
26 import Outputable
27 import Platform
28
29 -----------------------------------------------------------------------------
30 -- | Top level driver for C-- pipeline
31 -----------------------------------------------------------------------------
32
33 cmmPipeline :: HscEnv -- Compilation env including
34 -- dynamic flags: -dcmm-lint -ddump-cmm-cps
35 -> TopSRT -- SRT table and accumulating list of compiled procs
36 -> CmmGroup -- Input C-- with Procedures
37 -> IO (TopSRT, CmmGroup) -- Output CPS transformed C--
38
39 cmmPipeline hsc_env topSRT prog =
40 do let dflags = hsc_dflags hsc_env
41
42 tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog
43
44 (topSRT, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags topSRT tops
45 dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" (ppr cmms)
46
47 return (topSRT, cmms)
48
49
50 cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl])
51 cpsTop _ p@(CmmData {}) = return (mapEmpty, [p])
52 cpsTop hsc_env proc =
53 do
54 ----------- Control-flow optimisations ----------------------------------
55
56 -- The first round of control-flow optimisation speeds up the
57 -- later passes by removing lots of empty blocks, so we do it
58 -- even when optimisation isn't turned on.
59 --
60 CmmProc h l v g <- {-# SCC "cmmCfgOpts(1)" #-}
61 return $ cmmCfgOptsProc splitting_proc_points proc
62 dump Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
63
64 let !TopInfo {stack_info=StackInfo { arg_space = entry_off
65 , do_layout = do_layout }} = h
66
67 ----------- Eliminate common blocks -------------------------------------
68 g <- {-# SCC "elimCommonBlocks" #-}
69 condPass Opt_CmmElimCommonBlocks elimCommonBlocks g
70 Opt_D_dump_cmm_cbe "Post common block elimination"
71
72 -- Any work storing block Labels must be performed _after_
73 -- elimCommonBlocks
74
75 g <- {-# SCC "createSwitchPlans" #-}
76 runUniqSM $ cmmImplementSwitchPlans dflags g
77 dump Opt_D_dump_cmm_switch "Post switch plan" g
78
79 ----------- Proc points -------------------------------------------------
80 let call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
81 proc_points <-
82 if splitting_proc_points
83 then do
84 pp <- {-# SCC "minimalProcPointSet" #-} runUniqSM $
85 minimalProcPointSet (targetPlatform dflags) call_pps g
86 dumpWith dflags Opt_D_dump_cmm_proc "Proc points"
87 (ppr l $$ ppr pp $$ ppr g)
88 return pp
89 else
90 return call_pps
91
92 ----------- Layout the stack and manifest Sp ----------------------------
93 (g, stackmaps) <-
94 {-# SCC "layoutStack" #-}
95 if do_layout
96 then runUniqSM $ cmmLayoutStack dflags proc_points entry_off g
97 else return (g, mapEmpty)
98 dump Opt_D_dump_cmm_sp "Layout Stack" g
99
100 ----------- Sink and inline assignments --------------------------------
101 g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout]
102 condPass Opt_CmmSink (cmmSink dflags) g
103 Opt_D_dump_cmm_sink "Sink assignments"
104
105 ------------- CAF analysis ----------------------------------------------
106 let cafEnv = {-# SCC "cafAnal" #-} cafAnal g
107 dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" (ppr cafEnv)
108
109 g <- if splitting_proc_points
110 then do
111 ------------- Split into separate procedures -----------------------
112 let pp_map = {-# SCC "procPointAnalysis" #-}
113 procPointAnalysis proc_points g
114 dumpWith dflags Opt_D_dump_cmm_procmap "procpoint map" $
115 ppr pp_map
116 g <- {-# SCC "splitAtProcPoints" #-} runUniqSM $
117 splitAtProcPoints dflags l call_pps proc_points pp_map
118 (CmmProc h l v g)
119 dumps Opt_D_dump_cmm_split "Post splitting" g
120 return g
121 else do
122 -- attach info tables to return points
123 return $ [attachContInfoTables call_pps (CmmProc h l v g)]
124
125 ------------- Populate info tables with stack info -----------------
126 g <- {-# SCC "setInfoTableStackMap" #-}
127 return $ map (setInfoTableStackMap dflags stackmaps) g
128 dumps Opt_D_dump_cmm_info "after setInfoTableStackMap" g
129
130 ----------- Control-flow optimisations -----------------------------
131 g <- {-# SCC "cmmCfgOpts(2)" #-}
132 return $ if optLevel dflags >= 1
133 then map (cmmCfgOptsProc splitting_proc_points) g
134 else g
135 g <- return (map removeUnreachableBlocksProc g)
136 -- See Note [unreachable blocks]
137 dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
138
139 return (cafEnv, g)
140
141 where dflags = hsc_dflags hsc_env
142 platform = targetPlatform dflags
143 dump = dumpGraph dflags
144
145 dumps flag name
146 = mapM_ (dumpWith dflags flag name . ppr)
147
148 condPass flag pass g dumpflag dumpname =
149 if gopt flag dflags
150 then do
151 g <- return $ pass g
152 dump dumpflag dumpname g
153 return g
154 else return g
155
156
157 -- we don't need to split proc points for the NCG, unless
158 -- tablesNextToCode is off. The latter is because we have no
159 -- label to put on info tables for basic blocks that are not
160 -- the entry point.
161 splitting_proc_points = hscTarget dflags /= HscAsm
162 || not (tablesNextToCode dflags)
163 || -- Note [inconsistent-pic-reg]
164 usingInconsistentPicReg
165 usingInconsistentPicReg
166 = case (platformArch platform, platformOS platform, gopt Opt_PIC dflags)
167 of (ArchX86, OSDarwin, pic) -> pic
168 (ArchPPC, OSDarwin, pic) -> pic
169 _ -> False
170
171 -- Note [Sinking after stack layout]
172 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
173 --
174 -- In the past we considered running sinking pass also before stack
175 -- layout, but after making some measurements we realized that:
176 --
177 -- a) running sinking only before stack layout produces slower
178 -- code than running sinking only before stack layout
179 --
180 -- b) running sinking both before and after stack layout produces
181 -- code that has the same performance as when running sinking
182 -- only after stack layout.
183 --
184 -- In other words sinking before stack layout doesn't buy as anything.
185 --
186 -- An interesting question is "why is it better to run sinking after
187 -- stack layout"? It seems that the major reason are stores and loads
188 -- generated by stack layout. Consider this code before stack layout:
189 --
190 -- c1E:
191 -- _c1C::P64 = R3;
192 -- _c1B::P64 = R2;
193 -- _c1A::P64 = R1;
194 -- I64[(young<c1D> + 8)] = c1D;
195 -- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
196 -- c1D:
197 -- R3 = _c1C::P64;
198 -- R2 = _c1B::P64;
199 -- R1 = _c1A::P64;
200 -- call (P64[(old + 8)])(R3, R2, R1) args: 8, res: 0, upd: 8;
201 --
202 -- Stack layout pass will save all local variables live across a call
203 -- (_c1C, _c1B and _c1A in this example) on the stack just before
204 -- making a call and reload them from the stack after returning from a
205 -- call:
206 --
207 -- c1E:
208 -- _c1C::P64 = R3;
209 -- _c1B::P64 = R2;
210 -- _c1A::P64 = R1;
211 -- I64[Sp - 32] = c1D;
212 -- P64[Sp - 24] = _c1A::P64;
213 -- P64[Sp - 16] = _c1B::P64;
214 -- P64[Sp - 8] = _c1C::P64;
215 -- Sp = Sp - 32;
216 -- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
217 -- c1D:
218 -- _c1A::P64 = P64[Sp + 8];
219 -- _c1B::P64 = P64[Sp + 16];
220 -- _c1C::P64 = P64[Sp + 24];
221 -- R3 = _c1C::P64;
222 -- R2 = _c1B::P64;
223 -- R1 = _c1A::P64;
224 -- Sp = Sp + 32;
225 -- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
226 --
227 -- If we don't run sinking pass after stack layout we are basically
228 -- left with such code. However, running sinking on this code can lead
229 -- to significant improvements:
230 --
231 -- c1E:
232 -- I64[Sp - 32] = c1D;
233 -- P64[Sp - 24] = R1;
234 -- P64[Sp - 16] = R2;
235 -- P64[Sp - 8] = R3;
236 -- Sp = Sp - 32;
237 -- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8;
238 -- c1D:
239 -- R3 = P64[Sp + 24];
240 -- R2 = P64[Sp + 16];
241 -- R1 = P64[Sp + 8];
242 -- Sp = Sp + 32;
243 -- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8;
244 --
245 -- Now we only have 9 assignments instead of 15.
246 --
247 -- There is one case when running sinking before stack layout could
248 -- be beneficial. Consider this:
249 --
250 -- L1:
251 -- x = y
252 -- call f() returns L2
253 -- L2: ...x...y...
254 --
255 -- Since both x and y are live across a call to f, they will be stored
256 -- on the stack during stack layout and restored after the call:
257 --
258 -- L1:
259 -- x = y
260 -- P64[Sp - 24] = L2
261 -- P64[Sp - 16] = x
262 -- P64[Sp - 8] = y
263 -- Sp = Sp - 24
264 -- call f() returns L2
265 -- L2:
266 -- y = P64[Sp + 16]
267 -- x = P64[Sp + 8]
268 -- Sp = Sp + 24
269 -- ...x...y...
270 --
271 -- However, if we run sinking before stack layout we would propagate x
272 -- to its usage place (both x and y must be local register for this to
273 -- be possible - global registers cannot be floated past a call):
274 --
275 -- L1:
276 -- x = y
277 -- call f() returns L2
278 -- L2: ...y...y...
279 --
280 -- Thus making x dead at the call to f(). If we ran stack layout now
281 -- we would generate less stores and loads:
282 --
283 -- L1:
284 -- x = y
285 -- P64[Sp - 16] = L2
286 -- P64[Sp - 8] = y
287 -- Sp = Sp - 16
288 -- call f() returns L2
289 -- L2:
290 -- y = P64[Sp + 8]
291 -- Sp = Sp + 16
292 -- ...y...y...
293 --
294 -- But since we don't see any benefits from running sinking befroe stack
295 -- layout, this situation probably doesn't arise too often in practice.
296 --
297
298 {- Note [inconsistent-pic-reg]
299
300 On x86/Darwin, PIC is implemented by inserting a sequence like
301
302 call 1f
303 1: popl %reg
304
305 at the proc entry point, and then referring to labels as offsets from
306 %reg. If we don't split proc points, then we could have many entry
307 points in a proc that would need this sequence, and each entry point
308 would then get a different value for %reg. If there are any join
309 points, then at the join point we don't have a consistent value for
310 %reg, so we don't know how to refer to labels.
311
312 Hence, on x86/Darwin, we have to split proc points, and then each proc
313 point will get its own PIC initialisation sequence.
314
315 The situation is the same for ppc/Darwin. We use essentially the same
316 sequence to load the program counter onto reg:
317
318 bcl 20,31,1f
319 1: mflr reg
320
321 This isn't an issue on x86/ELF, where the sequence is
322
323 call 1f
324 1: popl %reg
325 addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %reg
326
327 so %reg always has a consistent value: the address of
328 _GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via.
329
330 -}
331
332 {- Note [unreachable blocks]
333
334 The control-flow optimiser sometimes leaves unreachable blocks behind
335 containing junk code. These aren't necessarily a problem, but
336 removing them is good because it might save time in the native code
337 generator later.
338
339 -}
340
341 runUniqSM :: UniqSM a -> IO a
342 runUniqSM m = do
343 us <- mkSplitUniqSupply 'u'
344 return (initUs_ us m)
345
346
347 dumpGraph :: DynFlags -> DumpFlag -> String -> CmmGraph -> IO ()
348 dumpGraph dflags flag name g = do
349 when (gopt Opt_DoCmmLinting dflags) $ do_lint g
350 dumpWith dflags flag name (ppr g)
351 where
352 do_lint g = case cmmLintGraph dflags g of
353 Just err -> do { fatalErrorMsg dflags err
354 ; ghcExit dflags 1
355 }
356 Nothing -> return ()
357
358 dumpWith :: DynFlags -> DumpFlag -> String -> SDoc -> IO ()
359 dumpWith dflags flag txt sdoc = do
360 -- ToDo: No easy way of say "dump all the cmm, *and* split
361 -- them into files." Also, -ddump-cmm-verbose doesn't play
362 -- nicely with -ddump-to-file, since the headers get omitted.
363 dumpIfSet_dyn dflags flag txt sdoc
364 when (not (dopt flag dflags)) $
365 dumpIfSet_dyn dflags Opt_D_dump_cmm_verbose txt sdoc