52b95a93ccfbaecf18b0800c86f2f775d1a526db
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
3 module CmmContFlowOpt
4 ( cmmCfgOpts
5 , cmmCfgOptsProc
6 , removeUnreachableBlocksProc
7 , replaceLabels
8 )
9 where
10
11 import Hoopl
12 import BlockId
13 import Cmm
14 import CmmUtils
15 import Maybes
16 import Panic
17
18 import Control.Monad
19 import Prelude hiding (succ, unzip, zip)
20
21
22 -- Note [What is shortcutting]
23 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 --
25 -- Consider this Cmm code:
26 --
27 -- L1: ...
28 -- goto L2;
29 -- L2: goto L3;
30 -- L3: ...
31 --
32 -- Here L2 is an empty block and contains only an unconditional branch
33 -- to L3. In this situation any block that jumps to L2 can jump
34 -- directly to L3:
35 --
36 -- L1: ...
37 -- goto L3;
38 -- L2: goto L3;
39 -- L3: ...
40 --
41 -- In this situation we say that we shortcut L2 to L3. One of
42 -- consequences of shortcutting is that some blocks of code may become
43 -- unreachable (in the example above this is true for L2).
44
45
46 -- Note [Control-flow optimisations]
47 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48 --
49 -- This optimisation does four things:
50 --
51 -- - If a block finishes in an unconditonal branch to another block
52 -- and that is the only jump to that block we concatenate the
53 -- destination block at the end of the current one.
54 --
55 -- - If a block finishes in an unconditional branch, we may be able
56 -- to shortcut the destination block.
57 --
58 -- - If a block finishes in a call whose continuation block is a
59 -- goto, then we can shortcut the destination, making the
60 -- continuation block the destination of the goto - but see Note
61 -- [Shortcut call returns].
62 --
63 -- - For block finishing in conditional branch we try to invert the
64 -- condition and shortcut destination of alternatives.
65 --
66 -- - For any block that is not a call we try to shortcut the
67 -- destination(s).
68 --
69 -- Blocks are processed using postorder DFS traversal. A side effect
70 -- of determining traversal order with a graph search is elimination
71 -- of any blocks that are unreachable.
72 --
73 -- Transformations are improved by working from the end of the graph
74 -- towards the beginning, because we may be able to perform many
75 -- shortcuts in one go.
76
77
78 -- Note [Shortcut call returns]
79 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
80 --
81 -- We are going to maintain the "current" graph (BlockEnv CmmBlock) as
82 -- we go, and also a mapping from BlockId to BlockId, representing
83 -- continuation labels that we have renamed. This latter mapping is
84 -- important because we might shortcut a CmmCall continuation. For
85 -- example:
86 --
87 -- Sp[0] = L
88 -- call g returns to L
89 -- L: goto M
90 -- M: ...
91 --
92 -- So when we shortcut the L block, we need to replace not only
93 -- the continuation of the call, but also references to L in the
94 -- code (e.g. the assignment Sp[0] = L):
95 --
96 -- Sp[0] = M
97 -- call g returns to M
98 -- M: ...
99 --
100 -- So we keep track of which labels we have renamed and apply the mapping
101 -- at the end with replaceLabels.
102
103
104 -- Note [Shortcut call returns and proc-points]
105 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
106 --
107 -- Consider this code that you might get from a recursive
108 -- let-no-escape:
109 --
110 -- goto L1
111 -- L1:
112 -- if (Hp > HpLim) then L2 else L3
113 -- L2:
114 -- call stg_gc_noregs returns to L4
115 -- L4:
116 -- goto L1
117 -- L3:
118 -- ...
119 -- goto L1
120 --
121 -- Then the control-flow optimiser shortcuts L4. But that turns L1
122 -- into the call-return proc point, and every iteration of the loop
123 -- has to shuffle variables to and from the stack. So we must *not*
124 -- shortcut L4.
125 --
126 -- Moreover not shortcutting call returns is probably fine. If L4 can
127 -- concat with its branch target then it will still do so. And we
128 -- save some compile time because we don't have to traverse all the
129 -- code in replaceLabels.
130 --
131 -- However, we probably do want to do this if we are splitting proc
132 -- points, because L1 will be a proc-point anyway, so merging it with
133 -- L4 reduces the number of proc points. Unfortunately recursive
134 -- let-no-escapes won't generate very good code with proc-point
135 -- splitting on - we should probably compile them to explicitly use
136 -- the native calling convention instead.
137
138 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
139 cmmCfgOpts split g = fst (blockConcat split g)
140
141 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
142 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
143 where (g', env) = blockConcat split g
144 info' = info{ info_tbls = new_info_tbls }
145 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
146
147 -- If we changed any labels, then we have to update the info tables
148 -- too, except for the top-level info table because that might be
149 -- referred to by other procs.
150 upd_info (k,info)
151 | Just k' <- mapLookup k env
152 = (k', if k' == g_entry g'
153 then info
154 else info{ cit_lbl = infoTblLbl k' })
155 | otherwise
156 = (k,info)
157 cmmCfgOptsProc _ top = top
158
159
160 blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
161 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
162 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
163 where
164 -- We might be able to shortcut the entry BlockId itself.
165 -- Remember to update the shortcut_map, since we also have to
166 -- update the info_tbls mapping now.
167 (new_entry, shortcut_map')
168 | Just entry_blk <- mapLookup entry_id new_blocks
169 , Just dest <- canShortcut entry_blk
170 = (dest, mapInsert entry_id dest shortcut_map)
171 | otherwise
172 = (entry_id, shortcut_map)
173
174 -- blocks is a list of blocks in DFS postorder, while blockmap is
175 -- a map of blocks. We process each element from blocks and update
176 -- blockmap accordingly
177 blocks = postorderDfs g
178 blockmap = foldr addBlock emptyBody blocks
179
180 -- Accumulator contains three components:
181 -- * map of blocks in a graph
182 -- * map of shortcut labels. See Note [Shortcut call returns]
183 -- * map containing number of predecessors for each block. We discard
184 -- it after we process all blocks.
185 (new_blocks, shortcut_map, _) =
186 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
187
188 -- Map of predecessors for initial graph. We increase number of
189 -- predecessors for entry block by one to denote that it is
190 -- target of a jump, even if no block in the current graph jumps
191 -- to it.
192 initialBackEdges = incPreds entry_id (predMap blocks)
193
194 maybe_concat :: CmmBlock
195 -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
196 -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
197 maybe_concat block (blocks, shortcut_map, backEdges)
198 -- If:
199 -- (1) current block ends with unconditional branch to b' and
200 -- (2) it has exactly one predecessor (namely, current block)
201 --
202 -- Then:
203 -- (1) append b' block at the end of current block
204 -- (2) remove b' from the map of blocks
205 -- (3) remove information about b' from predecessors map
206 --
207 -- This guard must be first so that we always eliminate blocks that have
208 -- only one predecessor. If we had a target block that is both
209 -- shorcutable and has only one predecessor and attempted to shortcut it
210 -- first we would make that block unreachable but would not remove it
211 -- from the graph.
212 --
213 -- Note that we always maintain an up-to-date list of predecessors, so
214 -- we can ignore the contents of shortcut_map
215 | CmmBranch b' <- last
216 , hasOnePredecessor b'
217 , Just blk' <- mapLookup b' blocks
218 = let bid' = entryLabel blk'
219 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
220 , shortcut_map
221 , mapDelete b' backEdges )
222
223 -- If:
224 -- (1) current block ends with unconditional branch to b' and
225 -- (2) we can shortcut block b'
226 -- Then:
227 -- (1) concatenate b' at the end of current block, effectively
228 -- changing target of uncondtional jump from b' to dest
229 -- (2) increase number of predecessors of dest by 1
230 -- (3) decrease number of predecessors of b' by 1
231 | CmmBranch b' <- last
232 , Just blk' <- mapLookup b' blocks
233 , Just dest <- canShortcut blk'
234 = ( mapInsert bid (splice head blk') blocks, shortcut_map,
235 decPreds b' $ incPreds dest backEdges )
236
237 -- If:
238 -- (1) we are splitting proc points (see Note
239 -- [Shortcut call returns and proc-points]) and
240 -- (2) current block is a CmmCall or CmmForeignCall with
241 -- continuation b' and
242 -- (3) we can shortcut that continuation to dest
243 -- Then:
244 -- (1) we change continuation to point to b'
245 -- (2) create mapping from b' to dest
246 -- (3) increase number of predecessors of dest by 1
247 -- (4) decrease number of predecessors of b' by 1
248 --
249 -- Later we will use replaceLabels to substitute all occurrences of b'
250 -- with dest.
251 | splitting_procs
252 , Just b' <- callContinuation_maybe last
253 , Just blk' <- mapLookup b' blocks
254 , Just dest <- canShortcut blk'
255 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
256 , mapInsert b' dest shortcut_map
257 , decPreds b' $ incPreds dest backEdges )
258
259 -- If:
260 -- (1) a block does not end with a call
261 -- Then:
262 -- (1) if it ends with a conditional attempt to invert the
263 -- conditional
264 -- (2) attempt to shortcut all destination blocks
265 -- (3) if new successors of a block are different from the old ones
266 -- we update the of predecessors accordingly
267 | Nothing <- callContinuation_maybe last
268 = let oldSuccs = successors last
269 newSuccs = successors swapcond_last
270 in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
271 , shortcut_map
272 , if oldSuccs == newSuccs
273 then backEdges
274 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
275
276 -- Otherwise don't do anything
277 | otherwise
278 = ( blocks, shortcut_map, backEdges )
279 where
280 (head, last) = blockSplitTail block
281 bid = entryLabel block
282
283 -- Changes continuation of a call to a specified label
284 update_cont dest =
285 case last of
286 CmmCall{} -> last { cml_cont = Just dest }
287 CmmForeignCall{} -> last { succ = dest }
288 _ -> panic "Can't shortcut continuation."
289
290 -- Attempts to shortcut successors of last node
291 shortcut_last = mapSuccessors shortcut last
292 where
293 shortcut l =
294 case mapLookup l blocks of
295 Just b | Just dest <- canShortcut b -> dest
296 _otherwise -> l
297
298 -- For a conditional, we invert the conditional if that would make it
299 -- more likely that the branch-not-taken case becomes a fallthrough.
300 -- This helps the native codegen a little bit, and probably has no
301 -- effect on LLVM. It's convenient to do it here, where we have the
302 -- information about predecessors.
303 swapcond_last
304 | CmmCondBranch cond t f <- shortcut_last
305 , numPreds f > 1
306 , hasOnePredecessor t
307 , Just cond' <- maybeInvertCmmExpr cond
308 = CmmCondBranch cond' f t
309
310 | otherwise
311 = shortcut_last
312
313 -- Number of predecessors for a block
314 numPreds bid = mapLookup bid backEdges `orElse` 0
315
316 hasOnePredecessor b = numPreds b == 1
317
318 -- Functions for incrementing and decrementing number of predecessors. If
319 -- decrementing would set the predecessor count to 0, we remove entry from the
320 -- map.
321 -- Invariant: if a block has no predecessors it should be dropped from the
322 -- graph because it is unreachable. maybe_concat is constructed to maintain
323 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
324 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
325 -- blocks.
326 incPreds, decPreds :: BlockId -> BlockEnv Int -> BlockEnv Int
327 incPreds bid edges = mapInsertWith (+) bid 1 edges
328 decPreds bid edges = case mapLookup bid edges of
329 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
330 Just _ -> mapDelete bid edges
331 _ -> edges
332
333
334 -- Checks if a block consists only of "goto dest". If it does than we return
335 -- "Just dest" label. See Note [What is shortcutting]
336 canShortcut :: CmmBlock -> Maybe BlockId
337 canShortcut block
338 | (_, middle, CmmBranch dest) <- blockSplit block
339 , isEmptyBlock middle
340 = Just dest
341 | otherwise
342 = Nothing
343
344
345 -- Concatenates two blocks. First one is assumed to be open on exit, the second
346 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
347 -- the splice function removes by calling snd on result of blockSplitHead).
348 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
349 splice head rest = head `blockAppend` snd (blockSplitHead rest)
350
351
352 -- If node is a call with continuation call return Just label of that
353 -- continuation. Otherwise return Nothing.
354 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
355 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
356 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
357 callContinuation_maybe _ = Nothing
358
359
360 -- Map over the CmmGraph, replacing each label with its mapping in the
361 -- supplied BlockEnv.
362 replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
363 replaceLabels env g
364 | mapNull env = g
365 | otherwise = replace_eid $ mapGraphNodes1 txnode g
366 where
367 replace_eid g = g {g_entry = lookup (g_entry g)}
368 lookup id = mapLookup id env `orElse` id
369
370 txnode :: CmmNode e x -> CmmNode e x
371 txnode (CmmBranch bid) = CmmBranch (lookup bid)
372 txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f)
373 txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms)
374 txnode (CmmCall t k rg a res r) = CmmCall (exp t) (liftM lookup k) rg a res r
375 txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc)
376 , succ = lookup (succ fc) }
377 txnode other = mapExpDeep exp other
378
379 exp :: CmmExpr -> CmmExpr
380 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
381 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
382 exp e = e
383
384 mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
385 mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f
386
387 -- Build a map from a block to its set of predecessors.
388 predMap :: [CmmBlock] -> BlockEnv Int
389 predMap blocks = foldr add_preds mapEmpty blocks
390 where
391 add_preds block env = foldr add env (successors block)
392 where add lbl env = mapInsertWith (+) lbl 1 env
393
394 -- Removing unreachable blocks
395 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
396 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
397 | length used_blocks < mapSize (toBlockMap g)
398 = CmmProc info' lbl live g'
399 | otherwise
400 = proc
401 where
402 g' = ofBlockList (g_entry g) used_blocks
403 info' = info { info_tbls = keep_used (info_tbls info) }
404 -- Remove any info_tbls for unreachable
405
406 keep_used :: BlockEnv CmmInfoTable -> BlockEnv CmmInfoTable
407 keep_used bs = mapFoldWithKey keep emptyBlockMap bs
408
409 keep :: Label -> CmmInfoTable -> BlockEnv CmmInfoTable -> BlockEnv CmmInfoTable
410 keep l i env | l `setMember` used_lbls = mapInsert l i env
411 | otherwise = env
412
413 used_blocks :: [CmmBlock]
414 used_blocks = postorderDfs g
415
416 used_lbls :: LabelSet
417 used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks