Simplify Control Flow Optimisations Cmm pass
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
3 module CmmContFlowOpt
4 ( cmmCfgOpts
5 , cmmCfgOptsProc
6 , removeUnreachableBlocksProc
7 , replaceLabels
8 )
9 where
10
11 import Hoopl
12 import BlockId
13 import Cmm
14 import CmmUtils
15 import Maybes
16 import Panic
17
18 import Control.Monad
19 import Prelude hiding (succ, unzip, zip)
20
21
22 -- Note [What is shortcutting]
23 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 --
25 -- Consider this Cmm code:
26 --
27 -- L1: ...
28 -- goto L2;
29 -- L2: goto L3;
30 -- L3: ...
31 --
32 -- Here L2 is an empty block and contains only an unconditional branch
33 -- to L3. In this situation any block that jumps to L2 can jump
34 -- directly to L3:
35 --
36 -- L1: ...
37 -- goto L3;
38 -- L2: goto L3;
39 -- L3: ...
40 --
41 -- In this situation we say that we shortcut L2 to L3. One of
42 -- consequences of shortcutting is that some blocks of code may become
43 -- unreachable (in the example above this is true for L2).
44
45
46 -- Note [Control-flow optimisations]
47 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48 --
49 -- This optimisation does three things:
50 --
51 -- - If a block finishes in an unconditonal branch to another block
52 -- and that is the only jump to that block we concatenate the
53 -- destination block at the end of the current one.
54 --
55 -- - If a block finishes in a call whose continuation block is a
56 -- goto, then we can shortcut the destination, making the
57 -- continuation block the destination of the goto - but see Note
58 -- [Shortcut call returns].
59 --
60 -- - For any block that is not a call we try to shortcut the
61 -- destination(s). Additionally, if a block ends with a
62 -- conditional branch we try to invert the condition.
63 --
64 -- Blocks are processed using postorder DFS traversal. A side effect
65 -- of determining traversal order with a graph search is elimination
66 -- of any blocks that are unreachable.
67 --
68 -- Transformations are improved by working from the end of the graph
69 -- towards the beginning, because we may be able to perform many
70 -- shortcuts in one go.
71
72
73 -- Note [Shortcut call returns]
74 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
75 --
76 -- We are going to maintain the "current" graph (BlockEnv CmmBlock) as
77 -- we go, and also a mapping from BlockId to BlockId, representing
78 -- continuation labels that we have renamed. This latter mapping is
79 -- important because we might shortcut a CmmCall continuation. For
80 -- example:
81 --
82 -- Sp[0] = L
83 -- call g returns to L
84 -- L: goto M
85 -- M: ...
86 --
87 -- So when we shortcut the L block, we need to replace not only
88 -- the continuation of the call, but also references to L in the
89 -- code (e.g. the assignment Sp[0] = L):
90 --
91 -- Sp[0] = M
92 -- call g returns to M
93 -- M: ...
94 --
95 -- So we keep track of which labels we have renamed and apply the mapping
96 -- at the end with replaceLabels.
97
98
99 -- Note [Shortcut call returns and proc-points]
100 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101 --
102 -- Consider this code that you might get from a recursive
103 -- let-no-escape:
104 --
105 -- goto L1
106 -- L1:
107 -- if (Hp > HpLim) then L2 else L3
108 -- L2:
109 -- call stg_gc_noregs returns to L4
110 -- L4:
111 -- goto L1
112 -- L3:
113 -- ...
114 -- goto L1
115 --
116 -- Then the control-flow optimiser shortcuts L4. But that turns L1
117 -- into the call-return proc point, and every iteration of the loop
118 -- has to shuffle variables to and from the stack. So we must *not*
119 -- shortcut L4.
120 --
121 -- Moreover not shortcutting call returns is probably fine. If L4 can
122 -- concat with its branch target then it will still do so. And we
123 -- save some compile time because we don't have to traverse all the
124 -- code in replaceLabels.
125 --
126 -- However, we probably do want to do this if we are splitting proc
127 -- points, because L1 will be a proc-point anyway, so merging it with
128 -- L4 reduces the number of proc points. Unfortunately recursive
129 -- let-no-escapes won't generate very good code with proc-point
130 -- splitting on - we should probably compile them to explicitly use
131 -- the native calling convention instead.
132
133 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
134 cmmCfgOpts split g = fst (blockConcat split g)
135
136 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
137 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
138 where (g', env) = blockConcat split g
139 info' = info{ info_tbls = new_info_tbls }
140 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
141
142 -- If we changed any labels, then we have to update the info tables
143 -- too, except for the top-level info table because that might be
144 -- referred to by other procs.
145 upd_info (k,info)
146 | Just k' <- mapLookup k env
147 = (k', if k' == g_entry g'
148 then info
149 else info{ cit_lbl = infoTblLbl k' })
150 | otherwise
151 = (k,info)
152 cmmCfgOptsProc _ top = top
153
154
155 blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
156 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
157 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
158 where
159 -- We might be able to shortcut the entry BlockId itself.
160 -- Remember to update the shortcut_map, since we also have to
161 -- update the info_tbls mapping now.
162 (new_entry, shortcut_map')
163 | Just entry_blk <- mapLookup entry_id new_blocks
164 , Just dest <- canShortcut entry_blk
165 = (dest, mapInsert entry_id dest shortcut_map)
166 | otherwise
167 = (entry_id, shortcut_map)
168
169 -- blocks is a list of blocks in DFS postorder, while blockmap is
170 -- a map of blocks. We process each element from blocks and update
171 -- blockmap accordingly
172 blocks = postorderDfs g
173 blockmap = foldr addBlock emptyBody blocks
174
175 -- Accumulator contains three components:
176 -- * map of blocks in a graph
177 -- * map of shortcut labels. See Note [Shortcut call returns]
178 -- * map containing number of predecessors for each block. We discard
179 -- it after we process all blocks.
180 (new_blocks, shortcut_map, _) =
181 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
182
183 -- Map of predecessors for initial graph. We increase number of
184 -- predecessors for entry block by one to denote that it is
185 -- target of a jump, even if no block in the current graph jumps
186 -- to it.
187 initialBackEdges = incPreds entry_id (predMap blocks)
188
189 maybe_concat :: CmmBlock
190 -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
191 -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
192 maybe_concat block (blocks, shortcut_map, backEdges)
193 -- If:
194 -- (1) current block ends with unconditional branch to b' and
195 -- (2) it has exactly one predecessor (namely, current block)
196 --
197 -- Then:
198 -- (1) append b' block at the end of current block
199 -- (2) remove b' from the map of blocks
200 -- (3) remove information about b' from predecessors map
201 --
202 -- Since we know that the block has only one predecessor we call
203 -- mapDelete directly instead of calling decPreds.
204 --
205 -- Note that we always maintain an up-to-date list of predecessors, so
206 -- we can ignore the contents of shortcut_map
207 | CmmBranch b' <- last
208 , hasOnePredecessor b'
209 , Just blk' <- mapLookup b' blocks
210 = let bid' = entryLabel blk'
211 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
212 , shortcut_map
213 , mapDelete b' backEdges )
214
215 -- If:
216 -- (1) we are splitting proc points (see Note
217 -- [Shortcut call returns and proc-points]) and
218 -- (2) current block is a CmmCall or CmmForeignCall with
219 -- continuation b' and
220 -- (3) we can shortcut that continuation to dest
221 -- Then:
222 -- (1) we change continuation to point to b'
223 -- (2) create mapping from b' to dest
224 -- (3) increase number of predecessors of dest by 1
225 -- (4) decrease number of predecessors of b' by 1
226 --
227 -- Later we will use replaceLabels to substitute all occurrences of b'
228 -- with dest.
229 | splitting_procs
230 , Just b' <- callContinuation_maybe last
231 , Just blk' <- mapLookup b' blocks
232 , Just dest <- canShortcut blk'
233 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
234 , mapInsert b' dest shortcut_map
235 , decPreds b' $ incPreds dest backEdges )
236
237 -- If:
238 -- (1) a block does not end with a call
239 -- Then:
240 -- (1) if it ends with a conditional attempt to invert the
241 -- conditional
242 -- (2) attempt to shortcut all destination blocks
243 -- (3) if new successors of a block are different from the old ones
244 -- update the of predecessors accordingly
245 --
246 -- A special case of this is a situation when a block ends with an
247 -- unconditional jump to a block that can be shortcut.
248 | Nothing <- callContinuation_maybe last
249 = let oldSuccs = successors last
250 newSuccs = successors swapcond_last
251 in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
252 , shortcut_map
253 , if oldSuccs == newSuccs
254 then backEdges
255 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
256
257 -- Otherwise don't do anything
258 | otherwise
259 = ( blocks, shortcut_map, backEdges )
260 where
261 (head, last) = blockSplitTail block
262 bid = entryLabel block
263
264 -- Changes continuation of a call to a specified label
265 update_cont dest =
266 case last of
267 CmmCall{} -> last { cml_cont = Just dest }
268 CmmForeignCall{} -> last { succ = dest }
269 _ -> panic "Can't shortcut continuation."
270
271 -- Attempts to shortcut successors of last node
272 shortcut_last = mapSuccessors shortcut last
273 where
274 shortcut l =
275 case mapLookup l blocks of
276 Just b | Just dest <- canShortcut b -> dest
277 _otherwise -> l
278
279 -- For a conditional, we invert the conditional if that would make it
280 -- more likely that the branch-not-taken case becomes a fallthrough.
281 -- This helps the native codegen a little bit, and probably has no
282 -- effect on LLVM. It's convenient to do it here, where we have the
283 -- information about predecessors.
284 swapcond_last
285 | CmmCondBranch cond t f <- shortcut_last
286 , numPreds f > 1
287 , hasOnePredecessor t
288 , Just cond' <- maybeInvertCmmExpr cond
289 = CmmCondBranch cond' f t
290
291 | otherwise
292 = shortcut_last
293
294 -- Number of predecessors for a block
295 numPreds bid = mapLookup bid backEdges `orElse` 0
296
297 hasOnePredecessor b = numPreds b == 1
298
299 -- Functions for incrementing and decrementing number of predecessors. If
300 -- decrementing would set the predecessor count to 0, we remove entry from the
301 -- map.
302 -- Invariant: if a block has no predecessors it should be dropped from the
303 -- graph because it is unreachable. maybe_concat is constructed to maintain
304 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
305 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
306 -- blocks.
307 incPreds, decPreds :: BlockId -> BlockEnv Int -> BlockEnv Int
308 incPreds bid edges = mapInsertWith (+) bid 1 edges
309 decPreds bid edges = case mapLookup bid edges of
310 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
311 Just _ -> mapDelete bid edges
312 _ -> edges
313
314
315 -- Checks if a block consists only of "goto dest". If it does than we return
316 -- "Just dest" label. See Note [What is shortcutting]
317 canShortcut :: CmmBlock -> Maybe BlockId
318 canShortcut block
319 | (_, middle, CmmBranch dest) <- blockSplit block
320 , isEmptyBlock middle
321 = Just dest
322 | otherwise
323 = Nothing
324
325
326 -- Concatenates two blocks. First one is assumed to be open on exit, the second
327 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
328 -- the splice function removes by calling snd on result of blockSplitHead).
329 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
330 splice head rest = head `blockAppend` snd (blockSplitHead rest)
331
332
333 -- If node is a call with continuation call return Just label of that
334 -- continuation. Otherwise return Nothing.
335 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
336 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
337 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
338 callContinuation_maybe _ = Nothing
339
340
341 -- Map over the CmmGraph, replacing each label with its mapping in the
342 -- supplied BlockEnv.
343 replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
344 replaceLabels env g
345 | mapNull env = g
346 | otherwise = replace_eid $ mapGraphNodes1 txnode g
347 where
348 replace_eid g = g {g_entry = lookup (g_entry g)}
349 lookup id = mapLookup id env `orElse` id
350
351 txnode :: CmmNode e x -> CmmNode e x
352 txnode (CmmBranch bid) = CmmBranch (lookup bid)
353 txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f)
354 txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms)
355 txnode (CmmCall t k rg a res r) = CmmCall (exp t) (liftM lookup k) rg a res r
356 txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc)
357 , succ = lookup (succ fc) }
358 txnode other = mapExpDeep exp other
359
360 exp :: CmmExpr -> CmmExpr
361 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
362 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
363 exp e = e
364
365 mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
366 mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f
367
368 -- Build a map from a block to its set of predecessors.
369 predMap :: [CmmBlock] -> BlockEnv Int
370 predMap blocks = foldr add_preds mapEmpty blocks
371 where
372 add_preds block env = foldr add env (successors block)
373 where add lbl env = mapInsertWith (+) lbl 1 env
374
375 -- Removing unreachable blocks
376 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
377 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
378 | length used_blocks < mapSize (toBlockMap g)
379 = CmmProc info' lbl live g'
380 | otherwise
381 = proc
382 where
383 g' = ofBlockList (g_entry g) used_blocks
384 info' = info { info_tbls = keep_used (info_tbls info) }
385 -- Remove any info_tbls for unreachable
386
387 keep_used :: BlockEnv CmmInfoTable -> BlockEnv CmmInfoTable
388 keep_used bs = mapFoldWithKey keep emptyBlockMap bs
389
390 keep :: Label -> CmmInfoTable -> BlockEnv CmmInfoTable -> BlockEnv CmmInfoTable
391 keep l i env | l `setMember` used_lbls = mapInsert l i env
392 | otherwise = env
393
394 used_blocks :: [CmmBlock]
395 used_blocks = postorderDfs g
396
397 used_lbls :: LabelSet
398 used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks