Remove unnecessary check in CmmContFlowOpt
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
3 module CmmContFlowOpt
4 ( cmmCfgOpts
5 , cmmCfgOptsProc
6 , removeUnreachableBlocksProc
7 , removeUnreachableBlocks
8 , replaceLabels
9 )
10 where
11
12 import Hoopl
13 import BlockId
14 import Cmm
15 import CmmUtils
16 import Maybes
17 import Panic
18
19 import Control.Monad
20 import Prelude hiding (succ, unzip, zip)
21
22
23 -- Note [What is shortcutting]
24 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
25 --
26 -- Consider this Cmm code:
27 --
28 -- L1: ...
29 -- goto L2;
30 -- L2: goto L3;
31 -- L3: ...
32 --
33 -- Here L2 is an empty block and contains only an unconditional branch
34 -- to L3. In this situation any block that jumps to L2 can jump
35 -- directly to L3:
36 --
37 -- L1: ...
38 -- goto L3;
39 -- L2: goto L3;
40 -- L3: ...
41 --
42 -- In this situation we say that we shortcut L2 to L3. One of
43 -- consequences of shortcutting is that some blocks of code may become
44 -- unreachable (in the example above this is true for L2).
45
46
47 -- Note [Control-flow optimisations]
48 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
49 --
50 -- This optimisation does four things:
51 --
52 -- - If a block finishes in an unconditonal branch to another block
53 -- and that is the only jump to that block we concatenate the
54 -- destination block at the end of the current one.
55 --
56 -- - If a block finishes in an unconditional branch, we may be able
57 -- to shortcut the destination block.
58 --
59 -- - If a block finishes in a call whose continuation block is a
60 -- goto, then we can shortcut the destination, making the
61 -- continuation block the destination of the goto - but see Note
62 -- [Shortcut call returns].
63 --
64 -- - For block finishing in conditional branch we try to invert the
65 -- condition and shortcut destination of alternatives.
66 --
67 -- - For any block that is not a call we try to shortcut the
68 -- destination(s).
69 --
70 -- Blocks are processed using postorder DFS traversal. A side effect
71 -- of determining traversal order with a graph search is elimination
72 -- of any blocks that are unreachable.
73 --
74 -- Transformations are improved by working from the end of the graph
75 -- towards the beginning, because we may be able to perform many
76 -- shortcuts in one go.
77
78
79 -- Note [Shortcut call returns]
80 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
81 --
82 -- We are going to maintain the "current" graph (BlockEnv CmmBlock) as
83 -- we go, and also a mapping from BlockId to BlockId, representing
84 -- continuation labels that we have renamed. This latter mapping is
85 -- important because we might shortcut a CmmCall continuation. For
86 -- example:
87 --
88 -- Sp[0] = L
89 -- call g returns to L
90 -- L: goto M
91 -- M: ...
92 --
93 -- So when we shortcut the L block, we need to replace not only
94 -- the continuation of the call, but also references to L in the
95 -- code (e.g. the assignment Sp[0] = L):
96 --
97 -- Sp[0] = M
98 -- call g returns to M
99 -- M: ...
100 --
101 -- So we keep track of which labels we have renamed and apply the mapping
102 -- at the end with replaceLabels.
103
104
105 -- Note [Shortcut call returns and proc-points]
106 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
107 --
108 -- Consider this code that you might get from a recursive
109 -- let-no-escape:
110 --
111 -- goto L1
112 -- L1:
113 -- if (Hp > HpLim) then L2 else L3
114 -- L2:
115 -- call stg_gc_noregs returns to L4
116 -- L4:
117 -- goto L1
118 -- L3:
119 -- ...
120 -- goto L1
121 --
122 -- Then the control-flow optimiser shortcuts L4. But that turns L1
123 -- into the call-return proc point, and every iteration of the loop
124 -- has to shuffle variables to and from the stack. So we must *not*
125 -- shortcut L4.
126 --
127 -- Moreover not shortcutting call returns is probably fine. If L4 can
128 -- concat with its branch target then it will still do so. And we
129 -- save some compile time because we don't have to traverse all the
130 -- code in replaceLabels.
131 --
132 -- However, we probably do want to do this if we are splitting proc
133 -- points, because L1 will be a proc-point anyway, so merging it with
134 -- L4 reduces the number of proc points. Unfortunately recursive
135 -- let-no-escapes won't generate very good code with proc-point
136 -- splitting on - we should probably compile them to explicitly use
137 -- the native calling convention instead.
138
139 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
140 cmmCfgOpts split g = fst (blockConcat split g)
141
142 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
143 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
144 where (g', env) = blockConcat split g
145 info' = info{ info_tbls = new_info_tbls }
146 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
147
148 -- If we changed any labels, then we have to update the info tables
149 -- too, except for the top-level info table because that might be
150 -- referred to by other procs.
151 upd_info (k,info)
152 | Just k' <- mapLookup k env
153 = (k', if k' == g_entry g'
154 then info
155 else info{ cit_lbl = infoTblLbl k' })
156 | otherwise
157 = (k,info)
158 cmmCfgOptsProc _ top = top
159
160
161 blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
162 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
163 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
164 where
165 -- We might be able to shortcut the entry BlockId itself.
166 -- Remember to update the shortcut_map, since we also have to
167 -- update the info_tbls mapping now.
168 (new_entry, shortcut_map')
169 | Just entry_blk <- mapLookup entry_id new_blocks
170 , Just dest <- canShortcut entry_blk
171 = (dest, mapInsert entry_id dest shortcut_map)
172 | otherwise
173 = (entry_id, shortcut_map)
174
175 -- blocks is a list of blocks in DFS postorder, while blockmap is
176 -- a map of blocks. We process each element from blocks and update
177 -- blockmap accordingly
178 blocks = postorderDfs g
179 blockmap = foldr addBlock emptyBody blocks
180
181 -- Accumulator contains three components:
182 -- * map of blocks in a graph
183 -- * map of shortcut labels. See Note [Shortcut call returns]
184 -- * map containing number of predecessors for each block. We discard
185 -- it after we process all blocks.
186 (new_blocks, shortcut_map, _) =
187 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
188
189 -- Map of predecessors for initial graph. We increase number of
190 -- predecessors for entry block by one to denote that it is
191 -- target of a jump, even if no block in the current graph jumps
192 -- to it.
193 initialBackEdges = incPreds entry_id (predMap blocks)
194
195 maybe_concat :: CmmBlock
196 -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
197 -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
198 maybe_concat block (blocks, shortcut_map, backEdges)
199 -- If:
200 -- (1) current block ends with unconditional branch to b' and
201 -- (2) it has exactly one predecessor (namely, current block)
202 --
203 -- Then:
204 -- (1) append b' block at the end of current block
205 -- (2) remove b' from the map of blocks
206 -- (3) remove information about b' from predecessors map
207 --
208 -- This guard must be first so that we always eliminate blocks that have
209 -- only one predecessor. If we had a target block that is both
210 -- shorcutable and has only one predecessor and attempted to shortcut it
211 -- first we would make that block unreachable but would not remove it
212 -- from the graph.
213 --
214 -- Note that we always maintain an up-to-date list of predecessors, so
215 -- we can ignore the contents of shortcut_map
216 | CmmBranch b' <- last
217 , hasOnePredecessor b'
218 , Just blk' <- mapLookup b' blocks
219 = let bid' = entryLabel blk'
220 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
221 , shortcut_map
222 , mapDelete b' backEdges )
223
224 -- If:
225 -- (1) current block ends with unconditional branch to b' and
226 -- (2) we can shortcut block b'
227 -- Then:
228 -- (1) concatenate b' at the end of current block, effectively
229 -- changing target of uncondtional jump from b' to dest
230 -- (2) increase number of predecessors of dest by 1
231 -- (3) decrease number of predecessors of b' by 1
232 | CmmBranch b' <- last
233 , Just blk' <- mapLookup b' blocks
234 , Just dest <- canShortcut blk'
235 = ( mapInsert bid (splice head blk') blocks, shortcut_map,
236 decPreds b' $ incPreds dest backEdges )
237
238 -- If:
239 -- (1) we are splitting proc points (see Note
240 -- [Shortcut call returns and proc-points]) and
241 -- (2) current block is a CmmCall or CmmForeignCall with
242 -- continuation b' and
243 -- (3) we can shortcut that continuation to dest
244 -- Then:
245 -- (1) we change continuation to point to b'
246 -- (2) create mapping from b' to dest
247 -- (3) increase number of predecessors of dest by 1
248 -- (4) decrease number of predecessors of b' by 1
249 --
250 -- Later we will use replaceLabels to substitute all occurences of b'
251 -- with dest.
252 | splitting_procs
253 , Just b' <- callContinuation_maybe last
254 , Just blk' <- mapLookup b' blocks
255 , Just dest <- canShortcut blk'
256 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
257 , mapInsert b' dest shortcut_map
258 , decPreds b' $ incPreds dest backEdges )
259
260 -- If:
261 -- (1) a block does not end with a call
262 -- Then:
263 -- (1) if it ends with a conditional attempt to invert the
264 -- conditional
265 -- (2) attempt to shortcut all destination blocks
266 -- (3) if new successors of a block are different from the old ones
267 -- we update the of predecessors accordingly
268 | Nothing <- callContinuation_maybe last
269 = let oldSuccs = successors last
270 newSuccs = successors swapcond_last
271 in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
272 , shortcut_map
273 , if oldSuccs == newSuccs
274 then backEdges
275 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
276
277 -- Otherwise don't do anything
278 | otherwise
279 = ( blocks, shortcut_map, backEdges )
280 where
281 (head, last) = blockSplitTail block
282 bid = entryLabel block
283
284 -- Changes continuation of a call to a specified label
285 update_cont dest =
286 case last of
287 CmmCall{} -> last { cml_cont = Just dest }
288 CmmForeignCall{} -> last { succ = dest }
289 _ -> panic "Can't shortcut continuation."
290
291 -- Attempts to shortcut successors of last node
292 shortcut_last = mapSuccessors shortcut last
293 where
294 shortcut l =
295 case mapLookup l blocks of
296 Just b | Just dest <- canShortcut b -> dest
297 _otherwise -> l
298
299 -- For a conditional, we invert the conditional if that would make it
300 -- more likely that the branch-not-taken case becomes a fallthrough.
301 -- This helps the native codegen a little bit, and probably has no
302 -- effect on LLVM. It's convenient to do it here, where we have the
303 -- information about predecessors.
304 swapcond_last
305 | CmmCondBranch cond t f <- shortcut_last
306 , numPreds f > 1
307 , hasOnePredecessor t
308 , Just cond' <- maybeInvertCmmExpr cond
309 = CmmCondBranch cond' f t
310
311 | otherwise
312 = shortcut_last
313
314 -- Number of predecessors for a block
315 numPreds bid = mapLookup bid backEdges `orElse` 0
316
317 hasOnePredecessor b = numPreds b == 1
318
319 -- Functions for incrementing and decrementing number of predecessors. If
320 -- decrementing would set the predecessor count to 0, we remove entry from the
321 -- map.
322 -- Invariant: if a block has no predecessors it should be dropped from the
323 -- graph because it is unreachable. maybe_concat is constructed to maintain
324 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
325 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
326 -- blocks.
327 incPreds, decPreds :: BlockId -> BlockEnv Int -> BlockEnv Int
328 incPreds bid edges = mapInsertWith (+) bid 1 edges
329 decPreds bid edges = case mapLookup bid edges of
330 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
331 Just _ -> mapDelete bid edges
332 _ -> edges
333
334
335 -- Checks if a block consists only of "goto dest". If it does than we return
336 -- "Just dest" label. See Note [What is shortcutting]
337 canShortcut :: CmmBlock -> Maybe BlockId
338 canShortcut block
339 | (_, middle, CmmBranch dest) <- blockSplit block
340 , isEmptyBlock middle
341 = Just dest
342 | otherwise
343 = Nothing
344
345
346 -- Concatenates two blocks. First one is assumed to be open on exit, the second
347 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
348 -- the splice function removes by calling snd on result of blockSplitHead).
349 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
350 splice head rest = head `blockAppend` snd (blockSplitHead rest)
351
352
353 -- If node is a call with continuation call return Just label of that
354 -- continuation. Otherwise return Nothing.
355 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
356 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
357 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
358 callContinuation_maybe _ = Nothing
359
360
361 -- Map over the CmmGraph, replacing each label with its mapping in the
362 -- supplied BlockEnv.
363 replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
364 replaceLabels env g
365 | mapNull env = g
366 | otherwise = replace_eid $ mapGraphNodes1 txnode g
367 where
368 replace_eid g = g {g_entry = lookup (g_entry g)}
369 lookup id = mapLookup id env `orElse` id
370
371 txnode :: CmmNode e x -> CmmNode e x
372 txnode (CmmBranch bid) = CmmBranch (lookup bid)
373 txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f)
374 txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms)
375 txnode (CmmCall t k rg a res r) = CmmCall (exp t) (liftM lookup k) rg a res r
376 txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc)
377 , succ = lookup (succ fc) }
378 txnode other = mapExpDeep exp other
379
380 exp :: CmmExpr -> CmmExpr
381 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
382 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
383 exp e = e
384
385 mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
386 mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f
387
388 -- Build a map from a block to its set of predecessors.
389 predMap :: [CmmBlock] -> BlockEnv Int
390 predMap blocks = foldr add_preds mapEmpty blocks
391 where
392 add_preds block env = foldr add env (successors block)
393 where add lbl env = mapInsertWith (+) lbl 1 env
394
395 -- Removing unreachable blocks
396 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
397 removeUnreachableBlocksProc (CmmProc info lbl live g)
398 = CmmProc info lbl live (removeUnreachableBlocks g)
399
400 removeUnreachableBlocks :: CmmGraph -> CmmGraph
401 removeUnreachableBlocks g
402 | length blocks < mapSize (toBlockMap g) = ofBlockList (g_entry g) blocks
403 | otherwise = g
404 where blocks = postorderDfs g