Check if both branches of an Cmm if have the same target.
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# LANGUAGE BangPatterns #-}
3 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
4 module CmmContFlowOpt
5 ( cmmCfgOpts
6 , cmmCfgOptsProc
7 , removeUnreachableBlocksProc
8 , replaceLabels
9 )
10 where
11
12 import GhcPrelude hiding (succ, unzip, zip)
13
14 import Hoopl.Block
15 import Hoopl.Collections
16 import Hoopl.Graph
17 import Hoopl.Label
18 import BlockId
19 import Cmm
20 import CmmUtils
21 import CmmSwitch (mapSwitchTargets)
22 import Maybes
23 import Panic
24 import Util
25
26 import Control.Monad
27 import Data.List
28
29
30 -- Note [What is shortcutting]
31 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
32 --
33 -- Consider this Cmm code:
34 --
35 -- L1: ...
36 -- goto L2;
37 -- L2: goto L3;
38 -- L3: ...
39 --
40 -- Here L2 is an empty block and contains only an unconditional branch
41 -- to L3. In this situation any block that jumps to L2 can jump
42 -- directly to L3:
43 --
44 -- L1: ...
45 -- goto L3;
46 -- L2: goto L3;
47 -- L3: ...
48 --
49 -- In this situation we say that we shortcut L2 to L3. One of
50 -- consequences of shortcutting is that some blocks of code may become
51 -- unreachable (in the example above this is true for L2).
52
53
54 -- Note [Control-flow optimisations]
55 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
56 --
57 -- This optimisation does three things:
58 --
59 -- - If a block finishes in an unconditional branch to another block
60 -- and that is the only jump to that block we concatenate the
61 -- destination block at the end of the current one.
62 --
63 -- - If a block finishes in a call whose continuation block is a
64 -- goto, then we can shortcut the destination, making the
65 -- continuation block the destination of the goto - but see Note
66 -- [Shortcut call returns].
67 --
68 -- - For any block that is not a call we try to shortcut the
69 -- destination(s). Additionally, if a block ends with a
70 -- conditional branch we try to invert the condition.
71 --
72 -- Blocks are processed using postorder DFS traversal. A side effect
73 -- of determining traversal order with a graph search is elimination
74 -- of any blocks that are unreachable.
75 --
76 -- Transformations are improved by working from the end of the graph
77 -- towards the beginning, because we may be able to perform many
78 -- shortcuts in one go.
79
80
81 -- Note [Shortcut call returns]
82 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
83 --
84 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
85 -- we go, and also a mapping from BlockId to BlockId, representing
86 -- continuation labels that we have renamed. This latter mapping is
87 -- important because we might shortcut a CmmCall continuation. For
88 -- example:
89 --
90 -- Sp[0] = L
91 -- call g returns to L
92 -- L: goto M
93 -- M: ...
94 --
95 -- So when we shortcut the L block, we need to replace not only
96 -- the continuation of the call, but also references to L in the
97 -- code (e.g. the assignment Sp[0] = L):
98 --
99 -- Sp[0] = M
100 -- call g returns to M
101 -- M: ...
102 --
103 -- So we keep track of which labels we have renamed and apply the mapping
104 -- at the end with replaceLabels.
105
106
107 -- Note [Shortcut call returns and proc-points]
108 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
109 --
110 -- Consider this code that you might get from a recursive
111 -- let-no-escape:
112 --
113 -- goto L1
114 -- L1:
115 -- if (Hp > HpLim) then L2 else L3
116 -- L2:
117 -- call stg_gc_noregs returns to L4
118 -- L4:
119 -- goto L1
120 -- L3:
121 -- ...
122 -- goto L1
123 --
124 -- Then the control-flow optimiser shortcuts L4. But that turns L1
125 -- into the call-return proc point, and every iteration of the loop
126 -- has to shuffle variables to and from the stack. So we must *not*
127 -- shortcut L4.
128 --
129 -- Moreover not shortcutting call returns is probably fine. If L4 can
130 -- concat with its branch target then it will still do so. And we
131 -- save some compile time because we don't have to traverse all the
132 -- code in replaceLabels.
133 --
134 -- However, we probably do want to do this if we are splitting proc
135 -- points, because L1 will be a proc-point anyway, so merging it with
136 -- L4 reduces the number of proc points. Unfortunately recursive
137 -- let-no-escapes won't generate very good code with proc-point
138 -- splitting on - we should probably compile them to explicitly use
139 -- the native calling convention instead.
140
141 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
142 cmmCfgOpts split g = fst (blockConcat split g)
143
144 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
145 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
146 where (g', env) = blockConcat split g
147 info' = info{ info_tbls = new_info_tbls }
148 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
149
150 -- If we changed any labels, then we have to update the info tables
151 -- too, except for the top-level info table because that might be
152 -- referred to by other procs.
153 upd_info (k,info)
154 | Just k' <- mapLookup k env
155 = (k', if k' == g_entry g'
156 then info
157 else info{ cit_lbl = infoTblLbl k' })
158 | otherwise
159 = (k,info)
160 cmmCfgOptsProc _ top = top
161
162
163 blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
164 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
165 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
166 where
167 -- We might be able to shortcut the entry BlockId itself.
168 -- Remember to update the shortcut_map, since we also have to
169 -- update the info_tbls mapping now.
170 (new_entry, shortcut_map')
171 | Just entry_blk <- mapLookup entry_id new_blocks
172 , Just dest <- canShortcut entry_blk
173 = (dest, mapInsert entry_id dest shortcut_map)
174 | otherwise
175 = (entry_id, shortcut_map)
176
177 -- blocks are sorted in reverse postorder, but we want to go from the exit
178 -- towards beginning, so we use foldr below.
179 blocks = revPostorder g
180 blockmap = foldl' (flip addBlock) emptyBody blocks
181
182 -- Accumulator contains three components:
183 -- * map of blocks in a graph
184 -- * map of shortcut labels. See Note [Shortcut call returns]
185 -- * map containing number of predecessors for each block. We discard
186 -- it after we process all blocks.
187 (new_blocks, shortcut_map, _) =
188 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
189
190 -- Map of predecessors for initial graph. We increase number of
191 -- predecessors for entry block by one to denote that it is
192 -- target of a jump, even if no block in the current graph jumps
193 -- to it.
194 initialBackEdges = incPreds entry_id (predMap blocks)
195
196 maybe_concat :: CmmBlock
197 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
198 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
199 maybe_concat block (!blocks, !shortcut_map, !backEdges)
200 -- If:
201 -- (1) current block ends with unconditional branch to b' and
202 -- (2) it has exactly one predecessor (namely, current block)
203 --
204 -- Then:
205 -- (1) append b' block at the end of current block
206 -- (2) remove b' from the map of blocks
207 -- (3) remove information about b' from predecessors map
208 --
209 -- Since we know that the block has only one predecessor we call
210 -- mapDelete directly instead of calling decPreds.
211 --
212 -- Note that we always maintain an up-to-date list of predecessors, so
213 -- we can ignore the contents of shortcut_map
214 | CmmBranch b' <- last
215 , hasOnePredecessor b'
216 , Just blk' <- mapLookup b' blocks
217 = let bid' = entryLabel blk'
218 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
219 , shortcut_map
220 , mapDelete b' backEdges )
221
222 -- If:
223 -- (1) we are splitting proc points (see Note
224 -- [Shortcut call returns and proc-points]) and
225 -- (2) current block is a CmmCall or CmmForeignCall with
226 -- continuation b' and
227 -- (3) we can shortcut that continuation to dest
228 -- Then:
229 -- (1) we change continuation to point to b'
230 -- (2) create mapping from b' to dest
231 -- (3) increase number of predecessors of dest by 1
232 -- (4) decrease number of predecessors of b' by 1
233 --
234 -- Later we will use replaceLabels to substitute all occurrences of b'
235 -- with dest.
236 | splitting_procs
237 , Just b' <- callContinuation_maybe last
238 , Just blk' <- mapLookup b' blocks
239 , Just dest <- canShortcut blk'
240 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
241 , mapInsert b' dest shortcut_map
242 , decPreds b' $ incPreds dest backEdges )
243
244 -- If:
245 -- (1) a block does not end with a call
246 -- Then:
247 -- (1) if it ends with a conditional attempt to invert the
248 -- conditional
249 -- (2) attempt to shortcut all destination blocks
250 -- (3) if new successors of a block are different from the old ones
251 -- update the of predecessors accordingly
252 --
253 -- A special case of this is a situation when a block ends with an
254 -- unconditional jump to a block that can be shortcut.
255 | Nothing <- callContinuation_maybe last
256 = let oldSuccs = successors last
257 newSuccs = successors rewrite_last
258 in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
259 , shortcut_map
260 , if oldSuccs == newSuccs
261 then backEdges
262 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
263
264 -- Otherwise don't do anything
265 | otherwise
266 = ( blocks, shortcut_map, backEdges )
267 where
268 (head, last) = blockSplitTail block
269 bid = entryLabel block
270
271 -- Changes continuation of a call to a specified label
272 update_cont dest =
273 case last of
274 CmmCall{} -> last { cml_cont = Just dest }
275 CmmForeignCall{} -> last { succ = dest }
276 _ -> panic "Can't shortcut continuation."
277
278 -- Attempts to shortcut successors of last node
279 shortcut_last = mapSuccessors shortcut last
280 where
281 shortcut l =
282 case mapLookup l blocks of
283 Just b | Just dest <- canShortcut b -> dest
284 _otherwise -> l
285
286 rewrite_last
287 -- Sometimes we can get rid of the conditional completely.
288 | CmmCondBranch _cond t f _l <- shortcut_last
289 , t == f
290 = CmmBranch t
291
292 -- See Note [Invert Cmm conditionals]
293 | CmmCondBranch cond t f l <- shortcut_last
294 , hasOnePredecessor t -- inverting will make t a fallthrough
295 , likelyTrue l || (numPreds f > 1)
296 , Just cond' <- maybeInvertCmmExpr cond
297 = CmmCondBranch cond' f t (invertLikeliness l)
298
299 | otherwise
300 = shortcut_last
301
302 likelyTrue (Just True) = True
303 likelyTrue _ = False
304
305 invertLikeliness :: Maybe Bool -> Maybe Bool
306 invertLikeliness = fmap not
307
308 -- Number of predecessors for a block
309 numPreds bid = mapLookup bid backEdges `orElse` 0
310
311 hasOnePredecessor b = numPreds b == 1
312
313 {-
314 Note [Invert Cmm conditionals]
315 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
316 The native code generator always produces jumps to the true branch.
317 Falling through to the false branch is however faster. So we try to
318 arrange for that to happen.
319 This means we invert the condition if:
320 * The likely path will become a fallthrough.
321 * We can't guarantee a fallthrough for the false branch but for the
322 true branch.
323
324 In some cases it's faster to avoid inverting when the false branch is likely.
325 However determining when that is the case is neither easy nor cheap so for
326 now we always invert as this produces smaller binaries and code that is
327 equally fast on average. (On an i7-6700K)
328
329 TODO:
330 There is also the edge case when both branches have multiple predecessors.
331 In this case we could assume that we will end up with a jump for BOTH
332 branches. In this case it might be best to put the likely path in the true
333 branch especially if there are large numbers of predecessors as this saves
334 us the jump thats not taken. However I haven't tested this and as of early
335 2018 we almost never generate cmm where this would apply.
336 -}
337
338 -- Functions for incrementing and decrementing number of predecessors. If
339 -- decrementing would set the predecessor count to 0, we remove entry from the
340 -- map.
341 -- Invariant: if a block has no predecessors it should be dropped from the
342 -- graph because it is unreachable. maybe_concat is constructed to maintain
343 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
344 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
345 -- blocks.
346 incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
347 incPreds bid edges = mapInsertWith (+) bid 1 edges
348 decPreds bid edges = case mapLookup bid edges of
349 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
350 Just _ -> mapDelete bid edges
351 _ -> edges
352
353
354 -- Checks if a block consists only of "goto dest". If it does than we return
355 -- "Just dest" label. See Note [What is shortcutting]
356 canShortcut :: CmmBlock -> Maybe BlockId
357 canShortcut block
358 | (_, middle, CmmBranch dest) <- blockSplit block
359 , all dont_care $ blockToList middle
360 = Just dest
361 | otherwise
362 = Nothing
363 where dont_care CmmComment{} = True
364 dont_care CmmTick{} = True
365 dont_care _other = False
366
367 -- Concatenates two blocks. First one is assumed to be open on exit, the second
368 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
369 -- the splice function removes by calling snd on result of blockSplitHead).
370 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
371 splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
372 where (CmmEntry lbl sc0, code0) = blockSplitHead head
373 (CmmEntry _ sc1, code1) = blockSplitHead rest
374 entry = CmmEntry lbl (combineTickScopes sc0 sc1)
375
376 -- If node is a call with continuation call return Just label of that
377 -- continuation. Otherwise return Nothing.
378 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
379 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
380 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
381 callContinuation_maybe _ = Nothing
382
383
384 -- Map over the CmmGraph, replacing each label with its mapping in the
385 -- supplied LabelMap.
386 replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
387 replaceLabels env g
388 | mapNull env = g
389 | otherwise = replace_eid $ mapGraphNodes1 txnode g
390 where
391 replace_eid g = g {g_entry = lookup (g_entry g)}
392 lookup id = mapLookup id env `orElse` id
393
394 txnode :: CmmNode e x -> CmmNode e x
395 txnode (CmmBranch bid) = CmmBranch (lookup bid)
396 txnode (CmmCondBranch p t f l) =
397 mkCmmCondBranch (exp p) (lookup t) (lookup f) l
398 txnode (CmmSwitch e ids) =
399 CmmSwitch (exp e) (mapSwitchTargets lookup ids)
400 txnode (CmmCall t k rg a res r) =
401 CmmCall (exp t) (liftM lookup k) rg a res r
402 txnode fc@CmmForeignCall{} =
403 fc{ args = map exp (args fc), succ = lookup (succ fc) }
404 txnode other = mapExpDeep exp other
405
406 exp :: CmmExpr -> CmmExpr
407 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
408 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
409 exp e = e
410
411 mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
412 mkCmmCondBranch p t f l =
413 if t == f then CmmBranch t else CmmCondBranch p t f l
414
415 -- Build a map from a block to its set of predecessors.
416 predMap :: [CmmBlock] -> LabelMap Int
417 predMap blocks = foldr add_preds mapEmpty blocks
418 where
419 add_preds block env = foldr add env (successors block)
420 where add lbl env = mapInsertWith (+) lbl 1 env
421
422 -- Removing unreachable blocks
423 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
424 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
425 | used_blocks `lengthLessThan` mapSize (toBlockMap g)
426 = CmmProc info' lbl live g'
427 | otherwise
428 = proc
429 where
430 g' = ofBlockList (g_entry g) used_blocks
431 info' = info { info_tbls = keep_used (info_tbls info) }
432 -- Remove any info_tbls for unreachable
433
434 keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
435 keep_used bs = mapFoldlWithKey keep mapEmpty bs
436
437 keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
438 keep env l i | l `setMember` used_lbls = mapInsert l i env
439 | otherwise = env
440
441 used_blocks :: [CmmBlock]
442 used_blocks = revPostorder g
443
444 used_lbls :: LabelSet
445 used_lbls = setFromList $ map entryLabel used_blocks