Handle the likely:True case in CmmContFlowOpt
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# LANGUAGE BangPatterns #-}
3 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
4 module CmmContFlowOpt
5 ( cmmCfgOpts
6 , cmmCfgOptsProc
7 , removeUnreachableBlocksProc
8 , replaceLabels
9 )
10 where
11
12 import GhcPrelude hiding (succ, unzip, zip)
13
14 import Hoopl.Block
15 import Hoopl.Collections
16 import Hoopl.Graph
17 import Hoopl.Label
18 import BlockId
19 import Cmm
20 import CmmUtils
21 import CmmSwitch (mapSwitchTargets)
22 import Maybes
23 import Panic
24 import Util
25
26 import Control.Monad
27
28
29 -- Note [What is shortcutting]
30 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
31 --
32 -- Consider this Cmm code:
33 --
34 -- L1: ...
35 -- goto L2;
36 -- L2: goto L3;
37 -- L3: ...
38 --
39 -- Here L2 is an empty block and contains only an unconditional branch
40 -- to L3. In this situation any block that jumps to L2 can jump
41 -- directly to L3:
42 --
43 -- L1: ...
44 -- goto L3;
45 -- L2: goto L3;
46 -- L3: ...
47 --
48 -- In this situation we say that we shortcut L2 to L3. One of
49 -- consequences of shortcutting is that some blocks of code may become
50 -- unreachable (in the example above this is true for L2).
51
52
53 -- Note [Control-flow optimisations]
54 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 --
56 -- This optimisation does three things:
57 --
58 -- - If a block finishes in an unconditional branch to another block
59 -- and that is the only jump to that block we concatenate the
60 -- destination block at the end of the current one.
61 --
62 -- - If a block finishes in a call whose continuation block is a
63 -- goto, then we can shortcut the destination, making the
64 -- continuation block the destination of the goto - but see Note
65 -- [Shortcut call returns].
66 --
67 -- - For any block that is not a call we try to shortcut the
68 -- destination(s). Additionally, if a block ends with a
69 -- conditional branch we try to invert the condition.
70 --
71 -- Blocks are processed using postorder DFS traversal. A side effect
72 -- of determining traversal order with a graph search is elimination
73 -- of any blocks that are unreachable.
74 --
75 -- Transformations are improved by working from the end of the graph
76 -- towards the beginning, because we may be able to perform many
77 -- shortcuts in one go.
78
79
80 -- Note [Shortcut call returns]
81 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
82 --
83 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
84 -- we go, and also a mapping from BlockId to BlockId, representing
85 -- continuation labels that we have renamed. This latter mapping is
86 -- important because we might shortcut a CmmCall continuation. For
87 -- example:
88 --
89 -- Sp[0] = L
90 -- call g returns to L
91 -- L: goto M
92 -- M: ...
93 --
94 -- So when we shortcut the L block, we need to replace not only
95 -- the continuation of the call, but also references to L in the
96 -- code (e.g. the assignment Sp[0] = L):
97 --
98 -- Sp[0] = M
99 -- call g returns to M
100 -- M: ...
101 --
102 -- So we keep track of which labels we have renamed and apply the mapping
103 -- at the end with replaceLabels.
104
105
106 -- Note [Shortcut call returns and proc-points]
107 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 --
109 -- Consider this code that you might get from a recursive
110 -- let-no-escape:
111 --
112 -- goto L1
113 -- L1:
114 -- if (Hp > HpLim) then L2 else L3
115 -- L2:
116 -- call stg_gc_noregs returns to L4
117 -- L4:
118 -- goto L1
119 -- L3:
120 -- ...
121 -- goto L1
122 --
123 -- Then the control-flow optimiser shortcuts L4. But that turns L1
124 -- into the call-return proc point, and every iteration of the loop
125 -- has to shuffle variables to and from the stack. So we must *not*
126 -- shortcut L4.
127 --
128 -- Moreover not shortcutting call returns is probably fine. If L4 can
129 -- concat with its branch target then it will still do so. And we
130 -- save some compile time because we don't have to traverse all the
131 -- code in replaceLabels.
132 --
133 -- However, we probably do want to do this if we are splitting proc
134 -- points, because L1 will be a proc-point anyway, so merging it with
135 -- L4 reduces the number of proc points. Unfortunately recursive
136 -- let-no-escapes won't generate very good code with proc-point
137 -- splitting on - we should probably compile them to explicitly use
138 -- the native calling convention instead.
139
140 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
141 cmmCfgOpts split g = fst (blockConcat split g)
142
143 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
144 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
145 where (g', env) = blockConcat split g
146 info' = info{ info_tbls = new_info_tbls }
147 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
148
149 -- If we changed any labels, then we have to update the info tables
150 -- too, except for the top-level info table because that might be
151 -- referred to by other procs.
152 upd_info (k,info)
153 | Just k' <- mapLookup k env
154 = (k', if k' == g_entry g'
155 then info
156 else info{ cit_lbl = infoTblLbl k' })
157 | otherwise
158 = (k,info)
159 cmmCfgOptsProc _ top = top
160
161
162 blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
163 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
164 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
165 where
166 -- We might be able to shortcut the entry BlockId itself.
167 -- Remember to update the shortcut_map, since we also have to
168 -- update the info_tbls mapping now.
169 (new_entry, shortcut_map')
170 | Just entry_blk <- mapLookup entry_id new_blocks
171 , Just dest <- canShortcut entry_blk
172 = (dest, mapInsert entry_id dest shortcut_map)
173 | otherwise
174 = (entry_id, shortcut_map)
175
176 -- blocks is a list of blocks in DFS postorder, while blockmap is
177 -- a map of blocks. We process each element from blocks and update
178 -- blockmap accordingly
179 blocks = postorderDfs g
180 blockmap = foldr addBlock emptyBody blocks
181
182 -- Accumulator contains three components:
183 -- * map of blocks in a graph
184 -- * map of shortcut labels. See Note [Shortcut call returns]
185 -- * map containing number of predecessors for each block. We discard
186 -- it after we process all blocks.
187 (new_blocks, shortcut_map, _) =
188 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
189
190 -- Map of predecessors for initial graph. We increase number of
191 -- predecessors for entry block by one to denote that it is
192 -- target of a jump, even if no block in the current graph jumps
193 -- to it.
194 initialBackEdges = incPreds entry_id (predMap blocks)
195
196 maybe_concat :: CmmBlock
197 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
198 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
199 maybe_concat block (!blocks, !shortcut_map, !backEdges)
200 -- If:
201 -- (1) current block ends with unconditional branch to b' and
202 -- (2) it has exactly one predecessor (namely, current block)
203 --
204 -- Then:
205 -- (1) append b' block at the end of current block
206 -- (2) remove b' from the map of blocks
207 -- (3) remove information about b' from predecessors map
208 --
209 -- Since we know that the block has only one predecessor we call
210 -- mapDelete directly instead of calling decPreds.
211 --
212 -- Note that we always maintain an up-to-date list of predecessors, so
213 -- we can ignore the contents of shortcut_map
214 | CmmBranch b' <- last
215 , hasOnePredecessor b'
216 , Just blk' <- mapLookup b' blocks
217 = let bid' = entryLabel blk'
218 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
219 , shortcut_map
220 , mapDelete b' backEdges )
221
222 -- If:
223 -- (1) we are splitting proc points (see Note
224 -- [Shortcut call returns and proc-points]) and
225 -- (2) current block is a CmmCall or CmmForeignCall with
226 -- continuation b' and
227 -- (3) we can shortcut that continuation to dest
228 -- Then:
229 -- (1) we change continuation to point to b'
230 -- (2) create mapping from b' to dest
231 -- (3) increase number of predecessors of dest by 1
232 -- (4) decrease number of predecessors of b' by 1
233 --
234 -- Later we will use replaceLabels to substitute all occurrences of b'
235 -- with dest.
236 | splitting_procs
237 , Just b' <- callContinuation_maybe last
238 , Just blk' <- mapLookup b' blocks
239 , Just dest <- canShortcut blk'
240 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
241 , mapInsert b' dest shortcut_map
242 , decPreds b' $ incPreds dest backEdges )
243
244 -- If:
245 -- (1) a block does not end with a call
246 -- Then:
247 -- (1) if it ends with a conditional attempt to invert the
248 -- conditional
249 -- (2) attempt to shortcut all destination blocks
250 -- (3) if new successors of a block are different from the old ones
251 -- update the of predecessors accordingly
252 --
253 -- A special case of this is a situation when a block ends with an
254 -- unconditional jump to a block that can be shortcut.
255 | Nothing <- callContinuation_maybe last
256 = let oldSuccs = successors last
257 newSuccs = successors swapcond_last
258 in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
259 , shortcut_map
260 , if oldSuccs == newSuccs
261 then backEdges
262 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
263
264 -- Otherwise don't do anything
265 | otherwise
266 = ( blocks, shortcut_map, backEdges )
267 where
268 (head, last) = blockSplitTail block
269 bid = entryLabel block
270
271 -- Changes continuation of a call to a specified label
272 update_cont dest =
273 case last of
274 CmmCall{} -> last { cml_cont = Just dest }
275 CmmForeignCall{} -> last { succ = dest }
276 _ -> panic "Can't shortcut continuation."
277
278 -- Attempts to shortcut successors of last node
279 shortcut_last = mapSuccessors shortcut last
280 where
281 shortcut l =
282 case mapLookup l blocks of
283 Just b | Just dest <- canShortcut b -> dest
284 _otherwise -> l
285
286 -- See Note [Invert Cmm conditionals]
287 swapcond_last
288 | CmmCondBranch cond t f l <- shortcut_last
289 , hasOnePredecessor t -- inverting will make t a fallthrough
290 , likelyTrue l || (numPreds f > 1)
291 , Just cond' <- maybeInvertCmmExpr cond
292 = CmmCondBranch cond' f t (invertLikeliness l)
293
294 | otherwise
295 = shortcut_last
296
297 likelyTrue (Just True) = True
298 likelyTrue _ = False
299
300 invertLikeliness :: Maybe Bool -> Maybe Bool
301 invertLikeliness = fmap not
302
303 -- Number of predecessors for a block
304 numPreds bid = mapLookup bid backEdges `orElse` 0
305
306 hasOnePredecessor b = numPreds b == 1
307
308 {-
309 Note [Invert Cmm conditionals]
310 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
311 The native code generator always produces jumps to the true branch.
312 Falling through to the false branch is however faster. So we try to
313 arrange for that to happen.
314 This means we invert the condition if:
315 * The likely path will become a fallthrough.
316 * We can't guarantee a fallthrough for the false branch but for the
317 true branch.
318
319 In some cases it's faster to avoid inverting when the false branch is likely.
320 However determining when that is the case is neither easy nor cheap so for
321 now we always invert as this produces smaller binaries and code that is
322 equally fast on average. (On an i7-6700K)
323
324 TODO:
325 There is also the edge case when both branches have multiple predecessors.
326 In this case we could assume that we will end up with a jump for BOTH
327 branches. In this case it might be best to put the likely path in the true
328 branch especially if there are large numbers of predecessors as this saves
329 us the jump thats not taken. However I haven't tested this and as of early
330 2018 we almost never generate cmm where this would apply.
331 -}
332
333 -- Functions for incrementing and decrementing number of predecessors. If
334 -- decrementing would set the predecessor count to 0, we remove entry from the
335 -- map.
336 -- Invariant: if a block has no predecessors it should be dropped from the
337 -- graph because it is unreachable. maybe_concat is constructed to maintain
338 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
339 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
340 -- blocks.
341 incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
342 incPreds bid edges = mapInsertWith (+) bid 1 edges
343 decPreds bid edges = case mapLookup bid edges of
344 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
345 Just _ -> mapDelete bid edges
346 _ -> edges
347
348
349 -- Checks if a block consists only of "goto dest". If it does than we return
350 -- "Just dest" label. See Note [What is shortcutting]
351 canShortcut :: CmmBlock -> Maybe BlockId
352 canShortcut block
353 | (_, middle, CmmBranch dest) <- blockSplit block
354 , all dont_care $ blockToList middle
355 = Just dest
356 | otherwise
357 = Nothing
358 where dont_care CmmComment{} = True
359 dont_care CmmTick{} = True
360 dont_care _other = False
361
362 -- Concatenates two blocks. First one is assumed to be open on exit, the second
363 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
364 -- the splice function removes by calling snd on result of blockSplitHead).
365 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
366 splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
367 where (CmmEntry lbl sc0, code0) = blockSplitHead head
368 (CmmEntry _ sc1, code1) = blockSplitHead rest
369 entry = CmmEntry lbl (combineTickScopes sc0 sc1)
370
371 -- If node is a call with continuation call return Just label of that
372 -- continuation. Otherwise return Nothing.
373 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
374 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
375 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
376 callContinuation_maybe _ = Nothing
377
378
379 -- Map over the CmmGraph, replacing each label with its mapping in the
380 -- supplied LabelMap.
381 replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
382 replaceLabels env g
383 | mapNull env = g
384 | otherwise = replace_eid $ mapGraphNodes1 txnode g
385 where
386 replace_eid g = g {g_entry = lookup (g_entry g)}
387 lookup id = mapLookup id env `orElse` id
388
389 txnode :: CmmNode e x -> CmmNode e x
390 txnode (CmmBranch bid) = CmmBranch (lookup bid)
391 txnode (CmmCondBranch p t f l) =
392 mkCmmCondBranch (exp p) (lookup t) (lookup f) l
393 txnode (CmmSwitch e ids) =
394 CmmSwitch (exp e) (mapSwitchTargets lookup ids)
395 txnode (CmmCall t k rg a res r) =
396 CmmCall (exp t) (liftM lookup k) rg a res r
397 txnode fc@CmmForeignCall{} =
398 fc{ args = map exp (args fc), succ = lookup (succ fc) }
399 txnode other = mapExpDeep exp other
400
401 exp :: CmmExpr -> CmmExpr
402 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
403 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
404 exp e = e
405
406 mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
407 mkCmmCondBranch p t f l =
408 if t == f then CmmBranch t else CmmCondBranch p t f l
409
410 -- Build a map from a block to its set of predecessors.
411 predMap :: [CmmBlock] -> LabelMap Int
412 predMap blocks = foldr add_preds mapEmpty blocks
413 where
414 add_preds block env = foldr add env (successors block)
415 where add lbl env = mapInsertWith (+) lbl 1 env
416
417 -- Removing unreachable blocks
418 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
419 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
420 | used_blocks `lengthLessThan` mapSize (toBlockMap g)
421 = CmmProc info' lbl live g'
422 | otherwise
423 = proc
424 where
425 g' = ofBlockList (g_entry g) used_blocks
426 info' = info { info_tbls = keep_used (info_tbls info) }
427 -- Remove any info_tbls for unreachable
428
429 keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
430 keep_used bs = mapFoldWithKey keep mapEmpty bs
431
432 keep :: Label -> CmmInfoTable -> LabelMap CmmInfoTable -> LabelMap CmmInfoTable
433 keep l i env | l `setMember` used_lbls = mapInsert l i env
434 | otherwise = env
435
436 used_blocks :: [CmmBlock]
437 used_blocks = postorderDfs g
438
439 used_lbls :: LabelSet
440 used_lbls = setFromList $ map entryLabel used_blocks