Pull recent Hadrian changes from upstream
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# LANGUAGE BangPatterns #-}
3 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
4 module CmmContFlowOpt
5 ( cmmCfgOpts
6 , cmmCfgOptsProc
7 , removeUnreachableBlocksProc
8 , replaceLabels
9 )
10 where
11
12 import GhcPrelude hiding (succ, unzip, zip)
13
14 import Hoopl.Block
15 import Hoopl.Collections
16 import Hoopl.Graph
17 import Hoopl.Label
18 import BlockId
19 import Cmm
20 import CmmUtils
21 import CmmSwitch (mapSwitchTargets)
22 import Maybes
23 import Panic
24 import Util
25
26 import Control.Monad
27
28
29 -- Note [What is shortcutting]
30 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
31 --
32 -- Consider this Cmm code:
33 --
34 -- L1: ...
35 -- goto L2;
36 -- L2: goto L3;
37 -- L3: ...
38 --
39 -- Here L2 is an empty block and contains only an unconditional branch
40 -- to L3. In this situation any block that jumps to L2 can jump
41 -- directly to L3:
42 --
43 -- L1: ...
44 -- goto L3;
45 -- L2: goto L3;
46 -- L3: ...
47 --
48 -- In this situation we say that we shortcut L2 to L3. One of
49 -- consequences of shortcutting is that some blocks of code may become
50 -- unreachable (in the example above this is true for L2).
51
52
53 -- Note [Control-flow optimisations]
54 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 --
56 -- This optimisation does three things:
57 --
58 -- - If a block finishes in an unconditional branch to another block
59 -- and that is the only jump to that block we concatenate the
60 -- destination block at the end of the current one.
61 --
62 -- - If a block finishes in a call whose continuation block is a
63 -- goto, then we can shortcut the destination, making the
64 -- continuation block the destination of the goto - but see Note
65 -- [Shortcut call returns].
66 --
67 -- - For any block that is not a call we try to shortcut the
68 -- destination(s). Additionally, if a block ends with a
69 -- conditional branch we try to invert the condition.
70 --
71 -- Blocks are processed using postorder DFS traversal. A side effect
72 -- of determining traversal order with a graph search is elimination
73 -- of any blocks that are unreachable.
74 --
75 -- Transformations are improved by working from the end of the graph
76 -- towards the beginning, because we may be able to perform many
77 -- shortcuts in one go.
78
79
80 -- Note [Shortcut call returns]
81 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
82 --
83 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
84 -- we go, and also a mapping from BlockId to BlockId, representing
85 -- continuation labels that we have renamed. This latter mapping is
86 -- important because we might shortcut a CmmCall continuation. For
87 -- example:
88 --
89 -- Sp[0] = L
90 -- call g returns to L
91 -- L: goto M
92 -- M: ...
93 --
94 -- So when we shortcut the L block, we need to replace not only
95 -- the continuation of the call, but also references to L in the
96 -- code (e.g. the assignment Sp[0] = L):
97 --
98 -- Sp[0] = M
99 -- call g returns to M
100 -- M: ...
101 --
102 -- So we keep track of which labels we have renamed and apply the mapping
103 -- at the end with replaceLabels.
104
105
106 -- Note [Shortcut call returns and proc-points]
107 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 --
109 -- Consider this code that you might get from a recursive
110 -- let-no-escape:
111 --
112 -- goto L1
113 -- L1:
114 -- if (Hp > HpLim) then L2 else L3
115 -- L2:
116 -- call stg_gc_noregs returns to L4
117 -- L4:
118 -- goto L1
119 -- L3:
120 -- ...
121 -- goto L1
122 --
123 -- Then the control-flow optimiser shortcuts L4. But that turns L1
124 -- into the call-return proc point, and every iteration of the loop
125 -- has to shuffle variables to and from the stack. So we must *not*
126 -- shortcut L4.
127 --
128 -- Moreover not shortcutting call returns is probably fine. If L4 can
129 -- concat with its branch target then it will still do so. And we
130 -- save some compile time because we don't have to traverse all the
131 -- code in replaceLabels.
132 --
133 -- However, we probably do want to do this if we are splitting proc
134 -- points, because L1 will be a proc-point anyway, so merging it with
135 -- L4 reduces the number of proc points. Unfortunately recursive
136 -- let-no-escapes won't generate very good code with proc-point
137 -- splitting on - we should probably compile them to explicitly use
138 -- the native calling convention instead.
139
140 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
141 cmmCfgOpts split g = fst (blockConcat split g)
142
143 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
144 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
145 where (g', env) = blockConcat split g
146 info' = info{ info_tbls = new_info_tbls }
147 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
148
149 -- If we changed any labels, then we have to update the info tables
150 -- too, except for the top-level info table because that might be
151 -- referred to by other procs.
152 upd_info (k,info)
153 | Just k' <- mapLookup k env
154 = (k', if k' == g_entry g'
155 then info
156 else info{ cit_lbl = infoTblLbl k' })
157 | otherwise
158 = (k,info)
159 cmmCfgOptsProc _ top = top
160
161
162 blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
163 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
164 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
165 where
166 -- We might be able to shortcut the entry BlockId itself.
167 -- Remember to update the shortcut_map, since we also have to
168 -- update the info_tbls mapping now.
169 (new_entry, shortcut_map')
170 | Just entry_blk <- mapLookup entry_id new_blocks
171 , Just dest <- canShortcut entry_blk
172 = (dest, mapInsert entry_id dest shortcut_map)
173 | otherwise
174 = (entry_id, shortcut_map)
175
176 -- blocks is a list of blocks in DFS postorder, while blockmap is
177 -- a map of blocks. We process each element from blocks and update
178 -- blockmap accordingly
179 blocks = postorderDfs g
180 blockmap = foldr addBlock emptyBody blocks
181
182 -- Accumulator contains three components:
183 -- * map of blocks in a graph
184 -- * map of shortcut labels. See Note [Shortcut call returns]
185 -- * map containing number of predecessors for each block. We discard
186 -- it after we process all blocks.
187 (new_blocks, shortcut_map, _) =
188 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
189
190 -- Map of predecessors for initial graph. We increase number of
191 -- predecessors for entry block by one to denote that it is
192 -- target of a jump, even if no block in the current graph jumps
193 -- to it.
194 initialBackEdges = incPreds entry_id (predMap blocks)
195
196 maybe_concat :: CmmBlock
197 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
198 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
199 maybe_concat block (!blocks, !shortcut_map, !backEdges)
200 -- If:
201 -- (1) current block ends with unconditional branch to b' and
202 -- (2) it has exactly one predecessor (namely, current block)
203 --
204 -- Then:
205 -- (1) append b' block at the end of current block
206 -- (2) remove b' from the map of blocks
207 -- (3) remove information about b' from predecessors map
208 --
209 -- Since we know that the block has only one predecessor we call
210 -- mapDelete directly instead of calling decPreds.
211 --
212 -- Note that we always maintain an up-to-date list of predecessors, so
213 -- we can ignore the contents of shortcut_map
214 | CmmBranch b' <- last
215 , hasOnePredecessor b'
216 , Just blk' <- mapLookup b' blocks
217 = let bid' = entryLabel blk'
218 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
219 , shortcut_map
220 , mapDelete b' backEdges )
221
222 -- If:
223 -- (1) we are splitting proc points (see Note
224 -- [Shortcut call returns and proc-points]) and
225 -- (2) current block is a CmmCall or CmmForeignCall with
226 -- continuation b' and
227 -- (3) we can shortcut that continuation to dest
228 -- Then:
229 -- (1) we change continuation to point to b'
230 -- (2) create mapping from b' to dest
231 -- (3) increase number of predecessors of dest by 1
232 -- (4) decrease number of predecessors of b' by 1
233 --
234 -- Later we will use replaceLabels to substitute all occurrences of b'
235 -- with dest.
236 | splitting_procs
237 , Just b' <- callContinuation_maybe last
238 , Just blk' <- mapLookup b' blocks
239 , Just dest <- canShortcut blk'
240 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
241 , mapInsert b' dest shortcut_map
242 , decPreds b' $ incPreds dest backEdges )
243
244 -- If:
245 -- (1) a block does not end with a call
246 -- Then:
247 -- (1) if it ends with a conditional attempt to invert the
248 -- conditional
249 -- (2) attempt to shortcut all destination blocks
250 -- (3) if new successors of a block are different from the old ones
251 -- update the of predecessors accordingly
252 --
253 -- A special case of this is a situation when a block ends with an
254 -- unconditional jump to a block that can be shortcut.
255 | Nothing <- callContinuation_maybe last
256 = let oldSuccs = successors last
257 newSuccs = successors swapcond_last
258 in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
259 , shortcut_map
260 , if oldSuccs == newSuccs
261 then backEdges
262 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
263
264 -- Otherwise don't do anything
265 | otherwise
266 = ( blocks, shortcut_map, backEdges )
267 where
268 (head, last) = blockSplitTail block
269 bid = entryLabel block
270
271 -- Changes continuation of a call to a specified label
272 update_cont dest =
273 case last of
274 CmmCall{} -> last { cml_cont = Just dest }
275 CmmForeignCall{} -> last { succ = dest }
276 _ -> panic "Can't shortcut continuation."
277
278 -- Attempts to shortcut successors of last node
279 shortcut_last = mapSuccessors shortcut last
280 where
281 shortcut l =
282 case mapLookup l blocks of
283 Just b | Just dest <- canShortcut b -> dest
284 _otherwise -> l
285
286 -- For a conditional, we invert the conditional if that would make it
287 -- more likely that the branch-not-taken case becomes a fallthrough.
288 -- This helps the native codegen a little bit, and probably has no
289 -- effect on LLVM. It's convenient to do it here, where we have the
290 -- information about predecessors.
291 swapcond_last
292 | CmmCondBranch cond t f l <- shortcut_last
293 , likelyFalse l
294 , numPreds f > 1
295 , hasOnePredecessor t
296 , Just cond' <- maybeInvertCmmExpr cond
297 = CmmCondBranch cond' f t (invertLikeliness l)
298
299 | otherwise
300 = shortcut_last
301
302 likelyFalse (Just False) = True
303 likelyFalse Nothing = True
304 likelyFalse _ = False
305
306 invertLikeliness (Just b) = Just (not b)
307 invertLikeliness Nothing = Nothing
308
309 -- Number of predecessors for a block
310 numPreds bid = mapLookup bid backEdges `orElse` 0
311
312 hasOnePredecessor b = numPreds b == 1
313
314 -- Functions for incrementing and decrementing number of predecessors. If
315 -- decrementing would set the predecessor count to 0, we remove entry from the
316 -- map.
317 -- Invariant: if a block has no predecessors it should be dropped from the
318 -- graph because it is unreachable. maybe_concat is constructed to maintain
319 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
320 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
321 -- blocks.
322 incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
323 incPreds bid edges = mapInsertWith (+) bid 1 edges
324 decPreds bid edges = case mapLookup bid edges of
325 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
326 Just _ -> mapDelete bid edges
327 _ -> edges
328
329
330 -- Checks if a block consists only of "goto dest". If it does than we return
331 -- "Just dest" label. See Note [What is shortcutting]
332 canShortcut :: CmmBlock -> Maybe BlockId
333 canShortcut block
334 | (_, middle, CmmBranch dest) <- blockSplit block
335 , all dont_care $ blockToList middle
336 = Just dest
337 | otherwise
338 = Nothing
339 where dont_care CmmComment{} = True
340 dont_care CmmTick{} = True
341 dont_care _other = False
342
343 -- Concatenates two blocks. First one is assumed to be open on exit, the second
344 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
345 -- the splice function removes by calling snd on result of blockSplitHead).
346 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
347 splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
348 where (CmmEntry lbl sc0, code0) = blockSplitHead head
349 (CmmEntry _ sc1, code1) = blockSplitHead rest
350 entry = CmmEntry lbl (combineTickScopes sc0 sc1)
351
352 -- If node is a call with continuation call return Just label of that
353 -- continuation. Otherwise return Nothing.
354 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
355 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
356 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
357 callContinuation_maybe _ = Nothing
358
359
360 -- Map over the CmmGraph, replacing each label with its mapping in the
361 -- supplied LabelMap.
362 replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
363 replaceLabels env g
364 | mapNull env = g
365 | otherwise = replace_eid $ mapGraphNodes1 txnode g
366 where
367 replace_eid g = g {g_entry = lookup (g_entry g)}
368 lookup id = mapLookup id env `orElse` id
369
370 txnode :: CmmNode e x -> CmmNode e x
371 txnode (CmmBranch bid) = CmmBranch (lookup bid)
372 txnode (CmmCondBranch p t f l) =
373 mkCmmCondBranch (exp p) (lookup t) (lookup f) l
374 txnode (CmmSwitch e ids) =
375 CmmSwitch (exp e) (mapSwitchTargets lookup ids)
376 txnode (CmmCall t k rg a res r) =
377 CmmCall (exp t) (liftM lookup k) rg a res r
378 txnode fc@CmmForeignCall{} =
379 fc{ args = map exp (args fc), succ = lookup (succ fc) }
380 txnode other = mapExpDeep exp other
381
382 exp :: CmmExpr -> CmmExpr
383 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
384 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
385 exp e = e
386
387 mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
388 mkCmmCondBranch p t f l =
389 if t == f then CmmBranch t else CmmCondBranch p t f l
390
391 -- Build a map from a block to its set of predecessors.
392 predMap :: [CmmBlock] -> LabelMap Int
393 predMap blocks = foldr add_preds mapEmpty blocks
394 where
395 add_preds block env = foldr add env (successors block)
396 where add lbl env = mapInsertWith (+) lbl 1 env
397
398 -- Removing unreachable blocks
399 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
400 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
401 | used_blocks `lengthLessThan` mapSize (toBlockMap g)
402 = CmmProc info' lbl live g'
403 | otherwise
404 = proc
405 where
406 g' = ofBlockList (g_entry g) used_blocks
407 info' = info { info_tbls = keep_used (info_tbls info) }
408 -- Remove any info_tbls for unreachable
409
410 keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
411 keep_used bs = mapFoldWithKey keep mapEmpty bs
412
413 keep :: Label -> CmmInfoTable -> LabelMap CmmInfoTable -> LabelMap CmmInfoTable
414 keep l i env | l `setMember` used_lbls = mapInsert l i env
415 | otherwise = env
416
417 used_blocks :: [CmmBlock]
418 used_blocks = postorderDfs g
419
420 used_lbls :: LabelSet
421 used_lbls = setFromList $ map entryLabel used_blocks