d8740df3f2cb94d1e897b0d020b87d2d6fc28890
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
3 module CmmContFlowOpt
4 ( cmmCfgOpts
5 , cmmCfgOptsProc
6 , removeUnreachableBlocksProc
7 , replaceLabels
8 )
9 where
10
11 import Hoopl
12 import BlockId
13 import Cmm
14 import CmmUtils
15 import CmmSwitch (mapSwitchTargets)
16 import Maybes
17 import Panic
18
19 import Control.Monad
20 import Prelude hiding (succ, unzip, zip)
21
22
23 -- Note [What is shortcutting]
24 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
25 --
26 -- Consider this Cmm code:
27 --
28 -- L1: ...
29 -- goto L2;
30 -- L2: goto L3;
31 -- L3: ...
32 --
33 -- Here L2 is an empty block and contains only an unconditional branch
34 -- to L3. In this situation any block that jumps to L2 can jump
35 -- directly to L3:
36 --
37 -- L1: ...
38 -- goto L3;
39 -- L2: goto L3;
40 -- L3: ...
41 --
42 -- In this situation we say that we shortcut L2 to L3. One of
43 -- consequences of shortcutting is that some blocks of code may become
44 -- unreachable (in the example above this is true for L2).
45
46
47 -- Note [Control-flow optimisations]
48 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
49 --
50 -- This optimisation does three things:
51 --
52 -- - If a block finishes in an unconditonal branch to another block
53 -- and that is the only jump to that block we concatenate the
54 -- destination block at the end of the current one.
55 --
56 -- - If a block finishes in a call whose continuation block is a
57 -- goto, then we can shortcut the destination, making the
58 -- continuation block the destination of the goto - but see Note
59 -- [Shortcut call returns].
60 --
61 -- - For any block that is not a call we try to shortcut the
62 -- destination(s). Additionally, if a block ends with a
63 -- conditional branch we try to invert the condition.
64 --
65 -- Blocks are processed using postorder DFS traversal. A side effect
66 -- of determining traversal order with a graph search is elimination
67 -- of any blocks that are unreachable.
68 --
69 -- Transformations are improved by working from the end of the graph
70 -- towards the beginning, because we may be able to perform many
71 -- shortcuts in one go.
72
73
74 -- Note [Shortcut call returns]
75 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
76 --
77 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
78 -- we go, and also a mapping from BlockId to BlockId, representing
79 -- continuation labels that we have renamed. This latter mapping is
80 -- important because we might shortcut a CmmCall continuation. For
81 -- example:
82 --
83 -- Sp[0] = L
84 -- call g returns to L
85 -- L: goto M
86 -- M: ...
87 --
88 -- So when we shortcut the L block, we need to replace not only
89 -- the continuation of the call, but also references to L in the
90 -- code (e.g. the assignment Sp[0] = L):
91 --
92 -- Sp[0] = M
93 -- call g returns to M
94 -- M: ...
95 --
96 -- So we keep track of which labels we have renamed and apply the mapping
97 -- at the end with replaceLabels.
98
99
100 -- Note [Shortcut call returns and proc-points]
101 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
102 --
103 -- Consider this code that you might get from a recursive
104 -- let-no-escape:
105 --
106 -- goto L1
107 -- L1:
108 -- if (Hp > HpLim) then L2 else L3
109 -- L2:
110 -- call stg_gc_noregs returns to L4
111 -- L4:
112 -- goto L1
113 -- L3:
114 -- ...
115 -- goto L1
116 --
117 -- Then the control-flow optimiser shortcuts L4. But that turns L1
118 -- into the call-return proc point, and every iteration of the loop
119 -- has to shuffle variables to and from the stack. So we must *not*
120 -- shortcut L4.
121 --
122 -- Moreover not shortcutting call returns is probably fine. If L4 can
123 -- concat with its branch target then it will still do so. And we
124 -- save some compile time because we don't have to traverse all the
125 -- code in replaceLabels.
126 --
127 -- However, we probably do want to do this if we are splitting proc
128 -- points, because L1 will be a proc-point anyway, so merging it with
129 -- L4 reduces the number of proc points. Unfortunately recursive
130 -- let-no-escapes won't generate very good code with proc-point
131 -- splitting on - we should probably compile them to explicitly use
132 -- the native calling convention instead.
133
134 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
135 cmmCfgOpts split g = fst (blockConcat split g)
136
137 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
138 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
139 where (g', env) = blockConcat split g
140 info' = info{ info_tbls = new_info_tbls }
141 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
142
143 -- If we changed any labels, then we have to update the info tables
144 -- too, except for the top-level info table because that might be
145 -- referred to by other procs.
146 upd_info (k,info)
147 | Just k' <- mapLookup k env
148 = (k', if k' == g_entry g'
149 then info
150 else info{ cit_lbl = infoTblLbl k' })
151 | otherwise
152 = (k,info)
153 cmmCfgOptsProc _ top = top
154
155
156 blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
157 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
158 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
159 where
160 -- We might be able to shortcut the entry BlockId itself.
161 -- Remember to update the shortcut_map, since we also have to
162 -- update the info_tbls mapping now.
163 (new_entry, shortcut_map')
164 | Just entry_blk <- mapLookup entry_id new_blocks
165 , Just dest <- canShortcut entry_blk
166 = (dest, mapInsert entry_id dest shortcut_map)
167 | otherwise
168 = (entry_id, shortcut_map)
169
170 -- blocks is a list of blocks in DFS postorder, while blockmap is
171 -- a map of blocks. We process each element from blocks and update
172 -- blockmap accordingly
173 blocks = postorderDfs g
174 blockmap = foldr addBlock emptyBody blocks
175
176 -- Accumulator contains three components:
177 -- * map of blocks in a graph
178 -- * map of shortcut labels. See Note [Shortcut call returns]
179 -- * map containing number of predecessors for each block. We discard
180 -- it after we process all blocks.
181 (new_blocks, shortcut_map, _) =
182 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
183
184 -- Map of predecessors for initial graph. We increase number of
185 -- predecessors for entry block by one to denote that it is
186 -- target of a jump, even if no block in the current graph jumps
187 -- to it.
188 initialBackEdges = incPreds entry_id (predMap blocks)
189
190 maybe_concat :: CmmBlock
191 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
192 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
193 maybe_concat block (blocks, shortcut_map, backEdges)
194 -- If:
195 -- (1) current block ends with unconditional branch to b' and
196 -- (2) it has exactly one predecessor (namely, current block)
197 --
198 -- Then:
199 -- (1) append b' block at the end of current block
200 -- (2) remove b' from the map of blocks
201 -- (3) remove information about b' from predecessors map
202 --
203 -- Since we know that the block has only one predecessor we call
204 -- mapDelete directly instead of calling decPreds.
205 --
206 -- Note that we always maintain an up-to-date list of predecessors, so
207 -- we can ignore the contents of shortcut_map
208 | CmmBranch b' <- last
209 , hasOnePredecessor b'
210 , Just blk' <- mapLookup b' blocks
211 = let bid' = entryLabel blk'
212 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
213 , shortcut_map
214 , mapDelete b' backEdges )
215
216 -- If:
217 -- (1) we are splitting proc points (see Note
218 -- [Shortcut call returns and proc-points]) and
219 -- (2) current block is a CmmCall or CmmForeignCall with
220 -- continuation b' and
221 -- (3) we can shortcut that continuation to dest
222 -- Then:
223 -- (1) we change continuation to point to b'
224 -- (2) create mapping from b' to dest
225 -- (3) increase number of predecessors of dest by 1
226 -- (4) decrease number of predecessors of b' by 1
227 --
228 -- Later we will use replaceLabels to substitute all occurrences of b'
229 -- with dest.
230 | splitting_procs
231 , Just b' <- callContinuation_maybe last
232 , Just blk' <- mapLookup b' blocks
233 , Just dest <- canShortcut blk'
234 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
235 , mapInsert b' dest shortcut_map
236 , decPreds b' $ incPreds dest backEdges )
237
238 -- If:
239 -- (1) a block does not end with a call
240 -- Then:
241 -- (1) if it ends with a conditional attempt to invert the
242 -- conditional
243 -- (2) attempt to shortcut all destination blocks
244 -- (3) if new successors of a block are different from the old ones
245 -- update the of predecessors accordingly
246 --
247 -- A special case of this is a situation when a block ends with an
248 -- unconditional jump to a block that can be shortcut.
249 | Nothing <- callContinuation_maybe last
250 = let oldSuccs = successors last
251 newSuccs = successors swapcond_last
252 in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
253 , shortcut_map
254 , if oldSuccs == newSuccs
255 then backEdges
256 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
257
258 -- Otherwise don't do anything
259 | otherwise
260 = ( blocks, shortcut_map, backEdges )
261 where
262 (head, last) = blockSplitTail block
263 bid = entryLabel block
264
265 -- Changes continuation of a call to a specified label
266 update_cont dest =
267 case last of
268 CmmCall{} -> last { cml_cont = Just dest }
269 CmmForeignCall{} -> last { succ = dest }
270 _ -> panic "Can't shortcut continuation."
271
272 -- Attempts to shortcut successors of last node
273 shortcut_last = mapSuccessors shortcut last
274 where
275 shortcut l =
276 case mapLookup l blocks of
277 Just b | Just dest <- canShortcut b -> dest
278 _otherwise -> l
279
280 -- For a conditional, we invert the conditional if that would make it
281 -- more likely that the branch-not-taken case becomes a fallthrough.
282 -- This helps the native codegen a little bit, and probably has no
283 -- effect on LLVM. It's convenient to do it here, where we have the
284 -- information about predecessors.
285 swapcond_last
286 | CmmCondBranch cond t f l <- shortcut_last
287 , likelyFalse l
288 , numPreds f > 1
289 , hasOnePredecessor t
290 , Just cond' <- maybeInvertCmmExpr cond
291 = CmmCondBranch cond' f t (invertLikeliness l)
292
293 | otherwise
294 = shortcut_last
295
296 likelyFalse (Just False) = True
297 likelyFalse Nothing = True
298 likelyFalse _ = False
299
300 invertLikeliness (Just b) = Just (not b)
301 invertLikeliness Nothing = Nothing
302
303 -- Number of predecessors for a block
304 numPreds bid = mapLookup bid backEdges `orElse` 0
305
306 hasOnePredecessor b = numPreds b == 1
307
308 -- Functions for incrementing and decrementing number of predecessors. If
309 -- decrementing would set the predecessor count to 0, we remove entry from the
310 -- map.
311 -- Invariant: if a block has no predecessors it should be dropped from the
312 -- graph because it is unreachable. maybe_concat is constructed to maintain
313 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
314 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
315 -- blocks.
316 incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
317 incPreds bid edges = mapInsertWith (+) bid 1 edges
318 decPreds bid edges = case mapLookup bid edges of
319 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
320 Just _ -> mapDelete bid edges
321 _ -> edges
322
323
324 -- Checks if a block consists only of "goto dest". If it does than we return
325 -- "Just dest" label. See Note [What is shortcutting]
326 canShortcut :: CmmBlock -> Maybe BlockId
327 canShortcut block
328 | (_, middle, CmmBranch dest) <- blockSplit block
329 , all dont_care $ blockToList middle
330 = Just dest
331 | otherwise
332 = Nothing
333 where dont_care CmmComment{} = True
334 dont_care CmmTick{} = True
335 dont_care _other = False
336
337 -- Concatenates two blocks. First one is assumed to be open on exit, the second
338 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
339 -- the splice function removes by calling snd on result of blockSplitHead).
340 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
341 splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
342 where (CmmEntry lbl sc0, code0) = blockSplitHead head
343 (CmmEntry _ sc1, code1) = blockSplitHead rest
344 entry = CmmEntry lbl (combineTickScopes sc0 sc1)
345
346 -- If node is a call with continuation call return Just label of that
347 -- continuation. Otherwise return Nothing.
348 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
349 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
350 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
351 callContinuation_maybe _ = Nothing
352
353
354 -- Map over the CmmGraph, replacing each label with its mapping in the
355 -- supplied LabelMap.
356 replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
357 replaceLabels env g
358 | mapNull env = g
359 | otherwise = replace_eid $ mapGraphNodes1 txnode g
360 where
361 replace_eid g = g {g_entry = lookup (g_entry g)}
362 lookup id = mapLookup id env `orElse` id
363
364 txnode :: CmmNode e x -> CmmNode e x
365 txnode (CmmBranch bid) = CmmBranch (lookup bid)
366 txnode (CmmCondBranch p t f l) =
367 mkCmmCondBranch (exp p) (lookup t) (lookup f) l
368 txnode (CmmSwitch e ids) =
369 CmmSwitch (exp e) (mapSwitchTargets lookup ids)
370 txnode (CmmCall t k rg a res r) =
371 CmmCall (exp t) (liftM lookup k) rg a res r
372 txnode fc@CmmForeignCall{} =
373 fc{ args = map exp (args fc), succ = lookup (succ fc) }
374 txnode other = mapExpDeep exp other
375
376 exp :: CmmExpr -> CmmExpr
377 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
378 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
379 exp e = e
380
381 mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
382 mkCmmCondBranch p t f l =
383 if t == f then CmmBranch t else CmmCondBranch p t f l
384
385 -- Build a map from a block to its set of predecessors.
386 predMap :: [CmmBlock] -> LabelMap Int
387 predMap blocks = foldr add_preds mapEmpty blocks
388 where
389 add_preds block env = foldr add env (successors block)
390 where add lbl env = mapInsertWith (+) lbl 1 env
391
392 -- Removing unreachable blocks
393 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
394 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
395 | length used_blocks < mapSize (toBlockMap g)
396 = CmmProc info' lbl live g'
397 | otherwise
398 = proc
399 where
400 g' = ofBlockList (g_entry g) used_blocks
401 info' = info { info_tbls = keep_used (info_tbls info) }
402 -- Remove any info_tbls for unreachable
403
404 keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
405 keep_used bs = mapFoldWithKey keep mapEmpty bs
406
407 keep :: Label -> CmmInfoTable -> LabelMap CmmInfoTable -> LabelMap CmmInfoTable
408 keep l i env | l `setMember` used_lbls = mapInsert l i env
409 | otherwise = env
410
411 used_blocks :: [CmmBlock]
412 used_blocks = postorderDfs g
413
414 used_lbls :: LabelSet
415 used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks