Hoopl: remove dependency on Hoopl package
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
3 module CmmContFlowOpt
4 ( cmmCfgOpts
5 , cmmCfgOptsProc
6 , removeUnreachableBlocksProc
7 , replaceLabels
8 )
9 where
10
11 import Hoopl.Block
12 import Hoopl.Collections
13 import Hoopl.Graph
14 import Hoopl.Label
15 import BlockId
16 import Cmm
17 import CmmUtils
18 import CmmSwitch (mapSwitchTargets)
19 import Maybes
20 import Panic
21 import Util
22
23 import Control.Monad
24 import Prelude hiding (succ, unzip, zip)
25
26
27 -- Note [What is shortcutting]
28 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 --
30 -- Consider this Cmm code:
31 --
32 -- L1: ...
33 -- goto L2;
34 -- L2: goto L3;
35 -- L3: ...
36 --
37 -- Here L2 is an empty block and contains only an unconditional branch
38 -- to L3. In this situation any block that jumps to L2 can jump
39 -- directly to L3:
40 --
41 -- L1: ...
42 -- goto L3;
43 -- L2: goto L3;
44 -- L3: ...
45 --
46 -- In this situation we say that we shortcut L2 to L3. One of
47 -- consequences of shortcutting is that some blocks of code may become
48 -- unreachable (in the example above this is true for L2).
49
50
51 -- Note [Control-flow optimisations]
52 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
53 --
54 -- This optimisation does three things:
55 --
56 -- - If a block finishes in an unconditonal branch to another block
57 -- and that is the only jump to that block we concatenate the
58 -- destination block at the end of the current one.
59 --
60 -- - If a block finishes in a call whose continuation block is a
61 -- goto, then we can shortcut the destination, making the
62 -- continuation block the destination of the goto - but see Note
63 -- [Shortcut call returns].
64 --
65 -- - For any block that is not a call we try to shortcut the
66 -- destination(s). Additionally, if a block ends with a
67 -- conditional branch we try to invert the condition.
68 --
69 -- Blocks are processed using postorder DFS traversal. A side effect
70 -- of determining traversal order with a graph search is elimination
71 -- of any blocks that are unreachable.
72 --
73 -- Transformations are improved by working from the end of the graph
74 -- towards the beginning, because we may be able to perform many
75 -- shortcuts in one go.
76
77
78 -- Note [Shortcut call returns]
79 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
80 --
81 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
82 -- we go, and also a mapping from BlockId to BlockId, representing
83 -- continuation labels that we have renamed. This latter mapping is
84 -- important because we might shortcut a CmmCall continuation. For
85 -- example:
86 --
87 -- Sp[0] = L
88 -- call g returns to L
89 -- L: goto M
90 -- M: ...
91 --
92 -- So when we shortcut the L block, we need to replace not only
93 -- the continuation of the call, but also references to L in the
94 -- code (e.g. the assignment Sp[0] = L):
95 --
96 -- Sp[0] = M
97 -- call g returns to M
98 -- M: ...
99 --
100 -- So we keep track of which labels we have renamed and apply the mapping
101 -- at the end with replaceLabels.
102
103
104 -- Note [Shortcut call returns and proc-points]
105 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
106 --
107 -- Consider this code that you might get from a recursive
108 -- let-no-escape:
109 --
110 -- goto L1
111 -- L1:
112 -- if (Hp > HpLim) then L2 else L3
113 -- L2:
114 -- call stg_gc_noregs returns to L4
115 -- L4:
116 -- goto L1
117 -- L3:
118 -- ...
119 -- goto L1
120 --
121 -- Then the control-flow optimiser shortcuts L4. But that turns L1
122 -- into the call-return proc point, and every iteration of the loop
123 -- has to shuffle variables to and from the stack. So we must *not*
124 -- shortcut L4.
125 --
126 -- Moreover not shortcutting call returns is probably fine. If L4 can
127 -- concat with its branch target then it will still do so. And we
128 -- save some compile time because we don't have to traverse all the
129 -- code in replaceLabels.
130 --
131 -- However, we probably do want to do this if we are splitting proc
132 -- points, because L1 will be a proc-point anyway, so merging it with
133 -- L4 reduces the number of proc points. Unfortunately recursive
134 -- let-no-escapes won't generate very good code with proc-point
135 -- splitting on - we should probably compile them to explicitly use
136 -- the native calling convention instead.
137
138 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
139 cmmCfgOpts split g = fst (blockConcat split g)
140
141 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
142 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
143 where (g', env) = blockConcat split g
144 info' = info{ info_tbls = new_info_tbls }
145 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
146
147 -- If we changed any labels, then we have to update the info tables
148 -- too, except for the top-level info table because that might be
149 -- referred to by other procs.
150 upd_info (k,info)
151 | Just k' <- mapLookup k env
152 = (k', if k' == g_entry g'
153 then info
154 else info{ cit_lbl = infoTblLbl k' })
155 | otherwise
156 = (k,info)
157 cmmCfgOptsProc _ top = top
158
159
160 blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
161 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
162 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
163 where
164 -- We might be able to shortcut the entry BlockId itself.
165 -- Remember to update the shortcut_map, since we also have to
166 -- update the info_tbls mapping now.
167 (new_entry, shortcut_map')
168 | Just entry_blk <- mapLookup entry_id new_blocks
169 , Just dest <- canShortcut entry_blk
170 = (dest, mapInsert entry_id dest shortcut_map)
171 | otherwise
172 = (entry_id, shortcut_map)
173
174 -- blocks is a list of blocks in DFS postorder, while blockmap is
175 -- a map of blocks. We process each element from blocks and update
176 -- blockmap accordingly
177 blocks = postorderDfs g
178 blockmap = foldr addBlock emptyBody blocks
179
180 -- Accumulator contains three components:
181 -- * map of blocks in a graph
182 -- * map of shortcut labels. See Note [Shortcut call returns]
183 -- * map containing number of predecessors for each block. We discard
184 -- it after we process all blocks.
185 (new_blocks, shortcut_map, _) =
186 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
187
188 -- Map of predecessors for initial graph. We increase number of
189 -- predecessors for entry block by one to denote that it is
190 -- target of a jump, even if no block in the current graph jumps
191 -- to it.
192 initialBackEdges = incPreds entry_id (predMap blocks)
193
194 maybe_concat :: CmmBlock
195 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
196 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
197 maybe_concat block (blocks, shortcut_map, backEdges)
198 -- If:
199 -- (1) current block ends with unconditional branch to b' and
200 -- (2) it has exactly one predecessor (namely, current block)
201 --
202 -- Then:
203 -- (1) append b' block at the end of current block
204 -- (2) remove b' from the map of blocks
205 -- (3) remove information about b' from predecessors map
206 --
207 -- Since we know that the block has only one predecessor we call
208 -- mapDelete directly instead of calling decPreds.
209 --
210 -- Note that we always maintain an up-to-date list of predecessors, so
211 -- we can ignore the contents of shortcut_map
212 | CmmBranch b' <- last
213 , hasOnePredecessor b'
214 , Just blk' <- mapLookup b' blocks
215 = let bid' = entryLabel blk'
216 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
217 , shortcut_map
218 , mapDelete b' backEdges )
219
220 -- If:
221 -- (1) we are splitting proc points (see Note
222 -- [Shortcut call returns and proc-points]) and
223 -- (2) current block is a CmmCall or CmmForeignCall with
224 -- continuation b' and
225 -- (3) we can shortcut that continuation to dest
226 -- Then:
227 -- (1) we change continuation to point to b'
228 -- (2) create mapping from b' to dest
229 -- (3) increase number of predecessors of dest by 1
230 -- (4) decrease number of predecessors of b' by 1
231 --
232 -- Later we will use replaceLabels to substitute all occurrences of b'
233 -- with dest.
234 | splitting_procs
235 , Just b' <- callContinuation_maybe last
236 , Just blk' <- mapLookup b' blocks
237 , Just dest <- canShortcut blk'
238 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
239 , mapInsert b' dest shortcut_map
240 , decPreds b' $ incPreds dest backEdges )
241
242 -- If:
243 -- (1) a block does not end with a call
244 -- Then:
245 -- (1) if it ends with a conditional attempt to invert the
246 -- conditional
247 -- (2) attempt to shortcut all destination blocks
248 -- (3) if new successors of a block are different from the old ones
249 -- update the of predecessors accordingly
250 --
251 -- A special case of this is a situation when a block ends with an
252 -- unconditional jump to a block that can be shortcut.
253 | Nothing <- callContinuation_maybe last
254 = let oldSuccs = successors last
255 newSuccs = successors swapcond_last
256 in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
257 , shortcut_map
258 , if oldSuccs == newSuccs
259 then backEdges
260 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
261
262 -- Otherwise don't do anything
263 | otherwise
264 = ( blocks, shortcut_map, backEdges )
265 where
266 (head, last) = blockSplitTail block
267 bid = entryLabel block
268
269 -- Changes continuation of a call to a specified label
270 update_cont dest =
271 case last of
272 CmmCall{} -> last { cml_cont = Just dest }
273 CmmForeignCall{} -> last { succ = dest }
274 _ -> panic "Can't shortcut continuation."
275
276 -- Attempts to shortcut successors of last node
277 shortcut_last = mapSuccessors shortcut last
278 where
279 shortcut l =
280 case mapLookup l blocks of
281 Just b | Just dest <- canShortcut b -> dest
282 _otherwise -> l
283
284 -- For a conditional, we invert the conditional if that would make it
285 -- more likely that the branch-not-taken case becomes a fallthrough.
286 -- This helps the native codegen a little bit, and probably has no
287 -- effect on LLVM. It's convenient to do it here, where we have the
288 -- information about predecessors.
289 swapcond_last
290 | CmmCondBranch cond t f l <- shortcut_last
291 , likelyFalse l
292 , numPreds f > 1
293 , hasOnePredecessor t
294 , Just cond' <- maybeInvertCmmExpr cond
295 = CmmCondBranch cond' f t (invertLikeliness l)
296
297 | otherwise
298 = shortcut_last
299
300 likelyFalse (Just False) = True
301 likelyFalse Nothing = True
302 likelyFalse _ = False
303
304 invertLikeliness (Just b) = Just (not b)
305 invertLikeliness Nothing = Nothing
306
307 -- Number of predecessors for a block
308 numPreds bid = mapLookup bid backEdges `orElse` 0
309
310 hasOnePredecessor b = numPreds b == 1
311
312 -- Functions for incrementing and decrementing number of predecessors. If
313 -- decrementing would set the predecessor count to 0, we remove entry from the
314 -- map.
315 -- Invariant: if a block has no predecessors it should be dropped from the
316 -- graph because it is unreachable. maybe_concat is constructed to maintain
317 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
318 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
319 -- blocks.
320 incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
321 incPreds bid edges = mapInsertWith (+) bid 1 edges
322 decPreds bid edges = case mapLookup bid edges of
323 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
324 Just _ -> mapDelete bid edges
325 _ -> edges
326
327
328 -- Checks if a block consists only of "goto dest". If it does than we return
329 -- "Just dest" label. See Note [What is shortcutting]
330 canShortcut :: CmmBlock -> Maybe BlockId
331 canShortcut block
332 | (_, middle, CmmBranch dest) <- blockSplit block
333 , all dont_care $ blockToList middle
334 = Just dest
335 | otherwise
336 = Nothing
337 where dont_care CmmComment{} = True
338 dont_care CmmTick{} = True
339 dont_care _other = False
340
341 -- Concatenates two blocks. First one is assumed to be open on exit, the second
342 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
343 -- the splice function removes by calling snd on result of blockSplitHead).
344 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
345 splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
346 where (CmmEntry lbl sc0, code0) = blockSplitHead head
347 (CmmEntry _ sc1, code1) = blockSplitHead rest
348 entry = CmmEntry lbl (combineTickScopes sc0 sc1)
349
350 -- If node is a call with continuation call return Just label of that
351 -- continuation. Otherwise return Nothing.
352 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
353 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
354 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
355 callContinuation_maybe _ = Nothing
356
357
358 -- Map over the CmmGraph, replacing each label with its mapping in the
359 -- supplied LabelMap.
360 replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
361 replaceLabels env g
362 | mapNull env = g
363 | otherwise = replace_eid $ mapGraphNodes1 txnode g
364 where
365 replace_eid g = g {g_entry = lookup (g_entry g)}
366 lookup id = mapLookup id env `orElse` id
367
368 txnode :: CmmNode e x -> CmmNode e x
369 txnode (CmmBranch bid) = CmmBranch (lookup bid)
370 txnode (CmmCondBranch p t f l) =
371 mkCmmCondBranch (exp p) (lookup t) (lookup f) l
372 txnode (CmmSwitch e ids) =
373 CmmSwitch (exp e) (mapSwitchTargets lookup ids)
374 txnode (CmmCall t k rg a res r) =
375 CmmCall (exp t) (liftM lookup k) rg a res r
376 txnode fc@CmmForeignCall{} =
377 fc{ args = map exp (args fc), succ = lookup (succ fc) }
378 txnode other = mapExpDeep exp other
379
380 exp :: CmmExpr -> CmmExpr
381 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
382 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
383 exp e = e
384
385 mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
386 mkCmmCondBranch p t f l =
387 if t == f then CmmBranch t else CmmCondBranch p t f l
388
389 -- Build a map from a block to its set of predecessors.
390 predMap :: [CmmBlock] -> LabelMap Int
391 predMap blocks = foldr add_preds mapEmpty blocks
392 where
393 add_preds block env = foldr add env (successors block)
394 where add lbl env = mapInsertWith (+) lbl 1 env
395
396 -- Removing unreachable blocks
397 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
398 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
399 | used_blocks `lengthLessThan` mapSize (toBlockMap g)
400 = CmmProc info' lbl live g'
401 | otherwise
402 = proc
403 where
404 g' = ofBlockList (g_entry g) used_blocks
405 info' = info { info_tbls = keep_used (info_tbls info) }
406 -- Remove any info_tbls for unreachable
407
408 keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
409 keep_used bs = mapFoldWithKey keep mapEmpty bs
410
411 keep :: Label -> CmmInfoTable -> LabelMap CmmInfoTable -> LabelMap CmmInfoTable
412 keep l i env | l `setMember` used_lbls = mapInsert l i env
413 | otherwise = env
414
415 used_blocks :: [CmmBlock]
416 used_blocks = postorderDfs g
417
418 used_lbls :: LabelSet
419 used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks