Remove unnecessary warnings suppressions, fixes ticket #7756; thanks monoidal for...
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
3 module CmmContFlowOpt
4 ( cmmCfgOpts
5 , cmmCfgOptsProc
6 , removeUnreachableBlocksProc
7 , removeUnreachableBlocks
8 , replaceLabels
9 )
10 where
11
12 import Hoopl
13 import BlockId
14 import Cmm
15 import CmmUtils
16 import Maybes
17
18 import Control.Monad
19 import Prelude hiding (succ, unzip, zip)
20
21 -----------------------------------------------------------------------------
22 --
23 -- Control-flow optimisations
24 --
25 -----------------------------------------------------------------------------
26
27 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
28 cmmCfgOpts split g = fst (blockConcat split g)
29
30 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
31 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
32 where (g', env) = blockConcat split g
33 info' = info{ info_tbls = new_info_tbls }
34 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
35
36 -- If we changed any labels, then we have to update the info tables
37 -- too, except for the top-level info table because that might be
38 -- referred to by other procs.
39 upd_info (k,info)
40 | Just k' <- mapLookup k env
41 = (k', if k' == g_entry g'
42 then info
43 else info{ cit_lbl = infoTblLbl k' })
44 | otherwise
45 = (k,info)
46
47 cmmCfgOptsProc _ top = top
48
49
50 -----------------------------------------------------------------------------
51 --
52 -- Block concatenation
53 --
54 -----------------------------------------------------------------------------
55
56 -- This optimisation does three things:
57 --
58 -- - If a block finishes with an unconditional branch, then we may
59 -- be able to concatenate the block it points to and remove the
60 -- branch. We do this either if the destination block is small
61 -- (e.g. just another branch), or if this is the only jump to
62 -- this particular destination block.
63 --
64 -- - If a block finishes in a call whose continuation block is a
65 -- goto, then we can shortcut the destination, making the
66 -- continuation block the destination of the goto.
67 -- (but see Note [shortcut call returns])
68 --
69 -- - removes any unreachable blocks from the graph. This is a side
70 -- effect of starting with a postorder DFS traversal of the graph
71 --
72
73 -- Both transformations are improved by working from the end of the
74 -- graph towards the beginning, because we may be able to perform many
75 -- shortcuts in one go.
76
77
78 -- We need to walk over the blocks from the end back to the
79 -- beginning. We are going to maintain the "current" graph
80 -- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId
81 -- to BlockId, representing continuation labels that we have
82 -- renamed. This latter mapping is important because we might
83 -- shortcut a CmmCall continuation. For example:
84 --
85 -- Sp[0] = L
86 -- call g returns to L
87 --
88 -- L: goto M
89 --
90 -- M: ...
91 --
92 -- So when we shortcut the L block, we need to replace not only
93 -- the continuation of the call, but also references to L in the
94 -- code (e.g. the assignment Sp[0] = L). So we keep track of
95 -- which labels we have renamed and apply the mapping at the end
96 -- with replaceLabels.
97
98 blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
99 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
100 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
101 where
102 -- we might be able to shortcut the entry BlockId itself.
103 -- remember to update the shortcut_map', since we also have to
104 -- update the info_tbls mapping now.
105 (new_entry, shortcut_map')
106 | Just entry_blk <- mapLookup entry_id new_blocks
107 , Just dest <- canShortcut entry_blk
108 = (dest, mapInsert entry_id dest shortcut_map)
109 | otherwise
110 = (entry_id, shortcut_map)
111
112 blocks = postorderDfs g
113 blockmap = foldr addBlock emptyBody blocks
114 -- the initial blockmap is constructed from the postorderDfs result,
115 -- so that we automatically throw away unreachable blocks.
116
117 (new_blocks, shortcut_map) =
118 foldr maybe_concat (blockmap, mapEmpty) blocks
119
120 maybe_concat :: CmmBlock
121 -> (BlockEnv CmmBlock, BlockEnv BlockId)
122 -> (BlockEnv CmmBlock, BlockEnv BlockId)
123 maybe_concat block (blocks, shortcut_map)
124 | CmmBranch b' <- last
125 , Just blk' <- mapLookup b' blocks
126 , shouldConcatWith b' blk'
127 = (mapInsert bid (splice head blk') blocks, shortcut_map)
128
129 -- calls: if we can shortcut the continuation label, then
130 -- we must *also* remember to substitute for the label in the
131 -- code, because we will push it somewhere.
132 | splitting_procs -- Note [shortcut call returns]
133 , Just b' <- callContinuation_maybe last
134 , Just blk' <- mapLookup b' blocks
135 , Just dest <- canShortcut blk'
136 = (blocks, mapInsert b' dest shortcut_map)
137 -- replaceLabels will substitute dest for b' everywhere, later
138
139 -- non-calls: see if we can shortcut any of the successors,
140 -- and check whether we should invert the conditional
141 | Nothing <- callContinuation_maybe last
142 = ( mapInsert bid (blockJoinTail head swapcond_last) blocks
143 , shortcut_map )
144
145 | otherwise
146 = (blocks, shortcut_map)
147 where
148 (head, last) = blockSplitTail block
149 bid = entryLabel block
150
151 shortcut_last = mapSuccessors shortcut last
152 where
153 shortcut l =
154 case mapLookup l blocks of
155 Just b | Just dest <- canShortcut b -> dest
156 _otherwise -> l
157
158 -- for a conditional, we invert the conditional if that
159 -- would make it more likely that the branch-not-taken case
160 -- becomes a fallthrough. This helps the native codegen a
161 -- little bit, and probably has no effect on LLVM. It's
162 -- convenient to do it here, where we have the information
163 -- about predecessors.
164 --
165 swapcond_last
166 | CmmCondBranch cond t f <- shortcut_last
167 , numPreds f > 1
168 , numPreds t == 1
169 , Just cond' <- maybeInvertCmmExpr cond
170 = CmmCondBranch cond' f t
171
172 | otherwise
173 = shortcut_last
174
175
176 shouldConcatWith b block
177 | okToDuplicate block = True -- short enough to duplicate
178 | numPreds b == 1 = True -- only one predecessor: go for it
179 | otherwise = False
180
181 numPreds bid = mapLookup bid backEdges `orElse` 0
182
183 canShortcut :: CmmBlock -> Maybe BlockId
184 canShortcut block
185 | (_, middle, CmmBranch dest) <- blockSplit block
186 , isEmptyBlock middle
187 = Just dest
188 | otherwise
189 = Nothing
190
191 backEdges :: BlockEnv Int -- number of predecessors for each block
192 backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id
193 predMap blocks
194
195 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
196 splice head rest = head `blockAppend` snd (blockSplitHead rest)
197
198
199 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
200 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
201 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
202 callContinuation_maybe _ = Nothing
203
204 okToDuplicate :: CmmBlock -> Bool
205 okToDuplicate block
206 = case blockSplit block of
207 (_, m, CmmBranch _) -> isEmptyBlock m
208 -- cheap and cheerful; we might expand this in the future to
209 -- e.g. spot blocks that represent a single instruction or two.
210 -- Be careful: a CmmCall can be more than one instruction, it
211 -- has a CmmExpr inside it.
212 _otherwise -> False
213
214
215 {- Note [shortcut call returns]
216
217 Consider this code that you might get from a recursive let-no-escape:
218
219 goto L1
220 L1:
221 if (Hp > HpLim) then L2 else L3
222 L2:
223 call stg_gc_noregs returns to L4
224 L4:
225 goto L1
226 L3:
227 ...
228 goto L1
229
230 Then the control-flow optimiser shortcuts L4. But that turns L1
231 into the call-return proc point, and every iteration of the loop
232 has to shuffle variables to and from the stack. So we must *not*
233 shortcut L4.
234
235 Moreover not shortcutting call returns is probably fine. If L4 can
236 concat with its branch target then it will still do so. And we
237 save some compile time because we don't have to traverse all the
238 code in replaceLabels.
239
240 However, we probably do want to do this if we are splitting proc
241 points, because L1 will be a proc-point anyway, so merging it with L4
242 reduces the number of proc points. Unfortunately recursive
243 let-no-escapes won't generate very good code with proc-point splitting
244 on - we should probably compile them to explicitly use the native
245 calling convention instead.
246 -}
247
248 ------------------------------------------------------------------------
249 -- Map over the CmmGraph, replacing each label with its mapping in the
250 -- supplied BlockEnv.
251
252 replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
253 replaceLabels env g
254 | mapNull env = g
255 | otherwise = replace_eid $ mapGraphNodes1 txnode g
256 where
257 replace_eid g = g {g_entry = lookup (g_entry g)}
258 lookup id = mapLookup id env `orElse` id
259
260 txnode :: CmmNode e x -> CmmNode e x
261 txnode (CmmBranch bid) = CmmBranch (lookup bid)
262 txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f)
263 txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms)
264 txnode (CmmCall t k rg a res r) = CmmCall (exp t) (liftM lookup k) rg a res r
265 txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc)
266 , succ = lookup (succ fc) }
267 txnode other = mapExpDeep exp other
268
269 exp :: CmmExpr -> CmmExpr
270 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
271 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
272 exp e = e
273
274 mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
275 mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f
276
277 ----------------------------------------------------------------
278 -- Build a map from a block to its set of predecessors. Very useful.
279
280 predMap :: [CmmBlock] -> BlockEnv Int
281 predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges
282 where
283 add_preds block env = foldr add env (successors block)
284 where add lbl env = mapInsertWith (+) lbl 1 env
285
286 -----------------------------------------------------------------------------
287 --
288 -- Removing unreachable blocks
289
290 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
291 removeUnreachableBlocksProc (CmmProc info lbl live g)
292 = CmmProc info lbl live (removeUnreachableBlocks g)
293
294 removeUnreachableBlocks :: CmmGraph -> CmmGraph
295 removeUnreachableBlocks g
296 | length blocks < mapSize (toBlockMap g) = ofBlockList (g_entry g) blocks
297 | otherwise = g
298 where blocks = postorderDfs g