Merge branch 'refs/heads/vect-avoid' into vect-avoid-merge
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-warnings-deprecations -fno-warn-incomplete-patterns #-}
3
4 module CmmContFlowOpt
5 ( cmmCfgOpts
6 , cmmCfgOptsProc
7 , removeUnreachableBlocksProc
8 , removeUnreachableBlocks
9 , replaceLabels
10 )
11 where
12
13 import Hoopl
14 import BlockId
15 import Cmm
16 import CmmUtils
17 import Maybes
18
19 import Control.Monad
20 import Prelude hiding (succ, unzip, zip)
21
22 -----------------------------------------------------------------------------
23 --
24 -- Control-flow optimisations
25 --
26 -----------------------------------------------------------------------------
27
28 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
29 cmmCfgOpts split g = fst (blockConcat split g)
30
31 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
32 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
33 where (g', env) = blockConcat split g
34 info' = info{ info_tbls = new_info_tbls }
35 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
36
37 -- If we changed any labels, then we have to update the info tables
38 -- too, except for the top-level info table because that might be
39 -- referred to by other procs.
40 upd_info (k,info)
41 | Just k' <- mapLookup k env
42 = (k', if k' == g_entry g'
43 then info
44 else info{ cit_lbl = infoTblLbl k' })
45 | otherwise
46 = (k,info)
47
48 cmmCfgOptsProc _ top = top
49
50
51 -----------------------------------------------------------------------------
52 --
53 -- Block concatenation
54 --
55 -----------------------------------------------------------------------------
56
57 -- This optimisation does three things:
58 --
59 -- - If a block finishes with an unconditional branch, then we may
60 -- be able to concatenate the block it points to and remove the
61 -- branch. We do this either if the destination block is small
62 -- (e.g. just another branch), or if this is the only jump to
63 -- this particular destination block.
64 --
65 -- - If a block finishes in a call whose continuation block is a
66 -- goto, then we can shortcut the destination, making the
67 -- continuation block the destination of the goto.
68 -- (but see Note [shortcut call returns])
69 --
70 -- - removes any unreachable blocks from the graph. This is a side
71 -- effect of starting with a postorder DFS traversal of the graph
72 --
73
74 -- Both transformations are improved by working from the end of the
75 -- graph towards the beginning, because we may be able to perform many
76 -- shortcuts in one go.
77
78
79 -- We need to walk over the blocks from the end back to the
80 -- beginning. We are going to maintain the "current" graph
81 -- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId
82 -- to BlockId, representing continuation labels that we have
83 -- renamed. This latter mapping is important because we might
84 -- shortcut a CmmCall continuation. For example:
85 --
86 -- Sp[0] = L
87 -- call g returns to L
88 --
89 -- L: goto M
90 --
91 -- M: ...
92 --
93 -- So when we shortcut the L block, we need to replace not only
94 -- the continuation of the call, but also references to L in the
95 -- code (e.g. the assignment Sp[0] = L). So we keep track of
96 -- which labels we have renamed and apply the mapping at the end
97 -- with replaceLabels.
98
99 blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
100 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
101 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
102 where
103 -- we might be able to shortcut the entry BlockId itself.
104 -- remember to update the shortcut_map', since we also have to
105 -- update the info_tbls mapping now.
106 (new_entry, shortcut_map')
107 | Just entry_blk <- mapLookup entry_id new_blocks
108 , Just dest <- canShortcut entry_blk
109 = (dest, mapInsert entry_id dest shortcut_map)
110 | otherwise
111 = (entry_id, shortcut_map)
112
113 blocks = postorderDfs g
114 blockmap = foldr addBlock emptyBody blocks
115 -- the initial blockmap is constructed from the postorderDfs result,
116 -- so that we automatically throw away unreachable blocks.
117
118 (new_blocks, shortcut_map) =
119 foldr maybe_concat (blockmap, mapEmpty) blocks
120
121 maybe_concat :: CmmBlock
122 -> (BlockEnv CmmBlock, BlockEnv BlockId)
123 -> (BlockEnv CmmBlock, BlockEnv BlockId)
124 maybe_concat block (blocks, shortcut_map)
125 | CmmBranch b' <- last
126 , Just blk' <- mapLookup b' blocks
127 , shouldConcatWith b' blk'
128 = (mapInsert bid (splice head blk') blocks, shortcut_map)
129
130 -- calls: if we can shortcut the continuation label, then
131 -- we must *also* remember to substitute for the label in the
132 -- code, because we will push it somewhere.
133 | splitting_procs -- Note [shortcut call returns]
134 , Just b' <- callContinuation_maybe last
135 , Just blk' <- mapLookup b' blocks
136 , Just dest <- canShortcut blk'
137 = (blocks, mapInsert b' dest shortcut_map)
138 -- replaceLabels will substitute dest for b' everywhere, later
139
140 -- non-calls: see if we can shortcut any of the successors,
141 -- and check whether we should invert the conditional
142 | Nothing <- callContinuation_maybe last
143 = ( mapInsert bid (blockJoinTail head swapcond_last) blocks
144 , shortcut_map )
145
146 | otherwise
147 = (blocks, shortcut_map)
148 where
149 (head, last) = blockSplitTail block
150 bid = entryLabel block
151
152 shortcut_last = mapSuccessors shortcut last
153 where
154 shortcut l =
155 case mapLookup l blocks of
156 Just b | Just dest <- canShortcut b -> dest
157 _otherwise -> l
158
159 -- for a conditional, we invert the conditional if that
160 -- would make it more likely that the branch-not-taken case
161 -- becomes a fallthrough. This helps the native codegen a
162 -- little bit, and probably has no effect on LLVM. It's
163 -- convenient to do it here, where we have the information
164 -- about predecessors.
165 --
166 swapcond_last
167 | CmmCondBranch cond t f <- shortcut_last
168 , numPreds f > 1
169 , numPreds t == 1
170 , Just cond' <- maybeInvertCmmExpr cond
171 = CmmCondBranch cond' f t
172
173 | otherwise
174 = shortcut_last
175
176
177 shouldConcatWith b block
178 | okToDuplicate block = True -- short enough to duplicate
179 | numPreds b == 1 = True -- only one predecessor: go for it
180 | otherwise = False
181
182 numPreds bid = mapLookup bid backEdges `orElse` 0
183
184 canShortcut :: CmmBlock -> Maybe BlockId
185 canShortcut block
186 | (_, middle, CmmBranch dest) <- blockSplit block
187 , isEmptyBlock middle
188 = Just dest
189 | otherwise
190 = Nothing
191
192 backEdges :: BlockEnv Int -- number of predecessors for each block
193 backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id
194 predMap blocks
195
196 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
197 splice head rest = head `blockAppend` snd (blockSplitHead rest)
198
199
200 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
201 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
202 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
203 callContinuation_maybe _ = Nothing
204
205 okToDuplicate :: CmmBlock -> Bool
206 okToDuplicate block
207 = case blockSplit block of
208 (_, m, CmmBranch _) -> isEmptyBlock m
209 -- cheap and cheerful; we might expand this in the future to
210 -- e.g. spot blocks that represent a single instruction or two.
211 -- Be careful: a CmmCall can be more than one instruction, it
212 -- has a CmmExpr inside it.
213 _otherwise -> False
214
215
216 {- Note [shortcut call returns]
217
218 Consider this code that you might get from a recursive let-no-escape:
219
220 goto L1
221 L1:
222 if (Hp > HpLim) then L2 else L3
223 L2:
224 call stg_gc_noregs returns to L4
225 L4:
226 goto L1
227 L3:
228 ...
229 goto L1
230
231 Then the control-flow optimiser shortcuts L4. But that turns L1
232 into the call-return proc point, and every iteration of the loop
233 has to shuffle variables to and from the stack. So we must *not*
234 shortcut L4.
235
236 Moreover not shortcutting call returns is probably fine. If L4 can
237 concat with its branch target then it will still do so. And we
238 save some compile time because we don't have to traverse all the
239 code in replaceLabels.
240
241 However, we probably do want to do this if we are splitting proc
242 points, because L1 will be a proc-point anyway, so merging it with L4
243 reduces the number of proc points. Unfortunately recursive
244 let-no-escapes won't generate very good code with proc-point splitting
245 on - we should probably compile them to explicitly use the native
246 calling convention instead.
247 -}
248
249 ------------------------------------------------------------------------
250 -- Map over the CmmGraph, replacing each label with its mapping in the
251 -- supplied BlockEnv.
252
253 replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
254 replaceLabels env g
255 | mapNull env = g
256 | otherwise = replace_eid $ mapGraphNodes1 txnode g
257 where
258 replace_eid g = g {g_entry = lookup (g_entry g)}
259 lookup id = mapLookup id env `orElse` id
260
261 txnode :: CmmNode e x -> CmmNode e x
262 txnode (CmmBranch bid) = CmmBranch (lookup bid)
263 txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f)
264 txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms)
265 txnode (CmmCall t k rg a res r) = CmmCall (exp t) (liftM lookup k) rg a res r
266 txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc)
267 , succ = lookup (succ fc) }
268 txnode other = mapExpDeep exp other
269
270 exp :: CmmExpr -> CmmExpr
271 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
272 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
273 exp e = e
274
275 mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
276 mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f
277
278 ----------------------------------------------------------------
279 -- Build a map from a block to its set of predecessors. Very useful.
280
281 predMap :: [CmmBlock] -> BlockEnv Int
282 predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges
283 where
284 add_preds block env = foldr add env (successors block)
285 where add lbl env = mapInsertWith (+) lbl 1 env
286
287 -----------------------------------------------------------------------------
288 --
289 -- Removing unreachable blocks
290
291 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
292 removeUnreachableBlocksProc (CmmProc info lbl live g)
293 = CmmProc info lbl live (removeUnreachableBlocks g)
294
295 removeUnreachableBlocks :: CmmGraph -> CmmGraph
296 removeUnreachableBlocks g
297 | length blocks < mapSize (toBlockMap g) = ofBlockList (g_entry g) blocks
298 | otherwise = g
299 where blocks = postorderDfs g