Remove special case in SRT generation with -split-sections
[ghc.git] / compiler / cmm / CmmContFlowOpt.hs
1 {-# LANGUAGE GADTs #-}
2 {-# LANGUAGE BangPatterns #-}
3 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
4 module CmmContFlowOpt
5 ( cmmCfgOpts
6 , cmmCfgOptsProc
7 , removeUnreachableBlocksProc
8 , replaceLabels
9 )
10 where
11
12 import GhcPrelude hiding (succ, unzip, zip)
13
14 import Hoopl.Block
15 import Hoopl.Collections
16 import Hoopl.Graph
17 import Hoopl.Label
18 import BlockId
19 import Cmm
20 import CmmUtils
21 import CmmSwitch (mapSwitchTargets, switchTargetsToList)
22 import Maybes
23 import Panic
24 import Util
25
26 import Control.Monad
27
28
29 -- Note [What is shortcutting]
30 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
31 --
32 -- Consider this Cmm code:
33 --
34 -- L1: ...
35 -- goto L2;
36 -- L2: goto L3;
37 -- L3: ...
38 --
39 -- Here L2 is an empty block and contains only an unconditional branch
40 -- to L3. In this situation any block that jumps to L2 can jump
41 -- directly to L3:
42 --
43 -- L1: ...
44 -- goto L3;
45 -- L2: goto L3;
46 -- L3: ...
47 --
48 -- In this situation we say that we shortcut L2 to L3. One of
49 -- consequences of shortcutting is that some blocks of code may become
50 -- unreachable (in the example above this is true for L2).
51
52
53 -- Note [Control-flow optimisations]
54 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 --
56 -- This optimisation does three things:
57 --
58 -- - If a block finishes in an unconditional branch to another block
59 -- and that is the only jump to that block we concatenate the
60 -- destination block at the end of the current one.
61 --
62 -- - If a block finishes in a call whose continuation block is a
63 -- goto, then we can shortcut the destination, making the
64 -- continuation block the destination of the goto - but see Note
65 -- [Shortcut call returns].
66 --
67 -- - For any block that is not a call we try to shortcut the
68 -- destination(s). Additionally, if a block ends with a
69 -- conditional branch we try to invert the condition.
70 --
71 -- Blocks are processed using postorder DFS traversal. A side effect
72 -- of determining traversal order with a graph search is elimination
73 -- of any blocks that are unreachable.
74 --
75 -- Transformations are improved by working from the end of the graph
76 -- towards the beginning, because we may be able to perform many
77 -- shortcuts in one go.
78
79
80 -- Note [Shortcut call returns]
81 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
82 --
83 -- We are going to maintain the "current" graph (LabelMap CmmBlock) as
84 -- we go, and also a mapping from BlockId to BlockId, representing
85 -- continuation labels that we have renamed. This latter mapping is
86 -- important because we might shortcut a CmmCall continuation. For
87 -- example:
88 --
89 -- Sp[0] = L
90 -- call g returns to L
91 -- L: goto M
92 -- M: ...
93 --
94 -- So when we shortcut the L block, we need to replace not only
95 -- the continuation of the call, but also references to L in the
96 -- code (e.g. the assignment Sp[0] = L):
97 --
98 -- Sp[0] = M
99 -- call g returns to M
100 -- M: ...
101 --
102 -- So we keep track of which labels we have renamed and apply the mapping
103 -- at the end with replaceLabels.
104
105
106 -- Note [Shortcut call returns and proc-points]
107 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 --
109 -- Consider this code that you might get from a recursive
110 -- let-no-escape:
111 --
112 -- goto L1
113 -- L1:
114 -- if (Hp > HpLim) then L2 else L3
115 -- L2:
116 -- call stg_gc_noregs returns to L4
117 -- L4:
118 -- goto L1
119 -- L3:
120 -- ...
121 -- goto L1
122 --
123 -- Then the control-flow optimiser shortcuts L4. But that turns L1
124 -- into the call-return proc point, and every iteration of the loop
125 -- has to shuffle variables to and from the stack. So we must *not*
126 -- shortcut L4.
127 --
128 -- Moreover not shortcutting call returns is probably fine. If L4 can
129 -- concat with its branch target then it will still do so. And we
130 -- save some compile time because we don't have to traverse all the
131 -- code in replaceLabels.
132 --
133 -- However, we probably do want to do this if we are splitting proc
134 -- points, because L1 will be a proc-point anyway, so merging it with
135 -- L4 reduces the number of proc points. Unfortunately recursive
136 -- let-no-escapes won't generate very good code with proc-point
137 -- splitting on - we should probably compile them to explicitly use
138 -- the native calling convention instead.
139
140 cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
141 cmmCfgOpts split g = fst (blockConcat split g)
142
143 cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
144 cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
145 where (g', env) = blockConcat split g
146 info' = info{ info_tbls = new_info_tbls }
147 new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
148
149 -- If we changed any labels, then we have to update the info tables
150 -- too, except for the top-level info table because that might be
151 -- referred to by other procs.
152 upd_info (k,info)
153 | Just k' <- mapLookup k env
154 = (k', if k' == g_entry g'
155 then info
156 else info{ cit_lbl = infoTblLbl k' })
157 | otherwise
158 = (k,info)
159 cmmCfgOptsProc _ top = top
160
161
162 blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
163 blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
164 = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
165 where
166 -- We might be able to shortcut the entry BlockId itself.
167 -- Remember to update the shortcut_map, since we also have to
168 -- update the info_tbls mapping now.
169 (new_entry, shortcut_map')
170 | Just entry_blk <- mapLookup entry_id new_blocks
171 , Just dest <- canShortcut entry_blk
172 = (dest, mapInsert entry_id dest shortcut_map)
173 | otherwise
174 = (entry_id, shortcut_map)
175
176 -- blocks are sorted in reverse postorder, but we want to go from the exit
177 -- towards beginning, so we use foldr below.
178 blocks = revPostorder g
179 blockmap = foldl' (flip addBlock) emptyBody blocks
180
181 -- Accumulator contains three components:
182 -- * map of blocks in a graph
183 -- * map of shortcut labels. See Note [Shortcut call returns]
184 -- * map containing number of predecessors for each block. We discard
185 -- it after we process all blocks.
186 (new_blocks, shortcut_map, _) =
187 foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
188
189 -- Map of predecessors for initial graph. We increase number of
190 -- predecessors for entry block by one to denote that it is
191 -- target of a jump, even if no block in the current graph jumps
192 -- to it.
193 initialBackEdges = incPreds entry_id (predMap blocks)
194
195 maybe_concat :: CmmBlock
196 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
197 -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
198 maybe_concat block (!blocks, !shortcut_map, !backEdges)
199 -- If:
200 -- (1) current block ends with unconditional branch to b' and
201 -- (2) it has exactly one predecessor (namely, current block)
202 --
203 -- Then:
204 -- (1) append b' block at the end of current block
205 -- (2) remove b' from the map of blocks
206 -- (3) remove information about b' from predecessors map
207 --
208 -- Since we know that the block has only one predecessor we call
209 -- mapDelete directly instead of calling decPreds.
210 --
211 -- Note that we always maintain an up-to-date list of predecessors, so
212 -- we can ignore the contents of shortcut_map
213 | CmmBranch b' <- last
214 , hasOnePredecessor b'
215 , Just blk' <- mapLookup b' blocks
216 = let bid' = entryLabel blk'
217 in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
218 , shortcut_map
219 , mapDelete b' backEdges )
220
221 -- If:
222 -- (1) we are splitting proc points (see Note
223 -- [Shortcut call returns and proc-points]) and
224 -- (2) current block is a CmmCall or CmmForeignCall with
225 -- continuation b' and
226 -- (3) we can shortcut that continuation to dest
227 -- Then:
228 -- (1) we change continuation to point to b'
229 -- (2) create mapping from b' to dest
230 -- (3) increase number of predecessors of dest by 1
231 -- (4) decrease number of predecessors of b' by 1
232 --
233 -- Later we will use replaceLabels to substitute all occurrences of b'
234 -- with dest.
235 | splitting_procs
236 , Just b' <- callContinuation_maybe last
237 , Just blk' <- mapLookup b' blocks
238 , Just dest <- canShortcut blk'
239 = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
240 , mapInsert b' dest shortcut_map
241 , decPreds b' $ incPreds dest backEdges )
242
243 -- If:
244 -- (1) a block does not end with a call
245 -- Then:
246 -- (1) if it ends with a conditional attempt to invert the
247 -- conditional
248 -- (2) attempt to shortcut all destination blocks
249 -- (3) if new successors of a block are different from the old ones
250 -- update the of predecessors accordingly
251 --
252 -- A special case of this is a situation when a block ends with an
253 -- unconditional jump to a block that can be shortcut.
254 | Nothing <- callContinuation_maybe last
255 = let oldSuccs = successors last
256 newSuccs = successors rewrite_last
257 in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
258 , shortcut_map
259 , if oldSuccs == newSuccs
260 then backEdges
261 else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
262
263 -- Otherwise don't do anything
264 | otherwise
265 = ( blocks, shortcut_map, backEdges )
266 where
267 (head, last) = blockSplitTail block
268 bid = entryLabel block
269
270 -- Changes continuation of a call to a specified label
271 update_cont dest =
272 case last of
273 CmmCall{} -> last { cml_cont = Just dest }
274 CmmForeignCall{} -> last { succ = dest }
275 _ -> panic "Can't shortcut continuation."
276
277 -- Attempts to shortcut successors of last node
278 shortcut_last = mapSuccessors shortcut last
279 where
280 shortcut l =
281 case mapLookup l blocks of
282 Just b | Just dest <- canShortcut b -> dest
283 _otherwise -> l
284
285 rewrite_last
286 -- Sometimes we can get rid of the conditional completely.
287 | CmmCondBranch _cond t f _l <- shortcut_last
288 , t == f
289 = CmmBranch t
290
291 -- See Note [Invert Cmm conditionals]
292 | CmmCondBranch cond t f l <- shortcut_last
293 , hasOnePredecessor t -- inverting will make t a fallthrough
294 , likelyTrue l || (numPreds f > 1)
295 , Just cond' <- maybeInvertCmmExpr cond
296 = CmmCondBranch cond' f t (invertLikeliness l)
297
298 -- If all jump destinations of a switch go to the
299 -- same target eliminate the switch.
300 | CmmSwitch _expr targets <- shortcut_last
301 , (t:ts) <- switchTargetsToList targets
302 , all (== t) ts
303 = CmmBranch t
304
305 | otherwise
306 = shortcut_last
307
308 likelyTrue (Just True) = True
309 likelyTrue _ = False
310
311 invertLikeliness :: Maybe Bool -> Maybe Bool
312 invertLikeliness = fmap not
313
314 -- Number of predecessors for a block
315 numPreds bid = mapLookup bid backEdges `orElse` 0
316
317 hasOnePredecessor b = numPreds b == 1
318
319 {-
320 Note [Invert Cmm conditionals]
321 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
322 The native code generator always produces jumps to the true branch.
323 Falling through to the false branch is however faster. So we try to
324 arrange for that to happen.
325 This means we invert the condition if:
326 * The likely path will become a fallthrough.
327 * We can't guarantee a fallthrough for the false branch but for the
328 true branch.
329
330 In some cases it's faster to avoid inverting when the false branch is likely.
331 However determining when that is the case is neither easy nor cheap so for
332 now we always invert as this produces smaller binaries and code that is
333 equally fast on average. (On an i7-6700K)
334
335 TODO:
336 There is also the edge case when both branches have multiple predecessors.
337 In this case we could assume that we will end up with a jump for BOTH
338 branches. In this case it might be best to put the likely path in the true
339 branch especially if there are large numbers of predecessors as this saves
340 us the jump thats not taken. However I haven't tested this and as of early
341 2018 we almost never generate cmm where this would apply.
342 -}
343
344 -- Functions for incrementing and decrementing number of predecessors. If
345 -- decrementing would set the predecessor count to 0, we remove entry from the
346 -- map.
347 -- Invariant: if a block has no predecessors it should be dropped from the
348 -- graph because it is unreachable. maybe_concat is constructed to maintain
349 -- that invariant, but calling replaceLabels may introduce unreachable blocks.
350 -- We rely on subsequent passes in the Cmm pipeline to remove unreachable
351 -- blocks.
352 incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
353 incPreds bid edges = mapInsertWith (+) bid 1 edges
354 decPreds bid edges = case mapLookup bid edges of
355 Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
356 Just _ -> mapDelete bid edges
357 _ -> edges
358
359
360 -- Checks if a block consists only of "goto dest". If it does than we return
361 -- "Just dest" label. See Note [What is shortcutting]
362 canShortcut :: CmmBlock -> Maybe BlockId
363 canShortcut block
364 | (_, middle, CmmBranch dest) <- blockSplit block
365 , all dont_care $ blockToList middle
366 = Just dest
367 | otherwise
368 = Nothing
369 where dont_care CmmComment{} = True
370 dont_care CmmTick{} = True
371 dont_care _other = False
372
373 -- Concatenates two blocks. First one is assumed to be open on exit, the second
374 -- is assumed to be closed on entry (i.e. it has a label attached to it, which
375 -- the splice function removes by calling snd on result of blockSplitHead).
376 splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
377 splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
378 where (CmmEntry lbl sc0, code0) = blockSplitHead head
379 (CmmEntry _ sc1, code1) = blockSplitHead rest
380 entry = CmmEntry lbl (combineTickScopes sc0 sc1)
381
382 -- If node is a call with continuation call return Just label of that
383 -- continuation. Otherwise return Nothing.
384 callContinuation_maybe :: CmmNode O C -> Maybe BlockId
385 callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
386 callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
387 callContinuation_maybe _ = Nothing
388
389
390 -- Map over the CmmGraph, replacing each label with its mapping in the
391 -- supplied LabelMap.
392 replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
393 replaceLabels env g
394 | mapNull env = g
395 | otherwise = replace_eid $ mapGraphNodes1 txnode g
396 where
397 replace_eid g = g {g_entry = lookup (g_entry g)}
398 lookup id = mapLookup id env `orElse` id
399
400 txnode :: CmmNode e x -> CmmNode e x
401 txnode (CmmBranch bid) = CmmBranch (lookup bid)
402 txnode (CmmCondBranch p t f l) =
403 mkCmmCondBranch (exp p) (lookup t) (lookup f) l
404 txnode (CmmSwitch e ids) =
405 CmmSwitch (exp e) (mapSwitchTargets lookup ids)
406 txnode (CmmCall t k rg a res r) =
407 CmmCall (exp t) (liftM lookup k) rg a res r
408 txnode fc@CmmForeignCall{} =
409 fc{ args = map exp (args fc), succ = lookup (succ fc) }
410 txnode other = mapExpDeep exp other
411
412 exp :: CmmExpr -> CmmExpr
413 exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
414 exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
415 exp e = e
416
417 mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
418 mkCmmCondBranch p t f l =
419 if t == f then CmmBranch t else CmmCondBranch p t f l
420
421 -- Build a map from a block to its set of predecessors.
422 predMap :: [CmmBlock] -> LabelMap Int
423 predMap blocks = foldr add_preds mapEmpty blocks
424 where
425 add_preds block env = foldr add env (successors block)
426 where add lbl env = mapInsertWith (+) lbl 1 env
427
428 -- Removing unreachable blocks
429 removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
430 removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
431 | used_blocks `lengthLessThan` mapSize (toBlockMap g)
432 = CmmProc info' lbl live g'
433 | otherwise
434 = proc
435 where
436 g' = ofBlockList (g_entry g) used_blocks
437 info' = info { info_tbls = keep_used (info_tbls info) }
438 -- Remove any info_tbls for unreachable
439
440 keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
441 keep_used bs = mapFoldlWithKey keep mapEmpty bs
442
443 keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
444 keep env l i | l `setMember` used_lbls = mapInsert l i env
445 | otherwise = env
446
447 used_blocks :: [CmmBlock]
448 used_blocks = revPostorder g
449
450 used_lbls :: LabelSet
451 used_lbls = setFromList $ map entryLabel used_blocks