2ff9b98d2a1e4ee233b3611fec1613a3c5d7de7d
[ghc.git] / compiler / cmm / CmmSink.hs
1 {-# LANGUAGE GADTs #-}
2 module CmmSink (
3 cmmSink
4 ) where
5
6 import CodeGen.Platform (callerSaves)
7
8 import Cmm
9 import BlockId
10 import CmmLive
11 import CmmUtils
12 import Hoopl
13
14 import DynFlags
15 import UniqFM
16 -- import PprCmm ()
17 -- import Outputable
18
19 import Data.List (partition)
20 import qualified Data.Set as Set
21
22 -- -----------------------------------------------------------------------------
23 -- Sinking and inlining
24
25 -- This is an optimisation pass that
26 -- (a) moves assignments closer to their uses, to reduce register pressure
27 -- (b) pushes assignments into a single branch of a conditional if possible
28 -- (c) inlines assignments to registers that are mentioned only once
29 -- (d) discards dead assignments
30 --
31 -- This tightens up lots of register-heavy code. It is particularly
32 -- helpful in the Cmm generated by the Stg->Cmm code generator, in
33 -- which every function starts with a copyIn sequence like:
34 --
35 -- x1 = R1
36 -- x2 = Sp[8]
37 -- x3 = Sp[16]
38 -- if (Sp - 32 < SpLim) then L1 else L2
39 --
40 -- we really want to push the x1..x3 assignments into the L2 branch.
41 --
42 -- Algorithm:
43 --
44 -- * Start by doing liveness analysis.
45 --
46 -- * Keep a list of assignments A; earlier ones may refer to later ones
47 --
48 -- * Walk forwards through the graph, look at each node N:
49 -- * If any assignments in A (1) occur only once in N, and (2) are
50 -- not live after N, inline the assignment and remove it
51 -- from A.
52 -- * If N is an assignment:
53 -- * If the register is not live after N, discard it
54 -- * otherwise pick up the assignment and add it to A
55 -- * If N is a non-assignment node:
56 -- * remove any assignments from A that conflict with N, and
57 -- place them before N in the current block. (we call this
58 -- "dropping" the assignments).
59 -- * An assignment conflicts with N if it:
60 -- - assigns to a register mentioned in N
61 -- - mentions a register assigned by N
62 -- - reads from memory written by N
63 -- * do this recursively, dropping dependent assignments
64 -- * At a multi-way branch:
65 -- * drop any assignments that are live on more than one branch
66 -- * if any successor has more than one predecessor (a
67 -- join-point), drop everything live in that successor
68 --
69 -- As a side-effect we'll delete some dead assignments (transitively,
70 -- even). This isn't as good as removeDeadAssignments, but it's much
71 -- cheaper.
72
73 -- If we do this *before* stack layout, we might be able to avoid
74 -- saving some things across calls/procpoints.
75 --
76 -- *but*, that will invalidate the liveness analysis, and we'll have
77 -- to re-do it.
78
79 -- TODO: things that we aren't optimising very well yet.
80 --
81 -- From GHC's FastString.hashStr:
82 --
83 -- s2ay:
84 -- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
85 -- c2gn:
86 -- R1 = _s2au::I64;
87 -- call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
88 -- c2gp:
89 -- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
90 -- 4091);
91 -- _s2an::I64 = _s2an::I64 + 1;
92 -- _s2au::I64 = _s2cO::I64;
93 -- goto s2ay;
94 --
95 -- a nice loop, but we didn't eliminate the silly assignment at the end.
96 -- See Note [dependent assignments], which would probably fix this.
97 --
98
99 type Assignment = (LocalReg, CmmExpr, AbsMem)
100 -- Assignment caches AbsMem, an abstraction of the memory read by
101 -- the RHS of the assignment.
102
103 cmmSink :: DynFlags -> CmmGraph -> CmmGraph
104 cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
105 where
106 liveness = cmmLiveness graph
107 getLive l = mapFindWithDefault Set.empty l liveness
108
109 blocks = postorderDfs graph
110
111 join_pts = findJoinPoints blocks
112
113 sink :: BlockEnv [Assignment] -> [CmmBlock] -> [CmmBlock]
114 sink _ [] = []
115 sink sunk (b:bs) =
116 -- pprTrace "sink" (ppr lbl) $
117 blockJoin first final_middle final_last : sink sunk' bs
118 where
119 lbl = entryLabel b
120 (first, middle, last) = blockSplit b
121
122 succs = successors last
123
124 -- Annotate the middle nodes with the registers live *after*
125 -- the node. This will help us decide whether we can inline
126 -- an assignment in the current node or not.
127 live = Set.unions (map getLive succs)
128 live_middle = gen_kill last live
129 ann_middles = annotate live_middle (blockToList middle)
130
131 -- Now sink and inline in this block
132 (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
133 (final_last, assigs') = tryToInline dflags live last assigs
134
135 -- We cannot sink into join points (successors with more than
136 -- one predecessor), so identify the join points and the set
137 -- of registers live in them.
138 (joins, nonjoins) = partition (`mapMember` join_pts) succs
139 live_in_joins = Set.unions (map getLive joins)
140
141 -- We do not want to sink an assignment into multiple branches,
142 -- so identify the set of registers live in multiple successors.
143 -- This is made more complicated because when we sink an assignment
144 -- into one branch, this might change the set of registers that are
145 -- now live in multiple branches.
146 init_live_sets = map getLive nonjoins
147 live_in_multi live_sets r =
148 case filter (Set.member r) live_sets of
149 (_one:_two:_) -> True
150 _ -> False
151
152 -- Now, drop any assignments that we will not sink any further.
153 (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
154
155 drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
156 where
157 should_drop = conflicts dflags a final_last
158 || {- not (isSmall rhs) && -} live_in_multi live_sets r
159 || r `Set.member` live_in_joins
160
161 live_sets' | should_drop = live_sets
162 | otherwise = map upd live_sets
163
164 upd set | r `Set.member` set = set `Set.union` live_rhs
165 | otherwise = set
166
167 live_rhs = foldRegsUsed extendRegSet emptyRegSet rhs
168
169 final_middle = foldl blockSnoc middle' dropped_last
170
171 sunk' = mapUnion sunk $
172 mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
173 | l <- succs ]
174
175 {- TODO: enable this later, when we have some good tests in place to
176 measure the effect and tune it.
177
178 -- small: an expression we don't mind duplicating
179 isSmall :: CmmExpr -> Bool
180 isSmall (CmmReg (CmmLocal _)) = True -- not globals, we want to coalesce them instead
181 isSmall (CmmLit _) = True
182 isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
183 isSmall (CmmRegOff (CmmLocal _) _) = True
184 isSmall _ = False
185
186 isTrivial :: CmmExpr -> Bool
187 isTrivial (CmmReg (CmmLocal _)) = True
188 isTrivial (CmmLit _) = True
189 isTrivial _ = False
190 -}
191
192 --
193 -- annotate each node with the set of registers live *after* the node
194 --
195 annotate :: RegSet -> [CmmNode O O] -> [(RegSet, CmmNode O O)]
196 annotate live nodes = snd $ foldr ann (live,[]) nodes
197 where ann n (live,nodes) = (gen_kill n live, (live,n) : nodes)
198
199 --
200 -- Find the blocks that have multiple successors (join points)
201 --
202 findJoinPoints :: [CmmBlock] -> BlockEnv Int
203 findJoinPoints blocks = mapFilter (>1) succ_counts
204 where
205 all_succs = concatMap successors blocks
206
207 succ_counts :: BlockEnv Int
208 succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
209
210 --
211 -- filter the list of assignments to remove any assignments that
212 -- are not live in a continuation.
213 --
214 filterAssignments :: DynFlags -> RegSet -> [Assignment] -> [Assignment]
215 filterAssignments dflags live assigs = reverse (go assigs [])
216 where go [] kept = kept
217 go (a@(r,_,_):as) kept | needed = go as (a:kept)
218 | otherwise = go as kept
219 where
220 needed = r `Set.member` live
221 || any (conflicts dflags a) (map toNode kept)
222 -- Note that we must keep assignments that are
223 -- referred to by other assignments we have
224 -- already kept.
225
226 -- -----------------------------------------------------------------------------
227 -- Walk through the nodes of a block, sinking and inlining assignments
228 -- as we go.
229
230 walk :: DynFlags
231 -> [(RegSet, CmmNode O O)] -- nodes of the block, annotated with
232 -- the set of registers live *after*
233 -- this node.
234
235 -> [Assignment] -- The current list of
236 -- assignments we are sinking.
237 -- Later assignments may refer
238 -- to earlier ones.
239
240 -> ( Block CmmNode O O -- The new block
241 , [Assignment] -- Assignments to sink further
242 )
243
244 walk dflags nodes assigs = go nodes emptyBlock assigs
245 where
246 go [] block as = (block, as)
247 go ((live,node):ns) block as
248 | shouldDiscard node live = go ns block as
249 | Just a <- shouldSink dflags node1 = go ns block (a : as1)
250 | otherwise = go ns block' as'
251 where
252 (node1, as1) = tryToInline dflags live node as
253
254 (dropped, as') = dropAssignmentsSimple dflags (\a -> conflicts dflags a node1) as1
255 block' = foldl blockSnoc block dropped `blockSnoc` node1
256
257 --
258 -- Heuristic to decide whether to pick up and sink an assignment
259 -- Currently we pick up all assignments to local registers. It might
260 -- be profitable to sink assignments to global regs too, but the
261 -- liveness analysis doesn't track those (yet) so we can't.
262 --
263 shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
264 shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
265 where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
266 shouldSink _ _other = Nothing
267
268 --
269 -- discard dead assignments. This doesn't do as good a job as
270 -- removeDeadAsssignments, because it would need multiple passes
271 -- to get all the dead code, but it catches the common case of
272 -- superfluous reloads from the stack that the stack allocator
273 -- leaves behind.
274 --
275 -- Also we catch "r = r" here. You might think it would fall
276 -- out of inlining, but the inliner will see that r is live
277 -- after the instruction and choose not to inline r in the rhs.
278 --
279 shouldDiscard :: CmmNode e x -> RegSet -> Bool
280 shouldDiscard node live
281 = case node of
282 CmmAssign r (CmmReg r') | r == r' -> True
283 CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
284 _otherwise -> False
285
286
287 toNode :: Assignment -> CmmNode O O
288 toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
289
290 dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> [Assignment]
291 -> ([CmmNode O O], [Assignment])
292 dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
293
294 dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> [Assignment]
295 -> ([CmmNode O O], [Assignment])
296 dropAssignments dflags should_drop state assigs
297 = (dropped, reverse kept)
298 where
299 (dropped,kept) = go state assigs [] []
300
301 go _ [] dropped kept = (dropped, kept)
302 go state (assig : rest) dropped kept
303 | conflict = go state' rest (toNode assig : dropped) kept
304 | otherwise = go state' rest dropped (assig:kept)
305 where
306 (dropit, state') = should_drop assig state
307 conflict = dropit || any (conflicts dflags assig) dropped
308
309
310 -- -----------------------------------------------------------------------------
311 -- Try to inline assignments into a node.
312
313 tryToInline
314 :: DynFlags
315 -> RegSet -- set of registers live after this
316 -- node. We cannot inline anything
317 -- that is live after the node, unless
318 -- it is small enough to duplicate.
319 -> CmmNode O x -- The node to inline into
320 -> [Assignment] -- Assignments to inline
321 -> (
322 CmmNode O x -- New node
323 , [Assignment] -- Remaining assignments
324 )
325
326 tryToInline dflags live node assigs = go usages node [] assigs
327 where
328 usages :: UniqFM Int
329 usages = foldRegsUsed addUsage emptyUFM node
330
331 go _usages node _skipped [] = (node, [])
332
333 go usages node skipped (a@(l,rhs,_) : rest)
334 | can_inline = inline_and_discard
335 | False {- isTiny rhs -} = inline_and_keep
336 -- ^^ seems to make things slightly worse
337 where
338 inline_and_discard = go usages' node' skipped rest
339
340 inline_and_keep = (node'', a : rest')
341 where (node'',rest') = go usages' node' (l:skipped) rest
342
343 can_inline =
344 not (l `elemRegSet` live)
345 && not (skipped `regsUsedIn` rhs) -- Note [dependent assignments]
346 && okToInline dflags rhs node
347 && lookupUFM usages l == Just 1
348
349 usages' = foldRegsUsed addUsage usages rhs
350
351 node' = mapExpDeep inline node
352 where inline (CmmReg (CmmLocal l')) | l == l' = rhs
353 inline (CmmRegOff (CmmLocal l') off) | l == l'
354 = cmmOffset dflags rhs off
355 inline other = other
356
357 go usages node skipped (assig@(l,rhs,_) : rest)
358 = (node', assig : rest')
359 where (node', rest') = go usages' node (l:skipped) rest
360 usages' = foldRegsUsed (\m r -> addToUFM m r 2) usages rhs
361 -- we must not inline anything that is mentioned in the RHS
362 -- of a binding that we have already skipped, so we set the
363 -- usages of the regs on the RHS to 2.
364
365 -- Note [dependent assignments]
366 --
367 -- If our assignment list looks like
368 --
369 -- [ y = e, x = ... y ... ]
370 --
371 -- We cannot inline x. Remember this list is really in reverse order,
372 -- so it means x = ... y ...; y = e
373 --
374 -- Hence if we inline x, the outer assignment to y will capture the
375 -- reference in x's right hand side.
376 --
377 -- In this case we should rename the y in x's right-hand side,
378 -- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
379 -- Now we can go ahead and inline x.
380 --
381 -- For now we do nothing, because this would require putting
382 -- everything inside UniqSM.
383
384 addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
385 addUsage m r = addToUFM_C (+) m r 1
386
387 regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
388 regsUsedIn [] _ = False
389 regsUsedIn ls e = wrapRecExpf f e False
390 where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True
391 f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
392 f _ z = z
393
394 -- we don't inline into CmmUnsafeForeignCall if the expression refers
395 -- to global registers. This is a HACK to avoid global registers
396 -- clashing with C argument-passing registers, really the back-end
397 -- ought to be able to handle it properly, but currently neither PprC
398 -- nor the NCG can do it. See Note [Register parameter passing]
399 -- See also StgCmmForeign:load_args_into_temps.
400 okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
401 okToInline dflags expr CmmUnsafeForeignCall{} = not (anyCallerSavesRegs dflags expr)
402 okToInline _ _ _ = True
403
404 -- -----------------------------------------------------------------------------
405
406 -- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
407 -- @r = e@ can be safely commuted past @stmt@.
408 --
409 -- We only sink "r = G" assignments right now, so conflicts is very simple:
410 --
411 conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
412 conflicts dflags (r, rhs, addr) node
413
414 -- (1) an assignment to a register conflicts with a use of the register
415 | CmmAssign reg _ <- node, reg `regUsedIn` rhs = True
416 | foldRegsUsed (\b r' -> r == r' || b) False node = True
417
418 -- (2) a store to an address conflicts with a read of the same memory
419 | CmmStore addr' e <- node, memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
420
421 -- (3) an assignment to Hp/Sp conflicts with a heap/stack read respectively
422 | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True
423 | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
424 | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
425
426 -- (4) assignments that read caller-saves GlobalRegs conflict with a
427 -- foreign call. See Note [foreign calls clobber GlobalRegs].
428 | CmmUnsafeForeignCall{} <- node, anyCallerSavesRegs dflags rhs = True
429
430 -- (5) foreign calls clobber memory, but not heap/stack memory
431 | CmmUnsafeForeignCall{} <- node, AnyMem <- addr = True
432
433 -- (6) native calls clobber any memory
434 | CmmCall{} <- node, memConflicts addr AnyMem = True
435
436 -- (7) otherwise, no conflict
437 | otherwise = False
438
439
440 anyCallerSavesRegs :: DynFlags -> CmmExpr -> Bool
441 anyCallerSavesRegs dflags e = wrapRecExpf f e False
442 where f (CmmReg (CmmGlobal r)) _
443 | callerSaves (targetPlatform dflags) r = True
444 f _ z = z
445
446 -- An abstraction of memory read or written.
447 data AbsMem
448 = NoMem -- no memory accessed
449 | AnyMem -- arbitrary memory
450 | HeapMem -- definitely heap memory
451 | StackMem -- definitely stack memory
452 | SpMem -- <size>[Sp+n]
453 {-# UNPACK #-} !Int
454 {-# UNPACK #-} !Int
455
456 -- Having SpMem is important because it lets us float loads from Sp
457 -- past stores to Sp as long as they don't overlap, and this helps to
458 -- unravel some long sequences of
459 -- x1 = [Sp + 8]
460 -- x2 = [Sp + 16]
461 -- ...
462 -- [Sp + 8] = xi
463 -- [Sp + 16] = xj
464 --
465 -- Note that SpMem is invalidated if Sp is changed, but the definition
466 -- of 'conflicts' above handles that.
467
468 -- ToDo: this won't currently fix the following commonly occurring code:
469 -- x1 = [R1 + 8]
470 -- x2 = [R1 + 16]
471 -- ..
472 -- [Hp - 8] = x1
473 -- [Hp - 16] = x2
474 -- ..
475
476 -- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that
477 -- assignments to [Hp + n] do not conflict with any other heap memory,
478 -- but this is tricky to nail down. What if we had
479 --
480 -- x = Hp + n
481 -- [x] = ...
482 --
483 -- the store to [x] should be "new heap", not "old heap".
484 -- Furthermore, you could imagine that if we started inlining
485 -- functions in Cmm then there might well be reads of heap memory
486 -- that was written in the same basic block. To take advantage of
487 -- non-aliasing of heap memory we will have to be more clever.
488
489 bothMems :: AbsMem -> AbsMem -> AbsMem
490 bothMems NoMem x = x
491 bothMems x NoMem = x
492 bothMems HeapMem HeapMem = HeapMem
493 bothMems StackMem StackMem = StackMem
494 bothMems (SpMem o1 w1) (SpMem o2 w2)
495 | o1 == o2 = SpMem o1 (max w1 w2)
496 | otherwise = StackMem
497 bothMems SpMem{} StackMem = StackMem
498 bothMems StackMem SpMem{} = StackMem
499 bothMems _ _ = AnyMem
500
501 memConflicts :: AbsMem -> AbsMem -> Bool
502 memConflicts NoMem _ = False
503 memConflicts _ NoMem = False
504 memConflicts HeapMem StackMem = False
505 memConflicts StackMem HeapMem = False
506 memConflicts SpMem{} HeapMem = False
507 memConflicts HeapMem SpMem{} = False
508 memConflicts (SpMem o1 w1) (SpMem o2 w2)
509 | o1 < o2 = o1 + w1 > o2
510 | otherwise = o2 + w2 > o1
511 memConflicts _ _ = True
512
513 exprMem :: DynFlags -> CmmExpr -> AbsMem
514 exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
515 exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es)
516 exprMem _ _ = NoMem
517
518 loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
519 loadAddr dflags e w =
520 case e of
521 CmmReg r -> regAddr dflags r 0 w
522 CmmRegOff r i -> regAddr dflags r i w
523 _other | CmmGlobal Sp `regUsedIn` e -> StackMem
524 | otherwise -> AnyMem
525
526 regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
527 regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
528 regAddr _ (CmmGlobal Hp) _ _ = HeapMem
529 regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
530 regAddr _ _ _ _ = AnyMem