Some -dynamic-too fixes
[ghc.git] / compiler / cmm / CmmSink.hs
1 {-# LANGUAGE GADTs #-}
2 module CmmSink (
3 cmmSink
4 ) where
5
6 import CodeGen.Platform (callerSaves)
7
8 import Cmm
9 import CmmOpt
10 import BlockId
11 import CmmLive
12 import CmmUtils
13 import Hoopl
14
15 import DynFlags
16 import UniqFM
17 import PprCmm ()
18
19 import Data.List (partition)
20 import qualified Data.Set as Set
21
22 -- -----------------------------------------------------------------------------
23 -- Sinking and inlining
24
25 -- This is an optimisation pass that
26 -- (a) moves assignments closer to their uses, to reduce register pressure
27 -- (b) pushes assignments into a single branch of a conditional if possible
28 -- (c) inlines assignments to registers that are mentioned only once
29 -- (d) discards dead assignments
30 --
31 -- This tightens up lots of register-heavy code. It is particularly
32 -- helpful in the Cmm generated by the Stg->Cmm code generator, in
33 -- which every function starts with a copyIn sequence like:
34 --
35 -- x1 = R1
36 -- x2 = Sp[8]
37 -- x3 = Sp[16]
38 -- if (Sp - 32 < SpLim) then L1 else L2
39 --
40 -- we really want to push the x1..x3 assignments into the L2 branch.
41 --
42 -- Algorithm:
43 --
44 -- * Start by doing liveness analysis.
45 --
46 -- * Keep a list of assignments A; earlier ones may refer to later ones
47 --
48 -- * Walk forwards through the graph, look at each node N:
49 -- * If any assignments in A (1) occur only once in N, and (2) are
50 -- not live after N, inline the assignment and remove it
51 -- from A.
52 -- * If N is an assignment:
53 -- * If the register is not live after N, discard it
54 -- * otherwise pick up the assignment and add it to A
55 -- * If N is a non-assignment node:
56 -- * remove any assignments from A that conflict with N, and
57 -- place them before N in the current block. (we call this
58 -- "dropping" the assignments).
59 -- * An assignment conflicts with N if it:
60 -- - assigns to a register mentioned in N
61 -- - mentions a register assigned by N
62 -- - reads from memory written by N
63 -- * do this recursively, dropping dependent assignments
64 -- * At a multi-way branch:
65 -- * drop any assignments that are live on more than one branch
66 -- * if any successor has more than one predecessor (a
67 -- join-point), drop everything live in that successor
68 --
69 -- As a side-effect we'll delete some dead assignments (transitively,
70 -- even). This isn't as good as removeDeadAssignments, but it's much
71 -- cheaper.
72
73 -- If we do this *before* stack layout, we might be able to avoid
74 -- saving some things across calls/procpoints.
75 --
76 -- *but*, that will invalidate the liveness analysis, and we'll have
77 -- to re-do it.
78
79 -- -----------------------------------------------------------------------------
80 -- things that we aren't optimising very well yet.
81 --
82 -- -----------
83 -- (1) From GHC's FastString.hashStr:
84 --
85 -- s2ay:
86 -- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
87 -- c2gn:
88 -- R1 = _s2au::I64;
89 -- call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
90 -- c2gp:
91 -- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
92 -- 4091);
93 -- _s2an::I64 = _s2an::I64 + 1;
94 -- _s2au::I64 = _s2cO::I64;
95 -- goto s2ay;
96 --
97 -- a nice loop, but we didn't eliminate the silly assignment at the end.
98 -- See Note [dependent assignments], which would probably fix this.
99 --
100 -- -----------
101 -- (2) From stg_atomically_frame in PrimOps.cmm
102 --
103 -- We have a diamond control flow:
104 --
105 -- x = ...
106 -- |
107 -- / \
108 -- A B
109 -- \ /
110 -- |
111 -- use of x
112 --
113 -- Now x won't be sunk down to its use, because we won't push it into
114 -- both branches of the conditional. We certainly do have to check
115 -- that we can sink it past all the code in both A and B, but having
116 -- discovered that, we could sink it to its use.
117 --
118
119 -- -----------------------------------------------------------------------------
120
121 type Assignment = (LocalReg, CmmExpr, AbsMem)
122 -- Assignment caches AbsMem, an abstraction of the memory read by
123 -- the RHS of the assignment.
124
125 cmmSink :: DynFlags -> CmmGraph -> CmmGraph
126 cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
127 where
128 liveness = cmmLocalLiveness dflags graph
129 getLive l = mapFindWithDefault Set.empty l liveness
130
131 blocks = postorderDfs graph
132
133 join_pts = findJoinPoints blocks
134
135 sink :: BlockEnv [Assignment] -> [CmmBlock] -> [CmmBlock]
136 sink _ [] = []
137 sink sunk (b:bs) =
138 -- pprTrace "sink" (ppr lbl) $
139 blockJoin first final_middle final_last : sink sunk' bs
140 where
141 lbl = entryLabel b
142 (first, middle, last) = blockSplit b
143
144 succs = successors last
145
146 -- Annotate the middle nodes with the registers live *after*
147 -- the node. This will help us decide whether we can inline
148 -- an assignment in the current node or not.
149 live = Set.unions (map getLive succs)
150 live_middle = gen_kill dflags last live
151 ann_middles = annotate dflags live_middle (blockToList middle)
152
153 -- Now sink and inline in this block
154 (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
155 fold_last = constantFold dflags last
156 (final_last, assigs') = tryToInline dflags live fold_last assigs
157
158 -- We cannot sink into join points (successors with more than
159 -- one predecessor), so identify the join points and the set
160 -- of registers live in them.
161 (joins, nonjoins) = partition (`mapMember` join_pts) succs
162 live_in_joins = Set.unions (map getLive joins)
163
164 -- We do not want to sink an assignment into multiple branches,
165 -- so identify the set of registers live in multiple successors.
166 -- This is made more complicated because when we sink an assignment
167 -- into one branch, this might change the set of registers that are
168 -- now live in multiple branches.
169 init_live_sets = map getLive nonjoins
170 live_in_multi live_sets r =
171 case filter (Set.member r) live_sets of
172 (_one:_two:_) -> True
173 _ -> False
174
175 -- Now, drop any assignments that we will not sink any further.
176 (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
177
178 drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
179 where
180 should_drop = conflicts dflags a final_last
181 || not (isTrivial rhs) && live_in_multi live_sets r
182 || r `Set.member` live_in_joins
183
184 live_sets' | should_drop = live_sets
185 | otherwise = map upd live_sets
186
187 upd set | r `Set.member` set = set `Set.union` live_rhs
188 | otherwise = set
189
190 live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
191
192 final_middle = foldl blockSnoc middle' dropped_last
193
194 sunk' = mapUnion sunk $
195 mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
196 | l <- succs ]
197
198 {- TODO: enable this later, when we have some good tests in place to
199 measure the effect and tune it.
200
201 -- small: an expression we don't mind duplicating
202 isSmall :: CmmExpr -> Bool
203 isSmall (CmmReg (CmmLocal _)) = True -- not globals, we want to coalesce them instead
204 isSmall (CmmLit _) = True
205 isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
206 isSmall (CmmRegOff (CmmLocal _) _) = True
207 isSmall _ = False
208 -}
209
210 isTrivial :: CmmExpr -> Bool
211 isTrivial (CmmReg (CmmLocal _)) = True
212 -- isTrivial (CmmLit _) = True
213 isTrivial _ = False
214
215 --
216 -- annotate each node with the set of registers live *after* the node
217 --
218 annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
219 annotate dflags live nodes = snd $ foldr ann (live,[]) nodes
220 where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes)
221
222 --
223 -- Find the blocks that have multiple successors (join points)
224 --
225 findJoinPoints :: [CmmBlock] -> BlockEnv Int
226 findJoinPoints blocks = mapFilter (>1) succ_counts
227 where
228 all_succs = concatMap successors blocks
229
230 succ_counts :: BlockEnv Int
231 succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
232
233 --
234 -- filter the list of assignments to remove any assignments that
235 -- are not live in a continuation.
236 --
237 filterAssignments :: DynFlags -> LocalRegSet -> [Assignment] -> [Assignment]
238 filterAssignments dflags live assigs = reverse (go assigs [])
239 where go [] kept = kept
240 go (a@(r,_,_):as) kept | needed = go as (a:kept)
241 | otherwise = go as kept
242 where
243 needed = r `Set.member` live
244 || any (conflicts dflags a) (map toNode kept)
245 -- Note that we must keep assignments that are
246 -- referred to by other assignments we have
247 -- already kept.
248
249 -- -----------------------------------------------------------------------------
250 -- Walk through the nodes of a block, sinking and inlining assignments
251 -- as we go.
252
253 walk :: DynFlags
254 -> [(LocalRegSet, CmmNode O O)] -- nodes of the block, annotated with
255 -- the set of registers live *after*
256 -- this node.
257
258 -> [Assignment] -- The current list of
259 -- assignments we are sinking.
260 -- Later assignments may refer
261 -- to earlier ones.
262
263 -> ( Block CmmNode O O -- The new block
264 , [Assignment] -- Assignments to sink further
265 )
266
267 walk dflags nodes assigs = go nodes emptyBlock assigs
268 where
269 go [] block as = (block, as)
270 go ((live,node):ns) block as
271 | shouldDiscard node live = go ns block as
272 | Just a <- shouldSink dflags node2 = go ns block (a : as1)
273 | otherwise = go ns block' as'
274 where
275 node1 = constantFold dflags node
276
277 (node2, as1) = tryToInline dflags live node1 as
278
279 (dropped, as') = dropAssignmentsSimple dflags
280 (\a -> conflicts dflags a node2) as1
281
282 block' = foldl blockSnoc block dropped `blockSnoc` node2
283
284
285 constantFold :: DynFlags -> CmmNode e x -> CmmNode e x
286 constantFold dflags node = mapExpDeep f node
287 where f (CmmMachOp op args) = cmmMachOpFold dflags op args
288 f (CmmRegOff r 0) = CmmReg r
289 f e = e
290
291 --
292 -- Heuristic to decide whether to pick up and sink an assignment
293 -- Currently we pick up all assignments to local registers. It might
294 -- be profitable to sink assignments to global regs too, but the
295 -- liveness analysis doesn't track those (yet) so we can't.
296 --
297 shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
298 shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
299 where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
300 shouldSink _ _other = Nothing
301
302 --
303 -- discard dead assignments. This doesn't do as good a job as
304 -- removeDeadAsssignments, because it would need multiple passes
305 -- to get all the dead code, but it catches the common case of
306 -- superfluous reloads from the stack that the stack allocator
307 -- leaves behind.
308 --
309 -- Also we catch "r = r" here. You might think it would fall
310 -- out of inlining, but the inliner will see that r is live
311 -- after the instruction and choose not to inline r in the rhs.
312 --
313 shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
314 shouldDiscard node live
315 = case node of
316 CmmAssign r (CmmReg r') | r == r' -> True
317 CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
318 _otherwise -> False
319
320
321 toNode :: Assignment -> CmmNode O O
322 toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
323
324 dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> [Assignment]
325 -> ([CmmNode O O], [Assignment])
326 dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
327
328 dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> [Assignment]
329 -> ([CmmNode O O], [Assignment])
330 dropAssignments dflags should_drop state assigs
331 = (dropped, reverse kept)
332 where
333 (dropped,kept) = go state assigs [] []
334
335 go _ [] dropped kept = (dropped, kept)
336 go state (assig : rest) dropped kept
337 | conflict = go state' rest (toNode assig : dropped) kept
338 | otherwise = go state' rest dropped (assig:kept)
339 where
340 (dropit, state') = should_drop assig state
341 conflict = dropit || any (conflicts dflags assig) dropped
342
343
344 -- -----------------------------------------------------------------------------
345 -- Try to inline assignments into a node.
346
347 tryToInline
348 :: DynFlags
349 -> LocalRegSet -- set of registers live after this
350 -- node. We cannot inline anything
351 -- that is live after the node, unless
352 -- it is small enough to duplicate.
353 -> CmmNode O x -- The node to inline into
354 -> [Assignment] -- Assignments to inline
355 -> (
356 CmmNode O x -- New node
357 , [Assignment] -- Remaining assignments
358 )
359
360 tryToInline dflags live node assigs = go usages node [] assigs
361 where
362 usages :: UniqFM Int
363 usages = foldRegsUsed dflags addUsage emptyUFM node
364
365 go _usages node _skipped [] = (node, [])
366
367 go usages node skipped (a@(l,rhs,_) : rest)
368 | cannot_inline = dont_inline
369 | occurs_once = inline_and_discard
370 | isTrivial rhs = inline_and_keep
371 | otherwise = dont_inline
372 where
373 inline_and_discard = go usages' inl_node skipped rest
374 where usages' = foldRegsUsed dflags addUsage usages rhs
375
376 dont_inline = keep node -- don't inline the assignment, keep it
377 inline_and_keep = keep inl_node -- inline the assignment, keep it
378
379 keep node' = (final_node, a : rest')
380 where (final_node, rest') = go usages' node' (l:skipped) rest
381 usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2) usages rhs
382 -- we must not inline anything that is mentioned in the RHS
383 -- of a binding that we have already skipped, so we set the
384 -- usages of the regs on the RHS to 2.
385
386 cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
387 || l `elem` skipped
388 || not (okToInline dflags rhs node)
389
390 occurs_once = not (l `elemRegSet` live)
391 && lookupUFM usages l == Just 1
392
393 inl_node = mapExpDeep inline node
394 where inline (CmmReg (CmmLocal l')) | l == l' = rhs
395 inline (CmmRegOff (CmmLocal l') off) | l == l'
396 = cmmOffset dflags rhs off
397 -- re-constant fold after inlining
398 inline (CmmMachOp op args) = cmmMachOpFold dflags op args
399 inline other = other
400
401 -- Note [dependent assignments]
402 --
403 -- If our assignment list looks like
404 --
405 -- [ y = e, x = ... y ... ]
406 --
407 -- We cannot inline x. Remember this list is really in reverse order,
408 -- so it means x = ... y ...; y = e
409 --
410 -- Hence if we inline x, the outer assignment to y will capture the
411 -- reference in x's right hand side.
412 --
413 -- In this case we should rename the y in x's right-hand side,
414 -- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
415 -- Now we can go ahead and inline x.
416 --
417 -- For now we do nothing, because this would require putting
418 -- everything inside UniqSM.
419 --
420 -- One more variant of this (#7366):
421 --
422 -- [ y = e, y = z ]
423 --
424 -- If we don't want to inline y = e, because y is used many times, we
425 -- might still be tempted to inline y = z (because we always inline
426 -- trivial rhs's). But of course we can't, because y is equal to e,
427 -- not z.
428
429 addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
430 addUsage m r = addToUFM_C (+) m r 1
431
432 regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
433 regsUsedIn [] _ = False
434 regsUsedIn ls e = wrapRecExpf f e False
435 where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True
436 f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
437 f _ z = z
438
439 -- we don't inline into CmmUnsafeForeignCall if the expression refers
440 -- to global registers. This is a HACK to avoid global registers
441 -- clashing with C argument-passing registers, really the back-end
442 -- ought to be able to handle it properly, but currently neither PprC
443 -- nor the NCG can do it. See Note [Register parameter passing]
444 -- See also StgCmmForeign:load_args_into_temps.
445 okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
446 okToInline dflags expr CmmUnsafeForeignCall{} = not (anyCallerSavesRegs dflags expr)
447 okToInline _ _ _ = True
448
449 -- -----------------------------------------------------------------------------
450
451 -- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
452 -- @r = e@ can be safely commuted past @stmt@.
453 --
454 -- We only sink "r = G" assignments right now, so conflicts is very simple:
455 --
456 conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
457 conflicts dflags (r, rhs, addr) node
458
459 -- (1) an assignment to a register conflicts with a use of the register
460 | CmmAssign reg _ <- node, reg `regUsedIn` rhs = True
461 | foldRegsUsed dflags (\b r' -> r == r' || b) False node = True
462
463 -- (2) a store to an address conflicts with a read of the same memory
464 | CmmStore addr' e <- node
465 , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
466
467 -- (3) an assignment to Hp/Sp conflicts with a heap/stack read respectively
468 | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True
469 | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
470 | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
471
472 -- (4) assignments that read caller-saves GlobalRegs conflict with a
473 -- foreign call. See Note [foreign calls clobber GlobalRegs].
474 | CmmUnsafeForeignCall{} <- node, anyCallerSavesRegs dflags rhs = True
475
476 -- (5) foreign calls clobber heap: see Note [foreign calls clobber heap]
477 | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem = True
478
479 -- (6) native calls clobber any memory
480 | CmmCall{} <- node, memConflicts addr AnyMem = True
481
482 -- (7) otherwise, no conflict
483 | otherwise = False
484
485
486 anyCallerSavesRegs :: DynFlags -> CmmExpr -> Bool
487 anyCallerSavesRegs dflags e = wrapRecExpf f e False
488 where f (CmmReg (CmmGlobal r)) _
489 | callerSaves (targetPlatform dflags) r = True
490 f _ z = z
491
492 -- An abstraction of memory read or written.
493 data AbsMem
494 = NoMem -- no memory accessed
495 | AnyMem -- arbitrary memory
496 | HeapMem -- definitely heap memory
497 | StackMem -- definitely stack memory
498 | SpMem -- <size>[Sp+n]
499 {-# UNPACK #-} !Int
500 {-# UNPACK #-} !Int
501
502 -- Having SpMem is important because it lets us float loads from Sp
503 -- past stores to Sp as long as they don't overlap, and this helps to
504 -- unravel some long sequences of
505 -- x1 = [Sp + 8]
506 -- x2 = [Sp + 16]
507 -- ...
508 -- [Sp + 8] = xi
509 -- [Sp + 16] = xj
510 --
511 -- Note that SpMem is invalidated if Sp is changed, but the definition
512 -- of 'conflicts' above handles that.
513
514 -- ToDo: this won't currently fix the following commonly occurring code:
515 -- x1 = [R1 + 8]
516 -- x2 = [R1 + 16]
517 -- ..
518 -- [Hp - 8] = x1
519 -- [Hp - 16] = x2
520 -- ..
521
522 -- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that
523 -- assignments to [Hp + n] do not conflict with any other heap memory,
524 -- but this is tricky to nail down. What if we had
525 --
526 -- x = Hp + n
527 -- [x] = ...
528 --
529 -- the store to [x] should be "new heap", not "old heap".
530 -- Furthermore, you could imagine that if we started inlining
531 -- functions in Cmm then there might well be reads of heap memory
532 -- that was written in the same basic block. To take advantage of
533 -- non-aliasing of heap memory we will have to be more clever.
534
535 -- Note [foreign calls clobber]
536 --
537 -- It is tempting to say that foreign calls clobber only
538 -- non-heap/stack memory, but unfortunately we break this invariant in
539 -- the RTS. For example, in stg_catch_retry_frame we call
540 -- stmCommitNestedTransaction() which modifies the contents of the
541 -- TRec it is passed (this actually caused incorrect code to be
542 -- generated).
543 --
544 -- Since the invariant is true for the majority of foreign calls,
545 -- perhaps we ought to have a special annotation for calls that can
546 -- modify heap/stack memory. For now we just use the conservative
547 -- definition here.
548
549
550 bothMems :: AbsMem -> AbsMem -> AbsMem
551 bothMems NoMem x = x
552 bothMems x NoMem = x
553 bothMems HeapMem HeapMem = HeapMem
554 bothMems StackMem StackMem = StackMem
555 bothMems (SpMem o1 w1) (SpMem o2 w2)
556 | o1 == o2 = SpMem o1 (max w1 w2)
557 | otherwise = StackMem
558 bothMems SpMem{} StackMem = StackMem
559 bothMems StackMem SpMem{} = StackMem
560 bothMems _ _ = AnyMem
561
562 memConflicts :: AbsMem -> AbsMem -> Bool
563 memConflicts NoMem _ = False
564 memConflicts _ NoMem = False
565 memConflicts HeapMem StackMem = False
566 memConflicts StackMem HeapMem = False
567 memConflicts SpMem{} HeapMem = False
568 memConflicts HeapMem SpMem{} = False
569 memConflicts (SpMem o1 w1) (SpMem o2 w2)
570 | o1 < o2 = o1 + w1 > o2
571 | otherwise = o2 + w2 > o1
572 memConflicts _ _ = True
573
574 exprMem :: DynFlags -> CmmExpr -> AbsMem
575 exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
576 exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es)
577 exprMem _ _ = NoMem
578
579 loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
580 loadAddr dflags e w =
581 case e of
582 CmmReg r -> regAddr dflags r 0 w
583 CmmRegOff r i -> regAddr dflags r i w
584 _other | CmmGlobal Sp `regUsedIn` e -> StackMem
585 | otherwise -> AnyMem
586
587 regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
588 regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
589 regAddr _ (CmmGlobal Hp) _ _ = HeapMem
590 regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
591 regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
592 regAddr _ _ _ _ = AnyMem