Fold testsuite.git into ghc.git (re #8545)
[ghc.git] / compiler / cmm / CmmSink.hs
1 {-# LANGUAGE GADTs #-}
2 module CmmSink (
3 cmmSink
4 ) where
5
6 import Cmm
7 import CmmOpt
8 import BlockId
9 import CmmLive
10 import CmmUtils
11 import Hoopl
12
13 import DynFlags
14 import UniqFM
15 import PprCmm ()
16
17 import Data.List (partition)
18 import qualified Data.Set as Set
19
20 -- -----------------------------------------------------------------------------
21 -- Sinking and inlining
22
23 -- This is an optimisation pass that
24 -- (a) moves assignments closer to their uses, to reduce register pressure
25 -- (b) pushes assignments into a single branch of a conditional if possible
26 -- (c) inlines assignments to registers that are mentioned only once
27 -- (d) discards dead assignments
28 --
29 -- This tightens up lots of register-heavy code. It is particularly
30 -- helpful in the Cmm generated by the Stg->Cmm code generator, in
31 -- which every function starts with a copyIn sequence like:
32 --
33 -- x1 = R1
34 -- x2 = Sp[8]
35 -- x3 = Sp[16]
36 -- if (Sp - 32 < SpLim) then L1 else L2
37 --
38 -- we really want to push the x1..x3 assignments into the L2 branch.
39 --
40 -- Algorithm:
41 --
42 -- * Start by doing liveness analysis.
43 --
44 -- * Keep a list of assignments A; earlier ones may refer to later ones.
45 -- Currently we only sink assignments to local registers, because we don't
46 -- have liveness information about global registers.
47 --
48 -- * Walk forwards through the graph, look at each node N:
49 --
50 -- * If it is a dead assignment, i.e. assignment to a register that is
51 -- not used after N, discard it.
52 --
53 -- * Try to inline based on current list of assignments
54 -- * If any assignments in A (1) occur only once in N, and (2) are
55 -- not live after N, inline the assignment and remove it
56 -- from A.
57 --
58 -- * If an assignment in A is cheap (RHS is local register), then
59 -- inline the assignment and keep it in A in case it is used afterwards.
60 --
61 -- * Otherwise don't inline.
62 --
63 -- * If N is assignment to a local register pick up the assignment
64 -- and add it to A.
65 --
66 -- * If N is not an assignment to a local register:
67 -- * remove any assignments from A that conflict with N, and
68 -- place them before N in the current block. We call this
69 -- "dropping" the assignments.
70 --
71 -- * An assignment conflicts with N if it:
72 -- - assigns to a register mentioned in N
73 -- - mentions a register assigned by N
74 -- - reads from memory written by N
75 -- * do this recursively, dropping dependent assignments
76 --
77 -- * At an exit node:
78 -- * drop any assignments that are live on more than one successor
79 -- and are not trivial
80 -- * if any successor has more than one predecessor (a join-point),
81 -- drop everything live in that successor. Since we only propagate
82 -- assignments that are not dead at the successor, we will therefore
83 -- eliminate all assignments dead at this point. Thus analysis of a
84 -- join-point will always begin with an empty list of assignments.
85 --
86 --
87 -- As a result of above algorithm, sinking deletes some dead assignments
88 -- (transitively, even). This isn't as good as removeDeadAssignments,
89 -- but it's much cheaper.
90
91 -- -----------------------------------------------------------------------------
92 -- things that we aren't optimising very well yet.
93 --
94 -- -----------
95 -- (1) From GHC's FastString.hashStr:
96 --
97 -- s2ay:
98 -- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
99 -- c2gn:
100 -- R1 = _s2au::I64;
101 -- call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
102 -- c2gp:
103 -- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
104 -- 4091);
105 -- _s2an::I64 = _s2an::I64 + 1;
106 -- _s2au::I64 = _s2cO::I64;
107 -- goto s2ay;
108 --
109 -- a nice loop, but we didn't eliminate the silly assignment at the end.
110 -- See Note [dependent assignments], which would probably fix this.
111 -- This is #8336 on Trac.
112 --
113 -- -----------
114 -- (2) From stg_atomically_frame in PrimOps.cmm
115 --
116 -- We have a diamond control flow:
117 --
118 -- x = ...
119 -- |
120 -- / \
121 -- A B
122 -- \ /
123 -- |
124 -- use of x
125 --
126 -- Now x won't be sunk down to its use, because we won't push it into
127 -- both branches of the conditional. We certainly do have to check
128 -- that we can sink it past all the code in both A and B, but having
129 -- discovered that, we could sink it to its use.
130 --
131
132 -- -----------------------------------------------------------------------------
133
134 type Assignment = (LocalReg, CmmExpr, AbsMem)
135 -- Assignment caches AbsMem, an abstraction of the memory read by
136 -- the RHS of the assignment.
137
138 type Assignments = [Assignment]
139 -- A sequence of assignements; kept in *reverse* order
140 -- So the list [ x=e1, y=e2 ] means the sequence of assignments
141 -- y = e2
142 -- x = e1
143
144 cmmSink :: DynFlags -> CmmGraph -> CmmGraph
145 cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
146 where
147 liveness = cmmLocalLiveness dflags graph
148 getLive l = mapFindWithDefault Set.empty l liveness
149
150 blocks = postorderDfs graph
151
152 join_pts = findJoinPoints blocks
153
154 sink :: BlockEnv Assignments -> [CmmBlock] -> [CmmBlock]
155 sink _ [] = []
156 sink sunk (b:bs) =
157 -- pprTrace "sink" (ppr lbl) $
158 blockJoin first final_middle final_last : sink sunk' bs
159 where
160 lbl = entryLabel b
161 (first, middle, last) = blockSplit b
162
163 succs = successors last
164
165 -- Annotate the middle nodes with the registers live *after*
166 -- the node. This will help us decide whether we can inline
167 -- an assignment in the current node or not.
168 live = Set.unions (map getLive succs)
169 live_middle = gen_kill dflags last live
170 ann_middles = annotate dflags live_middle (blockToList middle)
171
172 -- Now sink and inline in this block
173 (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
174 fold_last = constantFold dflags last
175 (final_last, assigs') = tryToInline dflags live fold_last assigs
176
177 -- We cannot sink into join points (successors with more than
178 -- one predecessor), so identify the join points and the set
179 -- of registers live in them.
180 (joins, nonjoins) = partition (`mapMember` join_pts) succs
181 live_in_joins = Set.unions (map getLive joins)
182
183 -- We do not want to sink an assignment into multiple branches,
184 -- so identify the set of registers live in multiple successors.
185 -- This is made more complicated because when we sink an assignment
186 -- into one branch, this might change the set of registers that are
187 -- now live in multiple branches.
188 init_live_sets = map getLive nonjoins
189 live_in_multi live_sets r =
190 case filter (Set.member r) live_sets of
191 (_one:_two:_) -> True
192 _ -> False
193
194 -- Now, drop any assignments that we will not sink any further.
195 (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
196
197 drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
198 where
199 should_drop = conflicts dflags a final_last
200 || not (isTrivial rhs) && live_in_multi live_sets r
201 || r `Set.member` live_in_joins
202
203 live_sets' | should_drop = live_sets
204 | otherwise = map upd live_sets
205
206 upd set | r `Set.member` set = set `Set.union` live_rhs
207 | otherwise = set
208
209 live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
210
211 final_middle = foldl blockSnoc middle' dropped_last
212
213 sunk' = mapUnion sunk $
214 mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
215 | l <- succs ]
216
217 {- TODO: enable this later, when we have some good tests in place to
218 measure the effect and tune it.
219
220 -- small: an expression we don't mind duplicating
221 isSmall :: CmmExpr -> Bool
222 isSmall (CmmReg (CmmLocal _)) = True -- not globals, we want to coalesce them instead* See below
223 isSmall (CmmLit _) = True
224 isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
225 isSmall (CmmRegOff (CmmLocal _) _) = True
226 isSmall _ = False
227
228 Coalesce global registers? What does that mean? We observed no decrease
229 in performance comming from inlining of global registers, hence we do it now
230 (see isTrivial function). Ideally we'd like to measure performance using
231 some tool like perf or VTune and make decisions what to inline based on that.
232 -}
233
234 --
235 -- We allow duplication of trivial expressions: registers (both local and
236 -- global) and literals.
237 --
238 isTrivial :: CmmExpr -> Bool
239 isTrivial (CmmReg _) = True
240 isTrivial (CmmLit _) = True
241 isTrivial _ = False
242
243 --
244 -- annotate each node with the set of registers live *after* the node
245 --
246 annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
247 annotate dflags live nodes = snd $ foldr ann (live,[]) nodes
248 where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes)
249
250 --
251 -- Find the blocks that have multiple successors (join points)
252 --
253 findJoinPoints :: [CmmBlock] -> BlockEnv Int
254 findJoinPoints blocks = mapFilter (>1) succ_counts
255 where
256 all_succs = concatMap successors blocks
257
258 succ_counts :: BlockEnv Int
259 succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
260
261 --
262 -- filter the list of assignments to remove any assignments that
263 -- are not live in a continuation.
264 --
265 filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments
266 filterAssignments dflags live assigs = reverse (go assigs [])
267 where go [] kept = kept
268 go (a@(r,_,_):as) kept | needed = go as (a:kept)
269 | otherwise = go as kept
270 where
271 needed = r `Set.member` live
272 || any (conflicts dflags a) (map toNode kept)
273 -- Note that we must keep assignments that are
274 -- referred to by other assignments we have
275 -- already kept.
276
277 -- -----------------------------------------------------------------------------
278 -- Walk through the nodes of a block, sinking and inlining assignments
279 -- as we go.
280 --
281 -- On input we pass in a:
282 -- * list of nodes in the block
283 -- * a list of assignments that appeared *before* this block and
284 -- that are being sunk.
285 --
286 -- On output we get:
287 -- * a new block
288 -- * a list of assignments that will be placed *after* that block.
289 --
290
291 walk :: DynFlags
292 -> [(LocalRegSet, CmmNode O O)] -- nodes of the block, annotated with
293 -- the set of registers live *after*
294 -- this node.
295
296 -> Assignments -- The current list of
297 -- assignments we are sinking.
298 -- Earlier assignments may refer
299 -- to later ones.
300
301 -> ( Block CmmNode O O -- The new block
302 , Assignments -- Assignments to sink further
303 )
304
305 walk dflags nodes assigs = go nodes emptyBlock assigs
306 where
307 go [] block as = (block, as)
308 go ((live,node):ns) block as
309 | shouldDiscard node live = go ns block as
310 -- discard dead assignment
311 | Just a <- shouldSink dflags node2 = go ns block (a : as1)
312 | otherwise = go ns block' as'
313 where
314 node1 = constantFold dflags node
315
316 (node2, as1) = tryToInline dflags live node1 as
317
318 (dropped, as') = dropAssignmentsSimple dflags
319 (\a -> conflicts dflags a node2) as1
320
321 block' = foldl blockSnoc block dropped `blockSnoc` node2
322
323
324 constantFold :: DynFlags -> CmmNode e x -> CmmNode e x
325 constantFold dflags node = mapExpDeep f node
326 where f (CmmMachOp op args) = cmmMachOpFold dflags op args
327 f (CmmRegOff r 0) = CmmReg r
328 f e = e
329
330 --
331 -- Heuristic to decide whether to pick up and sink an assignment
332 -- Currently we pick up all assignments to local registers. It might
333 -- be profitable to sink assignments to global regs too, but the
334 -- liveness analysis doesn't track those (yet) so we can't.
335 --
336 shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
337 shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
338 where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
339 shouldSink _ _other = Nothing
340
341 --
342 -- discard dead assignments. This doesn't do as good a job as
343 -- removeDeadAsssignments, because it would need multiple passes
344 -- to get all the dead code, but it catches the common case of
345 -- superfluous reloads from the stack that the stack allocator
346 -- leaves behind.
347 --
348 -- Also we catch "r = r" here. You might think it would fall
349 -- out of inlining, but the inliner will see that r is live
350 -- after the instruction and choose not to inline r in the rhs.
351 --
352 shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
353 shouldDiscard node live
354 = case node of
355 CmmAssign r (CmmReg r') | r == r' -> True
356 CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
357 _otherwise -> False
358
359
360 toNode :: Assignment -> CmmNode O O
361 toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
362
363 dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments
364 -> ([CmmNode O O], Assignments)
365 dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
366
367 dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments
368 -> ([CmmNode O O], Assignments)
369 dropAssignments dflags should_drop state assigs
370 = (dropped, reverse kept)
371 where
372 (dropped,kept) = go state assigs [] []
373
374 go _ [] dropped kept = (dropped, kept)
375 go state (assig : rest) dropped kept
376 | conflict = go state' rest (toNode assig : dropped) kept
377 | otherwise = go state' rest dropped (assig:kept)
378 where
379 (dropit, state') = should_drop assig state
380 conflict = dropit || any (conflicts dflags assig) dropped
381
382
383 -- -----------------------------------------------------------------------------
384 -- Try to inline assignments into a node.
385
386 tryToInline
387 :: DynFlags
388 -> LocalRegSet -- set of registers live after this
389 -- node. We cannot inline anything
390 -- that is live after the node, unless
391 -- it is small enough to duplicate.
392 -> CmmNode O x -- The node to inline into
393 -> Assignments -- Assignments to inline
394 -> (
395 CmmNode O x -- New node
396 , Assignments -- Remaining assignments
397 )
398
399 tryToInline dflags live node assigs = go usages node [] assigs
400 where
401 usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
402 usages = foldLocalRegsUsed dflags addUsage emptyUFM node
403
404 go _usages node _skipped [] = (node, [])
405
406 go usages node skipped (a@(l,rhs,_) : rest)
407 | cannot_inline = dont_inline
408 | occurs_none = discard -- Note [discard during inlining]
409 | occurs_once = inline_and_discard
410 | isTrivial rhs = inline_and_keep
411 | otherwise = dont_inline
412 where
413 inline_and_discard = go usages' inl_node skipped rest
414 where usages' = foldLocalRegsUsed dflags addUsage usages rhs
415
416 discard = go usages node skipped rest
417
418 dont_inline = keep node -- don't inline the assignment, keep it
419 inline_and_keep = keep inl_node -- inline the assignment, keep it
420
421 keep node' = (final_node, a : rest')
422 where (final_node, rest') = go usages' node' (l:skipped) rest
423 usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2)
424 usages rhs
425 -- we must not inline anything that is mentioned in the RHS
426 -- of a binding that we have already skipped, so we set the
427 -- usages of the regs on the RHS to 2.
428
429 cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
430 || l `elem` skipped
431 || not (okToInline dflags rhs node)
432
433 l_usages = lookupUFM usages l
434 l_live = l `elemRegSet` live
435
436 occurs_once = not l_live && l_usages == Just 1
437 occurs_none = not l_live && l_usages == Nothing
438
439 inl_node = mapExpDeep inline node
440 -- mapExpDeep is where the inlining actually takes place!
441 where inline (CmmReg (CmmLocal l')) | l == l' = rhs
442 inline (CmmRegOff (CmmLocal l') off) | l == l'
443 = cmmOffset dflags rhs off
444 -- re-constant fold after inlining
445 inline (CmmMachOp op args) = cmmMachOpFold dflags op args
446 inline other = other
447
448 -- Note [dependent assignments]
449 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
450 --
451 -- If our assignment list looks like
452 --
453 -- [ y = e, x = ... y ... ]
454 --
455 -- We cannot inline x. Remember this list is really in reverse order,
456 -- so it means x = ... y ...; y = e
457 --
458 -- Hence if we inline x, the outer assignment to y will capture the
459 -- reference in x's right hand side.
460 --
461 -- In this case we should rename the y in x's right-hand side,
462 -- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
463 -- Now we can go ahead and inline x.
464 --
465 -- For now we do nothing, because this would require putting
466 -- everything inside UniqSM.
467 --
468 -- One more variant of this (#7366):
469 --
470 -- [ y = e, y = z ]
471 --
472 -- If we don't want to inline y = e, because y is used many times, we
473 -- might still be tempted to inline y = z (because we always inline
474 -- trivial rhs's). But of course we can't, because y is equal to e,
475 -- not z.
476
477 -- Note [discard during inlining]
478 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
479 -- Opportunities to discard assignments sometimes appear after we've
480 -- done some inlining. Here's an example:
481 --
482 -- x = R1;
483 -- y = P64[x + 7];
484 -- z = P64[x + 15];
485 -- /* z is dead */
486 -- R1 = y & (-8);
487 --
488 -- The x assignment is trivial, so we inline it in the RHS of y, and
489 -- keep both x and y. z gets dropped because it is dead, then we
490 -- inline y, and we have a dead assignment to x. If we don't notice
491 -- that x is dead in tryToInline, we end up retaining it.
492
493 addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
494 addUsage m r = addToUFM_C (+) m r 1
495
496 regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
497 regsUsedIn [] _ = False
498 regsUsedIn ls e = wrapRecExpf f e False
499 where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True
500 f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
501 f _ z = z
502
503 -- we don't inline into CmmUnsafeForeignCall if the expression refers
504 -- to global registers. This is a HACK to avoid global registers
505 -- clashing with C argument-passing registers, really the back-end
506 -- ought to be able to handle it properly, but currently neither PprC
507 -- nor the NCG can do it. See Note [Register parameter passing]
508 -- See also StgCmmForeign:load_args_into_temps.
509 okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
510 okToInline dflags expr node@(CmmUnsafeForeignCall{}) =
511 not (globalRegistersConflict dflags expr node)
512 okToInline _ _ _ = True
513
514 -- -----------------------------------------------------------------------------
515
516 -- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
517 -- @r = e@ can be safely commuted past @stmt@.
518 --
519 -- We only sink "r = G" assignments right now, so conflicts is very simple:
520 --
521 conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
522 conflicts dflags (r, rhs, addr) node
523
524 -- (1) node defines registers used by rhs of assignment. This catches
525 -- assignmnets and all three kinds of calls. See Note [Sinking and calls]
526 | globalRegistersConflict dflags rhs node = True
527 | localRegistersConflict dflags rhs node = True
528
529 -- (2) node uses register defined by assignment
530 | foldRegsUsed dflags (\b r' -> r == r' || b) False node = True
531
532 -- (3) a store to an address conflicts with a read of the same memory
533 | CmmStore addr' e <- node
534 , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
535
536 -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively
537 | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True
538 | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
539 | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
540
541 -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap]
542 | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem = True
543
544 -- (6) native calls clobber any memory
545 | CmmCall{} <- node, memConflicts addr AnyMem = True
546
547 -- (7) otherwise, no conflict
548 | otherwise = False
549
550 -- Returns True if node defines any global registers that are used in the
551 -- Cmm expression
552 globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
553 globalRegistersConflict dflags expr node =
554 foldRegsDefd dflags (\b r -> b || (CmmGlobal r) `regUsedIn` expr) False node
555
556 -- Returns True if node defines any local registers that are used in the
557 -- Cmm expression
558 localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
559 localRegistersConflict dflags expr node =
560 foldRegsDefd dflags (\b r -> b || (CmmLocal r) `regUsedIn` expr) False node
561
562 -- Note [Sinking and calls]
563 -- ~~~~~~~~~~~~~~~~~~~~~~~~
564 --
565 -- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
566 -- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
567 -- stack layout (see Note [Sinking after stack layout]) which leads to two
568 -- invariants related to calls:
569 --
570 -- a) during stack layout phase all safe foreign calls are turned into
571 -- unsafe foreign calls (see Note [Lower safe foreign calls]). This
572 -- means that we will never encounter CmmForeignCall node when running
573 -- sinking after stack layout
574 --
575 -- b) stack layout saves all variables live across a call on the stack
576 -- just before making a call (remember we are not sinking assignments to
577 -- stack):
578 --
579 -- L1:
580 -- x = R1
581 -- P64[Sp - 16] = L2
582 -- P64[Sp - 8] = x
583 -- Sp = Sp - 16
584 -- call f() returns L2
585 -- L2:
586 --
587 -- We will attempt to sink { x = R1 } but we will detect conflict with
588 -- { P64[Sp - 8] = x } and hence we will drop { x = R1 } without even
589 -- checking whether it conflicts with { call f() }. In this way we will
590 -- never need to check any assignment conflicts with CmmCall. Remember
591 -- that we still need to check for potential memory conflicts.
592 --
593 -- So the result is that we only need to worry about CmmUnsafeForeignCall nodes
594 -- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]).
595 -- This assumption holds only when we do sinking after stack layout. If we run
596 -- it before stack layout we need to check for possible conflicts with all three
597 -- kinds of calls. Our `conflicts` function does that by using a generic
598 -- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and
599 -- UserOfRegs typeclasses.
600 --
601
602 -- An abstraction of memory read or written.
603 data AbsMem
604 = NoMem -- no memory accessed
605 | AnyMem -- arbitrary memory
606 | HeapMem -- definitely heap memory
607 | StackMem -- definitely stack memory
608 | SpMem -- <size>[Sp+n]
609 {-# UNPACK #-} !Int
610 {-# UNPACK #-} !Int
611
612 -- Having SpMem is important because it lets us float loads from Sp
613 -- past stores to Sp as long as they don't overlap, and this helps to
614 -- unravel some long sequences of
615 -- x1 = [Sp + 8]
616 -- x2 = [Sp + 16]
617 -- ...
618 -- [Sp + 8] = xi
619 -- [Sp + 16] = xj
620 --
621 -- Note that SpMem is invalidated if Sp is changed, but the definition
622 -- of 'conflicts' above handles that.
623
624 -- ToDo: this won't currently fix the following commonly occurring code:
625 -- x1 = [R1 + 8]
626 -- x2 = [R1 + 16]
627 -- ..
628 -- [Hp - 8] = x1
629 -- [Hp - 16] = x2
630 -- ..
631
632 -- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that
633 -- assignments to [Hp + n] do not conflict with any other heap memory,
634 -- but this is tricky to nail down. What if we had
635 --
636 -- x = Hp + n
637 -- [x] = ...
638 --
639 -- the store to [x] should be "new heap", not "old heap".
640 -- Furthermore, you could imagine that if we started inlining
641 -- functions in Cmm then there might well be reads of heap memory
642 -- that was written in the same basic block. To take advantage of
643 -- non-aliasing of heap memory we will have to be more clever.
644
645 -- Note [Foreign calls clobber heap]
646 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
647 --
648 -- It is tempting to say that foreign calls clobber only
649 -- non-heap/stack memory, but unfortunately we break this invariant in
650 -- the RTS. For example, in stg_catch_retry_frame we call
651 -- stmCommitNestedTransaction() which modifies the contents of the
652 -- TRec it is passed (this actually caused incorrect code to be
653 -- generated).
654 --
655 -- Since the invariant is true for the majority of foreign calls,
656 -- perhaps we ought to have a special annotation for calls that can
657 -- modify heap/stack memory. For now we just use the conservative
658 -- definition here.
659
660
661 bothMems :: AbsMem -> AbsMem -> AbsMem
662 bothMems NoMem x = x
663 bothMems x NoMem = x
664 bothMems HeapMem HeapMem = HeapMem
665 bothMems StackMem StackMem = StackMem
666 bothMems (SpMem o1 w1) (SpMem o2 w2)
667 | o1 == o2 = SpMem o1 (max w1 w2)
668 | otherwise = StackMem
669 bothMems SpMem{} StackMem = StackMem
670 bothMems StackMem SpMem{} = StackMem
671 bothMems _ _ = AnyMem
672
673 memConflicts :: AbsMem -> AbsMem -> Bool
674 memConflicts NoMem _ = False
675 memConflicts _ NoMem = False
676 memConflicts HeapMem StackMem = False
677 memConflicts StackMem HeapMem = False
678 memConflicts SpMem{} HeapMem = False
679 memConflicts HeapMem SpMem{} = False
680 memConflicts (SpMem o1 w1) (SpMem o2 w2)
681 | o1 < o2 = o1 + w1 > o2
682 | otherwise = o2 + w2 > o1
683 memConflicts _ _ = True
684
685 exprMem :: DynFlags -> CmmExpr -> AbsMem
686 exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
687 exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es)
688 exprMem _ _ = NoMem
689
690 loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
691 loadAddr dflags e w =
692 case e of
693 CmmReg r -> regAddr dflags r 0 w
694 CmmRegOff r i -> regAddr dflags r i w
695 _other | CmmGlobal Sp `regUsedIn` e -> StackMem
696 | otherwise -> AnyMem
697
698 regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
699 regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
700 regAddr _ (CmmGlobal Hp) _ _ = HeapMem
701 regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
702 regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
703 regAddr _ _ _ _ = AnyMem