Remove unnecessary LANGUAGE pragma
[ghc.git] / compiler / cmm / CmmSink.hs
1 {-# LANGUAGE GADTs #-}
2 module CmmSink (
3 cmmSink
4 ) where
5
6 import Cmm
7 import CmmOpt
8 import BlockId
9 import CmmLive
10 import CmmUtils
11 import Hoopl
12
13 import DynFlags
14 import UniqFM
15 import PprCmm ()
16
17 import Data.List (partition)
18 import qualified Data.Set as Set
19
20 -- -----------------------------------------------------------------------------
21 -- Sinking and inlining
22
23 -- This is an optimisation pass that
24 -- (a) moves assignments closer to their uses, to reduce register pressure
25 -- (b) pushes assignments into a single branch of a conditional if possible
26 -- (c) inlines assignments to registers that are mentioned only once
27 -- (d) discards dead assignments
28 --
29 -- This tightens up lots of register-heavy code. It is particularly
30 -- helpful in the Cmm generated by the Stg->Cmm code generator, in
31 -- which every function starts with a copyIn sequence like:
32 --
33 -- x1 = R1
34 -- x2 = Sp[8]
35 -- x3 = Sp[16]
36 -- if (Sp - 32 < SpLim) then L1 else L2
37 --
38 -- we really want to push the x1..x3 assignments into the L2 branch.
39 --
40 -- Algorithm:
41 --
42 -- * Start by doing liveness analysis.
43 --
44 -- * Keep a list of assignments A; earlier ones may refer to later ones.
45 -- Currently we only sink assignments to local registers, because we don't
46 -- have liveness information about global registers.
47 --
48 -- * Walk forwards through the graph, look at each node N:
49 --
50 -- * If it is a dead assignment, i.e. assignment to a register that is
51 -- not used after N, discard it.
52 --
53 -- * Try to inline based on current list of assignments
54 -- * If any assignments in A (1) occur only once in N, and (2) are
55 -- not live after N, inline the assignment and remove it
56 -- from A.
57 --
58 -- * If an assignment in A is cheap (RHS is local register), then
59 -- inline the assignment and keep it in A in case it is used afterwards.
60 --
61 -- * Otherwise don't inline.
62 --
63 -- * If N is assignment to a local register pick up the assignment
64 -- and add it to A.
65 --
66 -- * If N is not an assignment to a local register:
67 -- * remove any assignments from A that conflict with N, and
68 -- place them before N in the current block. We call this
69 -- "dropping" the assignments.
70 --
71 -- * An assignment conflicts with N if it:
72 -- - assigns to a register mentioned in N
73 -- - mentions a register assigned by N
74 -- - reads from memory written by N
75 -- * do this recursively, dropping dependent assignments
76 --
77 -- * At an exit node:
78 -- * drop any assignments that are live on more than one successor
79 -- and are not trivial
80 -- * if any successor has more than one predecessor (a join-point),
81 -- drop everything live in that successor. Since we only propagate
82 -- assignments that are not dead at the successor, we will therefore
83 -- eliminate all assignments dead at this point. Thus analysis of a
84 -- join-point will always begin with an empty list of assignments.
85 --
86 --
87 -- As a result of above algorithm, sinking deletes some dead assignments
88 -- (transitively, even). This isn't as good as removeDeadAssignments,
89 -- but it's much cheaper.
90
91 -- -----------------------------------------------------------------------------
92 -- things that we aren't optimising very well yet.
93 --
94 -- -----------
95 -- (1) From GHC's FastString.hashStr:
96 --
97 -- s2ay:
98 -- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
99 -- c2gn:
100 -- R1 = _s2au::I64;
101 -- call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
102 -- c2gp:
103 -- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
104 -- 4091);
105 -- _s2an::I64 = _s2an::I64 + 1;
106 -- _s2au::I64 = _s2cO::I64;
107 -- goto s2ay;
108 --
109 -- a nice loop, but we didn't eliminate the silly assignment at the end.
110 -- See Note [dependent assignments], which would probably fix this.
111 -- This is #8336 on Trac.
112 --
113 -- -----------
114 -- (2) From stg_atomically_frame in PrimOps.cmm
115 --
116 -- We have a diamond control flow:
117 --
118 -- x = ...
119 -- |
120 -- / \
121 -- A B
122 -- \ /
123 -- |
124 -- use of x
125 --
126 -- Now x won't be sunk down to its use, because we won't push it into
127 -- both branches of the conditional. We certainly do have to check
128 -- that we can sink it past all the code in both A and B, but having
129 -- discovered that, we could sink it to its use.
130 --
131
132 -- -----------------------------------------------------------------------------
133
134 type Assignment = (LocalReg, CmmExpr, AbsMem)
135 -- Assignment caches AbsMem, an abstraction of the memory read by
136 -- the RHS of the assignment.
137
138 type Assignments = [Assignment]
139 -- A sequence of assignements; kept in *reverse* order
140 -- So the list [ x=e1, y=e2 ] means the sequence of assignments
141 -- y = e2
142 -- x = e1
143
144 cmmSink :: DynFlags -> CmmGraph -> CmmGraph
145 cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
146 where
147 liveness = cmmLocalLiveness dflags graph
148 getLive l = mapFindWithDefault Set.empty l liveness
149
150 blocks = postorderDfs graph
151
152 join_pts = findJoinPoints blocks
153
154 sink :: BlockEnv Assignments -> [CmmBlock] -> [CmmBlock]
155 sink _ [] = []
156 sink sunk (b:bs) =
157 -- pprTrace "sink" (ppr lbl) $
158 blockJoin first final_middle final_last : sink sunk' bs
159 where
160 lbl = entryLabel b
161 (first, middle, last) = blockSplit b
162
163 succs = successors last
164
165 -- Annotate the middle nodes with the registers live *after*
166 -- the node. This will help us decide whether we can inline
167 -- an assignment in the current node or not.
168 live = Set.unions (map getLive succs)
169 live_middle = gen_kill dflags last live
170 ann_middles = annotate dflags live_middle (blockToList middle)
171
172 -- Now sink and inline in this block
173 (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
174 fold_last = constantFoldNode dflags last
175 (final_last, assigs') = tryToInline dflags live fold_last assigs
176
177 -- We cannot sink into join points (successors with more than
178 -- one predecessor), so identify the join points and the set
179 -- of registers live in them.
180 (joins, nonjoins) = partition (`mapMember` join_pts) succs
181 live_in_joins = Set.unions (map getLive joins)
182
183 -- We do not want to sink an assignment into multiple branches,
184 -- so identify the set of registers live in multiple successors.
185 -- This is made more complicated because when we sink an assignment
186 -- into one branch, this might change the set of registers that are
187 -- now live in multiple branches.
188 init_live_sets = map getLive nonjoins
189 live_in_multi live_sets r =
190 case filter (Set.member r) live_sets of
191 (_one:_two:_) -> True
192 _ -> False
193
194 -- Now, drop any assignments that we will not sink any further.
195 (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
196
197 drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
198 where
199 should_drop = conflicts dflags a final_last
200 || not (isTrivial rhs) && live_in_multi live_sets r
201 || r `Set.member` live_in_joins
202
203 live_sets' | should_drop = live_sets
204 | otherwise = map upd live_sets
205
206 upd set | r `Set.member` set = set `Set.union` live_rhs
207 | otherwise = set
208
209 live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
210
211 final_middle = foldl blockSnoc middle' dropped_last
212
213 sunk' = mapUnion sunk $
214 mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
215 | l <- succs ]
216
217 {- TODO: enable this later, when we have some good tests in place to
218 measure the effect and tune it.
219
220 -- small: an expression we don't mind duplicating
221 isSmall :: CmmExpr -> Bool
222 isSmall (CmmReg (CmmLocal _)) = True -- not globals, we want to coalesce them instead* See below
223 isSmall (CmmLit _) = True
224 isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
225 isSmall (CmmRegOff (CmmLocal _) _) = True
226 isSmall _ = False
227
228 Coalesce global registers? What does that mean? We observed no decrease
229 in performance comming from inlining of global registers, hence we do it now
230 (see isTrivial function). Ideally we'd like to measure performance using
231 some tool like perf or VTune and make decisions what to inline based on that.
232 -}
233
234 --
235 -- We allow duplication of trivial expressions: registers (both local and
236 -- global) and literals.
237 --
238 isTrivial :: CmmExpr -> Bool
239 isTrivial (CmmReg _) = True
240 isTrivial (CmmLit _) = True
241 isTrivial _ = False
242
243 --
244 -- annotate each node with the set of registers live *after* the node
245 --
246 annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
247 annotate dflags live nodes = snd $ foldr ann (live,[]) nodes
248 where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes)
249
250 --
251 -- Find the blocks that have multiple successors (join points)
252 --
253 findJoinPoints :: [CmmBlock] -> BlockEnv Int
254 findJoinPoints blocks = mapFilter (>1) succ_counts
255 where
256 all_succs = concatMap successors blocks
257
258 succ_counts :: BlockEnv Int
259 succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
260
261 --
262 -- filter the list of assignments to remove any assignments that
263 -- are not live in a continuation.
264 --
265 filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments
266 filterAssignments dflags live assigs = reverse (go assigs [])
267 where go [] kept = kept
268 go (a@(r,_,_):as) kept | needed = go as (a:kept)
269 | otherwise = go as kept
270 where
271 needed = r `Set.member` live
272 || any (conflicts dflags a) (map toNode kept)
273 -- Note that we must keep assignments that are
274 -- referred to by other assignments we have
275 -- already kept.
276
277 -- -----------------------------------------------------------------------------
278 -- Walk through the nodes of a block, sinking and inlining assignments
279 -- as we go.
280 --
281 -- On input we pass in a:
282 -- * list of nodes in the block
283 -- * a list of assignments that appeared *before* this block and
284 -- that are being sunk.
285 --
286 -- On output we get:
287 -- * a new block
288 -- * a list of assignments that will be placed *after* that block.
289 --
290
291 walk :: DynFlags
292 -> [(LocalRegSet, CmmNode O O)] -- nodes of the block, annotated with
293 -- the set of registers live *after*
294 -- this node.
295
296 -> Assignments -- The current list of
297 -- assignments we are sinking.
298 -- Earlier assignments may refer
299 -- to later ones.
300
301 -> ( Block CmmNode O O -- The new block
302 , Assignments -- Assignments to sink further
303 )
304
305 walk dflags nodes assigs = go nodes emptyBlock assigs
306 where
307 go [] block as = (block, as)
308 go ((live,node):ns) block as
309 | shouldDiscard node live = go ns block as
310 -- discard dead assignment
311 | Just a <- shouldSink dflags node2 = go ns block (a : as1)
312 | otherwise = go ns block' as'
313 where
314 node1 = constantFoldNode dflags node
315
316 (node2, as1) = tryToInline dflags live node1 as
317
318 (dropped, as') = dropAssignmentsSimple dflags
319 (\a -> conflicts dflags a node2) as1
320
321 block' = foldl blockSnoc block dropped `blockSnoc` node2
322
323
324 --
325 -- Heuristic to decide whether to pick up and sink an assignment
326 -- Currently we pick up all assignments to local registers. It might
327 -- be profitable to sink assignments to global regs too, but the
328 -- liveness analysis doesn't track those (yet) so we can't.
329 --
330 shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
331 shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
332 where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
333 shouldSink _ _other = Nothing
334
335 --
336 -- discard dead assignments. This doesn't do as good a job as
337 -- removeDeadAsssignments, because it would need multiple passes
338 -- to get all the dead code, but it catches the common case of
339 -- superfluous reloads from the stack that the stack allocator
340 -- leaves behind.
341 --
342 -- Also we catch "r = r" here. You might think it would fall
343 -- out of inlining, but the inliner will see that r is live
344 -- after the instruction and choose not to inline r in the rhs.
345 --
346 shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
347 shouldDiscard node live
348 = case node of
349 CmmAssign r (CmmReg r') | r == r' -> True
350 CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
351 _otherwise -> False
352
353
354 toNode :: Assignment -> CmmNode O O
355 toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
356
357 dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments
358 -> ([CmmNode O O], Assignments)
359 dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
360
361 dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments
362 -> ([CmmNode O O], Assignments)
363 dropAssignments dflags should_drop state assigs
364 = (dropped, reverse kept)
365 where
366 (dropped,kept) = go state assigs [] []
367
368 go _ [] dropped kept = (dropped, kept)
369 go state (assig : rest) dropped kept
370 | conflict = go state' rest (toNode assig : dropped) kept
371 | otherwise = go state' rest dropped (assig:kept)
372 where
373 (dropit, state') = should_drop assig state
374 conflict = dropit || any (conflicts dflags assig) dropped
375
376
377 -- -----------------------------------------------------------------------------
378 -- Try to inline assignments into a node.
379
380 tryToInline
381 :: DynFlags
382 -> LocalRegSet -- set of registers live after this
383 -- node. We cannot inline anything
384 -- that is live after the node, unless
385 -- it is small enough to duplicate.
386 -> CmmNode O x -- The node to inline into
387 -> Assignments -- Assignments to inline
388 -> (
389 CmmNode O x -- New node
390 , Assignments -- Remaining assignments
391 )
392
393 tryToInline dflags live node assigs = go usages node [] assigs
394 where
395 usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
396 usages = foldLocalRegsUsed dflags addUsage emptyUFM node
397
398 go _usages node _skipped [] = (node, [])
399
400 go usages node skipped (a@(l,rhs,_) : rest)
401 | cannot_inline = dont_inline
402 | occurs_none = discard -- Note [discard during inlining]
403 | occurs_once = inline_and_discard
404 | isTrivial rhs = inline_and_keep
405 | otherwise = dont_inline
406 where
407 inline_and_discard = go usages' inl_node skipped rest
408 where usages' = foldLocalRegsUsed dflags addUsage usages rhs
409
410 discard = go usages node skipped rest
411
412 dont_inline = keep node -- don't inline the assignment, keep it
413 inline_and_keep = keep inl_node -- inline the assignment, keep it
414
415 keep node' = (final_node, a : rest')
416 where (final_node, rest') = go usages' node' (l:skipped) rest
417 usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2)
418 usages rhs
419 -- we must not inline anything that is mentioned in the RHS
420 -- of a binding that we have already skipped, so we set the
421 -- usages of the regs on the RHS to 2.
422
423 cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
424 || l `elem` skipped
425 || not (okToInline dflags rhs node)
426
427 l_usages = lookupUFM usages l
428 l_live = l `elemRegSet` live
429
430 occurs_once = not l_live && l_usages == Just 1
431 occurs_none = not l_live && l_usages == Nothing
432
433 inl_node = mapExpDeep inline node
434 -- mapExpDeep is where the inlining actually takes place!
435 where inline (CmmReg (CmmLocal l')) | l == l' = rhs
436 inline (CmmRegOff (CmmLocal l') off) | l == l'
437 = cmmOffset dflags rhs off
438 -- re-constant fold after inlining
439 inline (CmmMachOp op args) = cmmMachOpFold dflags op args
440 inline other = other
441
442 -- Note [dependent assignments]
443 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
444 --
445 -- If our assignment list looks like
446 --
447 -- [ y = e, x = ... y ... ]
448 --
449 -- We cannot inline x. Remember this list is really in reverse order,
450 -- so it means x = ... y ...; y = e
451 --
452 -- Hence if we inline x, the outer assignment to y will capture the
453 -- reference in x's right hand side.
454 --
455 -- In this case we should rename the y in x's right-hand side,
456 -- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
457 -- Now we can go ahead and inline x.
458 --
459 -- For now we do nothing, because this would require putting
460 -- everything inside UniqSM.
461 --
462 -- One more variant of this (#7366):
463 --
464 -- [ y = e, y = z ]
465 --
466 -- If we don't want to inline y = e, because y is used many times, we
467 -- might still be tempted to inline y = z (because we always inline
468 -- trivial rhs's). But of course we can't, because y is equal to e,
469 -- not z.
470
471 -- Note [discard during inlining]
472 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
473 -- Opportunities to discard assignments sometimes appear after we've
474 -- done some inlining. Here's an example:
475 --
476 -- x = R1;
477 -- y = P64[x + 7];
478 -- z = P64[x + 15];
479 -- /* z is dead */
480 -- R1 = y & (-8);
481 --
482 -- The x assignment is trivial, so we inline it in the RHS of y, and
483 -- keep both x and y. z gets dropped because it is dead, then we
484 -- inline y, and we have a dead assignment to x. If we don't notice
485 -- that x is dead in tryToInline, we end up retaining it.
486
487 addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
488 addUsage m r = addToUFM_C (+) m r 1
489
490 regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
491 regsUsedIn [] _ = False
492 regsUsedIn ls e = wrapRecExpf f e False
493 where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True
494 f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
495 f _ z = z
496
497 -- we don't inline into CmmUnsafeForeignCall if the expression refers
498 -- to global registers. This is a HACK to avoid global registers
499 -- clashing with C argument-passing registers, really the back-end
500 -- ought to be able to handle it properly, but currently neither PprC
501 -- nor the NCG can do it. See Note [Register parameter passing]
502 -- See also StgCmmForeign:load_args_into_temps.
503 okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
504 okToInline dflags expr node@(CmmUnsafeForeignCall{}) =
505 not (globalRegistersConflict dflags expr node)
506 okToInline _ _ _ = True
507
508 -- -----------------------------------------------------------------------------
509
510 -- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
511 -- @r = e@ can be safely commuted past @stmt@.
512 --
513 -- We only sink "r = G" assignments right now, so conflicts is very simple:
514 --
515 conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
516 conflicts dflags (r, rhs, addr) node
517
518 -- (1) node defines registers used by rhs of assignment. This catches
519 -- assignmnets and all three kinds of calls. See Note [Sinking and calls]
520 | globalRegistersConflict dflags rhs node = True
521 | localRegistersConflict dflags rhs node = True
522
523 -- (2) node uses register defined by assignment
524 | foldRegsUsed dflags (\b r' -> r == r' || b) False node = True
525
526 -- (3) a store to an address conflicts with a read of the same memory
527 | CmmStore addr' e <- node
528 , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
529
530 -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively
531 | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True
532 | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
533 | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
534
535 -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap]
536 | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem = True
537
538 -- (6) native calls clobber any memory
539 | CmmCall{} <- node, memConflicts addr AnyMem = True
540
541 -- (7) otherwise, no conflict
542 | otherwise = False
543
544 -- Returns True if node defines any global registers that are used in the
545 -- Cmm expression
546 globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
547 globalRegistersConflict dflags expr node =
548 foldRegsDefd dflags (\b r -> b || (CmmGlobal r) `regUsedIn` expr) False node
549
550 -- Returns True if node defines any local registers that are used in the
551 -- Cmm expression
552 localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
553 localRegistersConflict dflags expr node =
554 foldRegsDefd dflags (\b r -> b || (CmmLocal r) `regUsedIn` expr) False node
555
556 -- Note [Sinking and calls]
557 -- ~~~~~~~~~~~~~~~~~~~~~~~~
558 --
559 -- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
560 -- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
561 -- stack layout (see Note [Sinking after stack layout]) which leads to two
562 -- invariants related to calls:
563 --
564 -- a) during stack layout phase all safe foreign calls are turned into
565 -- unsafe foreign calls (see Note [Lower safe foreign calls]). This
566 -- means that we will never encounter CmmForeignCall node when running
567 -- sinking after stack layout
568 --
569 -- b) stack layout saves all variables live across a call on the stack
570 -- just before making a call (remember we are not sinking assignments to
571 -- stack):
572 --
573 -- L1:
574 -- x = R1
575 -- P64[Sp - 16] = L2
576 -- P64[Sp - 8] = x
577 -- Sp = Sp - 16
578 -- call f() returns L2
579 -- L2:
580 --
581 -- We will attempt to sink { x = R1 } but we will detect conflict with
582 -- { P64[Sp - 8] = x } and hence we will drop { x = R1 } without even
583 -- checking whether it conflicts with { call f() }. In this way we will
584 -- never need to check any assignment conflicts with CmmCall. Remember
585 -- that we still need to check for potential memory conflicts.
586 --
587 -- So the result is that we only need to worry about CmmUnsafeForeignCall nodes
588 -- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]).
589 -- This assumption holds only when we do sinking after stack layout. If we run
590 -- it before stack layout we need to check for possible conflicts with all three
591 -- kinds of calls. Our `conflicts` function does that by using a generic
592 -- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and
593 -- UserOfRegs typeclasses.
594 --
595
596 -- An abstraction of memory read or written.
597 data AbsMem
598 = NoMem -- no memory accessed
599 | AnyMem -- arbitrary memory
600 | HeapMem -- definitely heap memory
601 | StackMem -- definitely stack memory
602 | SpMem -- <size>[Sp+n]
603 {-# UNPACK #-} !Int
604 {-# UNPACK #-} !Int
605
606 -- Having SpMem is important because it lets us float loads from Sp
607 -- past stores to Sp as long as they don't overlap, and this helps to
608 -- unravel some long sequences of
609 -- x1 = [Sp + 8]
610 -- x2 = [Sp + 16]
611 -- ...
612 -- [Sp + 8] = xi
613 -- [Sp + 16] = xj
614 --
615 -- Note that SpMem is invalidated if Sp is changed, but the definition
616 -- of 'conflicts' above handles that.
617
618 -- ToDo: this won't currently fix the following commonly occurring code:
619 -- x1 = [R1 + 8]
620 -- x2 = [R1 + 16]
621 -- ..
622 -- [Hp - 8] = x1
623 -- [Hp - 16] = x2
624 -- ..
625
626 -- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that
627 -- assignments to [Hp + n] do not conflict with any other heap memory,
628 -- but this is tricky to nail down. What if we had
629 --
630 -- x = Hp + n
631 -- [x] = ...
632 --
633 -- the store to [x] should be "new heap", not "old heap".
634 -- Furthermore, you could imagine that if we started inlining
635 -- functions in Cmm then there might well be reads of heap memory
636 -- that was written in the same basic block. To take advantage of
637 -- non-aliasing of heap memory we will have to be more clever.
638
639 -- Note [Foreign calls clobber heap]
640 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
641 --
642 -- It is tempting to say that foreign calls clobber only
643 -- non-heap/stack memory, but unfortunately we break this invariant in
644 -- the RTS. For example, in stg_catch_retry_frame we call
645 -- stmCommitNestedTransaction() which modifies the contents of the
646 -- TRec it is passed (this actually caused incorrect code to be
647 -- generated).
648 --
649 -- Since the invariant is true for the majority of foreign calls,
650 -- perhaps we ought to have a special annotation for calls that can
651 -- modify heap/stack memory. For now we just use the conservative
652 -- definition here.
653
654
655 bothMems :: AbsMem -> AbsMem -> AbsMem
656 bothMems NoMem x = x
657 bothMems x NoMem = x
658 bothMems HeapMem HeapMem = HeapMem
659 bothMems StackMem StackMem = StackMem
660 bothMems (SpMem o1 w1) (SpMem o2 w2)
661 | o1 == o2 = SpMem o1 (max w1 w2)
662 | otherwise = StackMem
663 bothMems SpMem{} StackMem = StackMem
664 bothMems StackMem SpMem{} = StackMem
665 bothMems _ _ = AnyMem
666
667 memConflicts :: AbsMem -> AbsMem -> Bool
668 memConflicts NoMem _ = False
669 memConflicts _ NoMem = False
670 memConflicts HeapMem StackMem = False
671 memConflicts StackMem HeapMem = False
672 memConflicts SpMem{} HeapMem = False
673 memConflicts HeapMem SpMem{} = False
674 memConflicts (SpMem o1 w1) (SpMem o2 w2)
675 | o1 < o2 = o1 + w1 > o2
676 | otherwise = o2 + w2 > o1
677 memConflicts _ _ = True
678
679 exprMem :: DynFlags -> CmmExpr -> AbsMem
680 exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
681 exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es)
682 exprMem _ _ = NoMem
683
684 loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
685 loadAddr dflags e w =
686 case e of
687 CmmReg r -> regAddr dflags r 0 w
688 CmmRegOff r i -> regAddr dflags r i w
689 _other | CmmGlobal Sp `regUsedIn` e -> StackMem
690 | otherwise -> AnyMem
691
692 regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
693 regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
694 regAddr _ (CmmGlobal Hp) _ _ = HeapMem
695 regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
696 regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
697 regAddr _ _ _ _ = AnyMem