80 columns
[ghc.git] / compiler / cmm / CmmSink.hs
1 {-# LANGUAGE GADTs #-}
2 module CmmSink (
3 cmmSink
4 ) where
5
6 import Cmm
7 import CmmOpt
8 import BlockId
9 import CmmLive
10 import CmmUtils
11 import Hoopl
12
13 import DynFlags
14 import UniqFM
15 import PprCmm ()
16
17 import Data.List (partition)
18 import qualified Data.Set as Set
19
20 -- -----------------------------------------------------------------------------
21 -- Sinking and inlining
22
23 -- This is an optimisation pass that
24 -- (a) moves assignments closer to their uses, to reduce register pressure
25 -- (b) pushes assignments into a single branch of a conditional if possible
26 -- (c) inlines assignments to registers that are mentioned only once
27 -- (d) discards dead assignments
28 --
29 -- This tightens up lots of register-heavy code. It is particularly
30 -- helpful in the Cmm generated by the Stg->Cmm code generator, in
31 -- which every function starts with a copyIn sequence like:
32 --
33 -- x1 = R1
34 -- x2 = Sp[8]
35 -- x3 = Sp[16]
36 -- if (Sp - 32 < SpLim) then L1 else L2
37 --
38 -- we really want to push the x1..x3 assignments into the L2 branch.
39 --
40 -- Algorithm:
41 --
42 -- * Start by doing liveness analysis.
43 --
44 -- * Keep a list of assignments A; earlier ones may refer to later ones.
45 -- Currently we only sink assignments to local registers, because we don't
46 -- have liveness information about global registers.
47 --
48 -- * Walk forwards through the graph, look at each node N:
49 --
50 -- * If it is a dead assignment, i.e. assignment to a register that is
51 -- not used after N, discard it.
52 --
53 -- * Try to inline based on current list of assignments
54 -- * If any assignments in A (1) occur only once in N, and (2) are
55 -- not live after N, inline the assignment and remove it
56 -- from A.
57 --
58 -- * If an assignment in A is cheap (RHS is local register), then
59 -- inline the assignment and keep it in A in case it is used afterwards.
60 --
61 -- * Otherwise don't inline.
62 --
63 -- * If N is assignment to a local register pick up the assignment
64 -- and add it to A.
65 --
66 -- * If N is not an assignment to a local register:
67 -- * remove any assignments from A that conflict with N, and
68 -- place them before N in the current block. We call this
69 -- "dropping" the assignments.
70 --
71 -- * An assignment conflicts with N if it:
72 -- - assigns to a register mentioned in N
73 -- - mentions a register assigned by N
74 -- - reads from memory written by N
75 -- * do this recursively, dropping dependent assignments
76 --
77 -- * At an exit node:
78 -- * drop any assignments that are live on more than one successor
79 -- and are not trivial
80 -- * if any successor has more than one predecessor (a join-point),
81 -- drop everything live in that successor. Since we only propagate
82 -- assignments that are not dead at the successor, we will therefore
83 -- eliminate all assignments dead at this point. Thus analysis of a
84 -- join-point will always begin with an empty list of assignments.
85 --
86 --
87 -- As a result of above algorithm, sinking deletes some dead assignments
88 -- (transitively, even). This isn't as good as removeDeadAssignments,
89 -- but it's much cheaper.
90
91 -- -----------------------------------------------------------------------------
92 -- things that we aren't optimising very well yet.
93 --
94 -- -----------
95 -- (1) From GHC's FastString.hashStr:
96 --
97 -- s2ay:
98 -- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
99 -- c2gn:
100 -- R1 = _s2au::I64;
101 -- call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
102 -- c2gp:
103 -- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
104 -- 4091);
105 -- _s2an::I64 = _s2an::I64 + 1;
106 -- _s2au::I64 = _s2cO::I64;
107 -- goto s2ay;
108 --
109 -- a nice loop, but we didn't eliminate the silly assignment at the end.
110 -- See Note [dependent assignments], which would probably fix this.
111 --
112 -- -----------
113 -- (2) From stg_atomically_frame in PrimOps.cmm
114 --
115 -- We have a diamond control flow:
116 --
117 -- x = ...
118 -- |
119 -- / \
120 -- A B
121 -- \ /
122 -- |
123 -- use of x
124 --
125 -- Now x won't be sunk down to its use, because we won't push it into
126 -- both branches of the conditional. We certainly do have to check
127 -- that we can sink it past all the code in both A and B, but having
128 -- discovered that, we could sink it to its use.
129 --
130
131 -- -----------------------------------------------------------------------------
132
133 type Assignment = (LocalReg, CmmExpr, AbsMem)
134 -- Assignment caches AbsMem, an abstraction of the memory read by
135 -- the RHS of the assignment.
136
137 type Assignments = [Assignment]
138 -- A sequence of assignements; kept in *reverse* order
139 -- So the list [ x=e1, y=e2 ] means the sequence of assignments
140 -- y = e2
141 -- x = e1
142
143 cmmSink :: DynFlags -> CmmGraph -> CmmGraph
144 cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
145 where
146 liveness = cmmLocalLiveness dflags graph
147 getLive l = mapFindWithDefault Set.empty l liveness
148
149 blocks = postorderDfs graph
150
151 join_pts = findJoinPoints blocks
152
153 sink :: BlockEnv Assignments -> [CmmBlock] -> [CmmBlock]
154 sink _ [] = []
155 sink sunk (b:bs) =
156 -- pprTrace "sink" (ppr lbl) $
157 blockJoin first final_middle final_last : sink sunk' bs
158 where
159 lbl = entryLabel b
160 (first, middle, last) = blockSplit b
161
162 succs = successors last
163
164 -- Annotate the middle nodes with the registers live *after*
165 -- the node. This will help us decide whether we can inline
166 -- an assignment in the current node or not.
167 live = Set.unions (map getLive succs)
168 live_middle = gen_kill dflags last live
169 ann_middles = annotate dflags live_middle (blockToList middle)
170
171 -- Now sink and inline in this block
172 (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
173 fold_last = constantFold dflags last
174 (final_last, assigs') = tryToInline dflags live fold_last assigs
175
176 -- We cannot sink into join points (successors with more than
177 -- one predecessor), so identify the join points and the set
178 -- of registers live in them.
179 (joins, nonjoins) = partition (`mapMember` join_pts) succs
180 live_in_joins = Set.unions (map getLive joins)
181
182 -- We do not want to sink an assignment into multiple branches,
183 -- so identify the set of registers live in multiple successors.
184 -- This is made more complicated because when we sink an assignment
185 -- into one branch, this might change the set of registers that are
186 -- now live in multiple branches.
187 init_live_sets = map getLive nonjoins
188 live_in_multi live_sets r =
189 case filter (Set.member r) live_sets of
190 (_one:_two:_) -> True
191 _ -> False
192
193 -- Now, drop any assignments that we will not sink any further.
194 (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
195
196 drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
197 where
198 should_drop = conflicts dflags a final_last
199 || not (isTrivial rhs) && live_in_multi live_sets r
200 || r `Set.member` live_in_joins
201
202 live_sets' | should_drop = live_sets
203 | otherwise = map upd live_sets
204
205 upd set | r `Set.member` set = set `Set.union` live_rhs
206 | otherwise = set
207
208 live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
209
210 final_middle = foldl blockSnoc middle' dropped_last
211
212 sunk' = mapUnion sunk $
213 mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
214 | l <- succs ]
215
216 {- TODO: enable this later, when we have some good tests in place to
217 measure the effect and tune it.
218
219 -- small: an expression we don't mind duplicating
220 isSmall :: CmmExpr -> Bool
221 isSmall (CmmReg (CmmLocal _)) = True -- not globals, we want to coalesce them instead* See below
222 isSmall (CmmLit _) = True
223 isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
224 isSmall (CmmRegOff (CmmLocal _) _) = True
225 isSmall _ = False
226
227 Coalesce global registers? What does that mean? We observed no decrease
228 in performance comming from inlining of global registers, hence we do it now
229 (see isTrivial function). Ideally we'd like to measure performance using
230 some tool like perf or VTune and make decisions what to inline based on that.
231 -}
232
233 --
234 -- We allow duplication of trivial expressions: registers (both local and
235 -- global) and literals.
236 --
237 isTrivial :: CmmExpr -> Bool
238 isTrivial (CmmReg _) = True
239 isTrivial (CmmLit _) = True
240 isTrivial _ = False
241
242 --
243 -- annotate each node with the set of registers live *after* the node
244 --
245 annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
246 annotate dflags live nodes = snd $ foldr ann (live,[]) nodes
247 where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes)
248
249 --
250 -- Find the blocks that have multiple successors (join points)
251 --
252 findJoinPoints :: [CmmBlock] -> BlockEnv Int
253 findJoinPoints blocks = mapFilter (>1) succ_counts
254 where
255 all_succs = concatMap successors blocks
256
257 succ_counts :: BlockEnv Int
258 succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
259
260 --
261 -- filter the list of assignments to remove any assignments that
262 -- are not live in a continuation.
263 --
264 filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments
265 filterAssignments dflags live assigs = reverse (go assigs [])
266 where go [] kept = kept
267 go (a@(r,_,_):as) kept | needed = go as (a:kept)
268 | otherwise = go as kept
269 where
270 needed = r `Set.member` live
271 || any (conflicts dflags a) (map toNode kept)
272 -- Note that we must keep assignments that are
273 -- referred to by other assignments we have
274 -- already kept.
275
276 -- -----------------------------------------------------------------------------
277 -- Walk through the nodes of a block, sinking and inlining assignments
278 -- as we go.
279 --
280 -- On input we pass in a:
281 -- * list of nodes in the block
282 -- * a list of assignments that appeared *before* this block and
283 -- that are being sunk.
284 --
285 -- On output we get:
286 -- * a new block
287 -- * a list of assignments that will be placed *after* that block.
288 --
289
290 walk :: DynFlags
291 -> [(LocalRegSet, CmmNode O O)] -- nodes of the block, annotated with
292 -- the set of registers live *after*
293 -- this node.
294
295 -> Assignments -- The current list of
296 -- assignments we are sinking.
297 -- Later assignments may refer
298 -- to earlier ones.
299
300 -> ( Block CmmNode O O -- The new block
301 , Assignments -- Assignments to sink further
302 )
303
304 walk dflags nodes assigs = go nodes emptyBlock assigs
305 where
306 go [] block as = (block, as)
307 go ((live,node):ns) block as
308 | shouldDiscard node live = go ns block as
309 -- discard dead assignment
310 | Just a <- shouldSink dflags node2 = go ns block (a : as1)
311 | otherwise = go ns block' as'
312 where
313 node1 = constantFold dflags node
314
315 (node2, as1) = tryToInline dflags live node1 as
316
317 (dropped, as') = dropAssignmentsSimple dflags
318 (\a -> conflicts dflags a node2) as1
319
320 block' = foldl blockSnoc block dropped `blockSnoc` node2
321
322
323 constantFold :: DynFlags -> CmmNode e x -> CmmNode e x
324 constantFold dflags node = mapExpDeep f node
325 where f (CmmMachOp op args) = cmmMachOpFold dflags op args
326 f (CmmRegOff r 0) = CmmReg r
327 f e = e
328
329 --
330 -- Heuristic to decide whether to pick up and sink an assignment
331 -- Currently we pick up all assignments to local registers. It might
332 -- be profitable to sink assignments to global regs too, but the
333 -- liveness analysis doesn't track those (yet) so we can't.
334 --
335 shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
336 shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
337 where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
338 shouldSink _ _other = Nothing
339
340 --
341 -- discard dead assignments. This doesn't do as good a job as
342 -- removeDeadAsssignments, because it would need multiple passes
343 -- to get all the dead code, but it catches the common case of
344 -- superfluous reloads from the stack that the stack allocator
345 -- leaves behind.
346 --
347 -- Also we catch "r = r" here. You might think it would fall
348 -- out of inlining, but the inliner will see that r is live
349 -- after the instruction and choose not to inline r in the rhs.
350 --
351 shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
352 shouldDiscard node live
353 = case node of
354 CmmAssign r (CmmReg r') | r == r' -> True
355 CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
356 _otherwise -> False
357
358
359 toNode :: Assignment -> CmmNode O O
360 toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
361
362 dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments
363 -> ([CmmNode O O], Assignments)
364 dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
365
366 dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments
367 -> ([CmmNode O O], Assignments)
368 dropAssignments dflags should_drop state assigs
369 = (dropped, reverse kept)
370 where
371 (dropped,kept) = go state assigs [] []
372
373 go _ [] dropped kept = (dropped, kept)
374 go state (assig : rest) dropped kept
375 | conflict = go state' rest (toNode assig : dropped) kept
376 | otherwise = go state' rest dropped (assig:kept)
377 where
378 (dropit, state') = should_drop assig state
379 conflict = dropit || any (conflicts dflags assig) dropped
380
381
382 -- -----------------------------------------------------------------------------
383 -- Try to inline assignments into a node.
384
385 tryToInline
386 :: DynFlags
387 -> LocalRegSet -- set of registers live after this
388 -- node. We cannot inline anything
389 -- that is live after the node, unless
390 -- it is small enough to duplicate.
391 -> CmmNode O x -- The node to inline into
392 -> Assignments -- Assignments to inline
393 -> (
394 CmmNode O x -- New node
395 , Assignments -- Remaining assignments
396 )
397
398 tryToInline dflags live node assigs = go usages node [] assigs
399 where
400 usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
401 usages = foldLocalRegsUsed dflags addUsage emptyUFM node
402
403 go _usages node _skipped [] = (node, [])
404
405 go usages node skipped (a@(l,rhs,_) : rest)
406 | cannot_inline = dont_inline
407 | occurs_once = inline_and_discard
408 | isTrivial rhs = inline_and_keep
409 | otherwise = dont_inline
410 where
411 inline_and_discard = go usages' inl_node skipped rest
412 where usages' = foldLocalRegsUsed dflags addUsage usages rhs
413
414 dont_inline = keep node -- don't inline the assignment, keep it
415 inline_and_keep = keep inl_node -- inline the assignment, keep it
416
417 keep node' = (final_node, a : rest')
418 where (final_node, rest') = go usages' node' (l:skipped) rest
419 usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2)
420 usages rhs
421 -- we must not inline anything that is mentioned in the RHS
422 -- of a binding that we have already skipped, so we set the
423 -- usages of the regs on the RHS to 2.
424
425 cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
426 || l `elem` skipped
427 || not (okToInline dflags rhs node)
428
429 occurs_once = not (l `elemRegSet` live)
430 && lookupUFM usages l == Just 1
431
432 inl_node = mapExpDeep inline node
433 -- mapExpDeep is where the inlining actually takes place!
434 where inline (CmmReg (CmmLocal l')) | l == l' = rhs
435 inline (CmmRegOff (CmmLocal l') off) | l == l'
436 = cmmOffset dflags rhs off
437 -- re-constant fold after inlining
438 inline (CmmMachOp op args) = cmmMachOpFold dflags op args
439 inline other = other
440
441 -- Note [dependent assignments]
442 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
443 --
444 -- If our assignment list looks like
445 --
446 -- [ y = e, x = ... y ... ]
447 --
448 -- We cannot inline x. Remember this list is really in reverse order,
449 -- so it means x = ... y ...; y = e
450 --
451 -- Hence if we inline x, the outer assignment to y will capture the
452 -- reference in x's right hand side.
453 --
454 -- In this case we should rename the y in x's right-hand side,
455 -- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
456 -- Now we can go ahead and inline x.
457 --
458 -- For now we do nothing, because this would require putting
459 -- everything inside UniqSM.
460 --
461 -- One more variant of this (#7366):
462 --
463 -- [ y = e, y = z ]
464 --
465 -- If we don't want to inline y = e, because y is used many times, we
466 -- might still be tempted to inline y = z (because we always inline
467 -- trivial rhs's). But of course we can't, because y is equal to e,
468 -- not z.
469
470 addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
471 addUsage m r = addToUFM_C (+) m r 1
472
473 regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
474 regsUsedIn [] _ = False
475 regsUsedIn ls e = wrapRecExpf f e False
476 where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True
477 f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
478 f _ z = z
479
480 -- we don't inline into CmmUnsafeForeignCall if the expression refers
481 -- to global registers. This is a HACK to avoid global registers
482 -- clashing with C argument-passing registers, really the back-end
483 -- ought to be able to handle it properly, but currently neither PprC
484 -- nor the NCG can do it. See Note [Register parameter passing]
485 -- See also StgCmmForeign:load_args_into_temps.
486 okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
487 okToInline dflags expr node@(CmmUnsafeForeignCall{}) =
488 not (globalRegistersConflict dflags expr node)
489 okToInline _ _ _ = True
490
491 -- -----------------------------------------------------------------------------
492
493 -- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
494 -- @r = e@ can be safely commuted past @stmt@.
495 --
496 -- We only sink "r = G" assignments right now, so conflicts is very simple:
497 --
498 conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
499 conflicts dflags (r, rhs, addr) node
500
501 -- (1) node defines registers used by rhs of assignment. This catches
502 -- assignmnets and all three kinds of calls. See Note [Sinking and calls]
503 | globalRegistersConflict dflags rhs node = True
504 | localRegistersConflict dflags rhs node = True
505
506 -- (2) node uses register defined by assignment
507 | foldRegsUsed dflags (\b r' -> r == r' || b) False node = True
508
509 -- (3) a store to an address conflicts with a read of the same memory
510 | CmmStore addr' e <- node
511 , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
512
513 -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively
514 | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True
515 | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
516 | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
517
518 -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap]
519 | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem = True
520
521 -- (6) native calls clobber any memory
522 | CmmCall{} <- node, memConflicts addr AnyMem = True
523
524 -- (7) otherwise, no conflict
525 | otherwise = False
526
527 -- Returns True if node defines any global registers that are used in the
528 -- Cmm expression
529 globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
530 globalRegistersConflict dflags expr node =
531 foldRegsDefd dflags (\b r -> b || (CmmGlobal r) `regUsedIn` expr) False node
532
533 -- Returns True if node defines any local registers that are used in the
534 -- Cmm expression
535 localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
536 localRegistersConflict dflags expr node =
537 foldRegsDefd dflags (\b r -> b || (CmmLocal r) `regUsedIn` expr) False node
538
539 -- Note [Sinking and calls]
540 -- ~~~~~~~~~~~~~~~~~~~~~~~~
541 --
542 -- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
543 -- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
544 -- stack layout (see Note [Sinking after stack layout]) which leads to two
545 -- invariants related to calls:
546 --
547 -- a) during stack layout phase all safe foreign calls are turned into
548 -- unsafe foreign calls (see Note [Lower safe foreign calls]). This
549 -- means that we will never encounter CmmForeignCall node when running
550 -- sinking after stack layout
551 --
552 -- b) stack layout saves all variables live across a call on the stack
553 -- just before making a call (remember we are not sinking assignments to
554 -- stack):
555 --
556 -- L1:
557 -- x = R1
558 -- P64[Sp - 16] = L2
559 -- P64[Sp - 8] = x
560 -- Sp = Sp - 16
561 -- call f() returns L2
562 -- L2:
563 --
564 -- We will attempt to sink { x = R1 } but we will detect conflict with
565 -- { P64[Sp - 8] = x } and hence we will drop { x = R1 } without even
566 -- checking whether it conflicts with { call f() }. In this way we will
567 -- never need to check any assignment conflicts with CmmCall. Remeber
568 -- that we still need to check for potential memory conflicts.
569 --
570 -- So the result is that we only need to worry about CmmUnsafeForeignCall nodes
571 -- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]).
572 -- This assumption holds only when we do sinking after stack layout. If we run
573 -- it before stack layout we need to check for possible conflicts with all three
574 -- kinds of calls. Our `conflicts` function does that by using a generic
575 -- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and
576 -- UserOfRegs typeclasses.
577 --
578
579 -- An abstraction of memory read or written.
580 data AbsMem
581 = NoMem -- no memory accessed
582 | AnyMem -- arbitrary memory
583 | HeapMem -- definitely heap memory
584 | StackMem -- definitely stack memory
585 | SpMem -- <size>[Sp+n]
586 {-# UNPACK #-} !Int
587 {-# UNPACK #-} !Int
588
589 -- Having SpMem is important because it lets us float loads from Sp
590 -- past stores to Sp as long as they don't overlap, and this helps to
591 -- unravel some long sequences of
592 -- x1 = [Sp + 8]
593 -- x2 = [Sp + 16]
594 -- ...
595 -- [Sp + 8] = xi
596 -- [Sp + 16] = xj
597 --
598 -- Note that SpMem is invalidated if Sp is changed, but the definition
599 -- of 'conflicts' above handles that.
600
601 -- ToDo: this won't currently fix the following commonly occurring code:
602 -- x1 = [R1 + 8]
603 -- x2 = [R1 + 16]
604 -- ..
605 -- [Hp - 8] = x1
606 -- [Hp - 16] = x2
607 -- ..
608
609 -- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that
610 -- assignments to [Hp + n] do not conflict with any other heap memory,
611 -- but this is tricky to nail down. What if we had
612 --
613 -- x = Hp + n
614 -- [x] = ...
615 --
616 -- the store to [x] should be "new heap", not "old heap".
617 -- Furthermore, you could imagine that if we started inlining
618 -- functions in Cmm then there might well be reads of heap memory
619 -- that was written in the same basic block. To take advantage of
620 -- non-aliasing of heap memory we will have to be more clever.
621
622 -- Note [Foreign calls clobber heap]
623 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
624 --
625 -- It is tempting to say that foreign calls clobber only
626 -- non-heap/stack memory, but unfortunately we break this invariant in
627 -- the RTS. For example, in stg_catch_retry_frame we call
628 -- stmCommitNestedTransaction() which modifies the contents of the
629 -- TRec it is passed (this actually caused incorrect code to be
630 -- generated).
631 --
632 -- Since the invariant is true for the majority of foreign calls,
633 -- perhaps we ought to have a special annotation for calls that can
634 -- modify heap/stack memory. For now we just use the conservative
635 -- definition here.
636
637
638 bothMems :: AbsMem -> AbsMem -> AbsMem
639 bothMems NoMem x = x
640 bothMems x NoMem = x
641 bothMems HeapMem HeapMem = HeapMem
642 bothMems StackMem StackMem = StackMem
643 bothMems (SpMem o1 w1) (SpMem o2 w2)
644 | o1 == o2 = SpMem o1 (max w1 w2)
645 | otherwise = StackMem
646 bothMems SpMem{} StackMem = StackMem
647 bothMems StackMem SpMem{} = StackMem
648 bothMems _ _ = AnyMem
649
650 memConflicts :: AbsMem -> AbsMem -> Bool
651 memConflicts NoMem _ = False
652 memConflicts _ NoMem = False
653 memConflicts HeapMem StackMem = False
654 memConflicts StackMem HeapMem = False
655 memConflicts SpMem{} HeapMem = False
656 memConflicts HeapMem SpMem{} = False
657 memConflicts (SpMem o1 w1) (SpMem o2 w2)
658 | o1 < o2 = o1 + w1 > o2
659 | otherwise = o2 + w2 > o1
660 memConflicts _ _ = True
661
662 exprMem :: DynFlags -> CmmExpr -> AbsMem
663 exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
664 exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es)
665 exprMem _ _ = NoMem
666
667 loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
668 loadAddr dflags e w =
669 case e of
670 CmmReg r -> regAddr dflags r 0 w
671 CmmRegOff r i -> regAddr dflags r i w
672 _other | CmmGlobal Sp `regUsedIn` e -> StackMem
673 | otherwise -> AnyMem
674
675 regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
676 regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
677 regAddr _ (CmmGlobal Hp) _ _ = HeapMem
678 regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
679 regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
680 regAddr _ _ _ _ = AnyMem