7b5aaa6affccb96965427ed859e93a08ff9f9d10
[ghc.git] / compiler / cmm / CmmSink.hs
1 {-# LANGUAGE GADTs #-}
2 module CmmSink (
3 cmmSink
4 ) where
5
6 import Cmm
7 import CmmOpt
8 import BlockId
9 import CmmLive
10 import CmmUtils
11 import Hoopl
12
13 import DynFlags
14 import UniqFM
15 import PprCmm ()
16
17 import Data.List (partition)
18 import qualified Data.Set as Set
19
20 -- -----------------------------------------------------------------------------
21 -- Sinking and inlining
22
23 -- This is an optimisation pass that
24 -- (a) moves assignments closer to their uses, to reduce register pressure
25 -- (b) pushes assignments into a single branch of a conditional if possible
26 -- (c) inlines assignments to registers that are mentioned only once
27 -- (d) discards dead assignments
28 --
29 -- This tightens up lots of register-heavy code. It is particularly
30 -- helpful in the Cmm generated by the Stg->Cmm code generator, in
31 -- which every function starts with a copyIn sequence like:
32 --
33 -- x1 = R1
34 -- x2 = Sp[8]
35 -- x3 = Sp[16]
36 -- if (Sp - 32 < SpLim) then L1 else L2
37 --
38 -- we really want to push the x1..x3 assignments into the L2 branch.
39 --
40 -- Algorithm:
41 --
42 -- * Start by doing liveness analysis.
43 --
44 -- * Keep a list of assignments A; earlier ones may refer to later ones.
45 -- Currently we only sink assignments to local registers, because we don't
46 -- have liveness information about global registers.
47 --
48 -- * Walk forwards through the graph, look at each node N:
49 --
50 -- * If it is a dead assignment, i.e. assignment to a register that is
51 -- not used after N, discard it.
52 --
53 -- * Try to inline based on current list of assignments
54 -- * If any assignments in A (1) occur only once in N, and (2) are
55 -- not live after N, inline the assignment and remove it
56 -- from A.
57 --
58 -- * If an assignment in A is cheap (RHS is local register), then
59 -- inline the assignment and keep it in A in case it is used afterwards.
60 --
61 -- * Otherwise don't inline.
62 --
63 -- * If N is assignment to a local register pick up the assignment
64 -- and add it to A.
65 --
66 -- * If N is not an assignment to a local register:
67 -- * remove any assignments from A that conflict with N, and
68 -- place them before N in the current block. We call this
69 -- "dropping" the assignments.
70 --
71 -- * An assignment conflicts with N if it:
72 -- - assigns to a register mentioned in N
73 -- - mentions a register assigned by N
74 -- - reads from memory written by N
75 -- * do this recursively, dropping dependent assignments
76 --
77 -- * At an exit node:
78 -- * drop any assignments that are live on more than one successor
79 -- and are not trivial
80 -- * if any successor has more than one predecessor (a join-point),
81 -- drop everything live in that successor. Since we only propagate
82 -- assignments that are not dead at the successor, we will therefore
83 -- eliminate all assignments dead at this point. Thus analysis of a
84 -- join-point will always begin with an empty list of assignments.
85 --
86 --
87 -- As a result of above algorithm, sinking deletes some dead assignments
88 -- (transitively, even). This isn't as good as removeDeadAssignments,
89 -- but it's much cheaper.
90
91 -- -----------------------------------------------------------------------------
92 -- things that we aren't optimising very well yet.
93 --
94 -- -----------
95 -- (1) From GHC's FastString.hashStr:
96 --
97 -- s2ay:
98 -- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
99 -- c2gn:
100 -- R1 = _s2au::I64;
101 -- call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
102 -- c2gp:
103 -- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
104 -- 4091);
105 -- _s2an::I64 = _s2an::I64 + 1;
106 -- _s2au::I64 = _s2cO::I64;
107 -- goto s2ay;
108 --
109 -- a nice loop, but we didn't eliminate the silly assignment at the end.
110 -- See Note [dependent assignments], which would probably fix this.
111 --
112 -- -----------
113 -- (2) From stg_atomically_frame in PrimOps.cmm
114 --
115 -- We have a diamond control flow:
116 --
117 -- x = ...
118 -- |
119 -- / \
120 -- A B
121 -- \ /
122 -- |
123 -- use of x
124 --
125 -- Now x won't be sunk down to its use, because we won't push it into
126 -- both branches of the conditional. We certainly do have to check
127 -- that we can sink it past all the code in both A and B, but having
128 -- discovered that, we could sink it to its use.
129 --
130
131 -- -----------------------------------------------------------------------------
132
133 type Assignment = (LocalReg, CmmExpr, AbsMem)
134 -- Assignment caches AbsMem, an abstraction of the memory read by
135 -- the RHS of the assignment.
136
137 type Assignments = [Assignment]
138 -- A sequence of assignements; kept in *reverse* order
139 -- So the list [ x=e1, y=e2 ] means the sequence of assignments
140 -- y = e2
141 -- x = e1
142
143 cmmSink :: DynFlags -> CmmGraph -> CmmGraph
144 cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
145 where
146 liveness = cmmLocalLiveness dflags graph
147 getLive l = mapFindWithDefault Set.empty l liveness
148
149 blocks = postorderDfs graph
150
151 join_pts = findJoinPoints blocks
152
153 sink :: BlockEnv Assignments -> [CmmBlock] -> [CmmBlock]
154 sink _ [] = []
155 sink sunk (b:bs) =
156 -- pprTrace "sink" (ppr lbl) $
157 blockJoin first final_middle final_last : sink sunk' bs
158 where
159 lbl = entryLabel b
160 (first, middle, last) = blockSplit b
161
162 succs = successors last
163
164 -- Annotate the middle nodes with the registers live *after*
165 -- the node. This will help us decide whether we can inline
166 -- an assignment in the current node or not.
167 live = Set.unions (map getLive succs)
168 live_middle = gen_kill dflags last live
169 ann_middles = annotate dflags live_middle (blockToList middle)
170
171 -- Now sink and inline in this block
172 (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
173 fold_last = constantFold dflags last
174 (final_last, assigs') = tryToInline dflags live fold_last assigs
175
176 -- We cannot sink into join points (successors with more than
177 -- one predecessor), so identify the join points and the set
178 -- of registers live in them.
179 (joins, nonjoins) = partition (`mapMember` join_pts) succs
180 live_in_joins = Set.unions (map getLive joins)
181
182 -- We do not want to sink an assignment into multiple branches,
183 -- so identify the set of registers live in multiple successors.
184 -- This is made more complicated because when we sink an assignment
185 -- into one branch, this might change the set of registers that are
186 -- now live in multiple branches.
187 init_live_sets = map getLive nonjoins
188 live_in_multi live_sets r =
189 case filter (Set.member r) live_sets of
190 (_one:_two:_) -> True
191 _ -> False
192
193 -- Now, drop any assignments that we will not sink any further.
194 (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
195
196 drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
197 where
198 should_drop = conflicts dflags a final_last
199 || not (isTrivial rhs) && live_in_multi live_sets r
200 || r `Set.member` live_in_joins
201
202 live_sets' | should_drop = live_sets
203 | otherwise = map upd live_sets
204
205 upd set | r `Set.member` set = set `Set.union` live_rhs
206 | otherwise = set
207
208 live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs
209
210 final_middle = foldl blockSnoc middle' dropped_last
211
212 sunk' = mapUnion sunk $
213 mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
214 | l <- succs ]
215
216 {- TODO: enable this later, when we have some good tests in place to
217 measure the effect and tune it.
218
219 -- small: an expression we don't mind duplicating
220 isSmall :: CmmExpr -> Bool
221 isSmall (CmmReg (CmmLocal _)) = True -- not globals, we want to coalesce them instead* See below
222 isSmall (CmmLit _) = True
223 isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
224 isSmall (CmmRegOff (CmmLocal _) _) = True
225 isSmall _ = False
226
227 Coalesce global registers? What does that mean? We observed no decrease
228 in performance comming from inlining of global registers, hence we do it now
229 (see isTrivial function). Ideally we'd like to measure performance using
230 some tool like perf or VTune and make decisions what to inline based on that.
231 -}
232
233 --
234 -- We allow duplication of trivial expressions: registers (both local and
235 -- global) and literals.
236 --
237 isTrivial :: CmmExpr -> Bool
238 isTrivial (CmmReg _) = True
239 isTrivial (CmmLit _) = True
240 isTrivial _ = False
241
242 --
243 -- annotate each node with the set of registers live *after* the node
244 --
245 annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)]
246 annotate dflags live nodes = snd $ foldr ann (live,[]) nodes
247 where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes)
248
249 --
250 -- Find the blocks that have multiple successors (join points)
251 --
252 findJoinPoints :: [CmmBlock] -> BlockEnv Int
253 findJoinPoints blocks = mapFilter (>1) succ_counts
254 where
255 all_succs = concatMap successors blocks
256
257 succ_counts :: BlockEnv Int
258 succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
259
260 --
261 -- filter the list of assignments to remove any assignments that
262 -- are not live in a continuation.
263 --
264 filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments
265 filterAssignments dflags live assigs = reverse (go assigs [])
266 where go [] kept = kept
267 go (a@(r,_,_):as) kept | needed = go as (a:kept)
268 | otherwise = go as kept
269 where
270 needed = r `Set.member` live
271 || any (conflicts dflags a) (map toNode kept)
272 -- Note that we must keep assignments that are
273 -- referred to by other assignments we have
274 -- already kept.
275
276 -- -----------------------------------------------------------------------------
277 -- Walk through the nodes of a block, sinking and inlining assignments
278 -- as we go.
279 --
280 -- On input we pass in a:
281 -- * list of nodes in the block
282 -- * a list of assignments that appeared *before* this block and
283 -- that are being sunk.
284 --
285 -- On output we get:
286 -- * a new block
287 -- * a list of assignments that will be placed *after* that block.
288 --
289
290 walk :: DynFlags
291 -> [(LocalRegSet, CmmNode O O)] -- nodes of the block, annotated with
292 -- the set of registers live *after*
293 -- this node.
294
295 -> Assignments -- The current list of
296 -- assignments we are sinking.
297 -- Later assignments may refer
298 -- to earlier ones.
299
300 -> ( Block CmmNode O O -- The new block
301 , Assignments -- Assignments to sink further
302 )
303
304 walk dflags nodes assigs = go nodes emptyBlock assigs
305 where
306 go [] block as = (block, as)
307 go ((live,node):ns) block as
308 | shouldDiscard node live = go ns block as -- discard dead assignment
309 | Just a <- shouldSink dflags node2 = go ns block (a : as1)
310 | otherwise = go ns block' as'
311 where
312 node1 = constantFold dflags node
313
314 (node2, as1) = tryToInline dflags live node1 as
315
316 (dropped, as') = dropAssignmentsSimple dflags
317 (\a -> conflicts dflags a node2) as1
318
319 block' = foldl blockSnoc block dropped `blockSnoc` node2
320
321
322 constantFold :: DynFlags -> CmmNode e x -> CmmNode e x
323 constantFold dflags node = mapExpDeep f node
324 where f (CmmMachOp op args) = cmmMachOpFold dflags op args
325 f (CmmRegOff r 0) = CmmReg r
326 f e = e
327
328 --
329 -- Heuristic to decide whether to pick up and sink an assignment
330 -- Currently we pick up all assignments to local registers. It might
331 -- be profitable to sink assignments to global regs too, but the
332 -- liveness analysis doesn't track those (yet) so we can't.
333 --
334 shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
335 shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
336 where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
337 shouldSink _ _other = Nothing
338
339 --
340 -- discard dead assignments. This doesn't do as good a job as
341 -- removeDeadAsssignments, because it would need multiple passes
342 -- to get all the dead code, but it catches the common case of
343 -- superfluous reloads from the stack that the stack allocator
344 -- leaves behind.
345 --
346 -- Also we catch "r = r" here. You might think it would fall
347 -- out of inlining, but the inliner will see that r is live
348 -- after the instruction and choose not to inline r in the rhs.
349 --
350 shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool
351 shouldDiscard node live
352 = case node of
353 CmmAssign r (CmmReg r') | r == r' -> True
354 CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
355 _otherwise -> False
356
357
358 toNode :: Assignment -> CmmNode O O
359 toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
360
361 dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments
362 -> ([CmmNode O O], Assignments)
363 dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
364
365 dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments
366 -> ([CmmNode O O], Assignments)
367 dropAssignments dflags should_drop state assigs
368 = (dropped, reverse kept)
369 where
370 (dropped,kept) = go state assigs [] []
371
372 go _ [] dropped kept = (dropped, kept)
373 go state (assig : rest) dropped kept
374 | conflict = go state' rest (toNode assig : dropped) kept
375 | otherwise = go state' rest dropped (assig:kept)
376 where
377 (dropit, state') = should_drop assig state
378 conflict = dropit || any (conflicts dflags assig) dropped
379
380
381 -- -----------------------------------------------------------------------------
382 -- Try to inline assignments into a node.
383
384 tryToInline
385 :: DynFlags
386 -> LocalRegSet -- set of registers live after this
387 -- node. We cannot inline anything
388 -- that is live after the node, unless
389 -- it is small enough to duplicate.
390 -> CmmNode O x -- The node to inline into
391 -> Assignments -- Assignments to inline
392 -> (
393 CmmNode O x -- New node
394 , Assignments -- Remaining assignments
395 )
396
397 tryToInline dflags live node assigs = go usages node [] assigs
398 where
399 usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
400 usages = foldLocalRegsUsed dflags addUsage emptyUFM node
401
402 go _usages node _skipped [] = (node, [])
403
404 go usages node skipped (a@(l,rhs,_) : rest)
405 | cannot_inline = dont_inline
406 | occurs_once = inline_and_discard
407 | isTrivial rhs = inline_and_keep
408 | otherwise = dont_inline
409 where
410 inline_and_discard = go usages' inl_node skipped rest
411 where usages' = foldLocalRegsUsed dflags addUsage usages rhs
412
413 dont_inline = keep node -- don't inline the assignment, keep it
414 inline_and_keep = keep inl_node -- inline the assignment, keep it
415
416 keep node' = (final_node, a : rest')
417 where (final_node, rest') = go usages' node' (l:skipped) rest
418 usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2) usages rhs
419 -- we must not inline anything that is mentioned in the RHS
420 -- of a binding that we have already skipped, so we set the
421 -- usages of the regs on the RHS to 2.
422
423 cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
424 || l `elem` skipped
425 || not (okToInline dflags rhs node)
426
427 occurs_once = not (l `elemRegSet` live)
428 && lookupUFM usages l == Just 1
429
430 inl_node = mapExpDeep inline node -- mapExpDeep is where the inlining actually takes place!
431 where inline (CmmReg (CmmLocal l')) | l == l' = rhs
432 inline (CmmRegOff (CmmLocal l') off) | l == l'
433 = cmmOffset dflags rhs off
434 -- re-constant fold after inlining
435 inline (CmmMachOp op args) = cmmMachOpFold dflags op args
436 inline other = other
437
438 -- Note [dependent assignments]
439 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
440 --
441 -- If our assignment list looks like
442 --
443 -- [ y = e, x = ... y ... ]
444 --
445 -- We cannot inline x. Remember this list is really in reverse order,
446 -- so it means x = ... y ...; y = e
447 --
448 -- Hence if we inline x, the outer assignment to y will capture the
449 -- reference in x's right hand side.
450 --
451 -- In this case we should rename the y in x's right-hand side,
452 -- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
453 -- Now we can go ahead and inline x.
454 --
455 -- For now we do nothing, because this would require putting
456 -- everything inside UniqSM.
457 --
458 -- One more variant of this (#7366):
459 --
460 -- [ y = e, y = z ]
461 --
462 -- If we don't want to inline y = e, because y is used many times, we
463 -- might still be tempted to inline y = z (because we always inline
464 -- trivial rhs's). But of course we can't, because y is equal to e,
465 -- not z.
466
467 addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
468 addUsage m r = addToUFM_C (+) m r 1
469
470 regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
471 regsUsedIn [] _ = False
472 regsUsedIn ls e = wrapRecExpf f e False
473 where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True
474 f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
475 f _ z = z
476
477 -- we don't inline into CmmUnsafeForeignCall if the expression refers
478 -- to global registers. This is a HACK to avoid global registers
479 -- clashing with C argument-passing registers, really the back-end
480 -- ought to be able to handle it properly, but currently neither PprC
481 -- nor the NCG can do it. See Note [Register parameter passing]
482 -- See also StgCmmForeign:load_args_into_temps.
483 okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
484 okToInline dflags expr node@(CmmUnsafeForeignCall{}) =
485 not (globalRegistersConflict dflags expr node)
486 okToInline _ _ _ = True
487
488 -- -----------------------------------------------------------------------------
489
490 -- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
491 -- @r = e@ can be safely commuted past @stmt@.
492 --
493 -- We only sink "r = G" assignments right now, so conflicts is very simple:
494 --
495 conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
496 conflicts dflags (r, rhs, addr) node
497
498 -- (1) node defines registers used by rhs of assignment. This catches
499 -- assignmnets and all three kinds of calls. See Note [Sinking and calls]
500 | globalRegistersConflict dflags rhs node = True
501 | localRegistersConflict dflags rhs node = True
502
503 -- (2) node uses register defined by assignment
504 | foldRegsUsed dflags (\b r' -> r == r' || b) False node = True
505
506 -- (3) a store to an address conflicts with a read of the same memory
507 | CmmStore addr' e <- node
508 , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
509
510 -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively
511 | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True
512 | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
513 | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
514
515 -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap]
516 | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem = True
517
518 -- (6) native calls clobber any memory
519 | CmmCall{} <- node, memConflicts addr AnyMem = True
520
521 -- (7) otherwise, no conflict
522 | otherwise = False
523
524 -- Returns True if node defines any global registers that are used in the
525 -- Cmm expression
526 globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
527 globalRegistersConflict dflags expr node =
528 foldRegsDefd dflags (\b r -> b || (CmmGlobal r) `regUsedIn` expr) False node
529
530 -- Returns True if node defines any local registers that are used in the
531 -- Cmm expression
532 localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
533 localRegistersConflict dflags expr node =
534 foldRegsDefd dflags (\b r -> b || (CmmLocal r) `regUsedIn` expr) False node
535
536 -- Note [Sinking and calls]
537 -- ~~~~~~~~~~~~~~~~~~~~~~~~
538 --
539 -- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall)
540 -- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after
541 -- stack layout (see Note [Sinking after stack layout]) which leads to two
542 -- invariants related to calls:
543 --
544 -- a) during stack layout phase all safe foreign calls are turned into
545 -- unsafe foreign calls (see Note [Lower safe foreign calls]). This
546 -- means that we will never encounter CmmForeignCall node when running
547 -- sinking after stack layout
548 --
549 -- b) stack layout saves all variables live across a call on the stack
550 -- just before making a call (remember we are not sinking assignments to
551 -- stack):
552 --
553 -- L1:
554 -- x = R1
555 -- P64[Sp - 16] = L2
556 -- P64[Sp - 8] = x
557 -- Sp = Sp - 16
558 -- call f() returns L2
559 -- L2:
560 --
561 -- We will attempt to sink { x = R1 } but we will detect conflict with
562 -- { P64[Sp - 8] = x } and hence we will drop { x = R1 } without even
563 -- checking whether it conflicts with { call f() }. In this way we will
564 -- never need to check any assignment conflicts with CmmCall. Remeber
565 -- that we still need to check for potential memory conflicts.
566 --
567 -- So the result is that we only need to worry about CmmUnsafeForeignCall nodes
568 -- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]).
569 -- This assumption holds only when we do sinking after stack layout. If we run
570 -- it before stack layout we need to check for possible conflicts with all three
571 -- kinds of calls. Our `conflicts` function does that by using a generic
572 -- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and
573 -- UserOfRegs typeclasses.
574 --
575
576 -- An abstraction of memory read or written.
577 data AbsMem
578 = NoMem -- no memory accessed
579 | AnyMem -- arbitrary memory
580 | HeapMem -- definitely heap memory
581 | StackMem -- definitely stack memory
582 | SpMem -- <size>[Sp+n]
583 {-# UNPACK #-} !Int
584 {-# UNPACK #-} !Int
585
586 -- Having SpMem is important because it lets us float loads from Sp
587 -- past stores to Sp as long as they don't overlap, and this helps to
588 -- unravel some long sequences of
589 -- x1 = [Sp + 8]
590 -- x2 = [Sp + 16]
591 -- ...
592 -- [Sp + 8] = xi
593 -- [Sp + 16] = xj
594 --
595 -- Note that SpMem is invalidated if Sp is changed, but the definition
596 -- of 'conflicts' above handles that.
597
598 -- ToDo: this won't currently fix the following commonly occurring code:
599 -- x1 = [R1 + 8]
600 -- x2 = [R1 + 16]
601 -- ..
602 -- [Hp - 8] = x1
603 -- [Hp - 16] = x2
604 -- ..
605
606 -- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that
607 -- assignments to [Hp + n] do not conflict with any other heap memory,
608 -- but this is tricky to nail down. What if we had
609 --
610 -- x = Hp + n
611 -- [x] = ...
612 --
613 -- the store to [x] should be "new heap", not "old heap".
614 -- Furthermore, you could imagine that if we started inlining
615 -- functions in Cmm then there might well be reads of heap memory
616 -- that was written in the same basic block. To take advantage of
617 -- non-aliasing of heap memory we will have to be more clever.
618
619 -- Note [Foreign calls clobber heap]
620 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
621 --
622 -- It is tempting to say that foreign calls clobber only
623 -- non-heap/stack memory, but unfortunately we break this invariant in
624 -- the RTS. For example, in stg_catch_retry_frame we call
625 -- stmCommitNestedTransaction() which modifies the contents of the
626 -- TRec it is passed (this actually caused incorrect code to be
627 -- generated).
628 --
629 -- Since the invariant is true for the majority of foreign calls,
630 -- perhaps we ought to have a special annotation for calls that can
631 -- modify heap/stack memory. For now we just use the conservative
632 -- definition here.
633
634
635 bothMems :: AbsMem -> AbsMem -> AbsMem
636 bothMems NoMem x = x
637 bothMems x NoMem = x
638 bothMems HeapMem HeapMem = HeapMem
639 bothMems StackMem StackMem = StackMem
640 bothMems (SpMem o1 w1) (SpMem o2 w2)
641 | o1 == o2 = SpMem o1 (max w1 w2)
642 | otherwise = StackMem
643 bothMems SpMem{} StackMem = StackMem
644 bothMems StackMem SpMem{} = StackMem
645 bothMems _ _ = AnyMem
646
647 memConflicts :: AbsMem -> AbsMem -> Bool
648 memConflicts NoMem _ = False
649 memConflicts _ NoMem = False
650 memConflicts HeapMem StackMem = False
651 memConflicts StackMem HeapMem = False
652 memConflicts SpMem{} HeapMem = False
653 memConflicts HeapMem SpMem{} = False
654 memConflicts (SpMem o1 w1) (SpMem o2 w2)
655 | o1 < o2 = o1 + w1 > o2
656 | otherwise = o2 + w2 > o1
657 memConflicts _ _ = True
658
659 exprMem :: DynFlags -> CmmExpr -> AbsMem
660 exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
661 exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es)
662 exprMem _ _ = NoMem
663
664 loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
665 loadAddr dflags e w =
666 case e of
667 CmmReg r -> regAddr dflags r 0 w
668 CmmRegOff r i -> regAddr dflags r i w
669 _other | CmmGlobal Sp `regUsedIn` e -> StackMem
670 | otherwise -> AnyMem
671
672 regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
673 regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
674 regAddr _ (CmmGlobal Hp) _ _ = HeapMem
675 regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
676 regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
677 regAddr _ _ _ _ = AnyMem