c0b7510349a8844085eb227ff8c9168ad64330f1
[ghc.git] / compiler / cmm / CmmRewriteAssignments.hs
1 {-# LANGUAGE ViewPatterns #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE FlexibleContexts #-}
4
5 {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
6
7 -- TODO: Get rid of this flag:
8 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
9
10 -- This module implements generalized code motion for assignments to
11 -- local registers, inlining and sinking when possible. It also does
12 -- some amount of rewriting for stores to register slots, which are
13 -- effectively equivalent to local registers.
14 module CmmRewriteAssignments
15 ( rewriteAssignments
16 ) where
17
18 import Cmm
19 import CmmExpr
20 import CmmOpt
21 import OptimizationFuel
22 import StgCmmUtils
23
24 import Control.Monad
25 import UniqFM
26 import Unique
27 import BlockId
28
29 import Compiler.Hoopl hiding (Unique)
30 import Data.Maybe
31 import Prelude hiding (succ, zip)
32
33 ----------------------------------------------------------------
34 --- Main function
35
36 rewriteAssignments :: CmmGraph -> FuelUniqSM CmmGraph
37 rewriteAssignments g = do
38 -- Because we need to act on forwards and backwards information, we
39 -- first perform usage analysis and bake this information into the
40 -- graph (backwards transform), and then do a forwards transform
41 -- to actually perform inlining and sinking.
42 g' <- annotateUsage g
43 g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $
44 analRewFwd assignmentLattice
45 assignmentTransfer
46 (assignmentRewrite `thenFwdRw` machOpFoldRewrite)
47 return (modifyGraph eraseRegUsage g'')
48
49 ----------------------------------------------------------------
50 --- Usage information
51
52 -- We decorate all register assignments with approximate usage
53 -- information, that is, the maximum number of times the register is
54 -- referenced while it is live along all outgoing control paths.
55 -- This analysis provides a precise upper bound for usage, so if a
56 -- register is never referenced, we can remove it, as that assignment is
57 -- dead.
58 --
59 -- This analysis is very similar to liveness analysis; we just keep a
60 -- little extra info. (Maybe we should move it to CmmLive, and subsume
61 -- the old liveness analysis.)
62 --
63 -- There are a few subtleties here:
64 --
65 -- - If a register goes dead, and then becomes live again, the usages
66 -- of the disjoint live range don't count towards the original range.
67 --
68 -- a = 1; // used once
69 -- b = a;
70 -- a = 2; // used once
71 -- c = a;
72 --
73 -- - A register may be used multiple times, but these all reside in
74 -- different control paths, such that any given execution only uses
75 -- it once. In that case, the usage count may still be 1.
76 --
77 -- a = 1; // used once
78 -- if (b) {
79 -- c = a + 3;
80 -- } else {
81 -- c = a + 1;
82 -- }
83 --
84 -- This policy corresponds to an inlining strategy that does not
85 -- duplicate computation but may increase binary size.
86 --
87 -- - If we naively implement a usage count, we have a counting to
88 -- infinity problem across joins. Furthermore, knowing that
89 -- something is used 2 or more times in one runtime execution isn't
90 -- particularly useful for optimizations (inlining may be beneficial,
91 -- but there's no way of knowing that without register pressure
92 -- information.)
93 --
94 -- while (...) {
95 -- // first iteration, b used once
96 -- // second iteration, b used twice
97 -- // third iteration ...
98 -- a = b;
99 -- }
100 -- // b used zero times
101 --
102 -- There is an orthogonal question, which is that for every runtime
103 -- execution, the register may be used only once, but if we inline it
104 -- in every conditional path, the binary size might increase a lot.
105 -- But tracking this information would be tricky, because it violates
106 -- the finite lattice restriction Hoopl requires for termination;
107 -- we'd thus need to supply an alternate proof, which is probably
108 -- something we should defer until we actually have an optimization
109 -- that would take advantage of this. (This might also interact
110 -- strangely with liveness information.)
111 --
112 -- a = ...;
113 -- // a is used one time, but in X different paths
114 -- case (b) of
115 -- 1 -> ... a ...
116 -- 2 -> ... a ...
117 -- 3 -> ... a ...
118 -- ...
119 --
120 -- - Memory stores to local register slots (CmmStore (CmmStackSlot
121 -- (LocalReg _) 0) _) have similar behavior to local registers,
122 -- in that these locations are all disjoint from each other. Thus,
123 -- we attempt to inline them too. Note that because these are only
124 -- generated as part of the spilling process, most of the time this
125 -- will refer to a local register and the assignment will immediately
126 -- die on the subsequent call. However, if we manage to replace that
127 -- local register with a memory location, it means that we've managed
128 -- to preserve a value on the stack without having to move it to
129 -- another memory location again! We collect usage information just
130 -- to be safe in case extra computation is involved.
131
132 data RegUsage = SingleUse | ManyUse
133 deriving (Ord, Eq, Show)
134 -- Absence in map = ZeroUse
135
136 {-
137 -- minBound is bottom, maxBound is top, least-upper-bound is max
138 -- ToDo: Put this in Hoopl. Note that this isn't as useful as I
139 -- originally hoped, because you usually want to leave out the bottom
140 -- element when you have things like this put in maps. Maybe f is
141 -- useful on its own as a combining function.
142 boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a
143 boundedOrdLattice n = DataflowLattice n minBound f
144 where f _ (OldFact x) (NewFact y)
145 | x >= y = (NoChange, x)
146 | otherwise = (SomeChange, y)
147 -}
148
149 -- Custom node type we'll rewrite to. CmmAssign nodes to local
150 -- registers are replaced with AssignLocal nodes.
151 data WithRegUsage n e x where
152 -- Plain will not contain CmmAssign nodes immediately after
153 -- transformation, but as we rewrite assignments, we may have
154 -- assignments here: these are assignments that should not be
155 -- rewritten!
156 Plain :: n e x -> WithRegUsage n e x
157 AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O
158
159 instance UserOfLocalRegs (n e x) => UserOfLocalRegs (WithRegUsage n e x) where
160 foldRegsUsed f z (Plain n) = foldRegsUsed f z n
161 foldRegsUsed f z (AssignLocal _ e _) = foldRegsUsed f z e
162
163 instance DefinerOfLocalRegs (n e x) => DefinerOfLocalRegs (WithRegUsage n e x) where
164 foldRegsDefd f z (Plain n) = foldRegsDefd f z n
165 foldRegsDefd f z (AssignLocal r _ _) = foldRegsDefd f z r
166
167 instance NonLocal n => NonLocal (WithRegUsage n) where
168 entryLabel (Plain n) = entryLabel n
169 successors (Plain n) = successors n
170
171 liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x
172 liftRegUsage = mapGraph Plain
173
174 eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x
175 eraseRegUsage = mapGraph f
176 where f :: WithRegUsage CmmNode e x -> CmmNode e x
177 f (AssignLocal l e _) = CmmAssign (CmmLocal l) e
178 f (Plain n) = n
179
180 type UsageMap = UniqFM RegUsage
181
182 usageLattice :: DataflowLattice UsageMap
183 usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f)
184 where f _ (OldFact x) (NewFact y)
185 | x >= y = (NoChange, x)
186 | otherwise = (SomeChange, y)
187
188 -- We reuse the names 'gen' and 'kill', although we're doing something
189 -- slightly different from the Dragon Book
190 usageTransfer :: BwdTransfer (WithRegUsage CmmNode) UsageMap
191 usageTransfer = mkBTransfer3 first middle last
192 where first _ f = f
193 middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap
194 middle n f = gen_kill n f
195 last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap
196 -- Checking for CmmCall/CmmForeignCall is unnecessary, because
197 -- spills/reloads have already occurred by the time we do this
198 -- analysis.
199 -- XXX Deprecated warning is puzzling: what label are we
200 -- supposed to use?
201 -- ToDo: With a bit more cleverness here, we can avoid
202 -- disappointment and heartbreak associated with the inability
203 -- to inline into CmmCall and CmmForeignCall by
204 -- over-estimating the usage to be ManyUse.
205 last n f = gen_kill n (joinOutFacts usageLattice n f)
206 gen_kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
207 gen_kill a = gen a . kill a
208 gen :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
209 gen a f = foldRegsUsed increaseUsage f a
210 kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
211 kill a f = foldRegsDefd delFromUFM f a
212 increaseUsage f r = addToUFM_C combine f r SingleUse
213 where combine _ _ = ManyUse
214
215 usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap
216 usageRewrite = mkBRewrite3 first middle last
217 where first _ _ = return Nothing
218 middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
219 middle (Plain (CmmAssign (CmmLocal l) e)) f
220 = return . Just
221 $ case lookupUFM f l of
222 Nothing -> emptyGraph
223 Just usage -> mkMiddle (AssignLocal l e usage)
224 middle _ _ = return Nothing
225 last _ _ = return Nothing
226
227 type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
228 annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage)
229 annotateUsage vanilla_g =
230 let g = modifyGraph liftRegUsage vanilla_g
231 in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
232 analRewBwd usageLattice usageTransfer usageRewrite
233
234 ----------------------------------------------------------------
235 --- Assignment tracking
236
237 -- The idea is to maintain a map of local registers do expressions,
238 -- such that the value of that register is the same as the value of that
239 -- expression at any given time. We can then do several things,
240 -- as described by Assignment.
241
242 -- Assignment describes the various optimizations that are valid
243 -- at a given point in the program.
244 data Assignment =
245 -- This assignment can always be inlined. It is cheap or single-use.
246 AlwaysInline CmmExpr
247 -- This assignment should be sunk down to its first use. (This will
248 -- increase code size if the register is used in multiple control flow
249 -- paths, but won't increase execution time, and the reduction of
250 -- register pressure is worth it, I think.)
251 | AlwaysSink CmmExpr
252 -- We cannot safely optimize occurrences of this local register. (This
253 -- corresponds to top in the lattice structure.)
254 | NeverOptimize
255
256 -- Extract the expression that is being assigned to
257 xassign :: Assignment -> Maybe CmmExpr
258 xassign (AlwaysInline e) = Just e
259 xassign (AlwaysSink e) = Just e
260 xassign NeverOptimize = Nothing
261
262 -- Extracts the expression, but only if they're the same constructor
263 xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr)
264 xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e')
265 xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e')
266 xassign2 _ = Nothing
267
268 -- Note: We'd like to make decisions about "not optimizing" as soon as
269 -- possible, because this will make running the transfer function more
270 -- efficient.
271 type AssignmentMap = UniqFM Assignment
272
273 assignmentLattice :: DataflowLattice AssignmentMap
274 assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
275 where add _ (OldFact old) (NewFact new)
276 = case (old, new) of
277 (NeverOptimize, _) -> (NoChange, NeverOptimize)
278 (_, NeverOptimize) -> (SomeChange, NeverOptimize)
279 (xassign2 -> Just (e, e'))
280 | e == e' -> (NoChange, old)
281 | otherwise -> (SomeChange, NeverOptimize)
282 _ -> (SomeChange, NeverOptimize)
283
284 -- Deletes sinks from assignment map, because /this/ is the place
285 -- where it will be sunk to.
286 deleteSinks :: UserOfLocalRegs n => n -> AssignmentMap -> AssignmentMap
287 deleteSinks n m = foldRegsUsed (adjustUFM f) m n
288 where f (AlwaysSink _) = NeverOptimize
289 f old = old
290
291 -- Invalidates any expressions that use a register.
292 invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap
293 -- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
294 invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance]
295 where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize
296 f _ _ m = m
297 {- This requires the entire spine of the map to be continually rebuilt,
298 - which causes crazy memory usage!
299 invalidateUsersOf reg = mapUFM (invalidateUsers' reg)
300 where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize
301 invalidateUsers' _ old = old
302 -}
303
304 -- Note [foldUFM performance]
305 -- These calls to fold UFM no longer leak memory, but they do cause
306 -- pretty killer amounts of allocation. So they'll be something to
307 -- optimize; we need an algorithmic change to prevent us from having to
308 -- traverse the /entire/ map continually.
309
310 middleAssignment :: WithRegUsage CmmNode O O -> AssignmentMap -> AssignmentMap
311
312 -- Algorithm for annotated assignments:
313 -- 1. Delete any sinking assignments that were used by this instruction
314 -- 2. Add the assignment to our list of valid local assignments with
315 -- the correct optimization policy.
316 -- 3. Look for all assignments that reference that register and
317 -- invalidate them.
318 middleAssignment n@(AssignLocal r e usage) assign
319 = invalidateUsersOf (CmmLocal r) . add . deleteSinks n $ assign
320 where add m = addToUFM m r
321 $ case usage of
322 SingleUse -> AlwaysInline e
323 ManyUse -> decide e
324 decide CmmLit{} = AlwaysInline e
325 decide CmmReg{} = AlwaysInline e
326 decide CmmLoad{} = AlwaysSink e
327 decide CmmStackSlot{} = AlwaysSink e
328 decide CmmMachOp{} = AlwaysSink e
329 -- We'll always inline simple operations on the global
330 -- registers, to reduce register pressure: Sp - 4 or Hp - 8
331 -- EZY: Justify this optimization more carefully.
332 decide CmmRegOff{} = AlwaysInline e
333
334 -- Algorithm for unannotated assignments of global registers:
335 -- 1. Delete any sinking assignments that were used by this instruction
336 -- 2. Look for all assignments that reference this register and
337 -- invalidate them.
338 middleAssignment (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign
339 = invalidateUsersOf reg . deleteSinks n $ assign
340
341 -- Algorithm for unannotated assignments of *local* registers: do
342 -- nothing (it's a reload, so no state should have changed)
343 middleAssignment (Plain (CmmAssign (CmmLocal _) _)) assign = assign
344
345 -- Algorithm for stores:
346 -- 1. Delete any sinking assignments that were used by this instruction
347 -- 2. Look for all assignments that load from memory locations that
348 -- were clobbered by this store and invalidate them.
349 middleAssignment (Plain n@(CmmStore lhs rhs)) assign
350 = let m = deleteSinks n assign
351 in foldUFM_Directly f m m -- [foldUFM performance]
352 where f u (xassign -> Just x) m | (lhs, rhs) `clobbers` (u, x) = addToUFM_Directly m u NeverOptimize
353 f _ _ m = m
354 {- Also leaky
355 = mapUFM_Directly p . deleteSinks n $ assign
356 -- ToDo: There's a missed opportunity here: even if a memory
357 -- access we're attempting to sink gets clobbered at some
358 -- location, it's still /better/ to sink it to right before the
359 -- point where it gets clobbered. How might we do this?
360 -- Unfortunately, it's too late to change the assignment...
361 where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize
362 p _ old = old
363 -}
364
365 -- Assumption: Unsafe foreign calls don't clobber memory
366 -- Since foreign calls clobber caller saved registers, we need
367 -- invalidate any assignments that reference those global registers.
368 -- This is kind of expensive. (One way to optimize this might be to
369 -- store extra information about expressions that allow this and other
370 -- checks to be done cheaply.)
371 middleAssignment (Plain n@(CmmUnsafeForeignCall{})) assign
372 = deleteCallerSaves (foldRegsDefd (\m r -> addToUFM m r NeverOptimize) (deleteSinks n assign) n)
373 where deleteCallerSaves m = foldUFM_Directly f m m
374 f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize
375 f _ _ m = m
376 g (CmmReg (CmmGlobal r)) _ | callerSaves r = True
377 g (CmmRegOff (CmmGlobal r) _) _ | callerSaves r = True
378 g _ b = b
379
380 middleAssignment (Plain (CmmComment {})) assign
381 = assign
382
383 -- Assumptions:
384 -- * Writes using Hp do not overlap with any other memory locations
385 -- (An important invariant being relied on here is that we only ever
386 -- use Hp to allocate values on the heap, which appears to be the
387 -- case given hpReg usage, and that our heap writing code doesn't
388 -- do anything stupid like overlapping writes.)
389 -- * Stack slots do not overlap with any other memory locations
390 -- * Stack slots for different areas do not overlap
391 -- * Stack slots within the same area and different offsets may
392 -- overlap; we need to do a size check (see 'overlaps').
393 -- * Register slots only overlap with themselves. (But this shouldn't
394 -- happen in practice, because we'll fail to inline a reload across
395 -- the next spill.)
396 -- * Non stack-slot stores always conflict with each other. (This is
397 -- not always the case; we could probably do something special for Hp)
398 clobbers :: (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore
399 -> (Unique, CmmExpr) -- (register, expression) that may be clobbered
400 -> Bool
401 clobbers (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False
402 clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
403 -- ToDo: Also catch MachOp case
404 clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
405 | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
406 clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
407 where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
408 = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
409 f (CmmLoad e _) = containsStackSlot e
410 f (CmmMachOp _ es) = or (map f es)
411 f _ = False
412 -- Maybe there's an invariant broken if this actually ever
413 -- returns True
414 containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
415 containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
416 containsStackSlot (CmmStackSlot{}) = True
417 containsStackSlot _ = False
418 clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
419 where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
420 f _ = False
421 clobbers _ (_, e) = f e
422 where f (CmmLoad (CmmStackSlot _ _) _) = False
423 f (CmmLoad{}) = True -- conservative
424 f (CmmMachOp _ es) = or (map f es)
425 f _ = False
426
427 -- Check for memory overlapping.
428 -- Diagram:
429 -- 4 8 12
430 -- s -w- o
431 -- [ I32 ]
432 -- [ F64 ]
433 -- s' -w'- o'
434 type CallSubArea = (AreaId, Int, Int) -- area, offset, width
435 overlaps :: CallSubArea -> CallSubArea -> Bool
436 overlaps (a, _, _) (a', _, _) | a /= a' = False
437 overlaps (_, o, w) (_, o', w') =
438 let s = o - w
439 s' = o' - w'
440 in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK
441
442 lastAssignment :: WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
443 lastAssignment (Plain (CmmCall _ (Just k) _ _ _)) assign = [(k, invalidateVolatile k assign)]
444 lastAssignment (Plain (CmmForeignCall {succ=k})) assign = [(k, invalidateVolatile k assign)]
445 lastAssignment l assign = map (\id -> (id, deleteSinks l assign)) $ successors l
446
447 -- Invalidates any expressions that have volatile contents: essentially,
448 -- all terminals volatile except for literals and loads of stack slots
449 -- that do not correspond to the call area for 'k' (the current call
450 -- area is volatile because overflow return parameters may be written
451 -- there.)
452 -- Note: mapUFM could be expensive, but hopefully block boundaries
453 -- aren't too common. If it is a problem, replace with something more
454 -- clever.
455 invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap
456 invalidateVolatile k m = mapUFM p m
457 where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
458 where exp CmmLit{} = True
459 exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _)
460 | k' == k = False
461 exp (CmmLoad (CmmStackSlot _ _) _) = True
462 exp (CmmMachOp _ es) = and (map exp es)
463 exp _ = False
464 p _ = NeverOptimize -- probably shouldn't happen with AlwaysSink
465
466 assignmentTransfer :: FwdTransfer (WithRegUsage CmmNode) AssignmentMap
467 assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment)
468
469 -- Note [Soundness of inlining]
470 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
471 -- In the Hoopl paper, the soundness condition on rewrite functions is
472 -- described as follows:
473 --
474 -- "If it replaces a node n by a replacement graph g, then g must
475 -- be observationally equivalent to n under the assumptions
476 -- expressed by the incoming dataflow fact f. Moreover, analysis of
477 -- g must produce output fact(s) that are at least as informative
478 -- as the fact(s) produced by applying the transfer function to n."
479 --
480 -- We consider the second condition in more detail here. It says given
481 -- the rewrite R(n, f) = g, then for any incoming fact f' consistent
482 -- with f (f' >= f), then running the transfer function T(f', n) <= T(f', g).
483 -- For inlining this is not necessarily the case:
484 --
485 -- n = "x = a + 2"
486 -- f = f' = {a = y}
487 -- g = "x = y + 2"
488 -- T(f', n) = {x = a + 2, a = y}
489 -- T(f', g) = {x = y + 2, a = y}
490 --
491 -- y + 2 and a + 2 are not obviously comparable, and a naive
492 -- implementation of the lattice would say they are incomparable.
493 -- At best, this means we may be over-conservative, at worst, it means
494 -- we may not terminate.
495 --
496 -- However, in the original Lerner-Grove-Chambers paper, soundness and
497 -- termination are separated, and only equivalence of facts is required
498 -- for soundness. Monotonicity of the transfer function is not required
499 -- for termination (as the calculation of least-upper-bound prevents
500 -- this from being a problem), but it means we won't necessarily find
501 -- the least-fixed point.
502
503 -- Note [Coherency of annotations]
504 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
505 -- Is it possible for our usage annotations to become invalid after we
506 -- start performing transformations? As the usage info only provides
507 -- an upper bound, we only need to consider cases where the usages of
508 -- a register may increase due to transformations--e.g. any reference
509 -- to a local register in an AlwaysInline or AlwaysSink instruction, whose
510 -- originating assignment was single use (we don't care about the
511 -- many use case, because it is the top of the lattice). But such a
512 -- case is not possible, because we always inline any single use
513 -- register. QED.
514 --
515 -- TODO: A useful lint option would be to check this invariant that
516 -- there is never a local register in the assignment map that is
517 -- single-use.
518
519 -- Note [Soundness of store rewriting]
520 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
521 -- Its soundness depends on the invariant that no assignment is made to
522 -- the local register before its store is accessed. This is clearly
523 -- true with unoptimized spill-reload code, and as the store will always
524 -- be rewritten first (if possible), there is no chance of it being
525 -- propagated down before getting written (possibly with incorrect
526 -- values from the assignment map, due to reassignment of the local
527 -- register.) This is probably not locally sound.
528
529 assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap
530 assignmentRewrite = mkFRewrite3 first middle last
531 where
532 first _ _ = return Nothing
533 middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O
534 middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m
535 middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) l e u
536 last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l
537 -- Tuple is (inline?, reloads for sinks)
538 precompute :: AssignmentMap -> CmmNode O x -> (Bool, [WithRegUsage CmmNode O O])
539 precompute assign n = foldRegsUsed f (False, []) n -- duplicates are harmless
540 where f (i, l) r = case lookupUFM assign r of
541 Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l)
542 Just (AlwaysInline _) -> (True, l)
543 Just NeverOptimize -> (i, l)
544 -- This case can show up when we have
545 -- limited optimization fuel.
546 Nothing -> (i, l)
547 rewrite :: AssignmentMap
548 -> (Bool, [WithRegUsage CmmNode O O])
549 -> (WithRegUsage CmmNode O x -> Graph (WithRegUsage CmmNode) O x)
550 -> CmmNode O x
551 -> Maybe (Graph (WithRegUsage CmmNode) O x)
552 rewrite _ (False, []) _ _ = Nothing
553 -- Note [CmmCall Inline Hack]
554 -- Conservative hack: don't do any inlining on what will
555 -- be translated into an OldCmm CmmCalls, since the code
556 -- produced here tends to be unproblematic and I need to write
557 -- lint passes to ensure that we don't put anything in the
558 -- arguments that could be construed as a global register by
559 -- some later translation pass. (For example, slots will turn
560 -- into dereferences of Sp). See [Register parameter passing].
561 -- ToDo: Fix this up to only bug out if all inlines were for
562 -- CmmExprs with global registers (we can't use the
563 -- straightforward mapExpDeep call, in this case.) ToDo: We miss
564 -- an opportunity here, where all possible inlinings should
565 -- instead be sunk.
566 rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack]
567 rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n))
568
569 rewriteLocal :: AssignmentMap
570 -> (Bool, [WithRegUsage CmmNode O O])
571 -> LocalReg -> CmmExpr -> RegUsage
572 -> Maybe (Graph (WithRegUsage CmmNode) O O)
573 rewriteLocal _ (False, []) _ _ _ = Nothing
574 rewriteLocal assign (i, xs) l e u = Just $ mkMiddles xs <*> mkMiddle n'
575 where n' = AssignLocal l e' u
576 e' = if i then wrapRecExp (inlineExp assign) e else e
577 -- inlinable check omitted, since we can always inline into
578 -- assignments.
579
580 inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x
581 inline False _ n = n
582 inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack]
583 inline True assign n = mapExpDeep (inlineExp assign) n
584
585 inlineExp assign old@(CmmReg (CmmLocal r))
586 = case lookupUFM assign r of
587 Just (AlwaysInline x) -> x
588 _ -> old
589 inlineExp assign old@(CmmRegOff (CmmLocal r) i)
590 = case lookupUFM assign r of
591 Just (AlwaysInline x) ->
592 case x of
593 (CmmRegOff r' i') -> CmmRegOff r' (i + i')
594 _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
595 where rep = typeWidth (localRegType r)
596 _ -> old
597 -- See Note [Soundness of store rewriting]
598 inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _)
599 = case lookupUFM assign r of
600 Just (AlwaysInline x) -> x
601 _ -> old
602 inlineExp _ old = old
603
604 inlinable :: CmmNode e x -> Bool
605 inlinable (CmmCall{}) = False
606 inlinable (CmmForeignCall{}) = False
607 inlinable (CmmUnsafeForeignCall{}) = False
608 inlinable _ = True
609
610 -- Need to interleave this with inlining, because machop folding results
611 -- in literals, which we can inline more aggressively, and inlining
612 -- gives us opportunities for more folding. However, we don't need any
613 -- facts to do MachOp folding.
614 machOpFoldRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) a
615 machOpFoldRewrite = mkFRewrite3 first middle last
616 where first _ _ = return Nothing
617 middle :: WithRegUsage CmmNode O O -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O O
618 middle (Plain m) _ = return (fmap (mkMiddle . Plain) (foldNode m))
619 middle (AssignLocal l e r) _ = return (fmap f (wrapRecExpM foldExp e))
620 where f e' = mkMiddle (AssignLocal l e' r)
621 last :: WithRegUsage CmmNode O C -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O C
622 last (Plain l) _ = return (fmap (mkLast . Plain) (foldNode l))
623 foldNode :: CmmNode e x -> Maybe (CmmNode e x)
624 foldNode n = mapExpDeepM foldExp n
625 foldExp (CmmMachOp op args) = cmmMachOpFoldM op args
626 foldExp _ = Nothing
627
628 -- ToDo: Outputable instance for UsageMap and AssignmentMap