4c01a1a752bed28570a08ecee40650d81ff16ea3
[ghc.git] / compiler / cmm / CmmStackLayout.hs
1 {-# OPTIONS_GHC -XGADTs -XNoMonoLocalBinds #-}
2 -- Norman likes local bindings
3 -- If this module lives on I'd like to get rid of this flag in due course
4
5 -- Todo: remove
6 {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
7
8 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
9 #if __GLASGOW_HASKELL__ >= 703
10 -- GHC 7.0.1 improved incomplete pattern warnings with GADTs
11 {-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
12 #endif
13
14 module CmmStackLayout
15 ( SlotEnv, liveSlotAnal, liveSlotTransfers, removeLiveSlotDefs
16 , getSpEntryMap, layout, manifestSP, igraph, areaBuilder
17 , stubSlotsOnDeath ) -- to help crash early during debugging
18 where
19
20 import Constants
21 import Prelude hiding (succ, zip, unzip, last)
22
23 import BlockId
24 import Cmm
25 import CmmExpr
26 import CmmProcPoint
27 import Maybes
28 import MkGraph (stackStubExpr)
29 import Control.Monad
30 import OptimizationFuel
31 import Outputable
32 import SMRep (ByteOff)
33
34 import Compiler.Hoopl
35
36 import Data.Map (Map)
37 import qualified Data.Map as Map
38 import qualified FiniteMap as Map
39
40 ------------------------------------------------------------------------
41 -- Stack Layout --
42 ------------------------------------------------------------------------
43
44 -- | Before we lay out the stack, we need to know something about the
45 -- liveness of the stack slots. In particular, to decide whether we can
46 -- reuse a stack location to hold multiple stack slots, we need to know
47 -- when each of the stack slots is used.
48 -- Although tempted to use something simpler, we really need a full interference
49 -- graph. Consider the following case:
50 -- case <...> of
51 -- 1 -> <spill x>; // y is dead out
52 -- 2 -> <spill y>; // x is dead out
53 -- 3 -> <spill x and y>
54 -- If we consider the arms in order and we use just the deadness information given by a
55 -- dataflow analysis, we might decide to allocate the stack slots for x and y
56 -- to the same stack location, which will lead to incorrect code in the third arm.
57 -- We won't make this mistake with an interference graph.
58
59 -- First, the liveness analysis.
60 -- We represent a slot with an area, an offset into the area, and a width.
61 -- Tracking the live slots is a bit tricky because there may be loads and stores
62 -- into only a part of a stack slot (e.g. loading the low word of a 2-word long),
63 -- e.g. Slot A 0 8 overlaps with Slot A 4 4.
64 --
65 -- The definition of a slot set is intended to reduce the number of overlap
66 -- checks we have to make. There's no reason to check for overlap between
67 -- slots in different areas, so we segregate the map by Area's.
68 -- We expect few slots in each Area, so we collect them in an unordered list.
69 -- To keep these lists short, any contiguous live slots are coalesced into
70 -- a single slot, on insertion.
71
72 slotLattice :: DataflowLattice SubAreaSet
73 slotLattice = DataflowLattice "live slots" Map.empty add
74 where add _ (OldFact old) (NewFact new) = case Map.foldRightWithKey addArea (False, old) new of
75 (change, x) -> (changeIf change, x)
76 addArea a newSlots z = foldr (addSlot a) z newSlots
77 addSlot a slot (changed, map) =
78 let (c, live) = liveGen slot $ Map.findWithDefault [] a map
79 in (c || changed, Map.insert a live map)
80
81 slotLatticeJoin :: [SubAreaSet] -> SubAreaSet
82 slotLatticeJoin facts = foldr extend (fact_bot slotLattice) facts
83 where extend fact res = snd $ fact_join slotLattice undefined (OldFact fact) (NewFact res)
84
85 type SlotEnv = BlockEnv SubAreaSet
86 -- The sub-areas live on entry to the block
87
88 liveSlotAnal :: CmmGraph -> FuelUniqSM SlotEnv
89 liveSlotAnal g = liftM snd $ dataflowPassBwd g [] $ analBwd slotLattice liveSlotTransfers
90
91 -- Add the subarea s to the subareas in the list-set (possibly coalescing it with
92 -- adjacent subareas), and also return whether s was a new addition.
93 liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea])
94 liveGen s set = liveGen' s set []
95 where liveGen' s [] z = (True, s : z)
96 liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z =
97 if a /= a' || hi < lo' || lo > hi' then -- no overlap
98 liveGen' s rst (s' : z)
99 else if s' `contains` s then -- old contains new
100 (False, set)
101 else -- overlap: coalesce the slots
102 let new_hi = max hi hi'
103 new_lo = min lo lo'
104 in liveGen' (a, new_hi, new_hi - new_lo) rst z
105 where lo = hi - w -- remember: areas grow down
106 lo' = hi' - w'
107 contains (a, hi, w) (a', hi', w') =
108 a == a' && hi >= hi' && hi - w <= hi' - w'
109
110 liveKill :: SubArea -> [SubArea] -> [SubArea]
111 liveKill (a, hi, w) set = -- pprTrace "killing slots in area" (ppr a) $
112 liveKill' set []
113 where liveKill' [] z = z
114 liveKill' (s'@(a', hi', w') : rst) z =
115 if a /= a' || hi < lo' || lo > hi' then -- no overlap
116 liveKill' rst (s' : z)
117 else -- overlap: split the old slot
118 let z' = if hi' > hi then (a, hi', hi' - hi) : z else z
119 z'' = if lo > lo' then (a, lo, lo - lo') : z' else z'
120 in liveKill' rst z''
121 where lo = hi - w -- remember: areas grow down
122 lo' = hi' - w'
123
124 -- Note: the stack slots that hold variables returned on the stack are not
125 -- considered live in to the block -- we treat the first node as a definition site.
126 -- BEWARE?: Am I being a little careless here in failing to check for the
127 -- entry Id (which would use the CallArea Old).
128 liveSlotTransfers :: BwdTransfer CmmNode SubAreaSet
129 liveSlotTransfers = mkBTransfer3 frt mid lst
130 where frt :: CmmNode C O -> SubAreaSet -> SubAreaSet
131 frt (CmmEntry l) f = Map.delete (CallArea (Young l)) f
132
133 mid :: CmmNode O O -> SubAreaSet -> SubAreaSet
134 mid n f = foldSlotsUsed addSlot (removeLiveSlotDefs f n) n
135 lst :: CmmNode O C -> FactBase SubAreaSet -> SubAreaSet
136 lst n f = liveInSlots n $ case n of
137 CmmCall {cml_cont=Nothing, cml_args=args} -> add_area (CallArea Old) args out
138 CmmCall {cml_cont=Just k, cml_args=args} -> add_area (CallArea Old) args (add_area (CallArea (Young k)) args out)
139 CmmForeignCall {succ=k, updfr=oldend} -> add_area (CallArea Old) oldend (add_area (CallArea (Young k)) wORD_SIZE out)
140 _ -> out
141 where out = joinOutFacts slotLattice n f
142 add_area _ n live | n == 0 = live
143 add_area a n live = Map.insert a (snd $ liveGen (a, n, n) $ Map.findWithDefault [] a live) live
144
145 -- Slot sets: adding slots, removing slots, and checking for membership.
146 liftToArea :: Area -> ([SubArea] -> [SubArea]) -> SubAreaSet -> SubAreaSet
147 addSlot, removeSlot :: SubAreaSet -> SubArea -> SubAreaSet
148 elemSlot :: SubAreaSet -> SubArea -> Bool
149 liftToArea a f map = Map.insert a (f (Map.findWithDefault [] a map)) map
150 addSlot live (a, i, w) = liftToArea a (snd . liveGen (a, i, w)) live
151 removeSlot live (a, i, w) = liftToArea a (liveKill (a, i, w)) live
152 elemSlot live (a, i, w) =
153 not $ fst $ liveGen (a, i, w) (Map.findWithDefault [] a live)
154
155 removeLiveSlotDefs :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet
156 removeLiveSlotDefs = foldSlotsDefd removeSlot
157
158 liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => s -> SubAreaSet -> SubAreaSet
159 liveInSlots x live = foldSlotsUsed addSlot (removeLiveSlotDefs live x) x
160
161 liveLastIn :: CmmNode O C -> (BlockId -> SubAreaSet) -> SubAreaSet
162 liveLastIn l env = liveInSlots l (liveLastOut env l)
163
164 -- Don't forget to keep the outgoing parameters in the CallArea live,
165 -- as well as the update frame.
166 -- Note: We have to keep the update frame live at a call because of the
167 -- case where the function doesn't return -- in that case, there won't
168 -- be a return to keep the update frame live. We'd still better keep the
169 -- info pointer in the update frame live at any call site;
170 -- otherwise we could screw up the garbage collector.
171 liveLastOut :: (BlockId -> SubAreaSet) -> CmmNode O C -> SubAreaSet
172 liveLastOut env l =
173 case l of
174 CmmCall _ Nothing n _ _ ->
175 add_area (CallArea Old) n out -- add outgoing args (includes upd frame)
176 CmmCall _ (Just k) n _ _ ->
177 add_area (CallArea Old) n (add_area (CallArea (Young k)) n out)
178 CmmForeignCall { succ = k, updfr = oldend } ->
179 add_area (CallArea Old) oldend (add_area (CallArea (Young k)) wORD_SIZE out)
180 _ -> out
181 where out = slotLatticeJoin $ map env $ successors l
182 add_area _ n live | n == 0 = live
183 add_area a n live =
184 Map.insert a (snd $ liveGen (a, n, n) $ Map.findWithDefault [] a live) live
185
186 -- The liveness analysis must be precise: otherwise, we won't know if a definition
187 -- should really kill a live-out stack slot.
188 -- But the interference graph does not have to be precise -- it might decide that
189 -- any live areas interfere. To maintain both a precise analysis and an imprecise
190 -- interference graph, we need to convert the live-out stack slots to graph nodes
191 -- at each and every instruction; rather than reconstruct a new list of nodes
192 -- every time, I provide a function to fold over the nodes, which should be a
193 -- reasonably efficient approach for the implementations we envision.
194 -- Of course, it will probably be much easier to program if we just return a list...
195 type Set x = Map x ()
196 data IGraphBuilder n =
197 Builder { foldNodes :: forall z. SubArea -> (n -> z -> z) -> z -> z
198 , _wordsOccupied :: AreaSizeMap -> AreaMap -> n -> [Int]
199 }
200
201 areaBuilder :: IGraphBuilder Area
202 areaBuilder = Builder fold words
203 where fold (a, _, _) f z = f a z
204 words areaSize areaMap a =
205 case Map.lookup a areaMap of
206 Just addr -> [addr .. addr + (Map.lookup a areaSize `orElse`
207 pprPanic "wordsOccupied: unknown area" (ppr areaSize <+> ppr a))]
208 Nothing -> []
209
210 --slotBuilder :: IGraphBuilder (Area, Int)
211 --slotBuilder = undefined
212
213 -- Now, we can build the interference graph.
214 -- The usual story: a definition interferes with all live outs and all other
215 -- definitions.
216 type IGraph x = Map x (Set x)
217 type IGPair x = (IGraph x, IGraphBuilder x)
218 igraph :: (Ord x) => IGraphBuilder x -> SlotEnv -> CmmGraph -> IGraph x
219 igraph builder env g = foldr interfere Map.empty (postorderDfs g)
220 where foldN = foldNodes builder
221 interfere block igraph = foldBlockNodesB3 (first, middle, last) block igraph
222 where first _ (igraph, _) = igraph
223 middle node (igraph, liveOut) =
224 (addEdges igraph node liveOut, liveInSlots node liveOut)
225 last node igraph =
226 (addEdges igraph node $ liveLastOut env' node, liveLastIn node env')
227
228 -- add edges between a def and the other defs and liveouts
229 addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i
230 addDef (igraph, out) def@(a, _, _) =
231 (foldN def (addDefN out) igraph,
232 Map.insert a (snd $ liveGen def (Map.findWithDefault [] a out)) out)
233 addDefN out n igraph =
234 let addEdgeNO o igraph = foldN o addEdgeNN igraph
235 addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph
236 addEdgeNN' n n' igraph = Map.insert n (Map.insert n' () set) igraph
237 where set = Map.findWithDefault Map.empty n igraph
238 in Map.foldRightWithKey (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out
239 env' bid = mapLookup bid env `orElse` panic "unknown blockId in igraph"
240
241 -- Before allocating stack slots, we need to collect one more piece of information:
242 -- what's the highest offset (in bytes) used in each Area?
243 -- We'll need to allocate that much space for each Area.
244
245 -- Mapping of areas to area sizes (not offsets!)
246 type AreaSizeMap = AreaMap
247
248 -- JD: WHY CAN'T THIS COME FROM THE slot-liveness info?
249 getAreaSize :: ByteOff -> CmmGraph -> AreaSizeMap
250 -- The domain of the returned mapping consists only of Areas
251 -- used for (a) variable spill slots, and (b) parameter passing areas for calls
252 getAreaSize entry_off g =
253 foldGraphBlocks (foldBlockNodesF3 (first, add_regslots, last))
254 (Map.singleton (CallArea Old) entry_off) g
255 where first _ z = z
256 last :: CmmNode O C -> Map Area Int -> Map Area Int
257 last l@(CmmCall _ Nothing args res _) z = add_regslots l (add (add z area args) area res)
258 where area = CallArea Old
259 last l@(CmmCall _ (Just k) args res _) z = add_regslots l (add (add z area args) area res)
260 where area = CallArea (Young k)
261 last l@(CmmForeignCall {succ = k}) z = add_regslots l (add z area wORD_SIZE)
262 where area = CallArea (Young k)
263 last l z = add_regslots l z
264 add_regslots i z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z i) i
265 addSlot z (a@(RegSlot (LocalReg _ ty)), _, _) =
266 add z a $ widthInBytes $ typeWidth ty
267 addSlot z _ = z
268 add z a off = Map.insert a (max off (Map.findWithDefault 0 a z)) z
269 -- The 'max' is important. Two calls, to f and g, might share a common
270 -- continuation (and hence a common CallArea), but their number of overflow
271 -- parameters might differ.
272 -- EZY: Ought to use insert with combining function...
273
274
275 -- Find the Stack slots occupied by the subarea's conflicts
276 conflictSlots :: Ord x => IGPair x -> AreaSizeMap -> AreaMap -> SubArea -> Set Int
277 conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea =
278 foldNodes subarea foldNode Map.empty
279 where foldNode n set = Map.foldRightWithKey conflict set $ Map.findWithDefault Map.empty n ig
280 conflict n' () set = liveInSlots areaMap n' set
281 -- Add stack slots occupied by igraph node n
282 liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n)
283 setAdd w s = Map.insert w () s
284
285 -- Find any open space for 'area' on the stack, starting from the
286 -- 'offset'. If the area is a CallArea or a spill slot for a pointer,
287 -- then it must be word-aligned.
288 freeSlotFrom :: Ord x => IGPair x -> AreaSizeMap -> Int -> AreaMap -> Area -> Int
289 freeSlotFrom ig areaSize offset areaMap area =
290 let size = Map.lookup area areaSize `orElse` 0
291 conflicts = conflictSlots ig areaSize areaMap (area, size, size)
292 -- CallAreas and Ptrs need to be word-aligned (round up!)
293 align = case area of CallArea _ -> align'
294 RegSlot r | isGcPtrType (localRegType r) -> align'
295 RegSlot _ -> id
296 align' n = (n + (wORD_SIZE - 1)) `div` wORD_SIZE * wORD_SIZE
297 -- Find a space big enough to hold the area
298 findSpace curr 0 = curr
299 findSpace curr cnt = -- part of target slot, # of bytes left to check
300 if Map.member curr conflicts then
301 findSpace (align (curr + size)) size -- try the next (possibly) open space
302 else findSpace (curr - 1) (cnt - 1)
303 in findSpace (align (offset + size)) size
304
305 -- Find an open space on the stack, and assign it to the area.
306 allocSlotFrom :: Ord x => IGPair x -> AreaSizeMap -> Int -> AreaMap -> Area -> AreaMap
307 allocSlotFrom ig areaSize from areaMap area =
308 if Map.member area areaMap then areaMap
309 else Map.insert area (freeSlotFrom ig areaSize from areaMap area) areaMap
310
311 -- Figure out all of the offsets from the slot location; this will be
312 -- non-zero for procpoints.
313 type SpEntryMap = BlockEnv Int
314 getSpEntryMap :: Int -> CmmGraph -> SpEntryMap
315 getSpEntryMap entry_off g@(CmmGraph {g_entry = entry})
316 = foldGraphBlocks add_sp_off (mapInsert entry entry_off emptyBlockMap) g
317 where add_sp_off :: CmmBlock -> BlockEnv Int -> BlockEnv Int
318 add_sp_off b env =
319 case lastNode b of
320 CmmCall {cml_cont=Just succ, cml_ret_args=off} -> mapInsert succ off env
321 CmmForeignCall {succ=succ} -> mapInsert succ wORD_SIZE env
322 _ -> env
323
324 -- | Greedy stack layout.
325 -- Compute liveness, build the interference graph, and allocate slots for the areas.
326 -- We visit each basic block in a (generally) forward order.
327
328 -- At each instruction that names a register subarea r, we immediately allocate
329 -- any available slot on the stack by the following procedure:
330 -- 1. Find the sub-areas S that conflict with r
331 -- 2. Find the stack slots used for S
332 -- 3. Choose a contiguous stack space s not in S (s must be large enough to hold r)
333
334 -- For a CallArea, we allocate the stack space only when we reach a function
335 -- call that returns to the CallArea's blockId.
336 -- Then, we allocate the Area subject to the following constraints:
337 -- a) It must be younger than all the sub-areas that are live on entry to the block
338 -- This constraint is only necessary for the successor of a call
339 -- b) It must not overlap with any already-allocated Area with which it conflicts
340 -- (ie at some point, not necessarily now, is live at the same time)
341 -- Part (b) is just the 1,2,3 part above
342
343 -- Note: The stack pointer only has to be younger than the youngest live stack slot
344 -- at proc points. Otherwise, the stack pointer can point anywhere.
345
346 layout :: ProcPointSet -> SpEntryMap -> SlotEnv -> ByteOff -> CmmGraph -> AreaMap
347 -- The domain of the returned map includes an Area for EVERY block
348 -- including each block that is not the successor of a call (ie is not a proc-point)
349 -- That's how we return the info of what the SP should be at the entry of every non
350 -- procpoint block. However, note that procpoint blocks have their
351 -- /slot/ stored, which is not necessarily the value of the SP on entry
352 -- to the block (in fact, it probably isn't, due to argument passing).
353 -- See [Procpoint Sp offset]
354
355 layout procPoints spEntryMap env entry_off g =
356 let ig = (igraph areaBuilder env g, areaBuilder)
357 env' bid = mapLookup bid env `orElse` panic "unknown blockId in igraph"
358 areaSize = getAreaSize entry_off g
359
360 -- Find the youngest live stack slot that has already been allocated
361 youngest_live :: AreaMap -- Already allocated
362 -> SubAreaSet -- Sub-areas live here
363 -> ByteOff -- Offset of the youngest byte of any
364 -- already-allocated, live sub-area
365 youngest_live areaMap live = fold_subareas young_slot live 0
366 where young_slot (a, o, _) z = case Map.lookup a areaMap of
367 Just top -> max z $ top + o
368 Nothing -> z
369 fold_subareas f m z = Map.foldRightWithKey (\_ s z -> foldr f z s) z m
370
371 -- Allocate space for spill slots and call areas
372 allocVarSlot = allocSlotFrom ig areaSize 0
373
374 -- Update the successor's incoming SP.
375 setSuccSPs inSp bid areaMap =
376 case (Map.lookup area areaMap , mapLookup bid (toBlockMap g)) of
377 (Just _, _) -> areaMap -- succ already knows incoming SP
378 (Nothing, Just _) ->
379 if setMember bid procPoints then
380 let young = youngest_live areaMap $ env' bid
381 -- start = case returnOff stackInfo of Just b -> max b young
382 -- Nothing -> young
383 start = young -- maybe wrong, but I don't understand
384 -- why the preceding is necessary...
385 in allocSlotFrom ig areaSize start areaMap area
386 else Map.insert area inSp areaMap
387 (_, Nothing) -> panic "Block not found in cfg"
388 where area = CallArea (Young bid)
389
390 layoutAreas areaMap block = foldBlockNodesF3 (flip const, allocMid, allocLast (entryLabel block)) block areaMap
391 allocMid m areaMap = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap m) m
392 allocLast bid l areaMap =
393 foldr (setSuccSPs inSp) areaMap' (successors l)
394 where inSp = slot + spOffset -- [Procpoint Sp offset]
395 -- If it's not in the map, we should use our previous
396 -- calculation unchanged.
397 spOffset = mapLookup bid spEntryMap `orElse` 0
398 slot = expectJust "slot in" $ Map.lookup (CallArea (Young bid)) areaMap
399 areaMap' = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap l) l
400 alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a
401 alloc' areaMap _ = areaMap
402
403 initMap = Map.insert (CallArea (Young (g_entry g))) 0
404 . Map.insert (CallArea Old) 0
405 $ Map.empty
406
407 areaMap = foldl layoutAreas initMap (postorderDfs g)
408 in -- pprTrace "ProcPoints" (ppr procPoints) $
409 -- pprTrace "Area SizeMap" (ppr areaSize) $
410 -- pprTrace "Entry offset" (ppr entry_off) $
411 -- pprTrace "Area Map" (ppr areaMap) $
412 areaMap
413
414 {- Note [Procpoint Sp offset]
415
416 The calculation of inSp is a little tricky. (Un)fortunately, if you get
417 it wrong, you will get inefficient but correct code. You know you've
418 got it wrong if the generated stack pointer bounces up and down for no
419 good reason.
420
421 Why can't we just set inSp to the location of the slot? (This is what
422 the code used to do.) The trouble is when we actually hit the proc
423 point the start of the slot will not be the same as the actual Sp due
424 to argument passing:
425
426 a:
427 I32[(young<b> + 4)] = cde;
428 // Stack pointer is moved to young end (bottom) of young<b> for call
429 // +-------+
430 // | arg 1 |
431 // +-------+ <- Sp
432 call (I32[foobar::I32])(...) returns to Just b (4) (4) with update frame 4;
433 b:
434 // After call, stack pointer is above the old end (top) of
435 // young<b> (the difference is spOffset)
436 // +-------+ <- Sp
437 // | arg 1 |
438 // +-------+
439
440 If we blithely set the Sp to be the same as the slot (the young end of
441 young<b>), an adjustment will be necessary when we go to the next block.
442 This is wasteful. So, instead, for the next block after a procpoint,
443 the actual Sp should be set to the same as the true Sp when we just
444 entered the procpoint. Then manifestSP will automatically do the right
445 thing.
446
447 Questions you may ask:
448
449 1. Why don't we need to change the mapping for the procpoint itself?
450 Because manifestSP does its own calculation of the true stack value,
451 manifestSP will notice the discrepancy between the actual stack
452 pointer and the slot start, and adjust all of its memory accesses
453 accordingly. So the only problem is when we adjust the Sp in
454 preparation for the successor block; that's why this code is here and
455 not in setSuccSPs.
456
457 2. Why don't we make the procpoint call area and the true offset match
458 up? If we did that, we would never use memory above the true value
459 of the stack pointer, thus wasting all of the stack we used to store
460 arguments. You might think that some clever changes to the slot
461 offsets, using negative offsets, might fix it, but this does not make
462 semantic sense.
463
464 3. If manifestSP is already calculating the true stack value, why we can't
465 do this trick inside manifestSP itself? The reason is that if two
466 branches join with inconsistent SPs, one of them has to be fixed: we
467 can't know what the fix should be without already knowing what the
468 chosen location of SP is on the next successor. (This is
469 the "succ already knows incoming SP" case), This calculation cannot
470 be easily done in manifestSP, since it processes the nodes
471 /backwards/. So we need to have figured this out before we hit
472 manifestSP.
473 -}
474
475 -- After determining the stack layout, we can:
476 -- 1. Replace references to stack Areas with addresses relative to the stack
477 -- pointer.
478 -- 2. Insert adjustments to the stack pointer to ensure that it is at a
479 -- conventional location at each proc point.
480 -- Because we don't take interrupts on the execution stack, we only need the
481 -- stack pointer to be younger than the live values on the stack at proc points.
482 -- 3. Compute the maximum stack offset used in the procedure and replace
483 -- the stack high-water mark with that offset.
484 manifestSP :: SpEntryMap -> AreaMap -> ByteOff -> CmmGraph -> FuelUniqSM CmmGraph
485 manifestSP spEntryMap areaMap entry_off g@(CmmGraph {g_entry=entry}) =
486 ofBlockMap entry `liftM` foldl replB (return mapEmpty) (postorderDfs g)
487 where slot a = -- pprTrace "slot" (ppr a) $
488 Map.lookup a areaMap `orElse` panic "unallocated Area"
489 slot' (Just id) = slot $ CallArea (Young id)
490 slot' Nothing = slot $ CallArea Old
491 sp_high = maxSlot slot g
492 proc_entry_sp = slot (CallArea Old) + entry_off
493
494 spOffset id = mapLookup id spEntryMap `orElse` 0
495
496 sp_on_entry id | id == entry = proc_entry_sp
497 sp_on_entry id = slot' (Just id) + spOffset id
498
499 -- On entry to procpoints, the stack pointer is conventional;
500 -- otherwise, we check the SP set by predecessors.
501 replB :: FuelUniqSM (BlockEnv CmmBlock) -> CmmBlock -> FuelUniqSM (BlockEnv CmmBlock)
502 replB blocks block =
503 do let (head, middles, JustC tail :: MaybeC C (CmmNode O C)) = blockToNodeList block
504 middles' = map (middle spIn) middles
505 bs <- replLast head middles' tail
506 flip (foldr insertBlock) bs `liftM` blocks
507 where spIn = sp_on_entry (entryLabel block)
508
509 middle spOff m = mapExpDeep (replSlot spOff) m
510 -- XXX there shouldn't be any global registers in the
511 -- CmmCall, so there shouldn't be any slots in
512 -- CmmCall... check that...
513 last spOff l = mapExpDeep (replSlot spOff) l
514 replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i))
515 replSlot _ (CmmLit CmmHighStackMark) = -- replacing the high water mark
516 CmmLit (CmmInt (toInteger (max 0 (sp_high - proc_entry_sp))) (typeWidth bWord))
517 -- Invariant: Sp is always greater than SpLim. Thus, if
518 -- the high water mark is zero, we can optimize away the
519 -- conditional branch. Relies on dead code elimination
520 -- to get rid of the dead GC blocks.
521 -- EZY: Maybe turn this into a guard that checks if a
522 -- statement is stack-check ish? Maybe we should make
523 -- an actual mach-op for it, so there's no chance of
524 -- mixing this up with something else...
525 replSlot _ (CmmMachOp (MO_U_Lt _)
526 [CmmMachOp (MO_Sub _)
527 [ CmmReg (CmmGlobal Sp)
528 , CmmLit (CmmInt 0 _)],
529 CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth)
530 replSlot _ e = e
531
532 replLast :: MaybeC C (CmmNode C O) -> [CmmNode O O] -> CmmNode O C -> FuelUniqSM [CmmBlock]
533 replLast h m l@(CmmCall _ k n _ _) = updSp (slot' k + n) h m l
534 -- JD: LastForeignCall probably ought to have an outgoing
535 -- arg size, just like LastCall
536 replLast h m l@(CmmForeignCall {succ=k}) = updSp (slot' (Just k) + wORD_SIZE) h m l
537 replLast h m l@(CmmBranch k) = updSp (sp_on_entry k) h m l
538 replLast h m l = uncurry (:) `liftM` foldr succ (return (b, [])) (successors l)
539 where b :: CmmBlock
540 b = updSp' spIn h m l
541 succ succId z =
542 let succSp = sp_on_entry succId in
543 if succSp /= spIn then
544 do (b, bs) <- z
545 (b', bs') <- insertBetween b (adjustSp succSp) succId
546 return (b', bs' ++ bs)
547 else z
548
549 updSp sp h m l = return [updSp' sp h m l]
550 updSp' sp h m l | sp == spIn = blockOfNodeList (h, m, JustC $ last sp l)
551 | otherwise = blockOfNodeList (h, m ++ adjustSp sp, JustC $ last sp l)
552 adjustSp sp = [CmmAssign (CmmGlobal Sp) e]
553 where e = CmmMachOp (MO_Add wordWidth) [CmmReg (CmmGlobal Sp), off]
554 off = CmmLit $ CmmInt (toInteger $ spIn - sp) wordWidth
555
556
557 -- To compute the stack high-water mark, we fold over the graph and
558 -- compute the highest slot offset.
559 maxSlot :: (Area -> Int) -> CmmGraph -> Int
560 maxSlot slotOff g = foldGraphBlocks (foldBlockNodesF3 (flip const, highSlot, highSlot)) 0 g
561 where highSlot i z = foldSlotsUsed add (foldSlotsDefd add z i) i
562 add z (a, i, _) = max z (slotOff a + i)
563
564 -----------------------------------------------------------------------------
565 -- | Sanity check: stub pointers immediately after they die
566 -----------------------------------------------------------------------------
567 -- This will miss stack slots that are last used in a Last node,
568 -- but it should do pretty well...
569
570 stubSlotsOnDeath :: CmmGraph -> FuelUniqSM CmmGraph
571 stubSlotsOnDeath g = liftM fst $ dataflowPassBwd g [] $ analRewBwd slotLattice
572 liveSlotTransfers
573 rewrites
574 where rewrites = mkBRewrite3 frt mid lst
575 frt _ _ = return Nothing
576 mid m liveSlots = return $ foldSlotsUsed (stub liveSlots m) Nothing m
577 lst _ _ = return Nothing
578 stub liveSlots m rst subarea@(a, off, w) =
579 if elemSlot liveSlots subarea then rst
580 else let store = mkMiddle $ CmmStore (CmmStackSlot a off)
581 (stackStubExpr (widthFromBytes w))
582 in case rst of Nothing -> Just (mkMiddle m <*> store)
583 Just g -> Just (g <*> store)