Build info tables with the new stack layout code
[ghc.git] / compiler / cmm / CmmLayoutStack.hs
1 {-# LANGUAGE RecordWildCards, GADTs #-}
2 module CmmLayoutStack (
3 cmmLayoutStack, setInfoTableStackMap
4 ) where
5
6 import Cmm
7 import BlockId
8 import CmmUtils
9 import CmmLive
10 import CmmProcPoint
11 import SMRep
12 import Hoopl
13 import OptimizationFuel
14 import Constants
15 import UniqSupply
16 import Maybes
17 import UniqFM
18 import Util
19
20 import FastString
21 import Outputable
22 import Data.Map (Map)
23 import qualified Data.Map as Map
24 import qualified Data.Set as Set
25 import Control.Monad.Fix
26 import Data.Array as Array
27
28 #include "HsVersions.h"
29
30
31 data StackSlot = Occupied | Empty
32 -- Occupied: a return address or part of an update frame
33
34 instance Outputable StackSlot where
35 ppr Occupied = ptext (sLit "XXX")
36 ppr Empty = ptext (sLit "---")
37
38 -- All stack locations are expressed as positive byte offsets from the
39 -- "base", which is defined to be the address above the return address
40 -- on the stack on entry to this CmmProc.
41 --
42 -- Lower addresses have higher StackLocs.
43 --
44 type StackLoc = ByteOff
45
46 {-
47 A StackMap describes the stack at any given point. At a continuation
48 it has a particular layout, like this:
49
50 | | <- base
51 |-------------|
52 | ret0 | <- base + 8
53 |-------------|
54 . upd frame . <- base + sm_ret_off
55 |-------------|
56 | |
57 . vars .
58 . (live/dead) .
59 | | <- base + sm_sp - sm_args
60 |-------------|
61 | ret1 |
62 . ret vals . <- base + sm_sp (<--- Sp points here)
63 |-------------|
64
65 Why do we include the final return address (ret0) in our stack map? I
66 have absolutely no idea, but it seems to be done that way consistently
67 in the rest of the code generator, so I played along here. --SDM
68
69 Note that we will be constructing an info table for the continuation
70 (ret1), which needs to describe the stack down to, but not including,
71 the update frame (or ret0, if there is no update frame).
72 -}
73
74 data StackMap = StackMap
75 { sm_sp :: StackLoc
76 -- ^ the offset of Sp relative to the base on entry
77 -- to this block.
78 , sm_args :: ByteOff
79 -- ^ the number of bytes of arguments in the area for this block
80 -- Defn: the offset of young(L) relative to the base is given by
81 -- (sm_sp - sm_args) of the StackMap for block L.
82 , sm_ret_off :: ByteOff
83 -- ^ Number of words of stack that we do not describe with an info
84 -- table, because it contains an update frame.
85 , sm_regs :: UniqFM (LocalReg,StackLoc)
86 -- ^ regs on the stack
87 }
88
89 instance Outputable StackMap where
90 ppr StackMap{..} =
91 text "Sp = " <> int sm_sp $$
92 text "sm_args = " <> int sm_args $$
93 text "sm_ret_off = " <> int sm_ret_off $$
94 text "sm_regs = " <> ppr (eltsUFM sm_regs)
95
96
97 cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph
98 -> FuelUniqSM (CmmGraph, BlockEnv StackMap)
99 cmmLayoutStack procpoints entry_args
100 graph@(CmmGraph { g_entry = entry })
101 = do
102 pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
103 liveness <- cmmLiveness graph
104 pprTrace "liveness" (ppr liveness) $ return ()
105 let blocks = postorderDfs graph
106
107 (final_stackmaps, final_high_sp, new_blocks) <- liftUniq $
108 mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
109 layout procpoints liveness entry entry_args
110 rec_stackmaps rec_high_sp blocks
111
112 pprTrace ("Sp HWM") (ppr final_high_sp) $
113 return (ofBlockList entry new_blocks, final_stackmaps)
114
115
116
117 layout :: BlockSet -- proc points
118 -> BlockEnv CmmLive -- liveness
119 -> BlockId -- entry
120 -> ByteOff -- stack args on entry
121
122 -> BlockEnv StackMap -- [final] stack maps
123 -> ByteOff -- [final] Sp high water mark
124
125 -> [CmmBlock] -- [in] blocks
126
127 -> UniqSM
128 ( BlockEnv StackMap -- [out] stack maps
129 , ByteOff -- [out] Sp high water mark
130 , [CmmBlock] -- [out] new blocks
131 )
132
133 layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
134 = go blocks init_stackmap entry_args []
135 where
136 sp_high = final_hwm - entry_args
137 -- The stack check value is adjusted by the Sp offset on
138 -- entry to the proc, which is entry_args. We are
139 -- assuming that we only do a stack check at the
140 -- beginning of a proc, and we don't modify Sp before the
141 -- check.
142
143 (updfr, cont_info) = collectContInfo blocks
144
145 init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
146 , sm_args = entry_args
147 , sm_ret_off = updfr
148 , sm_regs = emptyUFM
149 }
150
151 go [] acc_stackmaps acc_hwm acc_blocks
152 = return (acc_stackmaps, acc_hwm, acc_blocks)
153
154 go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
155 = do
156 let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
157
158 let stack0@StackMap { sm_sp = sp0 }
159 = mapFindWithDefault
160 (pprPanic "no stack map for" (ppr entry_lbl))
161 entry_lbl acc_stackmaps
162
163 pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
164
165 -- Update the stack map to include the effects of assignments
166 -- in this block
167 let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
168
169 -- Insert assignments to reload all the live variables if this
170 -- is a proc point
171 let middle1 = if entry_lbl `setMember` procpoints
172 then foldr blockCons middle0 (insertReloads stack0)
173 else middle0
174
175 -- Look at the last node and if we are making a call or jumping to
176 -- a proc point, we must save the live variables, adjust Sp, and
177 -- construct the StackMaps for each of the successor blocks.
178 -- See handleLastNode for details.
179 (saves, out, sp_off, last1, fixup_blocks)
180 <- handleLastNode procpoints liveness cont_info
181 acc_stackmaps stack1 last0
182
183 let hwm' = maximum (acc_hwm : map sm_sp (mapElems out))
184 middle2 = maybeAddSpAdj sp_off $ foldl blockSnoc middle1 saves
185
186 -- manifest Sp: turn all CmmStackSlots into actual loads
187 fiddle_middle = mapExpDeep (areaToSp sp0 sp_high final_stackmaps)
188 fiddle_last = mapExpDeep (areaToSp (sp0 - sp_off) sp_high
189 final_stackmaps)
190
191 stackmaps' = mapUnion acc_stackmaps out
192 newblock = blockJoin entry0 middle2 last1
193 newblock' = blockMapNodes3 (id, fiddle_middle, fiddle_last) newblock
194 fixup_blocks' = map (blockMapNodes3 (id, fiddle_middle, id))
195 fixup_blocks
196
197 pprTrace "layout(out)" (ppr out) $ return ()
198
199 go bs stackmaps' hwm' (newblock' : fixup_blocks' ++ acc_blocks)
200
201
202 -- This doesn't seem right somehow. We need to find out whether this
203 -- proc will push some update frame material at some point, so that we
204 -- can avoid using that area of the stack for spilling. The
205 -- updfr_space field of the CmmProc *should* tell us, but it doesn't
206 -- (I think maybe it gets filled in later when we do proc-point
207 -- splitting).
208 --
209 -- So we'll just take the max of all the cml_ret_offs. This could be
210 -- unnecessarily pessimistic, but probably not in the code we
211 -- generate.
212
213 collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
214 collectContInfo blocks
215 = (maximum ret_offs, mapFromList (catMaybes mb_argss))
216 where
217 (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
218
219 get_cont b =
220 case lastNode b of
221 CmmCall { cml_cont = Just l, .. }
222 -> (Just (l, cml_ret_args), cml_ret_off)
223 CmmForeignCall { .. }
224 -> (Just (succ, 0), updfr) -- ??
225 _other -> (Nothing, 0)
226
227
228 maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O
229 maybeAddSpAdj 0 block = block
230 maybeAddSpAdj sp_off block
231 = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off)
232
233
234 procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
235 procMiddle stackmaps node sm
236 = case node of
237 CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) t)
238 -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
239 where loc = getStackLoc area off stackmaps
240 CmmAssign (CmmLocal r) _other
241 -> sm { sm_regs = delFromUFM (sm_regs sm) r }
242 _other
243 -> sm
244
245 getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
246 getStackLoc Old n _ = n
247 getStackLoc (Young l) n stackmaps =
248 case mapLookup l stackmaps of
249 Nothing -> pprPanic "getStackLoc" (ppr l)
250 Just sm -> sm_sp sm - sm_args sm + n
251
252 -- -----------------------------------------------------------------------------
253 -- Handling stack allocation for a last node
254
255 handleLastNode
256 :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
257 -> BlockEnv StackMap -> StackMap
258 -> CmmNode O C
259 -> UniqSM
260 ( [CmmNode O O] -- assignments to save live variables
261 , BlockEnv StackMap -- stackmaps for the continuations
262 , ByteOff -- amount to adjust Sp before the jump
263 , CmmNode O C -- new last node
264 , [CmmBlock] -- new blocks
265 )
266
267 handleLastNode procpoints liveness cont_info stackmaps
268 stack0@StackMap { sm_sp = sp0 } last
269 = case last of
270 -- At each return / tail call,
271 -- adjust Sp to point to the last argument pushed, which
272 -- is cml_args, after popping any other junk from the stack.
273 CmmCall{ cml_cont = Nothing, .. } -> do
274 let sp_off = sp0 - cml_args
275 return ([], mapEmpty, sp_off, last, [])
276
277 -- At each CmmCall with a continuation:
278 CmmCall{ cml_cont = Just cont_lbl, .. }
279 -- If we have already seen this continuation before, then
280 -- we just have to make the stack look the same:
281 | Just cont_stack <- mapLookup cont_lbl stackmaps
282 ->
283 return ( fixupStack stack0 cont_stack
284 , stackmaps
285 , sp0 - sm_sp cont_stack
286 , last
287 , [] )
288
289 -- a continuation we haven't seen before:
290 -- allocate the stack frame for it.
291 | otherwise -> do
292
293 -- get the set of LocalRegs live in the continuation
294 let target_live = mapFindWithDefault Set.empty cont_lbl
295 liveness
296
297 -- the stack from the base to cml_ret_off is off-limits.
298 -- our new stack frame contains:
299 -- * saved live variables
300 -- * the return address [young(C) + 8]
301 -- * the args for the call,
302 -- which are replaced by the return values at the return
303 -- point.
304
305 -- everything up to cml_ret_off is off-limits: mark it Occupied
306 -- stack2 contains cml_ret_off, plus everything we need to save
307 (stack2, assigs) = allocate cml_ret_off target_live stack0
308
309 -- Sp is currently pointing to sp0,
310 -- we want it to point to (sm_sp stack2 + cml_args)
311 -- so the difference is sp0 - (sm_sp stack2 + cml_args)
312 sp_off = sp0 - (sm_sp stack2 + cml_args)
313
314 -- And the Sp at the continuation is:
315 -- sm_sp stack2 + cml_ret_args
316 cont_stack = stack2{ sm_sp = sm_sp stack2 + cml_ret_args
317 , sm_args = cml_ret_args
318 , sm_ret_off = cml_ret_off
319 }
320
321 -- emit the necessary assignments of LocalRegs to stack slots
322 -- emit an Sp adjustment, taking into account the call area
323 --
324 return ( assigs
325 , mapSingleton cont_lbl cont_stack
326 , sp_off
327 , last
328 , [] -- no new blocks
329 )
330
331 CmmBranch{..} -> handleProcPoints
332 CmmCondBranch{..} -> handleProcPoints
333 CmmSwitch{..} -> handleProcPoints
334
335 where
336 handleProcPoints :: UniqSM ( [CmmNode O O]
337 , BlockEnv StackMap
338 , ByteOff
339 , CmmNode O C
340 , [CmmBlock] )
341
342 handleProcPoints = do
343 pps <- mapM handleProcPoint (successors last)
344 let lbl_map :: LabelMap Label
345 lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
346 fix_lbl l = mapLookup l lbl_map `orElse` l
347 return ( []
348 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ]
349 , 0
350 , mapSuccessors fix_lbl last
351 , concat [ blk | (_,_,_,blk) <- pps ] )
352
353 -- For each proc point that is a successor of this block, we need to
354 -- (a) if the proc point already has a stackmap, we need to
355 -- shuffle the current stack to make it look the same.
356 -- We have to insert a new block to make this happen.
357 -- (b) otherwise, call "allocate live stack0" to make the
358 -- stack map for the proc point
359 handleProcPoint :: BlockId
360 -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
361 handleProcPoint l
362 | not (l `setMember` procpoints) = return (l, l, stack0, [])
363 | otherwise = do
364 tmp <- getUniqueM
365 let tmp_lbl = mkBlockId tmp
366 (assigs, stack3) = case mapLookup l stackmaps of
367 Just pp_sm -> (fixupStack stack0 pp_sm, pp_sm)
368 Nothing -> pprTrace "first visit to proc point" (ppr l <+> ppr live $$ ppr stack1) $ (assigs, stack2)
369 where
370 live = mapFindWithDefault Set.empty l liveness
371 (stack1, assigs) = allocate (sm_ret_off stack0) live stack0
372 cont_args = mapFindWithDefault 0 l cont_info
373 stack2 = stack1 { sm_sp = sm_sp stack1 + cont_args
374 , sm_args = cont_args
375 }
376
377 sp_off = sp0 - sm_sp stack3
378
379 block = blockJoin
380 (CmmEntry tmp_lbl)
381 (maybeAddSpAdj sp_off (blockFromList assigs))
382 (CmmBranch l)
383 --
384 return (l, tmp_lbl, stack3, [block])
385
386
387 passthrough :: BlockEnv StackMap
388 passthrough = mapFromList (zip (successors last) (repeat stack0))
389
390
391 -- | create a sequence of assignments to establish the new StackMap,
392 -- given the old StackMap.
393 fixupStack :: StackMap -> StackMap -> [CmmNode O O]
394 fixupStack old_stack new_stack = concatMap move new_locs
395 where
396 old_map :: Map LocalReg ByteOff
397 old_map = Map.fromList (stackSlotRegs old_stack)
398 new_locs = stackSlotRegs new_stack
399
400 move (r,n)
401 | Just m <- Map.lookup r old_map, n == m = []
402 | otherwise = [CmmStore (CmmStackSlot Old n)
403 (CmmReg (CmmLocal r))]
404
405 -- -----------------------------------------------------------------------------
406 -- Updating references to CallAreas
407
408 {-
409 After running layout, we need to update all the references to stack areas.
410
411 Sp(L) is the Sp offset on entry to block L relative to the base of the
412 OLD area.
413
414 SpArgs(L) is the size of the young area for L, i.e. the number of
415 arguments.
416
417 - in block L, each reference to (OldArea[N]) turns into
418 [Sp + Sp(L) - N]
419
420 - in block L, each reference to (Young(L')[N]) turns into
421 [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
422
423 - be careful with the last node of each block: Sp has already been adjusted
424 to be Sp + Sp(L) - Sp(L')
425 -}
426
427 areaToSp :: ByteOff -> ByteOff -> BlockEnv StackMap -> CmmExpr -> CmmExpr
428 areaToSp sp_old _sp_hwm stackmaps (CmmStackSlot area n) =
429 cmmOffset (CmmReg spReg) (sp_old - area_off - n)
430 where
431 area_off = case area of
432 Old -> 0
433 Young l ->
434 case mapLookup l stackmaps of
435 Just sm -> sm_sp sm - sm_args sm
436 Nothing -> pprPanic "areaToSp(2)" (ppr l)
437 areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm)
438 areaToSp _ _ _ other = other
439
440
441 -- -----------------------------------------------------------------------------
442 -- Saving live registers
443
444 -- | Given a set of live registers and a StackMap, save all the registers
445 -- on the stack and return the new StackMap and the assignments to do
446 -- the saving.
447 --
448 allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O])
449 allocate ret_off live stackmap@StackMap{ sm_sp = sp0
450 , sm_regs = regs0 }
451 =
452 pprTrace "allocate" (ppr live $$ ppr stackmap) $
453
454 -- we only have to save regs that are not already in a slot
455 let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
456 regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
457 in
458
459 -- make a map of the stack
460 let stack = reverse $ Array.elems $
461 accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $
462 ret_words ++ live_words
463 where ret_words =
464 [ (x, Occupied)
465 | x <- [ 1 .. toWords ret_off] ]
466 live_words =
467 [ (toWords x, Occupied)
468 | (r,off) <- eltsUFM regs1,
469 let w = localRegBytes r,
470 x <- [ off, off-wORD_SIZE .. off - w + 1] ]
471 in
472
473 -- Pass over the stack: find slots to save all the new live variables,
474 -- choosing the oldest slots first (hence a foldr).
475 let
476 save slot ([], stack, n, assigs, regs) -- no more regs to save
477 = ([], slot:stack, n `plusW` 1, assigs, regs)
478 save slot (to_save, stack, n, assigs, regs)
479 = case slot of
480 Occupied -> (to_save, Occupied:stack, n `plusW` 1, assigs, regs)
481 Empty
482 | Just (stack', r, to_save') <-
483 select_save to_save (slot:stack)
484 -> let assig = CmmStore (CmmStackSlot Old n')
485 (CmmReg (CmmLocal r))
486 n' = n `plusW` 1
487 in
488 (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
489
490 | otherwise
491 -> (to_save, slot:stack, n `plusW` 1, assigs, regs)
492
493 -- we should do better here: right now we'll fit the smallest first,
494 -- but it would make more sense to fit the biggest first.
495 select_save :: [LocalReg] -> [StackSlot]
496 -> Maybe ([StackSlot], LocalReg, [LocalReg])
497 select_save regs stack = go regs []
498 where go [] no_fit = Nothing
499 go (r:rs) no_fit
500 | Just rest <- dropEmpty words stack
501 = Just (replicate words Occupied ++ rest, r, rs++no_fit)
502 | otherwise
503 = go rs (r:no_fit)
504 where words = localRegWords r
505
506 -- fill in empty slots as much as possible
507 (still_to_save, save_stack, n, save_assigs, save_regs)
508 = foldr save (to_save, [], 0, [], []) stack
509
510 -- push any remaining live vars on the stack
511 (push_sp, push_assigs, push_regs)
512 = foldr push (n, [], []) still_to_save
513 where
514 push r (n, assigs, regs)
515 = (n', assig : assigs, (r,(r,n')) : regs)
516 where
517 w = typeWidth (localRegType r)
518 n' = n + widthInBytes w
519 assig = CmmStore (CmmStackSlot Old n')
520 (CmmReg (CmmLocal r))
521
522 trim_sp
523 | not (null push_regs) = push_sp
524 | otherwise
525 = case break notEmpty save_stack of
526 (empties, rest) -> n `plusW` (- length empties)
527
528 final_regs = regs1 `addListToUFM` push_regs
529 `addListToUFM` save_regs
530
531 in
532 -- XXX should be an assert
533 if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
534
535 ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
536 , push_assigs ++ save_assigs )
537
538
539 -- -----------------------------------------------------------------------------
540 -- Update info tables to include stack liveness
541
542
543 setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl
544 setInfoTableStackMap stackmaps
545 (CmmProc top_info@TopInfo{..} l g@CmmGraph{g_entry = eid})
546 = CmmProc top_info{ info_tbl = fix_info info_tbl } l g
547 where
548 fix_info info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
549 info_tbl { cit_rep = StackRep (get_liveness eid) }
550 fix_info other = other
551
552 get_liveness :: BlockId -> Liveness
553 get_liveness lbl
554 = case mapLookup lbl stackmaps of
555 Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl)
556 Just sm -> stackMapToLiveness sm
557
558 stackMapToLiveness :: StackMap -> Liveness
559 stackMapToLiveness StackMap{..} =
560 reverse $ Array.elems $
561 accumArray (\_ x -> x) True (toWords sm_ret_off + 1,
562 toWords (sm_sp - sm_args)) live_words
563 where
564 live_words = [ (toWords off, False)
565 | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]
566
567
568 -- -----------------------------------------------------------------------------
569
570 plusW :: ByteOff -> WordOff -> ByteOff
571 plusW b w = b + w * wORD_SIZE
572
573 dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
574 dropEmpty 0 ss = Just ss
575 dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
576 dropEmpty n _ = Nothing
577
578 pushEmpty :: ByteOff -> [StackSlot] -> [StackSlot]
579 pushEmpty n stack = replicate (toWords n) Empty ++ stack
580
581 notEmpty :: StackSlot -> Bool
582 notEmpty Empty = False
583 notEmpty _ = True
584
585 localRegBytes :: LocalReg -> ByteOff
586 localRegBytes r = widthInBytes (typeWidth (localRegType r))
587
588 localRegWords :: LocalReg -> WordOff
589 localRegWords = toWords . localRegBytes
590
591 toWords :: ByteOff -> WordOff
592 toWords x = x `quot` wORD_SIZE
593
594
595 insertReloads :: StackMap -> [CmmNode O O]
596 insertReloads stackmap =
597 [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
598 (localRegType r))
599 | (r,sp) <- stackSlotRegs stackmap
600 ]
601
602
603 stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
604 stackSlotRegs sm = eltsUFM (sm_regs sm)