Add type signature necessary for GHC 7.0.4
[ghc.git] / compiler / cmm / CmmLayoutStack.hs
1 {-# LANGUAGE RecordWildCards, GADTs #-}
2 module CmmLayoutStack (
3 cmmLayoutStack, setInfoTableStackMap
4 ) where
5
6 import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX
7 import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX
8
9 import Cmm
10 import BlockId
11 import CLabel
12 import CmmUtils
13 import MkGraph
14 import Module
15 import ForeignCall
16 import CmmLive
17 import CmmProcPoint
18 import SMRep
19 import Hoopl hiding ((<*>), mkLast, mkMiddle)
20 import OptimizationFuel
21 import Constants
22 import UniqSupply
23 import Maybes
24 import UniqFM
25 import Util
26
27 import FastString
28 import Outputable
29 import Data.Map (Map)
30 import qualified Data.Map as Map
31 import qualified Data.Set as Set
32 import Control.Monad.Fix
33 import Data.Array as Array
34 import Data.Bits
35 import Data.List (nub)
36 import Control.Monad (liftM)
37
38 #include "HsVersions.h"
39
40
41 data StackSlot = Occupied | Empty
42 -- Occupied: a return address or part of an update frame
43
44 instance Outputable StackSlot where
45 ppr Occupied = ptext (sLit "XXX")
46 ppr Empty = ptext (sLit "---")
47
48 -- All stack locations are expressed as positive byte offsets from the
49 -- "base", which is defined to be the address above the return address
50 -- on the stack on entry to this CmmProc.
51 --
52 -- Lower addresses have higher StackLocs.
53 --
54 type StackLoc = ByteOff
55
56 {-
57 A StackMap describes the stack at any given point. At a continuation
58 it has a particular layout, like this:
59
60 | | <- base
61 |-------------|
62 | ret0 | <- base + 8
63 |-------------|
64 . upd frame . <- base + sm_ret_off
65 |-------------|
66 | |
67 . vars .
68 . (live/dead) .
69 | | <- base + sm_sp - sm_args
70 |-------------|
71 | ret1 |
72 . ret vals . <- base + sm_sp (<--- Sp points here)
73 |-------------|
74
75 Why do we include the final return address (ret0) in our stack map? I
76 have absolutely no idea, but it seems to be done that way consistently
77 in the rest of the code generator, so I played along here. --SDM
78
79 Note that we will be constructing an info table for the continuation
80 (ret1), which needs to describe the stack down to, but not including,
81 the update frame (or ret0, if there is no update frame).
82 -}
83
84 data StackMap = StackMap
85 { sm_sp :: StackLoc
86 -- ^ the offset of Sp relative to the base on entry
87 -- to this block.
88 , sm_args :: ByteOff
89 -- ^ the number of bytes of arguments in the area for this block
90 -- Defn: the offset of young(L) relative to the base is given by
91 -- (sm_sp - sm_args) of the StackMap for block L.
92 , sm_ret_off :: ByteOff
93 -- ^ Number of words of stack that we do not describe with an info
94 -- table, because it contains an update frame.
95 , sm_regs :: UniqFM (LocalReg,StackLoc)
96 -- ^ regs on the stack
97 }
98
99 instance Outputable StackMap where
100 ppr StackMap{..} =
101 text "Sp = " <> int sm_sp $$
102 text "sm_args = " <> int sm_args $$
103 text "sm_ret_off = " <> int sm_ret_off $$
104 text "sm_regs = " <> ppr (eltsUFM sm_regs)
105
106
107 cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph
108 -> FuelUniqSM (CmmGraph, BlockEnv StackMap)
109 cmmLayoutStack procpoints entry_args
110 graph@(CmmGraph { g_entry = entry })
111 = do
112 pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
113 liveness <- cmmLiveness graph
114 pprTrace "liveness" (ppr liveness) $ return ()
115 let blocks = postorderDfs graph
116
117 (final_stackmaps, final_high_sp, new_blocks) <- liftUniq $
118 mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
119 layout procpoints liveness entry entry_args
120 rec_stackmaps rec_high_sp blocks
121
122 new_blocks' <- liftUniq $ mapM lowerSafeForeignCall new_blocks
123
124 pprTrace ("Sp HWM") (ppr final_high_sp) $
125 return (ofBlockList entry new_blocks', final_stackmaps)
126
127
128
129 layout :: BlockSet -- proc points
130 -> BlockEnv CmmLive -- liveness
131 -> BlockId -- entry
132 -> ByteOff -- stack args on entry
133
134 -> BlockEnv StackMap -- [final] stack maps
135 -> ByteOff -- [final] Sp high water mark
136
137 -> [CmmBlock] -- [in] blocks
138
139 -> UniqSM
140 ( BlockEnv StackMap -- [out] stack maps
141 , ByteOff -- [out] Sp high water mark
142 , [CmmBlock] -- [out] new blocks
143 )
144
145 layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
146 = go blocks init_stackmap entry_args []
147 where
148 (updfr, cont_info) = collectContInfo blocks
149
150 init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
151 , sm_args = entry_args
152 , sm_ret_off = updfr
153 , sm_regs = emptyUFM
154 }
155
156 go [] acc_stackmaps acc_hwm acc_blocks
157 = return (acc_stackmaps, acc_hwm, acc_blocks)
158
159 go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
160 = do
161 let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
162
163 let stack0@StackMap { sm_sp = sp0 }
164 = mapFindWithDefault
165 (pprPanic "no stack map for" (ppr entry_lbl))
166 entry_lbl acc_stackmaps
167
168 pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
169
170 -- (a) Update the stack map to include the effects of
171 -- assignments in this block
172 let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
173
174 -- (b) Insert assignments to reload all the live variables if this
175 -- block is a proc point
176 let middle1 = if entry_lbl `setMember` procpoints
177 then foldr blockCons middle0 (insertReloads stack0)
178 else middle0
179
180 -- (c) Look at the last node and if we are making a call or
181 -- jumping to a proc point, we must save the live
182 -- variables, adjust Sp, and construct the StackMaps for
183 -- each of the successor blocks. See handleLastNode for
184 -- details.
185 (middle2, sp_off, last1, fixup_blocks, out)
186 <- handleLastNode procpoints liveness cont_info
187 acc_stackmaps stack1 middle0 last0
188
189 pprTrace "layout(out)" (ppr out) $ return ()
190
191 -- (d) Manifest Sp: run over the nodes in the block and replace
192 -- CmmStackSlot with CmmLoad from Sp with a concrete offset.
193 --
194 -- our block:
195 -- middle1 -- the original middle nodes
196 -- middle2 -- live variable saves from handleLastNode
197 -- Sp = Sp + sp_off -- Sp adjustment goes here
198 -- last1 -- the last node
199 --
200 let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
201
202 sp_high = final_hwm - entry_args
203 -- The stack check value is adjusted by the Sp offset on
204 -- entry to the proc, which is entry_args. We are
205 -- assuming that we only do a stack check at the
206 -- beginning of a proc, and we don't modify Sp before the
207 -- check.
208
209 final_blocks = manifestSp final_stackmaps stack0 sp0 sp_high entry0
210 middle_pre sp_off last1 fixup_blocks
211
212 acc_stackmaps' = mapUnion acc_stackmaps out
213
214 hwm' = maximum (acc_hwm : (sp0 - sp_off) : map sm_sp (mapElems out))
215
216 go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
217
218
219 -- -----------------------------------------------------------------------------
220
221 -- This doesn't seem right somehow. We need to find out whether this
222 -- proc will push some update frame material at some point, so that we
223 -- can avoid using that area of the stack for spilling. The
224 -- updfr_space field of the CmmProc *should* tell us, but it doesn't
225 -- (I think maybe it gets filled in later when we do proc-point
226 -- splitting).
227 --
228 -- So we'll just take the max of all the cml_ret_offs. This could be
229 -- unnecessarily pessimistic, but probably not in the code we
230 -- generate.
231
232 collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
233 collectContInfo blocks
234 = (maximum ret_offs, mapFromList (catMaybes mb_argss))
235 where
236 (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
237
238 get_cont b =
239 case lastNode b of
240 CmmCall { cml_cont = Just l, .. }
241 -> (Just (l, cml_ret_args), cml_ret_off)
242 CmmForeignCall { .. }
243 -> (Just (succ, 0), updfr) -- ??
244 _other -> (Nothing, 0)
245
246
247 -- -----------------------------------------------------------------------------
248 -- Updating the StackMap from middle nodes
249
250 -- Look for loads from stack slots, and update the StackMap. This is
251 -- purelyu for optimisation reasons, so that we can avoid saving a
252 -- variable back to a different stack slot if it is already on the
253 -- stack.
254 --
255 -- This happens a lot: for example when function arguments are passed
256 -- on the stack and need to be immediately saved across a call, we
257 -- want to just leave them where they are on the stack.
258 --
259 procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
260 procMiddle stackmaps node sm
261 = case node of
262 CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
263 -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
264 where loc = getStackLoc area off stackmaps
265 CmmAssign (CmmLocal r) _other
266 -> sm { sm_regs = delFromUFM (sm_regs sm) r }
267 _other
268 -> sm
269
270 getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
271 getStackLoc Old n _ = n
272 getStackLoc (Young l) n stackmaps =
273 case mapLookup l stackmaps of
274 Nothing -> pprPanic "getStackLoc" (ppr l)
275 Just sm -> sm_sp sm - sm_args sm + n
276
277
278 -- -----------------------------------------------------------------------------
279 -- Handling stack allocation for a last node
280
281 -- We take a single last node and turn it into:
282 --
283 -- C1 (some statements)
284 -- Sp = Sp + N
285 -- C2 (some more statements)
286 -- call f() -- the actual last node
287 --
288 -- plus possibly some more blocks (we may have to add some fixup code
289 -- between the last node and the continuation).
290 --
291 -- C1: is the code for saving the variables across this last node onto
292 -- the stack, if the continuation is a call or jumps to a proc point.
293 --
294 -- C2: if the last node is a safe foreign call, we have to inject some
295 -- extra code that goes *after* the Sp adjustment.
296
297 handleLastNode
298 :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
299 -> BlockEnv StackMap -> StackMap
300 -> Block CmmNode O O
301 -> CmmNode O C
302 -> UniqSM
303 ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
304 , ByteOff -- amount to adjust Sp
305 , CmmNode O C -- new last node
306 , [CmmBlock] -- new blocks
307 , BlockEnv StackMap -- stackmaps for the continuations
308 )
309
310 handleLastNode procpoints liveness cont_info stackmaps
311 stack0@StackMap { sm_sp = sp0 } middle last
312 = case last of
313 -- At each return / tail call,
314 -- adjust Sp to point to the last argument pushed, which
315 -- is cml_args, after popping any other junk from the stack.
316 CmmCall{ cml_cont = Nothing, .. } -> do
317 let sp_off = sp0 - cml_args
318 return ([], sp_off, last, [], mapEmpty)
319
320 -- At each CmmCall with a continuation:
321 CmmCall{ cml_cont = Just cont_lbl, .. } ->
322 return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
323
324 CmmForeignCall{ succ = cont_lbl, .. } -> do
325 return $ lastCall cont_lbl wORD_SIZE wORD_SIZE (sm_ret_off stack0)
326 -- one word each for args and results: the return address
327
328 CmmBranch{..} -> handleProcPoints
329 CmmCondBranch{..} -> handleProcPoints
330 CmmSwitch{..} -> handleProcPoints
331
332 where
333 -- Calls and ForeignCalls are handled the same way:
334 lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
335 -> ( [CmmNode O O]
336 , ByteOff
337 , CmmNode O C
338 , [CmmBlock]
339 , BlockEnv StackMap
340 )
341 lastCall lbl cml_args cml_ret_args cml_ret_off
342 = ( assignments
343 , spOffsetForCall sp0 cont_stack cml_args
344 , last
345 , [] -- no new blocks
346 , mapSingleton lbl cont_stack )
347 where
348 (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
349
350
351 prepareStack lbl cml_ret_args cml_ret_off
352 | Just cont_stack <- mapLookup lbl stackmaps
353 -- If we have already seen this continuation before, then
354 -- we just have to make the stack look the same:
355 = (fixupStack stack0 cont_stack, cont_stack)
356 -- Otherwise, we have to allocate the stack frame
357 | otherwise
358 = (save_assignments, new_cont_stack)
359 where
360 (new_cont_stack, save_assignments)
361 = setupStackFrame lbl liveness cml_ret_off cml_ret_args stack0
362
363
364 -- proc point, we have to set up the stack to match what the proc
365 -- point is expecting.
366 --
367 handleProcPoints :: UniqSM ( [CmmNode O O]
368 , ByteOff
369 , CmmNode O C
370 , [CmmBlock]
371 , BlockEnv StackMap )
372
373 handleProcPoints
374 -- Note [diamond proc point]
375 | Just l <- futureContinuation middle
376 , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
377 = do
378 let cont_args = mapFindWithDefault 0 l cont_info
379 (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
380 out = mapFromList [ (l', cont_stack)
381 | l' <- successors last ]
382 return ( assigs
383 , spOffsetForCall sp0 cont_stack wORD_SIZE
384 , last
385 , []
386 , out)
387
388 | otherwise = do
389 pps <- mapM handleProcPoint (successors last)
390 let lbl_map :: LabelMap Label
391 lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
392 fix_lbl l = mapLookup l lbl_map `orElse` l
393 return ( []
394 , 0
395 , mapSuccessors fix_lbl last
396 , concat [ blk | (_,_,_,blk) <- pps ]
397 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
398
399 -- For each proc point that is a successor of this block
400 -- (a) if the proc point already has a stackmap, we need to
401 -- shuffle the current stack to make it look the same.
402 -- We have to insert a new block to make this happen.
403 -- (b) otherwise, call "allocate live stack0" to make the
404 -- stack map for the proc point
405 handleProcPoint :: BlockId
406 -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
407 handleProcPoint l
408 | not (l `setMember` procpoints) = return (l, l, stack0, [])
409 | otherwise = do
410 tmp_lbl <- liftM mkBlockId $ getUniqueM
411 let
412 (stack2, assigs) =
413 case mapLookup l stackmaps of
414 Just pp_sm -> (pp_sm, fixupStack stack0 pp_sm)
415 Nothing ->
416 pprTrace "first visit to proc point"
417 (ppr l <+> ppr stack1) $
418 (stack1, assigs)
419 where
420 cont_args = mapFindWithDefault 0 l cont_info
421 (stack1, assigs) =
422 setupStackFrame l liveness (sm_ret_off stack0)
423 cont_args stack0
424
425 sp_off = sp0 - sm_sp stack2
426
427 block = blockJoin (CmmEntry tmp_lbl)
428 (maybeAddSpAdj sp_off (blockFromList assigs))
429 (CmmBranch l)
430 --
431 return (l, tmp_lbl, stack2, [block])
432
433
434
435 -- Sp is currently pointing to current_sp,
436 -- we want it to point to
437 -- (sm_sp cont_stack - sm_args cont_stack + args)
438 -- so the difference is
439 -- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
440 spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
441 spOffsetForCall current_sp cont_stack args
442 = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
443
444
445 -- | create a sequence of assignments to establish the new StackMap,
446 -- given the old StackMap.
447 fixupStack :: StackMap -> StackMap -> [CmmNode O O]
448 fixupStack old_stack new_stack = concatMap move new_locs
449 where
450 old_map :: Map LocalReg ByteOff
451 old_map = Map.fromList (stackSlotRegs old_stack)
452 new_locs = stackSlotRegs new_stack
453
454 move (r,n)
455 | Just m <- Map.lookup r old_map, n == m = []
456 | otherwise = [CmmStore (CmmStackSlot Old n)
457 (CmmReg (CmmLocal r))]
458
459
460
461 setupStackFrame
462 :: BlockId -- label of continuation
463 -> BlockEnv CmmLive -- liveness
464 -> ByteOff -- updfr
465 -> ByteOff -- bytes of return values on stack
466 -> StackMap -- current StackMap
467 -> (StackMap, [CmmNode O O])
468
469 setupStackFrame lbl liveness updfr_off ret_args stack0
470 = (cont_stack, assigs)
471 where
472 -- get the set of LocalRegs live in the continuation
473 live = mapFindWithDefault Set.empty lbl liveness
474
475 -- the stack from the base to updfr_off is off-limits.
476 -- our new stack frame contains:
477 -- * saved live variables
478 -- * the return address [young(C) + 8]
479 -- * the args for the call,
480 -- which are replaced by the return values at the return
481 -- point.
482
483 -- everything up to updfr_off is off-limits
484 -- stack1 contains updfr_off, plus everything we need to save
485 (stack1, assigs) = allocate updfr_off live stack0
486
487 -- And the Sp at the continuation is:
488 -- sm_sp stack1 + ret_args
489 cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
490 , sm_args = ret_args
491 , sm_ret_off = updfr_off
492 }
493
494
495 -- -----------------------------------------------------------------------------
496 -- Note [diamond proc point]
497 --
498 -- This special case looks for the pattern we get from a typical
499 -- tagged case expression:
500 --
501 -- Sp[young(L1)] = L1
502 -- if (R1 & 7) != 0 goto L1 else goto L2
503 -- L2:
504 -- call [R1] returns to L1
505 -- L1: live: {y}
506 -- x = R1
507 --
508 --
509 -- If we let the generic case handle this, we get
510 --
511 -- Sp[-16] = L1
512 -- if (R1 & 7) != 0 goto L1a else goto L2
513 -- L2:
514 -- Sp[-8] = y
515 -- Sp = Sp - 16
516 -- call [R1] returns to L1
517 -- L1a:
518 -- Sp[-8] = y
519 -- Sp = Sp - 16
520 -- goto L1
521 -- L1:
522 -- x = R1
523 --
524 -- The code for saving the live vars is duplicated in each branch, and
525 -- furthermore there is an extra jump (assuming L1 is a proc point,
526 -- which it probably is if there is a heap check).
527 --
528 -- So to fix this we look for
529 -- (1) a block containing an assignment of a return address L
530 -- (2) ending in a branch where one (and only) continuation goes to L,
531 -- and no other continuations go to proc points.
532 --
533 -- If this happens, then we allocate the stack frame for L in the
534 -- current block.
535 --
536 -- We know that it is safe to allocate the stack frame and save the
537 -- live variables after the assignment of the return address, because
538 -- stack areas are defined as overlapping, so there can be no reads
539 -- from other stack areas after the return address assignment.
540 --
541 -- We could generalise (2), but that would make it a bit more
542 -- complicated to handle, and this currently catches the common case.
543
544 futureContinuation :: Block CmmNode O O -> Maybe BlockId
545 futureContinuation middle = foldBlockNodesB f middle Nothing
546 where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
547 f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
548 = Just l
549 f _ r = r
550
551 -- -----------------------------------------------------------------------------
552 -- Saving live registers
553
554 -- | Given a set of live registers and a StackMap, save all the registers
555 -- on the stack and return the new StackMap and the assignments to do
556 -- the saving.
557 --
558 allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O])
559 allocate ret_off live stackmap@StackMap{ sm_sp = sp0
560 , sm_regs = regs0 }
561 =
562 pprTrace "allocate" (ppr live $$ ppr stackmap) $
563
564 -- we only have to save regs that are not already in a slot
565 let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
566 regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
567 in
568
569 -- make a map of the stack
570 let stack = reverse $ Array.elems $
571 accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $
572 ret_words ++ live_words
573 where ret_words =
574 [ (x, Occupied)
575 | x <- [ 1 .. toWords ret_off] ]
576 live_words =
577 [ (toWords x, Occupied)
578 | (r,off) <- eltsUFM regs1,
579 let w = localRegBytes r,
580 x <- [ off, off-wORD_SIZE .. off - w + 1] ]
581 in
582
583 -- Pass over the stack: find slots to save all the new live variables,
584 -- choosing the oldest slots first (hence a foldr).
585 let
586 save slot ([], stack, n, assigs, regs) -- no more regs to save
587 = ([], slot:stack, n `plusW` 1, assigs, regs)
588 save slot (to_save, stack, n, assigs, regs)
589 = case slot of
590 Occupied -> (to_save, Occupied:stack, n `plusW` 1, assigs, regs)
591 Empty
592 | Just (stack', r, to_save') <-
593 select_save to_save (slot:stack)
594 -> let assig = CmmStore (CmmStackSlot Old n')
595 (CmmReg (CmmLocal r))
596 n' = n `plusW` 1
597 in
598 (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
599
600 | otherwise
601 -> (to_save, slot:stack, n `plusW` 1, assigs, regs)
602
603 -- we should do better here: right now we'll fit the smallest first,
604 -- but it would make more sense to fit the biggest first.
605 select_save :: [LocalReg] -> [StackSlot]
606 -> Maybe ([StackSlot], LocalReg, [LocalReg])
607 select_save regs stack = go regs []
608 where go [] _no_fit = Nothing
609 go (r:rs) no_fit
610 | Just rest <- dropEmpty words stack
611 = Just (replicate words Occupied ++ rest, r, rs++no_fit)
612 | otherwise
613 = go rs (r:no_fit)
614 where words = localRegWords r
615
616 -- fill in empty slots as much as possible
617 (still_to_save, save_stack, n, save_assigs, save_regs)
618 = foldr save (to_save, [], 0, [], []) stack
619
620 -- push any remaining live vars on the stack
621 (push_sp, push_assigs, push_regs)
622 = foldr push (n, [], []) still_to_save
623 where
624 push r (n, assigs, regs)
625 = (n', assig : assigs, (r,(r,n')) : regs)
626 where
627 n' = n + localRegBytes r
628 assig = CmmStore (CmmStackSlot Old n')
629 (CmmReg (CmmLocal r))
630
631 trim_sp
632 | not (null push_regs) = push_sp
633 | otherwise
634 = n `plusW` (- length (takeWhile isEmpty save_stack))
635
636 final_regs = regs1 `addListToUFM` push_regs
637 `addListToUFM` save_regs
638
639 in
640 -- XXX should be an assert
641 if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
642
643 if (trim_sp .&. (wORD_SIZE - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
644
645 ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
646 , push_assigs ++ save_assigs )
647
648
649 -- -----------------------------------------------------------------------------
650 -- Manifesting Sp
651
652 -- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The
653 -- block looks like this:
654 --
655 -- middle_pre -- the middle nodes
656 -- Sp = Sp + sp_off -- Sp adjustment goes here
657 -- last -- the last node
658 --
659 -- And we have some extra blocks too (that don't contain Sp adjustments)
660 --
661 -- The adjustment for middle_pre will be different from that for
662 -- middle_post, because the Sp adjustment intervenes.
663 --
664 manifestSp
665 :: BlockEnv StackMap -- StackMaps for other blocks
666 -> StackMap -- StackMap for this block
667 -> ByteOff -- Sp on entry to the block
668 -> ByteOff -- SpHigh
669 -> CmmNode C O -- first node
670 -> [CmmNode O O] -- middle
671 -> ByteOff -- sp_off
672 -> CmmNode O C -- last node
673 -> [CmmBlock] -- new blocks
674 -> [CmmBlock] -- final blocks with Sp manifest
675
676 manifestSp stackmaps stack0 sp0 sp_high
677 first middle_pre sp_off last fixup_blocks
678 = final_block : fixup_blocks'
679 where
680 area_off = getAreaOff stackmaps
681
682 adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
683 adj_pre_sp = mapExpDeep (areaToSp sp0 sp_high area_off)
684 adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
685
686 final_middle = maybeAddSpAdj sp_off $
687 blockFromList $
688 map adj_pre_sp $
689 elimStackStores stack0 stackmaps area_off $
690 middle_pre
691
692 final_last = optStackCheck (adj_post_sp last)
693
694 final_block = blockJoin first final_middle final_last
695
696 fixup_blocks' = map (blockMapNodes3 (id, adj_post_sp, id)) fixup_blocks
697
698
699 getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
700 getAreaOff _ Old = 0
701 getAreaOff stackmaps (Young l) =
702 case mapLookup l stackmaps of
703 Just sm -> sm_sp sm - sm_args sm
704 Nothing -> pprPanic "getAreaOff" (ppr l)
705
706
707 maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O
708 maybeAddSpAdj 0 block = block
709 maybeAddSpAdj sp_off block
710 = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off)
711
712
713 {-
714 Sp(L) is the Sp offset on entry to block L relative to the base of the
715 OLD area.
716
717 SpArgs(L) is the size of the young area for L, i.e. the number of
718 arguments.
719
720 - in block L, each reference to [old + N] turns into
721 [Sp + Sp(L) - N]
722
723 - in block L, each reference to [young(L') + N] turns into
724 [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
725
726 - be careful with the last node of each block: Sp has already been adjusted
727 to be Sp + Sp(L) - Sp(L')
728 -}
729
730 areaToSp :: ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
731 areaToSp sp_old _sp_hwm area_off (CmmStackSlot area n) =
732 cmmOffset (CmmReg spReg) (sp_old - area_off area - n)
733 areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm)
734 areaToSp _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check]
735 [CmmMachOp (MO_Sub _)
736 [ CmmReg (CmmGlobal Sp)
737 , CmmLit (CmmInt 0 _)],
738 CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth)
739 areaToSp _ _ _ other = other
740
741 -- -----------------------------------------------------------------------------
742 -- Note [null stack check]
743 --
744 -- If the high-water Sp is zero, then we end up with
745 --
746 -- if (Sp - 0 < SpLim) then .. else ..
747 --
748 -- and possibly some dead code for the failure case. Optimising this
749 -- away depends on knowing that SpLim <= Sp, so it is really the job
750 -- of the stack layout algorithm, hence we do it now. This is also
751 -- convenient because control-flow optimisation later will drop the
752 -- dead code.
753
754 optStackCheck :: CmmNode O C -> CmmNode O C
755 optStackCheck n = -- Note [null stack check]
756 case n of
757 CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
758 other -> other
759
760
761 -- -----------------------------------------------------------------------------
762
763 -- | Eliminate stores of the form
764 --
765 -- Sp[area+n] = r
766 --
767 -- when we know that r is already in the same slot as Sp[area+n]. We
768 -- could do this in a later optimisation pass, but that would involve
769 -- a separate analysis and we already have the information to hand
770 -- here. It helps clean up some extra stack stores in common cases.
771 --
772 -- Note that we may have to modify the StackMap as we walk through the
773 -- code using procMiddle, since an assignment to a variable in the
774 -- StackMap will invalidate its mapping there.
775 --
776 elimStackStores :: StackMap
777 -> BlockEnv StackMap
778 -> (Area -> ByteOff)
779 -> [CmmNode O O]
780 -> [CmmNode O O]
781 elimStackStores stackmap stackmaps area_off nodes
782 = go stackmap nodes
783 where
784 go _stackmap [] = []
785 go stackmap (n:ns)
786 = case n of
787 CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
788 | Just (_,off) <- lookupUFM (sm_regs stackmap) r
789 , area_off area + m == off
790 -> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns
791 _otherwise
792 -> n : go (procMiddle stackmaps n stackmap) ns
793
794
795 -- -----------------------------------------------------------------------------
796 -- Update info tables to include stack liveness
797
798
799 setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl
800 setInfoTableStackMap stackmaps
801 (CmmProc top_info@TopInfo{..} l g@CmmGraph{g_entry = eid})
802 = CmmProc top_info{ info_tbl = fix_info info_tbl } l g
803 where
804 fix_info info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
805 info_tbl { cit_rep = StackRep (get_liveness eid) }
806 fix_info other = other
807
808 get_liveness :: BlockId -> Liveness
809 get_liveness lbl
810 = case mapLookup lbl stackmaps of
811 Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl)
812 Just sm -> stackMapToLiveness sm
813
814 setInfoTableStackMap _ d = d
815
816
817 stackMapToLiveness :: StackMap -> Liveness
818 stackMapToLiveness StackMap{..} =
819 reverse $ Array.elems $
820 accumArray (\_ x -> x) True (toWords sm_ret_off + 1,
821 toWords (sm_sp - sm_args)) live_words
822 where
823 live_words = [ (toWords off, False)
824 | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]
825
826
827 -- -----------------------------------------------------------------------------
828 -- Lowering safe foreign calls
829
830 {-
831 Note [lower safe foreign calls]
832
833 We start with
834
835 Sp[young(L1)] = L1
836 ,-----------------------
837 | r1 = foo(x,y,z) returns to L1
838 '-----------------------
839 L1:
840 R1 = r1 -- copyIn, inserted by mkSafeCall
841 ...
842
843 the stack layout algorithm will arrange to save and reload everything
844 live across the call. Our job now is to expand the call so we get
845
846 Sp[young(L1)] = L1
847 ,-----------------------
848 | SAVE_THREAD_STATE()
849 | token = suspendThread(BaseReg, interruptible)
850 | r = foo(x,y,z)
851 | BaseReg = resumeThread(token)
852 | LOAD_THREAD_STATE()
853 | R1 = r -- copyOut
854 | jump L1
855 '-----------------------
856 L1:
857 r = R1 -- copyIn, inserted by mkSafeCall
858 ...
859
860 Note the copyOut, which saves the results in the places that L1 is
861 expecting them (see Note {safe foreign call convention]).
862 -}
863
864 lowerSafeForeignCall :: CmmBlock -> UniqSM CmmBlock
865 lowerSafeForeignCall block
866 | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
867 = do
868 -- Both 'id' and 'new_base' are KindNonPtr because they're
869 -- RTS-only objects and are not subject to garbage collection
870 id <- newTemp bWord
871 new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
872 let (caller_save, caller_load) = callerSaveVolatileRegs
873 load_tso <- newTemp gcWord
874 load_stack <- newTemp gcWord
875 let suspend = saveThreadState <*>
876 caller_save <*>
877 mkMiddle (callSuspendThread id intrbl)
878 midCall = mkUnsafeCall tgt res args
879 resume = mkMiddle (callResumeThread new_base id) <*>
880 -- Assign the result to BaseReg: we
881 -- might now have a different Capability!
882 mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
883 caller_load <*>
884 loadThreadState load_tso load_stack
885 -- Note: The successor must be a procpoint, and we have already split,
886 -- so we use a jump, not a branch.
887 succLbl = CmmLit (CmmLabel (infoTblLbl succ))
888
889 (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ)
890 (map (CmmReg . CmmLocal) res)
891 updfr (0, [])
892
893 jump = CmmCall { cml_target = succLbl
894 , cml_cont = Just succ
895 , cml_args = widthInBytes wordWidth
896 , cml_ret_args = ret_args
897 , cml_ret_off = updfr }
898
899 graph' <- lgraphOfAGraph $ suspend <*>
900 midCall <*>
901 resume <*>
902 copyout <*>
903 mkLast jump
904
905 case toBlockList graph' of
906 [one] -> let (_, middle', last) = blockSplit one
907 in return (blockJoin entry (middle `blockAppend` middle') last)
908 _ -> panic "lowerSafeForeignCall0"
909
910 -- Block doesn't end in a safe foreign call:
911 | otherwise = return block
912
913
914 foreignLbl :: FastString -> CmmExpr
915 foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))
916
917 newTemp :: CmmType -> UniqSM LocalReg
918 newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
919
920 callSuspendThread :: LocalReg -> Bool -> CmmNode O O
921 callSuspendThread id intrbl =
922 CmmUnsafeForeignCall
923 (ForeignTarget (foreignLbl (fsLit "suspendThread"))
924 (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
925 [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))]
926
927 callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
928 callResumeThread new_base id =
929 CmmUnsafeForeignCall
930 (ForeignTarget (foreignLbl (fsLit "resumeThread"))
931 (ForeignConvention CCallConv [AddrHint] [AddrHint]))
932 [new_base] [CmmReg (CmmLocal id)]
933
934 -- -----------------------------------------------------------------------------
935
936 plusW :: ByteOff -> WordOff -> ByteOff
937 plusW b w = b + w * wORD_SIZE
938
939 dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
940 dropEmpty 0 ss = Just ss
941 dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
942 dropEmpty _ _ = Nothing
943
944 isEmpty :: StackSlot -> Bool
945 isEmpty Empty = True
946 isEmpty _ = False
947
948 localRegBytes :: LocalReg -> ByteOff
949 localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r)))
950
951 localRegWords :: LocalReg -> WordOff
952 localRegWords = toWords . localRegBytes
953
954 toWords :: ByteOff -> WordOff
955 toWords x = x `quot` wORD_SIZE
956
957
958 insertReloads :: StackMap -> [CmmNode O O]
959 insertReloads stackmap =
960 [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
961 (localRegType r))
962 | (r,sp) <- stackSlotRegs stackmap
963 ]
964
965
966 stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
967 stackSlotRegs sm = eltsUFM (sm_regs sm)