Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / compiler / cmm / CmmLayoutStack.hs
1 {-# LANGUAGE RecordWildCards, GADTs #-}
2 module CmmLayoutStack (
3 cmmLayoutStack, setInfoTableStackMap
4 ) where
5
6 import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX
7 import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX
8
9 import Cmm
10 import BlockId
11 import CLabel
12 import CmmUtils
13 import MkGraph
14 import Module
15 import ForeignCall
16 import CmmLive
17 import CmmProcPoint
18 import SMRep
19 import Hoopl
20 import UniqSupply
21 import Maybes
22 import UniqFM
23 import Util
24
25 import DynFlags
26 import FastString
27 import Outputable
28 import qualified Data.Set as Set
29 import Control.Monad.Fix
30 import Data.Array as Array
31 import Data.Bits
32 import Data.List (nub)
33 import Control.Monad (liftM)
34
35 #include "HsVersions.h"
36
37
38 data StackSlot = Occupied | Empty
39 -- Occupied: a return address or part of an update frame
40
41 instance Outputable StackSlot where
42 ppr Occupied = ptext (sLit "XXX")
43 ppr Empty = ptext (sLit "---")
44
45 -- All stack locations are expressed as positive byte offsets from the
46 -- "base", which is defined to be the address above the return address
47 -- on the stack on entry to this CmmProc.
48 --
49 -- Lower addresses have higher StackLocs.
50 --
51 type StackLoc = ByteOff
52
53 {-
54 A StackMap describes the stack at any given point. At a continuation
55 it has a particular layout, like this:
56
57 | | <- base
58 |-------------|
59 | ret0 | <- base + 8
60 |-------------|
61 . upd frame . <- base + sm_ret_off
62 |-------------|
63 | |
64 . vars .
65 . (live/dead) .
66 | | <- base + sm_sp - sm_args
67 |-------------|
68 | ret1 |
69 . ret vals . <- base + sm_sp (<--- Sp points here)
70 |-------------|
71
72 Why do we include the final return address (ret0) in our stack map? I
73 have absolutely no idea, but it seems to be done that way consistently
74 in the rest of the code generator, so I played along here. --SDM
75
76 Note that we will be constructing an info table for the continuation
77 (ret1), which needs to describe the stack down to, but not including,
78 the update frame (or ret0, if there is no update frame).
79 -}
80
81 data StackMap = StackMap
82 { sm_sp :: StackLoc
83 -- ^ the offset of Sp relative to the base on entry
84 -- to this block.
85 , sm_args :: ByteOff
86 -- ^ the number of bytes of arguments in the area for this block
87 -- Defn: the offset of young(L) relative to the base is given by
88 -- (sm_sp - sm_args) of the StackMap for block L.
89 , sm_ret_off :: ByteOff
90 -- ^ Number of words of stack that we do not describe with an info
91 -- table, because it contains an update frame.
92 , sm_regs :: UniqFM (LocalReg,StackLoc)
93 -- ^ regs on the stack
94 }
95
96 instance Outputable StackMap where
97 ppr StackMap{..} =
98 text "Sp = " <> int sm_sp $$
99 text "sm_args = " <> int sm_args $$
100 text "sm_ret_off = " <> int sm_ret_off $$
101 text "sm_regs = " <> ppr (eltsUFM sm_regs)
102
103
104 cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph
105 -> UniqSM (CmmGraph, BlockEnv StackMap)
106 cmmLayoutStack dflags procpoints entry_args
107 graph0@(CmmGraph { g_entry = entry })
108 = do
109 -- pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
110
111 -- We need liveness info. We could do removeDeadAssignments at
112 -- the same time, but it buys nothing over doing cmmSink later,
113 -- and costs a lot more than just cmmLiveness.
114 -- (graph, liveness) <- removeDeadAssignments graph0
115 let (graph, liveness) = (graph0, cmmLiveness graph0)
116
117 -- pprTrace "liveness" (ppr liveness) $ return ()
118 let blocks = postorderDfs graph
119
120 (final_stackmaps, _final_high_sp, new_blocks) <-
121 mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
122 layout dflags procpoints liveness entry entry_args
123 rec_stackmaps rec_high_sp blocks
124
125 new_blocks' <- mapM (lowerSafeForeignCall dflags) new_blocks
126
127 -- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return ()
128 return (ofBlockList entry new_blocks', final_stackmaps)
129
130
131
132 layout :: DynFlags
133 -> BlockSet -- proc points
134 -> BlockEnv CmmLive -- liveness
135 -> BlockId -- entry
136 -> ByteOff -- stack args on entry
137
138 -> BlockEnv StackMap -- [final] stack maps
139 -> ByteOff -- [final] Sp high water mark
140
141 -> [CmmBlock] -- [in] blocks
142
143 -> UniqSM
144 ( BlockEnv StackMap -- [out] stack maps
145 , ByteOff -- [out] Sp high water mark
146 , [CmmBlock] -- [out] new blocks
147 )
148
149 layout dflags procpoints liveness entry entry_args final_stackmaps final_hwm blocks
150 = go blocks init_stackmap entry_args []
151 where
152 (updfr, cont_info) = collectContInfo blocks
153
154 init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
155 , sm_args = entry_args
156 , sm_ret_off = updfr
157 , sm_regs = emptyUFM
158 }
159
160 go [] acc_stackmaps acc_hwm acc_blocks
161 = return (acc_stackmaps, acc_hwm, acc_blocks)
162
163 go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
164 = do
165 let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
166
167 let stack0@StackMap { sm_sp = sp0 }
168 = mapFindWithDefault
169 (pprPanic "no stack map for" (ppr entry_lbl))
170 entry_lbl acc_stackmaps
171
172 -- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
173
174 -- (a) Update the stack map to include the effects of
175 -- assignments in this block
176 let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
177
178 -- (b) Insert assignments to reload all the live variables if this
179 -- block is a proc point
180 let middle1 = if entry_lbl `setMember` procpoints
181 then foldr blockCons middle0 (insertReloads stack0)
182 else middle0
183
184 -- (c) Look at the last node and if we are making a call or
185 -- jumping to a proc point, we must save the live
186 -- variables, adjust Sp, and construct the StackMaps for
187 -- each of the successor blocks. See handleLastNode for
188 -- details.
189 (middle2, sp_off, last1, fixup_blocks, out)
190 <- handleLastNode dflags procpoints liveness cont_info
191 acc_stackmaps stack1 middle0 last0
192
193 -- pprTrace "layout(out)" (ppr out) $ return ()
194
195 -- (d) Manifest Sp: run over the nodes in the block and replace
196 -- CmmStackSlot with CmmLoad from Sp with a concrete offset.
197 --
198 -- our block:
199 -- middle1 -- the original middle nodes
200 -- middle2 -- live variable saves from handleLastNode
201 -- Sp = Sp + sp_off -- Sp adjustment goes here
202 -- last1 -- the last node
203 --
204 let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
205
206 sp_high = final_hwm - entry_args
207 -- The stack check value is adjusted by the Sp offset on
208 -- entry to the proc, which is entry_args. We are
209 -- assuming that we only do a stack check at the
210 -- beginning of a proc, and we don't modify Sp before the
211 -- check.
212
213 final_blocks = manifestSp dflags final_stackmaps stack0 sp0 sp_high entry0
214 middle_pre sp_off last1 fixup_blocks
215
216 acc_stackmaps' = mapUnion acc_stackmaps out
217
218 -- If this block jumps to the GC, then we do not take its
219 -- stack usage into account for the high-water mark.
220 -- Otherwise, if the only stack usage is in the stack-check
221 -- failure block itself, we will do a redundant stack
222 -- check. The stack has a buffer designed to accommodate
223 -- the largest amount of stack needed for calling the GC.
224 --
225 this_sp_hwm | isGcJump last0 = 0
226 | otherwise = sp0 - sp_off
227
228 hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out))
229
230 go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
231
232
233 -- -----------------------------------------------------------------------------
234
235 -- Not foolproof, but GCFun is the culprit we most want to catch
236 isGcJump :: CmmNode O C -> Bool
237 isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) })
238 = l == GCFun || l == GCEnter1
239 isGcJump _something_else = False
240
241 -- -----------------------------------------------------------------------------
242
243 -- This doesn't seem right somehow. We need to find out whether this
244 -- proc will push some update frame material at some point, so that we
245 -- can avoid using that area of the stack for spilling. The
246 -- updfr_space field of the CmmProc *should* tell us, but it doesn't
247 -- (I think maybe it gets filled in later when we do proc-point
248 -- splitting).
249 --
250 -- So we'll just take the max of all the cml_ret_offs. This could be
251 -- unnecessarily pessimistic, but probably not in the code we
252 -- generate.
253
254 collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
255 collectContInfo blocks
256 = (maximum ret_offs, mapFromList (catMaybes mb_argss))
257 where
258 (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
259
260 get_cont b =
261 case lastNode b of
262 CmmCall { cml_cont = Just l, .. }
263 -> (Just (l, cml_ret_args), cml_ret_off)
264 CmmForeignCall { .. }
265 -> (Just (succ, 0), updfr) -- ??
266 _other -> (Nothing, 0)
267
268
269 -- -----------------------------------------------------------------------------
270 -- Updating the StackMap from middle nodes
271
272 -- Look for loads from stack slots, and update the StackMap. This is
273 -- purely for optimisation reasons, so that we can avoid saving a
274 -- variable back to a different stack slot if it is already on the
275 -- stack.
276 --
277 -- This happens a lot: for example when function arguments are passed
278 -- on the stack and need to be immediately saved across a call, we
279 -- want to just leave them where they are on the stack.
280 --
281 procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
282 procMiddle stackmaps node sm
283 = case node of
284 CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
285 -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
286 where loc = getStackLoc area off stackmaps
287 CmmAssign (CmmLocal r) _other
288 -> sm { sm_regs = delFromUFM (sm_regs sm) r }
289 _other
290 -> sm
291
292 getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
293 getStackLoc Old n _ = n
294 getStackLoc (Young l) n stackmaps =
295 case mapLookup l stackmaps of
296 Nothing -> pprPanic "getStackLoc" (ppr l)
297 Just sm -> sm_sp sm - sm_args sm + n
298
299
300 -- -----------------------------------------------------------------------------
301 -- Handling stack allocation for a last node
302
303 -- We take a single last node and turn it into:
304 --
305 -- C1 (some statements)
306 -- Sp = Sp + N
307 -- C2 (some more statements)
308 -- call f() -- the actual last node
309 --
310 -- plus possibly some more blocks (we may have to add some fixup code
311 -- between the last node and the continuation).
312 --
313 -- C1: is the code for saving the variables across this last node onto
314 -- the stack, if the continuation is a call or jumps to a proc point.
315 --
316 -- C2: if the last node is a safe foreign call, we have to inject some
317 -- extra code that goes *after* the Sp adjustment.
318
319 handleLastNode
320 :: DynFlags -> ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
321 -> BlockEnv StackMap -> StackMap
322 -> Block CmmNode O O
323 -> CmmNode O C
324 -> UniqSM
325 ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
326 , ByteOff -- amount to adjust Sp
327 , CmmNode O C -- new last node
328 , [CmmBlock] -- new blocks
329 , BlockEnv StackMap -- stackmaps for the continuations
330 )
331
332 handleLastNode dflags procpoints liveness cont_info stackmaps
333 stack0@StackMap { sm_sp = sp0 } middle last
334 = case last of
335 -- At each return / tail call,
336 -- adjust Sp to point to the last argument pushed, which
337 -- is cml_args, after popping any other junk from the stack.
338 CmmCall{ cml_cont = Nothing, .. } -> do
339 let sp_off = sp0 - cml_args
340 return ([], sp_off, last, [], mapEmpty)
341
342 -- At each CmmCall with a continuation:
343 CmmCall{ cml_cont = Just cont_lbl, .. } ->
344 return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
345
346 CmmForeignCall{ succ = cont_lbl, .. } -> do
347 return $ lastCall cont_lbl (wORD_SIZE dflags) (wORD_SIZE dflags) (sm_ret_off stack0)
348 -- one word each for args and results: the return address
349
350 CmmBranch{..} -> handleBranches
351 CmmCondBranch{..} -> handleBranches
352 CmmSwitch{..} -> handleBranches
353
354 where
355 -- Calls and ForeignCalls are handled the same way:
356 lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
357 -> ( [CmmNode O O]
358 , ByteOff
359 , CmmNode O C
360 , [CmmBlock]
361 , BlockEnv StackMap
362 )
363 lastCall lbl cml_args cml_ret_args cml_ret_off
364 = ( assignments
365 , spOffsetForCall sp0 cont_stack cml_args
366 , last
367 , [] -- no new blocks
368 , mapSingleton lbl cont_stack )
369 where
370 (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
371
372
373 prepareStack lbl cml_ret_args cml_ret_off
374 | Just cont_stack <- mapLookup lbl stackmaps
375 -- If we have already seen this continuation before, then
376 -- we just have to make the stack look the same:
377 = (fixupStack stack0 cont_stack, cont_stack)
378 -- Otherwise, we have to allocate the stack frame
379 | otherwise
380 = (save_assignments, new_cont_stack)
381 where
382 (new_cont_stack, save_assignments)
383 = setupStackFrame dflags lbl liveness cml_ret_off cml_ret_args stack0
384
385
386 -- For other last nodes (branches), if any of the targets is a
387 -- proc point, we have to set up the stack to match what the proc
388 -- point is expecting.
389 --
390 handleBranches :: UniqSM ( [CmmNode O O]
391 , ByteOff
392 , CmmNode O C
393 , [CmmBlock]
394 , BlockEnv StackMap )
395
396 handleBranches
397 -- Note [diamond proc point]
398 | Just l <- futureContinuation middle
399 , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
400 = do
401 let cont_args = mapFindWithDefault 0 l cont_info
402 (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
403 out = mapFromList [ (l', cont_stack)
404 | l' <- successors last ]
405 return ( assigs
406 , spOffsetForCall sp0 cont_stack (wORD_SIZE dflags)
407 , last
408 , []
409 , out)
410
411 | otherwise = do
412 pps <- mapM handleBranch (successors last)
413 let lbl_map :: LabelMap Label
414 lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
415 fix_lbl l = mapFindWithDefault l l lbl_map
416 return ( []
417 , 0
418 , mapSuccessors fix_lbl last
419 , concat [ blk | (_,_,_,blk) <- pps ]
420 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
421
422 -- For each successor of this block
423 handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
424 handleBranch l
425 -- (a) if the successor already has a stackmap, we need to
426 -- shuffle the current stack to make it look the same.
427 -- We have to insert a new block to make this happen.
428 | Just stack2 <- mapLookup l stackmaps
429 = do
430 let assigs = fixupStack stack0 stack2
431 (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
432 return (l, tmp_lbl, stack2, block)
433
434 -- (b) if the successor is a proc point, save everything
435 -- on the stack.
436 | l `setMember` procpoints
437 = do
438 let cont_args = mapFindWithDefault 0 l cont_info
439 (stack2, assigs) =
440 --pprTrace "first visit to proc point"
441 -- (ppr l <+> ppr stack1) $
442 setupStackFrame dflags l liveness (sm_ret_off stack0)
443 cont_args stack0
444 --
445 (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
446 return (l, tmp_lbl, stack2, block)
447
448 -- (c) otherwise, the current StackMap is the StackMap for
449 -- the continuation. But we must remember to remove any
450 -- variables from the StackMap that are *not* live at
451 -- the destination, because this StackMap might be used
452 -- by fixupStack if this is a join point.
453 | otherwise = return (l, l, stack1, [])
454 where live = mapFindWithDefault (panic "handleBranch") l liveness
455 stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) }
456 is_live (r,_) = r `elemRegSet` live
457
458
459 makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap -> [CmmNode O O]
460 -> UniqSM (Label, [CmmBlock])
461 makeFixupBlock dflags sp0 l stack assigs
462 | null assigs && sp0 == sm_sp stack = return (l, [])
463 | otherwise = do
464 tmp_lbl <- liftM mkBlockId $ getUniqueM
465 let sp_off = sp0 - sm_sp stack
466 block = blockJoin (CmmEntry tmp_lbl)
467 (maybeAddSpAdj dflags sp_off (blockFromList assigs))
468 (CmmBranch l)
469 return (tmp_lbl, [block])
470
471
472 -- Sp is currently pointing to current_sp,
473 -- we want it to point to
474 -- (sm_sp cont_stack - sm_args cont_stack + args)
475 -- so the difference is
476 -- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
477 spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
478 spOffsetForCall current_sp cont_stack args
479 = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
480
481
482 -- | create a sequence of assignments to establish the new StackMap,
483 -- given the old StackMap.
484 fixupStack :: StackMap -> StackMap -> [CmmNode O O]
485 fixupStack old_stack new_stack = concatMap move new_locs
486 where
487 old_map = sm_regs old_stack
488 new_locs = stackSlotRegs new_stack
489
490 move (r,n)
491 | Just (_,m) <- lookupUFM old_map r, n == m = []
492 | otherwise = [CmmStore (CmmStackSlot Old n)
493 (CmmReg (CmmLocal r))]
494
495
496
497 setupStackFrame
498 :: DynFlags
499 -> BlockId -- label of continuation
500 -> BlockEnv CmmLive -- liveness
501 -> ByteOff -- updfr
502 -> ByteOff -- bytes of return values on stack
503 -> StackMap -- current StackMap
504 -> (StackMap, [CmmNode O O])
505
506 setupStackFrame dflags lbl liveness updfr_off ret_args stack0
507 = (cont_stack, assignments)
508 where
509 -- get the set of LocalRegs live in the continuation
510 live = mapFindWithDefault Set.empty lbl liveness
511
512 -- the stack from the base to updfr_off is off-limits.
513 -- our new stack frame contains:
514 -- * saved live variables
515 -- * the return address [young(C) + 8]
516 -- * the args for the call,
517 -- which are replaced by the return values at the return
518 -- point.
519
520 -- everything up to updfr_off is off-limits
521 -- stack1 contains updfr_off, plus everything we need to save
522 (stack1, assignments) = allocate dflags updfr_off live stack0
523
524 -- And the Sp at the continuation is:
525 -- sm_sp stack1 + ret_args
526 cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
527 , sm_args = ret_args
528 , sm_ret_off = updfr_off
529 }
530
531
532 -- -----------------------------------------------------------------------------
533 -- Note [diamond proc point]
534 --
535 -- This special case looks for the pattern we get from a typical
536 -- tagged case expression:
537 --
538 -- Sp[young(L1)] = L1
539 -- if (R1 & 7) != 0 goto L1 else goto L2
540 -- L2:
541 -- call [R1] returns to L1
542 -- L1: live: {y}
543 -- x = R1
544 --
545 -- If we let the generic case handle this, we get
546 --
547 -- Sp[-16] = L1
548 -- if (R1 & 7) != 0 goto L1a else goto L2
549 -- L2:
550 -- Sp[-8] = y
551 -- Sp = Sp - 16
552 -- call [R1] returns to L1
553 -- L1a:
554 -- Sp[-8] = y
555 -- Sp = Sp - 16
556 -- goto L1
557 -- L1:
558 -- x = R1
559 --
560 -- The code for saving the live vars is duplicated in each branch, and
561 -- furthermore there is an extra jump in the fast path (assuming L1 is
562 -- a proc point, which it probably is if there is a heap check).
563 --
564 -- So to fix this we want to set up the stack frame before the
565 -- conditional jump. How do we know when to do this, and when it is
566 -- safe? The basic idea is, when we see the assignment
567 --
568 -- Sp[young(L)] = L
569 --
570 -- we know that
571 -- * we are definitely heading for L
572 -- * there can be no more reads from another stack area, because young(L)
573 -- overlaps with it.
574 --
575 -- We don't necessarily know that everything live at L is live now
576 -- (some might be assigned between here and the jump to L). So we
577 -- simplify and only do the optimisation when we see
578 --
579 -- (1) a block containing an assignment of a return address L
580 -- (2) ending in a branch where one (and only) continuation goes to L,
581 -- and no other continuations go to proc points.
582 --
583 -- then we allocate the stack frame for L at the end of the block,
584 -- before the branch.
585 --
586 -- We could generalise (2), but that would make it a bit more
587 -- complicated to handle, and this currently catches the common case.
588
589 futureContinuation :: Block CmmNode O O -> Maybe BlockId
590 futureContinuation middle = foldBlockNodesB f middle Nothing
591 where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
592 f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
593 = Just l
594 f _ r = r
595
596 -- -----------------------------------------------------------------------------
597 -- Saving live registers
598
599 -- | Given a set of live registers and a StackMap, save all the registers
600 -- on the stack and return the new StackMap and the assignments to do
601 -- the saving.
602 --
603 allocate :: DynFlags -> ByteOff -> RegSet -> StackMap
604 -> (StackMap, [CmmNode O O])
605 allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0
606 , sm_regs = regs0 }
607 =
608 -- pprTrace "allocate" (ppr live $$ ppr stackmap) $
609
610 -- we only have to save regs that are not already in a slot
611 let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
612 regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
613 in
614
615 -- make a map of the stack
616 let stack = reverse $ Array.elems $
617 accumArray (\_ x -> x) Empty (1, toWords dflags (max sp0 ret_off)) $
618 ret_words ++ live_words
619 where ret_words =
620 [ (x, Occupied)
621 | x <- [ 1 .. toWords dflags ret_off] ]
622 live_words =
623 [ (toWords dflags x, Occupied)
624 | (r,off) <- eltsUFM regs1,
625 let w = localRegBytes dflags r,
626 x <- [ off, off - wORD_SIZE dflags .. off - w + 1] ]
627 in
628
629 -- Pass over the stack: find slots to save all the new live variables,
630 -- choosing the oldest slots first (hence a foldr).
631 let
632 save slot ([], stack, n, assigs, regs) -- no more regs to save
633 = ([], slot:stack, plusW dflags n 1, assigs, regs)
634 save slot (to_save, stack, n, assigs, regs)
635 = case slot of
636 Occupied -> (to_save, Occupied:stack, plusW dflags n 1, assigs, regs)
637 Empty
638 | Just (stack', r, to_save') <-
639 select_save to_save (slot:stack)
640 -> let assig = CmmStore (CmmStackSlot Old n')
641 (CmmReg (CmmLocal r))
642 n' = plusW dflags n 1
643 in
644 (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
645
646 | otherwise
647 -> (to_save, slot:stack, plusW dflags n 1, assigs, regs)
648
649 -- we should do better here: right now we'll fit the smallest first,
650 -- but it would make more sense to fit the biggest first.
651 select_save :: [LocalReg] -> [StackSlot]
652 -> Maybe ([StackSlot], LocalReg, [LocalReg])
653 select_save regs stack = go regs []
654 where go [] _no_fit = Nothing
655 go (r:rs) no_fit
656 | Just rest <- dropEmpty words stack
657 = Just (replicate words Occupied ++ rest, r, rs++no_fit)
658 | otherwise
659 = go rs (r:no_fit)
660 where words = localRegWords dflags r
661
662 -- fill in empty slots as much as possible
663 (still_to_save, save_stack, n, save_assigs, save_regs)
664 = foldr save (to_save, [], 0, [], []) stack
665
666 -- push any remaining live vars on the stack
667 (push_sp, push_assigs, push_regs)
668 = foldr push (n, [], []) still_to_save
669 where
670 push r (n, assigs, regs)
671 = (n', assig : assigs, (r,(r,n')) : regs)
672 where
673 n' = n + localRegBytes dflags r
674 assig = CmmStore (CmmStackSlot Old n')
675 (CmmReg (CmmLocal r))
676
677 trim_sp
678 | not (null push_regs) = push_sp
679 | otherwise
680 = plusW dflags n (- length (takeWhile isEmpty save_stack))
681
682 final_regs = regs1 `addListToUFM` push_regs
683 `addListToUFM` save_regs
684
685 in
686 -- XXX should be an assert
687 if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
688
689 if (trim_sp .&. (wORD_SIZE dflags - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
690
691 ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
692 , push_assigs ++ save_assigs )
693
694
695 -- -----------------------------------------------------------------------------
696 -- Manifesting Sp
697
698 -- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The
699 -- block looks like this:
700 --
701 -- middle_pre -- the middle nodes
702 -- Sp = Sp + sp_off -- Sp adjustment goes here
703 -- last -- the last node
704 --
705 -- And we have some extra blocks too (that don't contain Sp adjustments)
706 --
707 -- The adjustment for middle_pre will be different from that for
708 -- middle_post, because the Sp adjustment intervenes.
709 --
710 manifestSp
711 :: DynFlags
712 -> BlockEnv StackMap -- StackMaps for other blocks
713 -> StackMap -- StackMap for this block
714 -> ByteOff -- Sp on entry to the block
715 -> ByteOff -- SpHigh
716 -> CmmNode C O -- first node
717 -> [CmmNode O O] -- middle
718 -> ByteOff -- sp_off
719 -> CmmNode O C -- last node
720 -> [CmmBlock] -- new blocks
721 -> [CmmBlock] -- final blocks with Sp manifest
722
723 manifestSp dflags stackmaps stack0 sp0 sp_high
724 first middle_pre sp_off last fixup_blocks
725 = final_block : fixup_blocks'
726 where
727 area_off = getAreaOff stackmaps
728
729 adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
730 adj_pre_sp = mapExpDeep (areaToSp dflags sp0 sp_high area_off)
731 adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off)
732
733 final_middle = maybeAddSpAdj dflags sp_off $
734 blockFromList $
735 map adj_pre_sp $
736 elimStackStores stack0 stackmaps area_off $
737 middle_pre
738
739 final_last = optStackCheck (adj_post_sp last)
740
741 final_block = blockJoin first final_middle final_last
742
743 fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
744
745
746 getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
747 getAreaOff _ Old = 0
748 getAreaOff stackmaps (Young l) =
749 case mapLookup l stackmaps of
750 Just sm -> sm_sp sm - sm_args sm
751 Nothing -> pprPanic "getAreaOff" (ppr l)
752
753
754 maybeAddSpAdj :: DynFlags -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
755 maybeAddSpAdj _ 0 block = block
756 maybeAddSpAdj dflags sp_off block
757 = block `blockSnoc` CmmAssign spReg (cmmOffset dflags (CmmReg spReg) sp_off)
758
759
760 {-
761 Sp(L) is the Sp offset on entry to block L relative to the base of the
762 OLD area.
763
764 SpArgs(L) is the size of the young area for L, i.e. the number of
765 arguments.
766
767 - in block L, each reference to [old + N] turns into
768 [Sp + Sp(L) - N]
769
770 - in block L, each reference to [young(L') + N] turns into
771 [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
772
773 - be careful with the last node of each block: Sp has already been adjusted
774 to be Sp + Sp(L) - Sp(L')
775 -}
776
777 areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
778 areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n) =
779 cmmOffset dflags (CmmReg spReg) (sp_old - area_off area - n)
780 areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark) = mkIntExpr dflags sp_hwm
781 areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check]
782 [CmmMachOp (MO_Sub _)
783 [ CmmReg (CmmGlobal Sp)
784 , CmmLit (CmmInt 0 _)],
785 CmmReg (CmmGlobal SpLim)]) = zeroExpr dflags
786 areaToSp _ _ _ _ other = other
787
788 -- -----------------------------------------------------------------------------
789 -- Note [null stack check]
790 --
791 -- If the high-water Sp is zero, then we end up with
792 --
793 -- if (Sp - 0 < SpLim) then .. else ..
794 --
795 -- and possibly some dead code for the failure case. Optimising this
796 -- away depends on knowing that SpLim <= Sp, so it is really the job
797 -- of the stack layout algorithm, hence we do it now. This is also
798 -- convenient because control-flow optimisation later will drop the
799 -- dead code.
800
801 optStackCheck :: CmmNode O C -> CmmNode O C
802 optStackCheck n = -- Note [null stack check]
803 case n of
804 CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
805 other -> other
806
807
808 -- -----------------------------------------------------------------------------
809
810 -- | Eliminate stores of the form
811 --
812 -- Sp[area+n] = r
813 --
814 -- when we know that r is already in the same slot as Sp[area+n]. We
815 -- could do this in a later optimisation pass, but that would involve
816 -- a separate analysis and we already have the information to hand
817 -- here. It helps clean up some extra stack stores in common cases.
818 --
819 -- Note that we may have to modify the StackMap as we walk through the
820 -- code using procMiddle, since an assignment to a variable in the
821 -- StackMap will invalidate its mapping there.
822 --
823 elimStackStores :: StackMap
824 -> BlockEnv StackMap
825 -> (Area -> ByteOff)
826 -> [CmmNode O O]
827 -> [CmmNode O O]
828 elimStackStores stackmap stackmaps area_off nodes
829 = go stackmap nodes
830 where
831 go _stackmap [] = []
832 go stackmap (n:ns)
833 = case n of
834 CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
835 | Just (_,off) <- lookupUFM (sm_regs stackmap) r
836 , area_off area + m == off
837 -> -- pprTrace "eliminated a node!" (ppr r) $
838 go stackmap ns
839 _otherwise
840 -> n : go (procMiddle stackmaps n stackmap) ns
841
842
843 -- -----------------------------------------------------------------------------
844 -- Update info tables to include stack liveness
845
846
847 setInfoTableStackMap :: DynFlags -> BlockEnv StackMap -> CmmDecl -> CmmDecl
848 setInfoTableStackMap dflags stackmaps (CmmProc top_info@TopInfo{..} l g)
849 = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l g
850 where
851 fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
852 info_tbl { cit_rep = StackRep (get_liveness lbl) }
853 fix_info _ other = other
854
855 get_liveness :: BlockId -> Liveness
856 get_liveness lbl
857 = case mapLookup lbl stackmaps of
858 Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls)
859 Just sm -> stackMapToLiveness dflags sm
860
861 setInfoTableStackMap _ _ d = d
862
863
864 stackMapToLiveness :: DynFlags -> StackMap -> Liveness
865 stackMapToLiveness dflags StackMap{..} =
866 reverse $ Array.elems $
867 accumArray (\_ x -> x) True (toWords dflags sm_ret_off + 1,
868 toWords dflags (sm_sp - sm_args)) live_words
869 where
870 live_words = [ (toWords dflags off, False)
871 | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]
872
873
874 -- -----------------------------------------------------------------------------
875 -- Lowering safe foreign calls
876
877 {-
878 Note [lower safe foreign calls]
879
880 We start with
881
882 Sp[young(L1)] = L1
883 ,-----------------------
884 | r1 = foo(x,y,z) returns to L1
885 '-----------------------
886 L1:
887 R1 = r1 -- copyIn, inserted by mkSafeCall
888 ...
889
890 the stack layout algorithm will arrange to save and reload everything
891 live across the call. Our job now is to expand the call so we get
892
893 Sp[young(L1)] = L1
894 ,-----------------------
895 | SAVE_THREAD_STATE()
896 | token = suspendThread(BaseReg, interruptible)
897 | r = foo(x,y,z)
898 | BaseReg = resumeThread(token)
899 | LOAD_THREAD_STATE()
900 | R1 = r -- copyOut
901 | jump Sp[0]
902 '-----------------------
903 L1:
904 r = R1 -- copyIn, inserted by mkSafeCall
905 ...
906
907 Note the copyOut, which saves the results in the places that L1 is
908 expecting them (see Note {safe foreign call convention]).
909 -}
910
911 lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock
912 lowerSafeForeignCall dflags block
913 | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
914 = do
915 -- Both 'id' and 'new_base' are KindNonPtr because they're
916 -- RTS-only objects and are not subject to garbage collection
917 id <- newTemp (bWord dflags)
918 new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg))
919 let (caller_save, caller_load) = callerSaveVolatileRegs dflags
920 load_tso <- newTemp (gcWord dflags)
921 load_stack <- newTemp (gcWord dflags)
922 let suspend = saveThreadState dflags <*>
923 caller_save <*>
924 mkMiddle (callSuspendThread dflags id intrbl)
925 midCall = mkUnsafeCall tgt res args
926 resume = mkMiddle (callResumeThread new_base id) <*>
927 -- Assign the result to BaseReg: we
928 -- might now have a different Capability!
929 mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
930 caller_load <*>
931 loadThreadState dflags load_tso load_stack
932
933 (ret_args, regs, copyout) = copyOutOflow dflags NativeReturn Jump (Young succ)
934 (map (CmmReg . CmmLocal) res)
935 updfr (0, [])
936
937 -- NB. after resumeThread returns, the top-of-stack probably contains
938 -- the stack frame for succ, but it might not: if the current thread
939 -- received an exception during the call, then the stack might be
940 -- different. Hence we continue by jumping to the top stack frame,
941 -- not by jumping to succ.
942 jump = CmmCall { cml_target = CmmLoad (CmmReg spReg) (bWord dflags)
943 , cml_cont = Just succ
944 , cml_args_regs = regs
945 , cml_args = widthInBytes (wordWidth dflags)
946 , cml_ret_args = ret_args
947 , cml_ret_off = updfr }
948
949 graph' <- lgraphOfAGraph $ suspend <*>
950 midCall <*>
951 resume <*>
952 copyout <*>
953 mkLast jump
954
955 case toBlockList graph' of
956 [one] -> let (_, middle', last) = blockSplit one
957 in return (blockJoin entry (middle `blockAppend` middle') last)
958 _ -> panic "lowerSafeForeignCall0"
959
960 -- Block doesn't end in a safe foreign call:
961 | otherwise = return block
962
963
964 foreignLbl :: FastString -> CmmExpr
965 foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))
966
967 newTemp :: CmmType -> UniqSM LocalReg
968 newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
969
970 callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O
971 callSuspendThread dflags id intrbl =
972 CmmUnsafeForeignCall
973 (ForeignTarget (foreignLbl (fsLit "suspendThread"))
974 (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
975 [id] [CmmReg (CmmGlobal BaseReg), mkIntExpr dflags (fromEnum intrbl)]
976
977 callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
978 callResumeThread new_base id =
979 CmmUnsafeForeignCall
980 (ForeignTarget (foreignLbl (fsLit "resumeThread"))
981 (ForeignConvention CCallConv [AddrHint] [AddrHint]))
982 [new_base] [CmmReg (CmmLocal id)]
983
984 -- -----------------------------------------------------------------------------
985
986 plusW :: DynFlags -> ByteOff -> WordOff -> ByteOff
987 plusW dflags b w = b + w * wORD_SIZE dflags
988
989 dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
990 dropEmpty 0 ss = Just ss
991 dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
992 dropEmpty _ _ = Nothing
993
994 isEmpty :: StackSlot -> Bool
995 isEmpty Empty = True
996 isEmpty _ = False
997
998 localRegBytes :: DynFlags -> LocalReg -> ByteOff
999 localRegBytes dflags r
1000 = roundUpToWords dflags (widthInBytes (typeWidth (localRegType r)))
1001
1002 localRegWords :: DynFlags -> LocalReg -> WordOff
1003 localRegWords dflags = toWords dflags . localRegBytes dflags
1004
1005 toWords :: DynFlags -> ByteOff -> WordOff
1006 toWords dflags x = x `quot` wORD_SIZE dflags
1007
1008
1009 insertReloads :: StackMap -> [CmmNode O O]
1010 insertReloads stackmap =
1011 [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
1012 (localRegType r))
1013 | (r,sp) <- stackSlotRegs stackmap
1014 ]
1015
1016
1017 stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
1018 stackSlotRegs sm = eltsUFM (sm_regs sm)
1019