Produce new-style Cmm from the Cmm parser
[ghc.git] / compiler / cmm / CmmLayoutStack.hs
1 {-# LANGUAGE RecordWildCards, GADTs #-}
2 module CmmLayoutStack (
3 cmmLayoutStack, setInfoTableStackMap
4 ) where
5
6 import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX layering violation
7 import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX layering violation
8 import StgCmmLayout ( entryCode ) -- XXX layering violation
9
10 import Cmm
11 import BlockId
12 import CLabel
13 import CmmUtils
14 import MkGraph
15 import Module
16 import ForeignCall
17 import CmmLive
18 import CmmProcPoint
19 import SMRep
20 import Hoopl
21 import UniqSupply
22 import Maybes
23 import UniqFM
24 import Util
25
26 import DynFlags
27 import FastString
28 import Outputable
29 import qualified Data.Set as Set
30 import Control.Monad.Fix
31 import Data.Array as Array
32 import Data.Bits
33 import Data.List (nub)
34 import Control.Monad (liftM)
35
36 #include "HsVersions.h"
37
38
39 data StackSlot = Occupied | Empty
40 -- Occupied: a return address or part of an update frame
41
42 instance Outputable StackSlot where
43 ppr Occupied = ptext (sLit "XXX")
44 ppr Empty = ptext (sLit "---")
45
46 -- All stack locations are expressed as positive byte offsets from the
47 -- "base", which is defined to be the address above the return address
48 -- on the stack on entry to this CmmProc.
49 --
50 -- Lower addresses have higher StackLocs.
51 --
52 type StackLoc = ByteOff
53
54 {-
55 A StackMap describes the stack at any given point. At a continuation
56 it has a particular layout, like this:
57
58 | | <- base
59 |-------------|
60 | ret0 | <- base + 8
61 |-------------|
62 . upd frame . <- base + sm_ret_off
63 |-------------|
64 | |
65 . vars .
66 . (live/dead) .
67 | | <- base + sm_sp - sm_args
68 |-------------|
69 | ret1 |
70 . ret vals . <- base + sm_sp (<--- Sp points here)
71 |-------------|
72
73 Why do we include the final return address (ret0) in our stack map? I
74 have absolutely no idea, but it seems to be done that way consistently
75 in the rest of the code generator, so I played along here. --SDM
76
77 Note that we will be constructing an info table for the continuation
78 (ret1), which needs to describe the stack down to, but not including,
79 the update frame (or ret0, if there is no update frame).
80 -}
81
82 data StackMap = StackMap
83 { sm_sp :: StackLoc
84 -- ^ the offset of Sp relative to the base on entry
85 -- to this block.
86 , sm_args :: ByteOff
87 -- ^ the number of bytes of arguments in the area for this block
88 -- Defn: the offset of young(L) relative to the base is given by
89 -- (sm_sp - sm_args) of the StackMap for block L.
90 , sm_ret_off :: ByteOff
91 -- ^ Number of words of stack that we do not describe with an info
92 -- table, because it contains an update frame.
93 , sm_regs :: UniqFM (LocalReg,StackLoc)
94 -- ^ regs on the stack
95 }
96
97 instance Outputable StackMap where
98 ppr StackMap{..} =
99 text "Sp = " <> int sm_sp $$
100 text "sm_args = " <> int sm_args $$
101 text "sm_ret_off = " <> int sm_ret_off $$
102 text "sm_regs = " <> ppr (eltsUFM sm_regs)
103
104
105 cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph
106 -> UniqSM (CmmGraph, BlockEnv StackMap)
107 cmmLayoutStack dflags procpoints entry_args
108 graph0@(CmmGraph { g_entry = entry })
109 = do
110 -- pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
111
112 -- We need liveness info. We could do removeDeadAssignments at
113 -- the same time, but it buys nothing over doing cmmSink later,
114 -- and costs a lot more than just cmmLiveness.
115 -- (graph, liveness) <- removeDeadAssignments graph0
116 let (graph, liveness) = (graph0, cmmLiveness graph0)
117
118 -- pprTrace "liveness" (ppr liveness) $ return ()
119 let blocks = postorderDfs graph
120
121 (final_stackmaps, _final_high_sp, new_blocks) <-
122 mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
123 layout dflags procpoints liveness entry entry_args
124 rec_stackmaps rec_high_sp blocks
125
126 new_blocks' <- mapM (lowerSafeForeignCall dflags) new_blocks
127
128 -- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return ()
129 return (ofBlockList entry new_blocks', final_stackmaps)
130
131
132
133 layout :: DynFlags
134 -> BlockSet -- proc points
135 -> BlockEnv CmmLive -- liveness
136 -> BlockId -- entry
137 -> ByteOff -- stack args on entry
138
139 -> BlockEnv StackMap -- [final] stack maps
140 -> ByteOff -- [final] Sp high water mark
141
142 -> [CmmBlock] -- [in] blocks
143
144 -> UniqSM
145 ( BlockEnv StackMap -- [out] stack maps
146 , ByteOff -- [out] Sp high water mark
147 , [CmmBlock] -- [out] new blocks
148 )
149
150 layout dflags procpoints liveness entry entry_args final_stackmaps final_hwm blocks
151 = go blocks init_stackmap entry_args []
152 where
153 (updfr, cont_info) = collectContInfo blocks
154
155 init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
156 , sm_args = entry_args
157 , sm_ret_off = updfr
158 , sm_regs = emptyUFM
159 }
160
161 go [] acc_stackmaps acc_hwm acc_blocks
162 = return (acc_stackmaps, acc_hwm, acc_blocks)
163
164 go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
165 = do
166 let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
167
168 let stack0@StackMap { sm_sp = sp0 }
169 = mapFindWithDefault
170 (pprPanic "no stack map for" (ppr entry_lbl))
171 entry_lbl acc_stackmaps
172
173 -- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
174
175 -- (a) Update the stack map to include the effects of
176 -- assignments in this block
177 let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
178
179 -- (b) Insert assignments to reload all the live variables if this
180 -- block is a proc point
181 let middle1 = if entry_lbl `setMember` procpoints
182 then foldr blockCons middle0 (insertReloads stack0)
183 else middle0
184
185 -- (c) Look at the last node and if we are making a call or
186 -- jumping to a proc point, we must save the live
187 -- variables, adjust Sp, and construct the StackMaps for
188 -- each of the successor blocks. See handleLastNode for
189 -- details.
190 (middle2, sp_off, last1, fixup_blocks, out)
191 <- handleLastNode dflags procpoints liveness cont_info
192 acc_stackmaps stack1 middle0 last0
193
194 -- pprTrace "layout(out)" (ppr out) $ return ()
195
196 -- (d) Manifest Sp: run over the nodes in the block and replace
197 -- CmmStackSlot with CmmLoad from Sp with a concrete offset.
198 --
199 -- our block:
200 -- middle1 -- the original middle nodes
201 -- middle2 -- live variable saves from handleLastNode
202 -- Sp = Sp + sp_off -- Sp adjustment goes here
203 -- last1 -- the last node
204 --
205 let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
206
207 sp_high = final_hwm - entry_args
208 -- The stack check value is adjusted by the Sp offset on
209 -- entry to the proc, which is entry_args. We are
210 -- assuming that we only do a stack check at the
211 -- beginning of a proc, and we don't modify Sp before the
212 -- check.
213
214 final_blocks = manifestSp dflags final_stackmaps stack0 sp0 sp_high entry0
215 middle_pre sp_off last1 fixup_blocks
216
217 acc_stackmaps' = mapUnion acc_stackmaps out
218
219 -- If this block jumps to the GC, then we do not take its
220 -- stack usage into account for the high-water mark.
221 -- Otherwise, if the only stack usage is in the stack-check
222 -- failure block itself, we will do a redundant stack
223 -- check. The stack has a buffer designed to accommodate
224 -- the largest amount of stack needed for calling the GC.
225 --
226 this_sp_hwm | isGcJump last0 = 0
227 | otherwise = sp0 - sp_off
228
229 hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out))
230
231 go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
232
233
234 -- -----------------------------------------------------------------------------
235
236 -- Not foolproof, but GCFun is the culprit we most want to catch
237 isGcJump :: CmmNode O C -> Bool
238 isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) })
239 = l == GCFun || l == GCEnter1
240 isGcJump _something_else = False
241
242 -- -----------------------------------------------------------------------------
243
244 -- This doesn't seem right somehow. We need to find out whether this
245 -- proc will push some update frame material at some point, so that we
246 -- can avoid using that area of the stack for spilling. The
247 -- updfr_space field of the CmmProc *should* tell us, but it doesn't
248 -- (I think maybe it gets filled in later when we do proc-point
249 -- splitting).
250 --
251 -- So we'll just take the max of all the cml_ret_offs. This could be
252 -- unnecessarily pessimistic, but probably not in the code we
253 -- generate.
254
255 collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
256 collectContInfo blocks
257 = (maximum ret_offs, mapFromList (catMaybes mb_argss))
258 where
259 (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
260
261 get_cont b =
262 case lastNode b of
263 CmmCall { cml_cont = Just l, .. }
264 -> (Just (l, cml_ret_args), cml_ret_off)
265 CmmForeignCall { .. }
266 -> (Just (succ, 0), updfr) -- ??
267 _other -> (Nothing, 0)
268
269
270 -- -----------------------------------------------------------------------------
271 -- Updating the StackMap from middle nodes
272
273 -- Look for loads from stack slots, and update the StackMap. This is
274 -- purely for optimisation reasons, so that we can avoid saving a
275 -- variable back to a different stack slot if it is already on the
276 -- stack.
277 --
278 -- This happens a lot: for example when function arguments are passed
279 -- on the stack and need to be immediately saved across a call, we
280 -- want to just leave them where they are on the stack.
281 --
282 procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
283 procMiddle stackmaps node sm
284 = case node of
285 CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
286 -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
287 where loc = getStackLoc area off stackmaps
288 CmmAssign (CmmLocal r) _other
289 -> sm { sm_regs = delFromUFM (sm_regs sm) r }
290 _other
291 -> sm
292
293 getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
294 getStackLoc Old n _ = n
295 getStackLoc (Young l) n stackmaps =
296 case mapLookup l stackmaps of
297 Nothing -> pprPanic "getStackLoc" (ppr l)
298 Just sm -> sm_sp sm - sm_args sm + n
299
300
301 -- -----------------------------------------------------------------------------
302 -- Handling stack allocation for a last node
303
304 -- We take a single last node and turn it into:
305 --
306 -- C1 (some statements)
307 -- Sp = Sp + N
308 -- C2 (some more statements)
309 -- call f() -- the actual last node
310 --
311 -- plus possibly some more blocks (we may have to add some fixup code
312 -- between the last node and the continuation).
313 --
314 -- C1: is the code for saving the variables across this last node onto
315 -- the stack, if the continuation is a call or jumps to a proc point.
316 --
317 -- C2: if the last node is a safe foreign call, we have to inject some
318 -- extra code that goes *after* the Sp adjustment.
319
320 handleLastNode
321 :: DynFlags -> ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
322 -> BlockEnv StackMap -> StackMap
323 -> Block CmmNode O O
324 -> CmmNode O C
325 -> UniqSM
326 ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
327 , ByteOff -- amount to adjust Sp
328 , CmmNode O C -- new last node
329 , [CmmBlock] -- new blocks
330 , BlockEnv StackMap -- stackmaps for the continuations
331 )
332
333 handleLastNode dflags procpoints liveness cont_info stackmaps
334 stack0@StackMap { sm_sp = sp0 } middle last
335 = case last of
336 -- At each return / tail call,
337 -- adjust Sp to point to the last argument pushed, which
338 -- is cml_args, after popping any other junk from the stack.
339 CmmCall{ cml_cont = Nothing, .. } -> do
340 let sp_off = sp0 - cml_args
341 return ([], sp_off, last, [], mapEmpty)
342
343 -- At each CmmCall with a continuation:
344 CmmCall{ cml_cont = Just cont_lbl, .. } ->
345 return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
346
347 CmmForeignCall{ succ = cont_lbl, .. } -> do
348 return $ lastCall cont_lbl (wORD_SIZE dflags) (wORD_SIZE dflags) (sm_ret_off stack0)
349 -- one word each for args and results: the return address
350
351 CmmBranch{..} -> handleBranches
352 CmmCondBranch{..} -> handleBranches
353 CmmSwitch{..} -> handleBranches
354
355 where
356 -- Calls and ForeignCalls are handled the same way:
357 lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
358 -> ( [CmmNode O O]
359 , ByteOff
360 , CmmNode O C
361 , [CmmBlock]
362 , BlockEnv StackMap
363 )
364 lastCall lbl cml_args cml_ret_args cml_ret_off
365 = ( assignments
366 , spOffsetForCall sp0 cont_stack cml_args
367 , last
368 , [] -- no new blocks
369 , mapSingleton lbl cont_stack )
370 where
371 (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
372
373
374 prepareStack lbl cml_ret_args cml_ret_off
375 | Just cont_stack <- mapLookup lbl stackmaps
376 -- If we have already seen this continuation before, then
377 -- we just have to make the stack look the same:
378 = (fixupStack stack0 cont_stack, cont_stack)
379 -- Otherwise, we have to allocate the stack frame
380 | otherwise
381 = (save_assignments, new_cont_stack)
382 where
383 (new_cont_stack, save_assignments)
384 = setupStackFrame dflags lbl liveness cml_ret_off cml_ret_args stack0
385
386
387 -- For other last nodes (branches), if any of the targets is a
388 -- proc point, we have to set up the stack to match what the proc
389 -- point is expecting.
390 --
391 handleBranches :: UniqSM ( [CmmNode O O]
392 , ByteOff
393 , CmmNode O C
394 , [CmmBlock]
395 , BlockEnv StackMap )
396
397 handleBranches
398 -- Note [diamond proc point]
399 | Just l <- futureContinuation middle
400 , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
401 = do
402 let cont_args = mapFindWithDefault 0 l cont_info
403 (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
404 out = mapFromList [ (l', cont_stack)
405 | l' <- successors last ]
406 return ( assigs
407 , spOffsetForCall sp0 cont_stack (wORD_SIZE dflags)
408 , last
409 , []
410 , out)
411
412 | otherwise = do
413 pps <- mapM handleBranch (successors last)
414 let lbl_map :: LabelMap Label
415 lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
416 fix_lbl l = mapFindWithDefault l l lbl_map
417 return ( []
418 , 0
419 , mapSuccessors fix_lbl last
420 , concat [ blk | (_,_,_,blk) <- pps ]
421 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
422
423 -- For each successor of this block
424 handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
425 handleBranch l
426 -- (a) if the successor already has a stackmap, we need to
427 -- shuffle the current stack to make it look the same.
428 -- We have to insert a new block to make this happen.
429 | Just stack2 <- mapLookup l stackmaps
430 = do
431 let assigs = fixupStack stack0 stack2
432 (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
433 return (l, tmp_lbl, stack2, block)
434
435 -- (b) if the successor is a proc point, save everything
436 -- on the stack.
437 | l `setMember` procpoints
438 = do
439 let cont_args = mapFindWithDefault 0 l cont_info
440 (stack2, assigs) =
441 --pprTrace "first visit to proc point"
442 -- (ppr l <+> ppr stack1) $
443 setupStackFrame dflags l liveness (sm_ret_off stack0)
444 cont_args stack0
445 --
446 (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
447 return (l, tmp_lbl, stack2, block)
448
449 -- (c) otherwise, the current StackMap is the StackMap for
450 -- the continuation. But we must remember to remove any
451 -- variables from the StackMap that are *not* live at
452 -- the destination, because this StackMap might be used
453 -- by fixupStack if this is a join point.
454 | otherwise = return (l, l, stack1, [])
455 where live = mapFindWithDefault (panic "handleBranch") l liveness
456 stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) }
457 is_live (r,_) = r `elemRegSet` live
458
459
460 makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap -> [CmmNode O O]
461 -> UniqSM (Label, [CmmBlock])
462 makeFixupBlock dflags sp0 l stack assigs
463 | null assigs && sp0 == sm_sp stack = return (l, [])
464 | otherwise = do
465 tmp_lbl <- liftM mkBlockId $ getUniqueM
466 let sp_off = sp0 - sm_sp stack
467 block = blockJoin (CmmEntry tmp_lbl)
468 (maybeAddSpAdj dflags sp_off (blockFromList assigs))
469 (CmmBranch l)
470 return (tmp_lbl, [block])
471
472
473 -- Sp is currently pointing to current_sp,
474 -- we want it to point to
475 -- (sm_sp cont_stack - sm_args cont_stack + args)
476 -- so the difference is
477 -- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
478 spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
479 spOffsetForCall current_sp cont_stack args
480 = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
481
482
483 -- | create a sequence of assignments to establish the new StackMap,
484 -- given the old StackMap.
485 fixupStack :: StackMap -> StackMap -> [CmmNode O O]
486 fixupStack old_stack new_stack = concatMap move new_locs
487 where
488 old_map = sm_regs old_stack
489 new_locs = stackSlotRegs new_stack
490
491 move (r,n)
492 | Just (_,m) <- lookupUFM old_map r, n == m = []
493 | otherwise = [CmmStore (CmmStackSlot Old n)
494 (CmmReg (CmmLocal r))]
495
496
497
498 setupStackFrame
499 :: DynFlags
500 -> BlockId -- label of continuation
501 -> BlockEnv CmmLive -- liveness
502 -> ByteOff -- updfr
503 -> ByteOff -- bytes of return values on stack
504 -> StackMap -- current StackMap
505 -> (StackMap, [CmmNode O O])
506
507 setupStackFrame dflags lbl liveness updfr_off ret_args stack0
508 = (cont_stack, assignments)
509 where
510 -- get the set of LocalRegs live in the continuation
511 live = mapFindWithDefault Set.empty lbl liveness
512
513 -- the stack from the base to updfr_off is off-limits.
514 -- our new stack frame contains:
515 -- * saved live variables
516 -- * the return address [young(C) + 8]
517 -- * the args for the call,
518 -- which are replaced by the return values at the return
519 -- point.
520
521 -- everything up to updfr_off is off-limits
522 -- stack1 contains updfr_off, plus everything we need to save
523 (stack1, assignments) = allocate dflags updfr_off live stack0
524
525 -- And the Sp at the continuation is:
526 -- sm_sp stack1 + ret_args
527 cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
528 , sm_args = ret_args
529 , sm_ret_off = updfr_off
530 }
531
532
533 -- -----------------------------------------------------------------------------
534 -- Note [diamond proc point]
535 --
536 -- This special case looks for the pattern we get from a typical
537 -- tagged case expression:
538 --
539 -- Sp[young(L1)] = L1
540 -- if (R1 & 7) != 0 goto L1 else goto L2
541 -- L2:
542 -- call [R1] returns to L1
543 -- L1: live: {y}
544 -- x = R1
545 --
546 -- If we let the generic case handle this, we get
547 --
548 -- Sp[-16] = L1
549 -- if (R1 & 7) != 0 goto L1a else goto L2
550 -- L2:
551 -- Sp[-8] = y
552 -- Sp = Sp - 16
553 -- call [R1] returns to L1
554 -- L1a:
555 -- Sp[-8] = y
556 -- Sp = Sp - 16
557 -- goto L1
558 -- L1:
559 -- x = R1
560 --
561 -- The code for saving the live vars is duplicated in each branch, and
562 -- furthermore there is an extra jump in the fast path (assuming L1 is
563 -- a proc point, which it probably is if there is a heap check).
564 --
565 -- So to fix this we want to set up the stack frame before the
566 -- conditional jump. How do we know when to do this, and when it is
567 -- safe? The basic idea is, when we see the assignment
568 --
569 -- Sp[young(L)] = L
570 --
571 -- we know that
572 -- * we are definitely heading for L
573 -- * there can be no more reads from another stack area, because young(L)
574 -- overlaps with it.
575 --
576 -- We don't necessarily know that everything live at L is live now
577 -- (some might be assigned between here and the jump to L). So we
578 -- simplify and only do the optimisation when we see
579 --
580 -- (1) a block containing an assignment of a return address L
581 -- (2) ending in a branch where one (and only) continuation goes to L,
582 -- and no other continuations go to proc points.
583 --
584 -- then we allocate the stack frame for L at the end of the block,
585 -- before the branch.
586 --
587 -- We could generalise (2), but that would make it a bit more
588 -- complicated to handle, and this currently catches the common case.
589
590 futureContinuation :: Block CmmNode O O -> Maybe BlockId
591 futureContinuation middle = foldBlockNodesB f middle Nothing
592 where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
593 f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
594 = Just l
595 f _ r = r
596
597 -- -----------------------------------------------------------------------------
598 -- Saving live registers
599
600 -- | Given a set of live registers and a StackMap, save all the registers
601 -- on the stack and return the new StackMap and the assignments to do
602 -- the saving.
603 --
604 allocate :: DynFlags -> ByteOff -> RegSet -> StackMap
605 -> (StackMap, [CmmNode O O])
606 allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0
607 , sm_regs = regs0 }
608 =
609 -- pprTrace "allocate" (ppr live $$ ppr stackmap) $
610
611 -- we only have to save regs that are not already in a slot
612 let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
613 regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
614 in
615
616 -- make a map of the stack
617 let stack = reverse $ Array.elems $
618 accumArray (\_ x -> x) Empty (1, toWords dflags (max sp0 ret_off)) $
619 ret_words ++ live_words
620 where ret_words =
621 [ (x, Occupied)
622 | x <- [ 1 .. toWords dflags ret_off] ]
623 live_words =
624 [ (toWords dflags x, Occupied)
625 | (r,off) <- eltsUFM regs1,
626 let w = localRegBytes dflags r,
627 x <- [ off, off - wORD_SIZE dflags .. off - w + 1] ]
628 in
629
630 -- Pass over the stack: find slots to save all the new live variables,
631 -- choosing the oldest slots first (hence a foldr).
632 let
633 save slot ([], stack, n, assigs, regs) -- no more regs to save
634 = ([], slot:stack, plusW dflags n 1, assigs, regs)
635 save slot (to_save, stack, n, assigs, regs)
636 = case slot of
637 Occupied -> (to_save, Occupied:stack, plusW dflags n 1, assigs, regs)
638 Empty
639 | Just (stack', r, to_save') <-
640 select_save to_save (slot:stack)
641 -> let assig = CmmStore (CmmStackSlot Old n')
642 (CmmReg (CmmLocal r))
643 n' = plusW dflags n 1
644 in
645 (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
646
647 | otherwise
648 -> (to_save, slot:stack, plusW dflags n 1, assigs, regs)
649
650 -- we should do better here: right now we'll fit the smallest first,
651 -- but it would make more sense to fit the biggest first.
652 select_save :: [LocalReg] -> [StackSlot]
653 -> Maybe ([StackSlot], LocalReg, [LocalReg])
654 select_save regs stack = go regs []
655 where go [] _no_fit = Nothing
656 go (r:rs) no_fit
657 | Just rest <- dropEmpty words stack
658 = Just (replicate words Occupied ++ rest, r, rs++no_fit)
659 | otherwise
660 = go rs (r:no_fit)
661 where words = localRegWords dflags r
662
663 -- fill in empty slots as much as possible
664 (still_to_save, save_stack, n, save_assigs, save_regs)
665 = foldr save (to_save, [], 0, [], []) stack
666
667 -- push any remaining live vars on the stack
668 (push_sp, push_assigs, push_regs)
669 = foldr push (n, [], []) still_to_save
670 where
671 push r (n, assigs, regs)
672 = (n', assig : assigs, (r,(r,n')) : regs)
673 where
674 n' = n + localRegBytes dflags r
675 assig = CmmStore (CmmStackSlot Old n')
676 (CmmReg (CmmLocal r))
677
678 trim_sp
679 | not (null push_regs) = push_sp
680 | otherwise
681 = plusW dflags n (- length (takeWhile isEmpty save_stack))
682
683 final_regs = regs1 `addListToUFM` push_regs
684 `addListToUFM` save_regs
685
686 in
687 -- XXX should be an assert
688 if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
689
690 if (trim_sp .&. (wORD_SIZE dflags - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
691
692 ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
693 , push_assigs ++ save_assigs )
694
695
696 -- -----------------------------------------------------------------------------
697 -- Manifesting Sp
698
699 -- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The
700 -- block looks like this:
701 --
702 -- middle_pre -- the middle nodes
703 -- Sp = Sp + sp_off -- Sp adjustment goes here
704 -- last -- the last node
705 --
706 -- And we have some extra blocks too (that don't contain Sp adjustments)
707 --
708 -- The adjustment for middle_pre will be different from that for
709 -- middle_post, because the Sp adjustment intervenes.
710 --
711 manifestSp
712 :: DynFlags
713 -> BlockEnv StackMap -- StackMaps for other blocks
714 -> StackMap -- StackMap for this block
715 -> ByteOff -- Sp on entry to the block
716 -> ByteOff -- SpHigh
717 -> CmmNode C O -- first node
718 -> [CmmNode O O] -- middle
719 -> ByteOff -- sp_off
720 -> CmmNode O C -- last node
721 -> [CmmBlock] -- new blocks
722 -> [CmmBlock] -- final blocks with Sp manifest
723
724 manifestSp dflags stackmaps stack0 sp0 sp_high
725 first middle_pre sp_off last fixup_blocks
726 = final_block : fixup_blocks'
727 where
728 area_off = getAreaOff stackmaps
729
730 adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
731 adj_pre_sp = mapExpDeep (areaToSp dflags sp0 sp_high area_off)
732 adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off)
733
734 final_middle = maybeAddSpAdj dflags sp_off $
735 blockFromList $
736 map adj_pre_sp $
737 elimStackStores stack0 stackmaps area_off $
738 middle_pre
739
740 final_last = optStackCheck (adj_post_sp last)
741
742 final_block = blockJoin first final_middle final_last
743
744 fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
745
746
747 getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
748 getAreaOff _ Old = 0
749 getAreaOff stackmaps (Young l) =
750 case mapLookup l stackmaps of
751 Just sm -> sm_sp sm - sm_args sm
752 Nothing -> pprPanic "getAreaOff" (ppr l)
753
754
755 maybeAddSpAdj :: DynFlags -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
756 maybeAddSpAdj _ 0 block = block
757 maybeAddSpAdj dflags sp_off block
758 = block `blockSnoc` CmmAssign spReg (cmmOffset dflags (CmmReg spReg) sp_off)
759
760
761 {-
762 Sp(L) is the Sp offset on entry to block L relative to the base of the
763 OLD area.
764
765 SpArgs(L) is the size of the young area for L, i.e. the number of
766 arguments.
767
768 - in block L, each reference to [old + N] turns into
769 [Sp + Sp(L) - N]
770
771 - in block L, each reference to [young(L') + N] turns into
772 [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
773
774 - be careful with the last node of each block: Sp has already been adjusted
775 to be Sp + Sp(L) - Sp(L')
776 -}
777
778 areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
779 areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n) =
780 cmmOffset dflags (CmmReg spReg) (sp_old - area_off area - n)
781 areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark) = mkIntExpr dflags sp_hwm
782 areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check]
783 [CmmMachOp (MO_Sub _)
784 [ CmmReg (CmmGlobal Sp)
785 , CmmLit (CmmInt 0 _)],
786 CmmReg (CmmGlobal SpLim)]) = zeroExpr dflags
787 areaToSp _ _ _ _ other = other
788
789 -- -----------------------------------------------------------------------------
790 -- Note [null stack check]
791 --
792 -- If the high-water Sp is zero, then we end up with
793 --
794 -- if (Sp - 0 < SpLim) then .. else ..
795 --
796 -- and possibly some dead code for the failure case. Optimising this
797 -- away depends on knowing that SpLim <= Sp, so it is really the job
798 -- of the stack layout algorithm, hence we do it now. This is also
799 -- convenient because control-flow optimisation later will drop the
800 -- dead code.
801
802 optStackCheck :: CmmNode O C -> CmmNode O C
803 optStackCheck n = -- Note [null stack check]
804 case n of
805 CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
806 other -> other
807
808
809 -- -----------------------------------------------------------------------------
810
811 -- | Eliminate stores of the form
812 --
813 -- Sp[area+n] = r
814 --
815 -- when we know that r is already in the same slot as Sp[area+n]. We
816 -- could do this in a later optimisation pass, but that would involve
817 -- a separate analysis and we already have the information to hand
818 -- here. It helps clean up some extra stack stores in common cases.
819 --
820 -- Note that we may have to modify the StackMap as we walk through the
821 -- code using procMiddle, since an assignment to a variable in the
822 -- StackMap will invalidate its mapping there.
823 --
824 elimStackStores :: StackMap
825 -> BlockEnv StackMap
826 -> (Area -> ByteOff)
827 -> [CmmNode O O]
828 -> [CmmNode O O]
829 elimStackStores stackmap stackmaps area_off nodes
830 = go stackmap nodes
831 where
832 go _stackmap [] = []
833 go stackmap (n:ns)
834 = case n of
835 CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
836 | Just (_,off) <- lookupUFM (sm_regs stackmap) r
837 , area_off area + m == off
838 -> -- pprTrace "eliminated a node!" (ppr r) $
839 go stackmap ns
840 _otherwise
841 -> n : go (procMiddle stackmaps n stackmap) ns
842
843
844 -- -----------------------------------------------------------------------------
845 -- Update info tables to include stack liveness
846
847
848 setInfoTableStackMap :: DynFlags -> BlockEnv StackMap -> CmmDecl -> CmmDecl
849 setInfoTableStackMap dflags stackmaps (CmmProc top_info@TopInfo{..} l g)
850 = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l g
851 where
852 fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
853 info_tbl { cit_rep = StackRep (get_liveness lbl) }
854 fix_info _ other = other
855
856 get_liveness :: BlockId -> Liveness
857 get_liveness lbl
858 = case mapLookup lbl stackmaps of
859 Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls)
860 Just sm -> stackMapToLiveness dflags sm
861
862 setInfoTableStackMap _ _ d = d
863
864
865 stackMapToLiveness :: DynFlags -> StackMap -> Liveness
866 stackMapToLiveness dflags StackMap{..} =
867 reverse $ Array.elems $
868 accumArray (\_ x -> x) True (toWords dflags sm_ret_off + 1,
869 toWords dflags (sm_sp - sm_args)) live_words
870 where
871 live_words = [ (toWords dflags off, False)
872 | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]
873
874
875 -- -----------------------------------------------------------------------------
876 -- Lowering safe foreign calls
877
878 {-
879 Note [lower safe foreign calls]
880
881 We start with
882
883 Sp[young(L1)] = L1
884 ,-----------------------
885 | r1 = foo(x,y,z) returns to L1
886 '-----------------------
887 L1:
888 R1 = r1 -- copyIn, inserted by mkSafeCall
889 ...
890
891 the stack layout algorithm will arrange to save and reload everything
892 live across the call. Our job now is to expand the call so we get
893
894 Sp[young(L1)] = L1
895 ,-----------------------
896 | SAVE_THREAD_STATE()
897 | token = suspendThread(BaseReg, interruptible)
898 | r = foo(x,y,z)
899 | BaseReg = resumeThread(token)
900 | LOAD_THREAD_STATE()
901 | R1 = r -- copyOut
902 | jump Sp[0]
903 '-----------------------
904 L1:
905 r = R1 -- copyIn, inserted by mkSafeCall
906 ...
907
908 Note the copyOut, which saves the results in the places that L1 is
909 expecting them (see Note {safe foreign call convention]).
910 -}
911
912 lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock
913 lowerSafeForeignCall dflags block
914 | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
915 = do
916 -- Both 'id' and 'new_base' are KindNonPtr because they're
917 -- RTS-only objects and are not subject to garbage collection
918 id <- newTemp (bWord dflags)
919 new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg))
920 let (caller_save, caller_load) = callerSaveVolatileRegs dflags
921 load_tso <- newTemp (gcWord dflags)
922 load_stack <- newTemp (gcWord dflags)
923 let suspend = saveThreadState dflags <*>
924 caller_save <*>
925 mkMiddle (callSuspendThread dflags id intrbl)
926 midCall = mkUnsafeCall tgt res args
927 resume = mkMiddle (callResumeThread new_base id) <*>
928 -- Assign the result to BaseReg: we
929 -- might now have a different Capability!
930 mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
931 caller_load <*>
932 loadThreadState dflags load_tso load_stack
933
934 (ret_args, regs, copyout) = copyOutOflow dflags NativeReturn Jump (Young succ)
935 (map (CmmReg . CmmLocal) res)
936 updfr []
937
938 -- NB. after resumeThread returns, the top-of-stack probably contains
939 -- the stack frame for succ, but it might not: if the current thread
940 -- received an exception during the call, then the stack might be
941 -- different. Hence we continue by jumping to the top stack frame,
942 -- not by jumping to succ.
943 jump = CmmCall { cml_target = entryCode dflags $
944 CmmLoad (CmmReg spReg) (bWord dflags)
945 , cml_cont = Just succ
946 , cml_args_regs = regs
947 , cml_args = widthInBytes (wordWidth dflags)
948 , cml_ret_args = ret_args
949 , cml_ret_off = updfr }
950
951 graph' <- lgraphOfAGraph $ suspend <*>
952 midCall <*>
953 resume <*>
954 copyout <*>
955 mkLast jump
956
957 case toBlockList graph' of
958 [one] -> let (_, middle', last) = blockSplit one
959 in return (blockJoin entry (middle `blockAppend` middle') last)
960 _ -> panic "lowerSafeForeignCall0"
961
962 -- Block doesn't end in a safe foreign call:
963 | otherwise = return block
964
965
966 foreignLbl :: FastString -> CmmExpr
967 foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))
968
969 newTemp :: CmmType -> UniqSM LocalReg
970 newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
971
972 callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O
973 callSuspendThread dflags id intrbl =
974 CmmUnsafeForeignCall
975 (ForeignTarget (foreignLbl (fsLit "suspendThread"))
976 (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn))
977 [id] [CmmReg (CmmGlobal BaseReg), mkIntExpr dflags (fromEnum intrbl)]
978
979 callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
980 callResumeThread new_base id =
981 CmmUnsafeForeignCall
982 (ForeignTarget (foreignLbl (fsLit "resumeThread"))
983 (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn))
984 [new_base] [CmmReg (CmmLocal id)]
985
986 -- -----------------------------------------------------------------------------
987
988 plusW :: DynFlags -> ByteOff -> WordOff -> ByteOff
989 plusW dflags b w = b + w * wORD_SIZE dflags
990
991 dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
992 dropEmpty 0 ss = Just ss
993 dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
994 dropEmpty _ _ = Nothing
995
996 isEmpty :: StackSlot -> Bool
997 isEmpty Empty = True
998 isEmpty _ = False
999
1000 localRegBytes :: DynFlags -> LocalReg -> ByteOff
1001 localRegBytes dflags r
1002 = roundUpToWords dflags (widthInBytes (typeWidth (localRegType r)))
1003
1004 localRegWords :: DynFlags -> LocalReg -> WordOff
1005 localRegWords dflags = toWords dflags . localRegBytes dflags
1006
1007 toWords :: DynFlags -> ByteOff -> WordOff
1008 toWords dflags x = x `quot` wORD_SIZE dflags
1009
1010
1011 insertReloads :: StackMap -> [CmmNode O O]
1012 insertReloads stackmap =
1013 [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
1014 (localRegType r))
1015 | (r,sp) <- stackSlotRegs stackmap
1016 ]
1017
1018
1019 stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
1020 stackSlotRegs sm = eltsUFM (sm_regs sm)
1021