Fold testsuite.git into ghc.git (re #8545)
[ghc.git] / compiler / cmm / CmmLayoutStack.hs
1 {-# LANGUAGE RecordWildCards, GADTs #-}
2 module CmmLayoutStack (
3 cmmLayoutStack, setInfoTableStackMap
4 ) where
5
6 import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX layering violation
7 import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX layering violation
8
9 import BasicTypes
10 import Cmm
11 import CmmInfo
12 import BlockId
13 import CLabel
14 import CmmUtils
15 import MkGraph
16 import ForeignCall
17 import CmmLive
18 import CmmProcPoint
19 import SMRep
20 import Hoopl
21 import UniqSupply
22 import Maybes
23 import UniqFM
24 import Util
25
26 import DynFlags
27 import FastString
28 import Outputable
29 import qualified Data.Set as Set
30 import Control.Monad.Fix
31 import Data.Array as Array
32 import Data.Bits
33 import Data.List (nub)
34 import Control.Monad (liftM)
35
36 #include "HsVersions.h"
37
38
39 data StackSlot = Occupied | Empty
40 -- Occupied: a return address or part of an update frame
41
42 instance Outputable StackSlot where
43 ppr Occupied = ptext (sLit "XXX")
44 ppr Empty = ptext (sLit "---")
45
46 -- All stack locations are expressed as positive byte offsets from the
47 -- "base", which is defined to be the address above the return address
48 -- on the stack on entry to this CmmProc.
49 --
50 -- Lower addresses have higher StackLocs.
51 --
52 type StackLoc = ByteOff
53
54 {-
55 A StackMap describes the stack at any given point. At a continuation
56 it has a particular layout, like this:
57
58 | | <- base
59 |-------------|
60 | ret0 | <- base + 8
61 |-------------|
62 . upd frame . <- base + sm_ret_off
63 |-------------|
64 | |
65 . vars .
66 . (live/dead) .
67 | | <- base + sm_sp - sm_args
68 |-------------|
69 | ret1 |
70 . ret vals . <- base + sm_sp (<--- Sp points here)
71 |-------------|
72
73 Why do we include the final return address (ret0) in our stack map? I
74 have absolutely no idea, but it seems to be done that way consistently
75 in the rest of the code generator, so I played along here. --SDM
76
77 Note that we will be constructing an info table for the continuation
78 (ret1), which needs to describe the stack down to, but not including,
79 the update frame (or ret0, if there is no update frame).
80 -}
81
82 data StackMap = StackMap
83 { sm_sp :: StackLoc
84 -- ^ the offset of Sp relative to the base on entry
85 -- to this block.
86 , sm_args :: ByteOff
87 -- ^ the number of bytes of arguments in the area for this block
88 -- Defn: the offset of young(L) relative to the base is given by
89 -- (sm_sp - sm_args) of the StackMap for block L.
90 , sm_ret_off :: ByteOff
91 -- ^ Number of words of stack that we do not describe with an info
92 -- table, because it contains an update frame.
93 , sm_regs :: UniqFM (LocalReg,StackLoc)
94 -- ^ regs on the stack
95 }
96
97 instance Outputable StackMap where
98 ppr StackMap{..} =
99 text "Sp = " <> int sm_sp $$
100 text "sm_args = " <> int sm_args $$
101 text "sm_ret_off = " <> int sm_ret_off $$
102 text "sm_regs = " <> ppr (eltsUFM sm_regs)
103
104
105 cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph
106 -> UniqSM (CmmGraph, BlockEnv StackMap)
107 cmmLayoutStack dflags procpoints entry_args
108 graph0@(CmmGraph { g_entry = entry })
109 = do
110 -- pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
111
112 -- We need liveness info. We could do removeDeadAssignments at
113 -- the same time, but it buys nothing over doing cmmSink later,
114 -- and costs a lot more than just cmmLocalLiveness.
115 -- (graph, liveness) <- removeDeadAssignments graph0
116 let (graph, liveness) = (graph0, cmmLocalLiveness dflags graph0)
117
118 -- pprTrace "liveness" (ppr liveness) $ return ()
119 let blocks = postorderDfs graph
120
121 (final_stackmaps, _final_high_sp, new_blocks) <-
122 mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
123 layout dflags procpoints liveness entry entry_args
124 rec_stackmaps rec_high_sp blocks
125
126 new_blocks' <- mapM (lowerSafeForeignCall dflags) new_blocks
127
128 -- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return ()
129 return (ofBlockList entry new_blocks', final_stackmaps)
130
131
132
133 layout :: DynFlags
134 -> BlockSet -- proc points
135 -> BlockEnv CmmLocalLive -- liveness
136 -> BlockId -- entry
137 -> ByteOff -- stack args on entry
138
139 -> BlockEnv StackMap -- [final] stack maps
140 -> ByteOff -- [final] Sp high water mark
141
142 -> [CmmBlock] -- [in] blocks
143
144 -> UniqSM
145 ( BlockEnv StackMap -- [out] stack maps
146 , ByteOff -- [out] Sp high water mark
147 , [CmmBlock] -- [out] new blocks
148 )
149
150 layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high blocks
151 = go blocks init_stackmap entry_args []
152 where
153 (updfr, cont_info) = collectContInfo blocks
154
155 init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
156 , sm_args = entry_args
157 , sm_ret_off = updfr
158 , sm_regs = emptyUFM
159 }
160
161 go [] acc_stackmaps acc_hwm acc_blocks
162 = return (acc_stackmaps, acc_hwm, acc_blocks)
163
164 go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
165 = do
166 let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
167
168 let stack0@StackMap { sm_sp = sp0 }
169 = mapFindWithDefault
170 (pprPanic "no stack map for" (ppr entry_lbl))
171 entry_lbl acc_stackmaps
172
173 -- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
174
175 -- (a) Update the stack map to include the effects of
176 -- assignments in this block
177 let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
178
179 -- (b) Insert assignments to reload all the live variables if this
180 -- block is a proc point
181 let middle1 = if entry_lbl `setMember` procpoints
182 then foldr blockCons middle0 (insertReloads stack0)
183 else middle0
184
185 -- (c) Look at the last node and if we are making a call or
186 -- jumping to a proc point, we must save the live
187 -- variables, adjust Sp, and construct the StackMaps for
188 -- each of the successor blocks. See handleLastNode for
189 -- details.
190 (middle2, sp_off, last1, fixup_blocks, out)
191 <- handleLastNode dflags procpoints liveness cont_info
192 acc_stackmaps stack1 middle0 last0
193
194 -- pprTrace "layout(out)" (ppr out) $ return ()
195
196 -- (d) Manifest Sp: run over the nodes in the block and replace
197 -- CmmStackSlot with CmmLoad from Sp with a concrete offset.
198 --
199 -- our block:
200 -- middle1 -- the original middle nodes
201 -- middle2 -- live variable saves from handleLastNode
202 -- Sp = Sp + sp_off -- Sp adjustment goes here
203 -- last1 -- the last node
204 --
205 let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
206
207 final_blocks = manifestSp dflags final_stackmaps stack0 sp0 final_sp_high entry0
208 middle_pre sp_off last1 fixup_blocks
209
210 acc_stackmaps' = mapUnion acc_stackmaps out
211
212 -- If this block jumps to the GC, then we do not take its
213 -- stack usage into account for the high-water mark.
214 -- Otherwise, if the only stack usage is in the stack-check
215 -- failure block itself, we will do a redundant stack
216 -- check. The stack has a buffer designed to accommodate
217 -- the largest amount of stack needed for calling the GC.
218 --
219 this_sp_hwm | isGcJump last0 = 0
220 | otherwise = sp0 - sp_off
221
222 hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out))
223
224 go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
225
226
227 -- -----------------------------------------------------------------------------
228
229 -- Not foolproof, but GCFun is the culprit we most want to catch
230 isGcJump :: CmmNode O C -> Bool
231 isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) })
232 = l == GCFun || l == GCEnter1
233 isGcJump _something_else = False
234
235 -- -----------------------------------------------------------------------------
236
237 -- This doesn't seem right somehow. We need to find out whether this
238 -- proc will push some update frame material at some point, so that we
239 -- can avoid using that area of the stack for spilling. The
240 -- updfr_space field of the CmmProc *should* tell us, but it doesn't
241 -- (I think maybe it gets filled in later when we do proc-point
242 -- splitting).
243 --
244 -- So we'll just take the max of all the cml_ret_offs. This could be
245 -- unnecessarily pessimistic, but probably not in the code we
246 -- generate.
247
248 collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
249 collectContInfo blocks
250 = (maximum ret_offs, mapFromList (catMaybes mb_argss))
251 where
252 (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
253
254 get_cont :: Block CmmNode x C -> (Maybe (Label, ByteOff), ByteOff)
255 get_cont b =
256 case lastNode b of
257 CmmCall { cml_cont = Just l, .. }
258 -> (Just (l, cml_ret_args), cml_ret_off)
259 CmmForeignCall { .. }
260 -> (Just (succ, ret_args), ret_off)
261 _other -> (Nothing, 0)
262
263
264 -- -----------------------------------------------------------------------------
265 -- Updating the StackMap from middle nodes
266
267 -- Look for loads from stack slots, and update the StackMap. This is
268 -- purely for optimisation reasons, so that we can avoid saving a
269 -- variable back to a different stack slot if it is already on the
270 -- stack.
271 --
272 -- This happens a lot: for example when function arguments are passed
273 -- on the stack and need to be immediately saved across a call, we
274 -- want to just leave them where they are on the stack.
275 --
276 procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
277 procMiddle stackmaps node sm
278 = case node of
279 CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
280 -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
281 where loc = getStackLoc area off stackmaps
282 CmmAssign (CmmLocal r) _other
283 -> sm { sm_regs = delFromUFM (sm_regs sm) r }
284 _other
285 -> sm
286
287 getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
288 getStackLoc Old n _ = n
289 getStackLoc (Young l) n stackmaps =
290 case mapLookup l stackmaps of
291 Nothing -> pprPanic "getStackLoc" (ppr l)
292 Just sm -> sm_sp sm - sm_args sm + n
293
294
295 -- -----------------------------------------------------------------------------
296 -- Handling stack allocation for a last node
297
298 -- We take a single last node and turn it into:
299 --
300 -- C1 (some statements)
301 -- Sp = Sp + N
302 -- C2 (some more statements)
303 -- call f() -- the actual last node
304 --
305 -- plus possibly some more blocks (we may have to add some fixup code
306 -- between the last node and the continuation).
307 --
308 -- C1: is the code for saving the variables across this last node onto
309 -- the stack, if the continuation is a call or jumps to a proc point.
310 --
311 -- C2: if the last node is a safe foreign call, we have to inject some
312 -- extra code that goes *after* the Sp adjustment.
313
314 handleLastNode
315 :: DynFlags -> ProcPointSet -> BlockEnv CmmLocalLive -> BlockEnv ByteOff
316 -> BlockEnv StackMap -> StackMap
317 -> Block CmmNode O O
318 -> CmmNode O C
319 -> UniqSM
320 ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
321 , ByteOff -- amount to adjust Sp
322 , CmmNode O C -- new last node
323 , [CmmBlock] -- new blocks
324 , BlockEnv StackMap -- stackmaps for the continuations
325 )
326
327 handleLastNode dflags procpoints liveness cont_info stackmaps
328 stack0@StackMap { sm_sp = sp0 } middle last
329 = case last of
330 -- At each return / tail call,
331 -- adjust Sp to point to the last argument pushed, which
332 -- is cml_args, after popping any other junk from the stack.
333 CmmCall{ cml_cont = Nothing, .. } -> do
334 let sp_off = sp0 - cml_args
335 return ([], sp_off, last, [], mapEmpty)
336
337 -- At each CmmCall with a continuation:
338 CmmCall{ cml_cont = Just cont_lbl, .. } ->
339 return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
340
341 CmmForeignCall{ succ = cont_lbl, .. } -> do
342 return $ lastCall cont_lbl (wORD_SIZE dflags) ret_args ret_off
343 -- one word of args: the return address
344
345 CmmBranch{..} -> handleBranches
346 CmmCondBranch{..} -> handleBranches
347 CmmSwitch{..} -> handleBranches
348
349 where
350 -- Calls and ForeignCalls are handled the same way:
351 lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
352 -> ( [CmmNode O O]
353 , ByteOff
354 , CmmNode O C
355 , [CmmBlock]
356 , BlockEnv StackMap
357 )
358 lastCall lbl cml_args cml_ret_args cml_ret_off
359 = ( assignments
360 , spOffsetForCall sp0 cont_stack cml_args
361 , last
362 , [] -- no new blocks
363 , mapSingleton lbl cont_stack )
364 where
365 (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
366
367
368 prepareStack lbl cml_ret_args cml_ret_off
369 | Just cont_stack <- mapLookup lbl stackmaps
370 -- If we have already seen this continuation before, then
371 -- we just have to make the stack look the same:
372 = (fixupStack stack0 cont_stack, cont_stack)
373 -- Otherwise, we have to allocate the stack frame
374 | otherwise
375 = (save_assignments, new_cont_stack)
376 where
377 (new_cont_stack, save_assignments)
378 = setupStackFrame dflags lbl liveness cml_ret_off cml_ret_args stack0
379
380
381 -- For other last nodes (branches), if any of the targets is a
382 -- proc point, we have to set up the stack to match what the proc
383 -- point is expecting.
384 --
385 handleBranches :: UniqSM ( [CmmNode O O]
386 , ByteOff
387 , CmmNode O C
388 , [CmmBlock]
389 , BlockEnv StackMap )
390
391 handleBranches
392 -- Note [diamond proc point]
393 | Just l <- futureContinuation middle
394 , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
395 = do
396 let cont_args = mapFindWithDefault 0 l cont_info
397 (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
398 out = mapFromList [ (l', cont_stack)
399 | l' <- successors last ]
400 return ( assigs
401 , spOffsetForCall sp0 cont_stack (wORD_SIZE dflags)
402 , last
403 , []
404 , out)
405
406 | otherwise = do
407 pps <- mapM handleBranch (successors last)
408 let lbl_map :: LabelMap Label
409 lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
410 fix_lbl l = mapFindWithDefault l l lbl_map
411 return ( []
412 , 0
413 , mapSuccessors fix_lbl last
414 , concat [ blk | (_,_,_,blk) <- pps ]
415 , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
416
417 -- For each successor of this block
418 handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
419 handleBranch l
420 -- (a) if the successor already has a stackmap, we need to
421 -- shuffle the current stack to make it look the same.
422 -- We have to insert a new block to make this happen.
423 | Just stack2 <- mapLookup l stackmaps
424 = do
425 let assigs = fixupStack stack0 stack2
426 (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
427 return (l, tmp_lbl, stack2, block)
428
429 -- (b) if the successor is a proc point, save everything
430 -- on the stack.
431 | l `setMember` procpoints
432 = do
433 let cont_args = mapFindWithDefault 0 l cont_info
434 (stack2, assigs) =
435 --pprTrace "first visit to proc point"
436 -- (ppr l <+> ppr stack1) $
437 setupStackFrame dflags l liveness (sm_ret_off stack0)
438 cont_args stack0
439 --
440 (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
441 return (l, tmp_lbl, stack2, block)
442
443 -- (c) otherwise, the current StackMap is the StackMap for
444 -- the continuation. But we must remember to remove any
445 -- variables from the StackMap that are *not* live at
446 -- the destination, because this StackMap might be used
447 -- by fixupStack if this is a join point.
448 | otherwise = return (l, l, stack1, [])
449 where live = mapFindWithDefault (panic "handleBranch") l liveness
450 stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) }
451 is_live (r,_) = r `elemRegSet` live
452
453
454 makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap -> [CmmNode O O]
455 -> UniqSM (Label, [CmmBlock])
456 makeFixupBlock dflags sp0 l stack assigs
457 | null assigs && sp0 == sm_sp stack = return (l, [])
458 | otherwise = do
459 tmp_lbl <- liftM mkBlockId $ getUniqueM
460 let sp_off = sp0 - sm_sp stack
461 block = blockJoin (CmmEntry tmp_lbl)
462 (maybeAddSpAdj dflags sp_off (blockFromList assigs))
463 (CmmBranch l)
464 return (tmp_lbl, [block])
465
466
467 -- Sp is currently pointing to current_sp,
468 -- we want it to point to
469 -- (sm_sp cont_stack - sm_args cont_stack + args)
470 -- so the difference is
471 -- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
472 spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
473 spOffsetForCall current_sp cont_stack args
474 = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
475
476
477 -- | create a sequence of assignments to establish the new StackMap,
478 -- given the old StackMap.
479 fixupStack :: StackMap -> StackMap -> [CmmNode O O]
480 fixupStack old_stack new_stack = concatMap move new_locs
481 where
482 old_map = sm_regs old_stack
483 new_locs = stackSlotRegs new_stack
484
485 move (r,n)
486 | Just (_,m) <- lookupUFM old_map r, n == m = []
487 | otherwise = [CmmStore (CmmStackSlot Old n)
488 (CmmReg (CmmLocal r))]
489
490
491
492 setupStackFrame
493 :: DynFlags
494 -> BlockId -- label of continuation
495 -> BlockEnv CmmLocalLive -- liveness
496 -> ByteOff -- updfr
497 -> ByteOff -- bytes of return values on stack
498 -> StackMap -- current StackMap
499 -> (StackMap, [CmmNode O O])
500
501 setupStackFrame dflags lbl liveness updfr_off ret_args stack0
502 = (cont_stack, assignments)
503 where
504 -- get the set of LocalRegs live in the continuation
505 live = mapFindWithDefault Set.empty lbl liveness
506
507 -- the stack from the base to updfr_off is off-limits.
508 -- our new stack frame contains:
509 -- * saved live variables
510 -- * the return address [young(C) + 8]
511 -- * the args for the call,
512 -- which are replaced by the return values at the return
513 -- point.
514
515 -- everything up to updfr_off is off-limits
516 -- stack1 contains updfr_off, plus everything we need to save
517 (stack1, assignments) = allocate dflags updfr_off live stack0
518
519 -- And the Sp at the continuation is:
520 -- sm_sp stack1 + ret_args
521 cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
522 , sm_args = ret_args
523 , sm_ret_off = updfr_off
524 }
525
526
527 -- -----------------------------------------------------------------------------
528 -- Note [diamond proc point]
529 --
530 -- This special case looks for the pattern we get from a typical
531 -- tagged case expression:
532 --
533 -- Sp[young(L1)] = L1
534 -- if (R1 & 7) != 0 goto L1 else goto L2
535 -- L2:
536 -- call [R1] returns to L1
537 -- L1: live: {y}
538 -- x = R1
539 --
540 -- If we let the generic case handle this, we get
541 --
542 -- Sp[-16] = L1
543 -- if (R1 & 7) != 0 goto L1a else goto L2
544 -- L2:
545 -- Sp[-8] = y
546 -- Sp = Sp - 16
547 -- call [R1] returns to L1
548 -- L1a:
549 -- Sp[-8] = y
550 -- Sp = Sp - 16
551 -- goto L1
552 -- L1:
553 -- x = R1
554 --
555 -- The code for saving the live vars is duplicated in each branch, and
556 -- furthermore there is an extra jump in the fast path (assuming L1 is
557 -- a proc point, which it probably is if there is a heap check).
558 --
559 -- So to fix this we want to set up the stack frame before the
560 -- conditional jump. How do we know when to do this, and when it is
561 -- safe? The basic idea is, when we see the assignment
562 --
563 -- Sp[young(L)] = L
564 --
565 -- we know that
566 -- * we are definitely heading for L
567 -- * there can be no more reads from another stack area, because young(L)
568 -- overlaps with it.
569 --
570 -- We don't necessarily know that everything live at L is live now
571 -- (some might be assigned between here and the jump to L). So we
572 -- simplify and only do the optimisation when we see
573 --
574 -- (1) a block containing an assignment of a return address L
575 -- (2) ending in a branch where one (and only) continuation goes to L,
576 -- and no other continuations go to proc points.
577 --
578 -- then we allocate the stack frame for L at the end of the block,
579 -- before the branch.
580 --
581 -- We could generalise (2), but that would make it a bit more
582 -- complicated to handle, and this currently catches the common case.
583
584 futureContinuation :: Block CmmNode O O -> Maybe BlockId
585 futureContinuation middle = foldBlockNodesB f middle Nothing
586 where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
587 f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
588 = Just l
589 f _ r = r
590
591 -- -----------------------------------------------------------------------------
592 -- Saving live registers
593
594 -- | Given a set of live registers and a StackMap, save all the registers
595 -- on the stack and return the new StackMap and the assignments to do
596 -- the saving.
597 --
598 allocate :: DynFlags -> ByteOff -> LocalRegSet -> StackMap
599 -> (StackMap, [CmmNode O O])
600 allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0
601 , sm_regs = regs0 }
602 =
603 -- pprTrace "allocate" (ppr live $$ ppr stackmap) $
604
605 -- we only have to save regs that are not already in a slot
606 let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
607 regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
608 in
609
610 -- make a map of the stack
611 let stack = reverse $ Array.elems $
612 accumArray (\_ x -> x) Empty (1, toWords dflags (max sp0 ret_off)) $
613 ret_words ++ live_words
614 where ret_words =
615 [ (x, Occupied)
616 | x <- [ 1 .. toWords dflags ret_off] ]
617 live_words =
618 [ (toWords dflags x, Occupied)
619 | (r,off) <- eltsUFM regs1,
620 let w = localRegBytes dflags r,
621 x <- [ off, off - wORD_SIZE dflags .. off - w + 1] ]
622 in
623
624 -- Pass over the stack: find slots to save all the new live variables,
625 -- choosing the oldest slots first (hence a foldr).
626 let
627 save slot ([], stack, n, assigs, regs) -- no more regs to save
628 = ([], slot:stack, plusW dflags n 1, assigs, regs)
629 save slot (to_save, stack, n, assigs, regs)
630 = case slot of
631 Occupied -> (to_save, Occupied:stack, plusW dflags n 1, assigs, regs)
632 Empty
633 | Just (stack', r, to_save') <-
634 select_save to_save (slot:stack)
635 -> let assig = CmmStore (CmmStackSlot Old n')
636 (CmmReg (CmmLocal r))
637 n' = plusW dflags n 1
638 in
639 (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
640
641 | otherwise
642 -> (to_save, slot:stack, plusW dflags n 1, assigs, regs)
643
644 -- we should do better here: right now we'll fit the smallest first,
645 -- but it would make more sense to fit the biggest first.
646 select_save :: [LocalReg] -> [StackSlot]
647 -> Maybe ([StackSlot], LocalReg, [LocalReg])
648 select_save regs stack = go regs []
649 where go [] _no_fit = Nothing
650 go (r:rs) no_fit
651 | Just rest <- dropEmpty words stack
652 = Just (replicate words Occupied ++ rest, r, rs++no_fit)
653 | otherwise
654 = go rs (r:no_fit)
655 where words = localRegWords dflags r
656
657 -- fill in empty slots as much as possible
658 (still_to_save, save_stack, n, save_assigs, save_regs)
659 = foldr save (to_save, [], 0, [], []) stack
660
661 -- push any remaining live vars on the stack
662 (push_sp, push_assigs, push_regs)
663 = foldr push (n, [], []) still_to_save
664 where
665 push r (n, assigs, regs)
666 = (n', assig : assigs, (r,(r,n')) : regs)
667 where
668 n' = n + localRegBytes dflags r
669 assig = CmmStore (CmmStackSlot Old n')
670 (CmmReg (CmmLocal r))
671
672 trim_sp
673 | not (null push_regs) = push_sp
674 | otherwise
675 = plusW dflags n (- length (takeWhile isEmpty save_stack))
676
677 final_regs = regs1 `addListToUFM` push_regs
678 `addListToUFM` save_regs
679
680 in
681 -- XXX should be an assert
682 if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
683
684 if (trim_sp .&. (wORD_SIZE dflags - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
685
686 ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
687 , push_assigs ++ save_assigs )
688
689
690 -- -----------------------------------------------------------------------------
691 -- Manifesting Sp
692
693 -- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The
694 -- block looks like this:
695 --
696 -- middle_pre -- the middle nodes
697 -- Sp = Sp + sp_off -- Sp adjustment goes here
698 -- last -- the last node
699 --
700 -- And we have some extra blocks too (that don't contain Sp adjustments)
701 --
702 -- The adjustment for middle_pre will be different from that for
703 -- middle_post, because the Sp adjustment intervenes.
704 --
705 manifestSp
706 :: DynFlags
707 -> BlockEnv StackMap -- StackMaps for other blocks
708 -> StackMap -- StackMap for this block
709 -> ByteOff -- Sp on entry to the block
710 -> ByteOff -- SpHigh
711 -> CmmNode C O -- first node
712 -> [CmmNode O O] -- middle
713 -> ByteOff -- sp_off
714 -> CmmNode O C -- last node
715 -> [CmmBlock] -- new blocks
716 -> [CmmBlock] -- final blocks with Sp manifest
717
718 manifestSp dflags stackmaps stack0 sp0 sp_high
719 first middle_pre sp_off last fixup_blocks
720 = final_block : fixup_blocks'
721 where
722 area_off = getAreaOff stackmaps
723
724 adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
725 adj_pre_sp = mapExpDeep (areaToSp dflags sp0 sp_high area_off)
726 adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off)
727
728 final_middle = maybeAddSpAdj dflags sp_off $
729 blockFromList $
730 map adj_pre_sp $
731 elimStackStores stack0 stackmaps area_off $
732 middle_pre
733
734 final_last = optStackCheck (adj_post_sp last)
735
736 final_block = blockJoin first final_middle final_last
737
738 fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
739
740
741 getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
742 getAreaOff _ Old = 0
743 getAreaOff stackmaps (Young l) =
744 case mapLookup l stackmaps of
745 Just sm -> sm_sp sm - sm_args sm
746 Nothing -> pprPanic "getAreaOff" (ppr l)
747
748
749 maybeAddSpAdj :: DynFlags -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
750 maybeAddSpAdj _ 0 block = block
751 maybeAddSpAdj dflags sp_off block
752 = block `blockSnoc` CmmAssign spReg (cmmOffset dflags (CmmReg spReg) sp_off)
753
754
755 {-
756 Sp(L) is the Sp offset on entry to block L relative to the base of the
757 OLD area.
758
759 SpArgs(L) is the size of the young area for L, i.e. the number of
760 arguments.
761
762 - in block L, each reference to [old + N] turns into
763 [Sp + Sp(L) - N]
764
765 - in block L, each reference to [young(L') + N] turns into
766 [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
767
768 - be careful with the last node of each block: Sp has already been adjusted
769 to be Sp + Sp(L) - Sp(L')
770 -}
771
772 areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
773
774 areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n)
775 = cmmOffset dflags (CmmReg spReg) (sp_old - area_off area - n)
776 -- Replace (CmmStackSlot area n) with an offset from Sp
777
778 areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark)
779 = mkIntExpr dflags sp_hwm
780 -- Replace CmmHighStackMark with the number of bytes of stack used,
781 -- the sp_hwm. See Note [Stack usage] in StgCmmHeap
782
783 areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _)
784 [CmmMachOp (MO_Sub _)
785 [ CmmRegOff (CmmGlobal Sp) x_off
786 , CmmLit (CmmInt y_lit _)],
787 CmmReg (CmmGlobal SpLim)])
788 | fromIntegral x_off >= y_lit
789 = zeroExpr dflags
790 -- Replace a stack-overflow test that cannot fail with a no-op
791 -- See Note [Always false stack check]
792
793 areaToSp _ _ _ _ other = other
794
795 -- Note [Always false stack check]
796 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
797 -- We can optimise stack checks of the form
798 --
799 -- if ((Sp + x) - y < SpLim) then .. else ..
800 --
801 -- where are non-negative integer byte offsets. Since we know that
802 -- SpLim <= Sp (remember the stack grows downwards), this test must
803 -- yield False if (x >= y), so we can rewrite the comparison to False.
804 -- A subsequent sinking pass will later drop the dead code.
805 -- Optimising this away depends on knowing that SpLim <= Sp, so it is
806 -- really the job of the stack layout algorithm, hence we do it now.
807
808 optStackCheck :: CmmNode O C -> CmmNode O C
809 optStackCheck n = -- Note [null stack check]
810 case n of
811 CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
812 other -> other
813
814
815 -- -----------------------------------------------------------------------------
816
817 -- | Eliminate stores of the form
818 --
819 -- Sp[area+n] = r
820 --
821 -- when we know that r is already in the same slot as Sp[area+n]. We
822 -- could do this in a later optimisation pass, but that would involve
823 -- a separate analysis and we already have the information to hand
824 -- here. It helps clean up some extra stack stores in common cases.
825 --
826 -- Note that we may have to modify the StackMap as we walk through the
827 -- code using procMiddle, since an assignment to a variable in the
828 -- StackMap will invalidate its mapping there.
829 --
830 elimStackStores :: StackMap
831 -> BlockEnv StackMap
832 -> (Area -> ByteOff)
833 -> [CmmNode O O]
834 -> [CmmNode O O]
835 elimStackStores stackmap stackmaps area_off nodes
836 = go stackmap nodes
837 where
838 go _stackmap [] = []
839 go stackmap (n:ns)
840 = case n of
841 CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
842 | Just (_,off) <- lookupUFM (sm_regs stackmap) r
843 , area_off area + m == off
844 -> -- pprTrace "eliminated a node!" (ppr r) $
845 go stackmap ns
846 _otherwise
847 -> n : go (procMiddle stackmaps n stackmap) ns
848
849
850 -- -----------------------------------------------------------------------------
851 -- Update info tables to include stack liveness
852
853
854 setInfoTableStackMap :: DynFlags -> BlockEnv StackMap -> CmmDecl -> CmmDecl
855 setInfoTableStackMap dflags stackmaps (CmmProc top_info@TopInfo{..} l v g)
856 = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l v g
857 where
858 fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
859 info_tbl { cit_rep = StackRep (get_liveness lbl) }
860 fix_info _ other = other
861
862 get_liveness :: BlockId -> Liveness
863 get_liveness lbl
864 = case mapLookup lbl stackmaps of
865 Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls)
866 Just sm -> stackMapToLiveness dflags sm
867
868 setInfoTableStackMap _ _ d = d
869
870
871 stackMapToLiveness :: DynFlags -> StackMap -> Liveness
872 stackMapToLiveness dflags StackMap{..} =
873 reverse $ Array.elems $
874 accumArray (\_ x -> x) True (toWords dflags sm_ret_off + 1,
875 toWords dflags (sm_sp - sm_args)) live_words
876 where
877 live_words = [ (toWords dflags off, False)
878 | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]
879
880
881 -- -----------------------------------------------------------------------------
882 -- Lowering safe foreign calls
883
884 {-
885 Note [Lower safe foreign calls]
886
887 We start with
888
889 Sp[young(L1)] = L1
890 ,-----------------------
891 | r1 = foo(x,y,z) returns to L1
892 '-----------------------
893 L1:
894 R1 = r1 -- copyIn, inserted by mkSafeCall
895 ...
896
897 the stack layout algorithm will arrange to save and reload everything
898 live across the call. Our job now is to expand the call so we get
899
900 Sp[young(L1)] = L1
901 ,-----------------------
902 | SAVE_THREAD_STATE()
903 | token = suspendThread(BaseReg, interruptible)
904 | r = foo(x,y,z)
905 | BaseReg = resumeThread(token)
906 | LOAD_THREAD_STATE()
907 | R1 = r -- copyOut
908 | jump Sp[0]
909 '-----------------------
910 L1:
911 r = R1 -- copyIn, inserted by mkSafeCall
912 ...
913
914 Note the copyOut, which saves the results in the places that L1 is
915 expecting them (see Note {safe foreign call convention]). Note also
916 that safe foreign call is replace by an unsafe one in the Cmm graph.
917 -}
918
919 lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock
920 lowerSafeForeignCall dflags block
921 | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
922 = do
923 -- Both 'id' and 'new_base' are KindNonPtr because they're
924 -- RTS-only objects and are not subject to garbage collection
925 id <- newTemp (bWord dflags)
926 new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg))
927 let (caller_save, caller_load) = callerSaveVolatileRegs dflags
928 load_tso <- newTemp (gcWord dflags)
929 load_stack <- newTemp (gcWord dflags)
930 let suspend = saveThreadState dflags <*>
931 caller_save <*>
932 mkMiddle (callSuspendThread dflags id intrbl)
933 midCall = mkUnsafeCall tgt res args
934 resume = mkMiddle (callResumeThread new_base id) <*>
935 -- Assign the result to BaseReg: we
936 -- might now have a different Capability!
937 mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
938 caller_load <*>
939 loadThreadState dflags load_tso load_stack
940
941 (_, regs, copyout) =
942 copyOutOflow dflags NativeReturn Jump (Young succ)
943 (map (CmmReg . CmmLocal) res)
944 ret_off []
945
946 -- NB. after resumeThread returns, the top-of-stack probably contains
947 -- the stack frame for succ, but it might not: if the current thread
948 -- received an exception during the call, then the stack might be
949 -- different. Hence we continue by jumping to the top stack frame,
950 -- not by jumping to succ.
951 jump = CmmCall { cml_target = entryCode dflags $
952 CmmLoad (CmmReg spReg) (bWord dflags)
953 , cml_cont = Just succ
954 , cml_args_regs = regs
955 , cml_args = widthInBytes (wordWidth dflags)
956 , cml_ret_args = ret_args
957 , cml_ret_off = ret_off }
958
959 graph' <- lgraphOfAGraph $ suspend <*>
960 midCall <*>
961 resume <*>
962 copyout <*>
963 mkLast jump
964
965 case toBlockList graph' of
966 [one] -> let (_, middle', last) = blockSplit one
967 in return (blockJoin entry (middle `blockAppend` middle') last)
968 _ -> panic "lowerSafeForeignCall0"
969
970 -- Block doesn't end in a safe foreign call:
971 | otherwise = return block
972
973
974 foreignLbl :: FastString -> CmmExpr
975 foreignLbl name = CmmLit (CmmLabel (mkForeignLabel name Nothing ForeignLabelInExternalPackage IsFunction))
976
977 newTemp :: CmmType -> UniqSM LocalReg
978 newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
979
980 callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O
981 callSuspendThread dflags id intrbl =
982 CmmUnsafeForeignCall
983 (ForeignTarget (foreignLbl (fsLit "suspendThread"))
984 (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn))
985 [id] [CmmReg (CmmGlobal BaseReg), mkIntExpr dflags (fromEnum intrbl)]
986
987 callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
988 callResumeThread new_base id =
989 CmmUnsafeForeignCall
990 (ForeignTarget (foreignLbl (fsLit "resumeThread"))
991 (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn))
992 [new_base] [CmmReg (CmmLocal id)]
993
994 -- -----------------------------------------------------------------------------
995
996 plusW :: DynFlags -> ByteOff -> WordOff -> ByteOff
997 plusW dflags b w = b + w * wORD_SIZE dflags
998
999 dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
1000 dropEmpty 0 ss = Just ss
1001 dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
1002 dropEmpty _ _ = Nothing
1003
1004 isEmpty :: StackSlot -> Bool
1005 isEmpty Empty = True
1006 isEmpty _ = False
1007
1008 localRegBytes :: DynFlags -> LocalReg -> ByteOff
1009 localRegBytes dflags r
1010 = roundUpToWords dflags (widthInBytes (typeWidth (localRegType r)))
1011
1012 localRegWords :: DynFlags -> LocalReg -> WordOff
1013 localRegWords dflags = toWords dflags . localRegBytes dflags
1014
1015 toWords :: DynFlags -> ByteOff -> WordOff
1016 toWords dflags x = x `quot` wORD_SIZE dflags
1017
1018
1019 insertReloads :: StackMap -> [CmmNode O O]
1020 insertReloads stackmap =
1021 [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
1022 (localRegType r))
1023 | (r,sp) <- stackSlotRegs stackmap
1024 ]
1025
1026
1027 stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
1028 stackSlotRegs sm = eltsUFM (sm_regs sm)