New stack layout algorithm
[ghc.git] / compiler / codeGen / StgCmmHeap.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: heap management functions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmHeap (
10 getVirtHp, setVirtHp, setRealHp,
11 getHpRelOffset, hpRel,
12
13 entryHeapCheck, altHeapCheck,
14
15 mkVirtHeapOffsets, mkVirtConstrOffsets,
16 mkStaticClosureFields, mkStaticClosure,
17
18 allocDynClosure, allocDynClosureCmm, emitSetDynHdr
19 ) where
20
21 #include "HsVersions.h"
22
23 import CmmType
24 import StgSyn
25 import CLabel
26 import StgCmmLayout
27 import StgCmmUtils
28 import StgCmmMonad
29 import StgCmmProf
30 import StgCmmTicky
31 import StgCmmGran
32 import StgCmmClosure
33 import StgCmmEnv
34
35 import MkGraph
36
37 import SMRep
38 import Cmm
39 import CmmUtils
40 import CostCentre
41 import Outputable
42 import IdInfo( CafInfo(..), mayHaveCafRefs )
43 import Module
44 import FastString( mkFastString, fsLit )
45 import Constants
46 import DynFlags
47
48 -----------------------------------------------------------
49 -- Initialise dynamic heap objects
50 -----------------------------------------------------------
51
52 allocDynClosure
53 :: CmmInfoTable
54 -> LambdaFormInfo
55 -> CmmExpr -- Cost Centre to stick in the object
56 -> CmmExpr -- Cost Centre to blame for this alloc
57 -- (usually the same; sometimes "OVERHEAD")
58
59 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
60 -- ie Info ptr has offset zero.
61 -- No void args in here
62 -> FCode (LocalReg, CmmAGraph)
63
64 allocDynClosureCmm
65 :: CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
66 -> [(CmmExpr, VirtualHpOffset)]
67 -> FCode (LocalReg, CmmAGraph)
68
69 -- allocDynClosure allocates the thing in the heap,
70 -- and modifies the virtual Hp to account for this.
71 -- The second return value is the graph that sets the value of the
72 -- returned LocalReg, which should point to the closure after executing
73 -- the graph.
74
75 -- Note [Return a LocalReg]
76 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
77 -- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr.
78 -- Reason:
79 -- ...allocate object...
80 -- obj = Hp + 8
81 -- y = f(z)
82 -- ...here obj is still valid,
83 -- but Hp+8 means something quite different...
84
85
86 allocDynClosure info_tbl lf_info use_cc _blame_cc args_w_offsets
87 = do { let (args, offsets) = unzip args_w_offsets
88 ; cmm_args <- mapM getArgAmode args -- No void args
89 ; allocDynClosureCmm info_tbl lf_info
90 use_cc _blame_cc (zip cmm_args offsets)
91 }
92
93 allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets
94 = do { virt_hp <- getVirtHp
95
96 -- SAY WHAT WE ARE ABOUT TO DO
97 ; let rep = cit_rep info_tbl
98 ; tickyDynAlloc rep lf_info
99 ; profDynAlloc rep use_cc
100
101 -- FIND THE OFFSET OF THE INFO-PTR WORD
102 ; let info_offset = virt_hp + 1
103 -- info_offset is the VirtualHpOffset of the first
104 -- word of the new object
105 -- Remember, virtHp points to last allocated word,
106 -- ie 1 *before* the info-ptr word of new object.
107
108 info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
109
110 -- ALLOCATE THE OBJECT
111 ; base <- getHpRelOffset info_offset
112 ; emitComment $ mkFastString "allocDynClosure"
113 ; emitSetDynHdr base info_ptr use_cc
114 ; let (cmm_args, offsets) = unzip amodes_w_offsets
115 ; hpStore base cmm_args offsets
116
117 -- BUMP THE VIRTUAL HEAP POINTER
118 ; setVirtHp (virt_hp + heapClosureSize rep)
119
120 -- Assign to a temporary and return
121 -- Note [Return a LocalReg]
122 ; hp_rel <- getHpRelOffset info_offset
123 ; getCodeR $ assignTemp hp_rel }
124
125 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
126 emitSetDynHdr base info_ptr ccs
127 = hpStore base header [0..]
128 where
129 header :: [CmmExpr]
130 header = [info_ptr] ++ dynProfHdr ccs
131 -- ToDo: Gransim stuff
132 -- ToDo: Parallel stuff
133 -- No ticky header
134
135 hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode ()
136 -- Store the item (expr,off) in base[off]
137 hpStore base vals offs
138 = emit (catAGraphs (zipWith mk_store vals offs))
139 where
140 mk_store val off = mkStore (cmmOffsetW base off) val
141
142
143 -----------------------------------------------------------
144 -- Layout of static closures
145 -----------------------------------------------------------
146
147 -- Make a static closure, adding on any extra padding needed for CAFs,
148 -- and adding a static link field if necessary.
149
150 mkStaticClosureFields
151 :: CmmInfoTable
152 -> CostCentreStack
153 -> CafInfo
154 -> [CmmLit] -- Payload
155 -> [CmmLit] -- The full closure
156 mkStaticClosureFields info_tbl ccs caf_refs payload
157 = mkStaticClosure info_lbl ccs payload padding
158 static_link_field saved_info_field
159 where
160 info_lbl = cit_lbl info_tbl
161
162 -- CAFs must have consistent layout, regardless of whether they
163 -- are actually updatable or not. The layout of a CAF is:
164 --
165 -- 3 saved_info
166 -- 2 static_link
167 -- 1 indirectee
168 -- 0 info ptr
169 --
170 -- the static_link and saved_info fields must always be in the
171 -- same place. So we use isThunkRep rather than closureUpdReqd
172 -- here:
173
174 is_caf = isThunkRep (cit_rep info_tbl)
175
176 padding
177 | not is_caf = []
178 | otherwise = ASSERT(null payload) [mkIntCLit 0]
179
180 static_link_field
181 | is_caf || staticClosureNeedsLink info_tbl = [static_link_value]
182 | otherwise = []
183
184 saved_info_field
185 | is_caf = [mkIntCLit 0]
186 | otherwise = []
187
188 -- For a static constructor which has NoCafRefs, we set the
189 -- static link field to a non-zero value so the garbage
190 -- collector will ignore it.
191 static_link_value
192 | mayHaveCafRefs caf_refs = mkIntCLit 0
193 | otherwise = mkIntCLit 1 -- No CAF refs
194
195
196 mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
197 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
198 mkStaticClosure info_lbl ccs payload padding static_link_field saved_info_field
199 = [CmmLabel info_lbl]
200 ++ variable_header_words
201 ++ concatMap padLitToWord payload
202 ++ padding
203 ++ static_link_field
204 ++ saved_info_field
205 where
206 variable_header_words
207 = staticGranHdr
208 ++ staticParHdr
209 ++ staticProfHdr ccs
210 ++ staticTickyHdr
211
212 -- JD: Simon had ellided this padding, but without it the C back end asserts
213 -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
214 padLitToWord :: CmmLit -> [CmmLit]
215 padLitToWord lit = lit : padding pad_length
216 where width = typeWidth (cmmLitType lit)
217 pad_length = wORD_SIZE - widthInBytes width :: Int
218
219 padding n | n <= 0 = []
220 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
221 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
222 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
223 | otherwise = CmmInt 0 W64 : padding (n-8)
224
225 -----------------------------------------------------------
226 -- Heap overflow checking
227 -----------------------------------------------------------
228
229 {- Note [Heap checks]
230 ~~~~~~~~~~~~~~~~~~
231 Heap checks come in various forms. We provide the following entry
232 points to the runtime system, all of which use the native C-- entry
233 convention.
234
235 * gc() performs garbage collection and returns
236 nothing to its caller
237
238 * A series of canned entry points like
239 r = gc_1p( r )
240 where r is a pointer. This performs gc, and
241 then returns its argument r to its caller.
242
243 * A series of canned entry points like
244 gcfun_2p( f, x, y )
245 where f is a function closure of arity 2
246 This performs garbage collection, keeping alive the
247 three argument ptrs, and then tail-calls f(x,y)
248
249 These are used in the following circumstances
250
251 * entryHeapCheck: Function entry
252 (a) With a canned GC entry sequence
253 f( f_clo, x:ptr, y:ptr ) {
254 Hp = Hp+8
255 if Hp > HpLim goto L
256 ...
257 L: HpAlloc = 8
258 jump gcfun_2p( f_clo, x, y ) }
259 Note the tail call to the garbage collector;
260 it should do no register shuffling
261
262 (b) No canned sequence
263 f( f_clo, x:ptr, y:ptr, ...etc... ) {
264 T: Hp = Hp+8
265 if Hp > HpLim goto L
266 ...
267 L: HpAlloc = 8
268 call gc() -- Needs an info table
269 goto T }
270
271 * altHeapCheck: Immediately following an eval
272 Started as
273 case f x y of r { (p,q) -> rhs }
274 (a) With a canned sequence for the results of f
275 (which is the very common case since
276 all boxed cases return just one pointer
277 ...
278 r = f( x, y )
279 K: -- K needs an info table
280 Hp = Hp+8
281 if Hp > HpLim goto L
282 ...code for rhs...
283
284 L: r = gc_1p( r )
285 goto K }
286
287 Here, the info table needed by the call
288 to gc_1p should be the *same* as the
289 one for the call to f; the C-- optimiser
290 spots this sharing opportunity)
291
292 (b) No canned sequence for results of f
293 Note second info table
294 ...
295 (r1,r2,r3) = call f( x, y )
296 K:
297 Hp = Hp+8
298 if Hp > HpLim goto L
299 ...code for rhs...
300
301 L: call gc() -- Extra info table here
302 goto K
303
304 * generalHeapCheck: Anywhere else
305 e.g. entry to thunk
306 case branch *not* following eval,
307 or let-no-escape
308 Exactly the same as the previous case:
309
310 K: -- K needs an info table
311 Hp = Hp+8
312 if Hp > HpLim goto L
313 ...
314
315 L: call gc()
316 goto K
317 -}
318
319 --------------------------------------------------------------
320 -- A heap/stack check at a function or thunk entry point.
321
322 entryHeapCheck :: ClosureInfo
323 -> Int -- Arg Offset
324 -> Maybe LocalReg -- Function (closure environment)
325 -> Int -- Arity -- not same as len args b/c of voids
326 -> [LocalReg] -- Non-void args (empty for thunk)
327 -> FCode ()
328 -> FCode ()
329
330 entryHeapCheck cl_info offset nodeSet arity args code
331 = do dflags <- getDynFlags
332
333 let platform = targetPlatform dflags
334
335 is_thunk = arity == 0
336 is_fastf = case closureFunInfo cl_info of
337 Just (_, ArgGen _) -> False
338 _otherwise -> True
339
340 args' = map (CmmReg . CmmLocal) args
341 setN = case nodeSet of
342 Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n)
343 Nothing -> mkAssign nodeReg $
344 CmmLit (CmmLabel $ staticClosureLabel platform cl_info)
345
346 {- Thunks: Set R1 = node, jump GCEnter1
347 Function (fast): Set R1 = node, jump GCFun
348 Function (slow): Set R1 = node, call generic_gc -}
349 gc_call upd = setN <*> gc_lbl upd
350 gc_lbl upd
351 | is_thunk = mkDirectJump (CmmReg $ CmmGlobal GCEnter1) [] sp
352 | is_fastf = mkDirectJump (CmmReg $ CmmGlobal GCFun) [] sp
353 | otherwise = mkForeignJump Slow (CmmReg $ CmmGlobal GCFun) args' upd
354 where sp = max offset upd
355 {- DT (12/08/10) This is a little fishy, mainly the sp fix up amount.
356 - This is since the ncg inserts spills before the stack/heap check.
357 - This should be fixed up and then we won't need to fix up the Sp on
358 - GC calls, but until then this fishy code works -}
359
360 updfr_sz <- getUpdFrameOff
361 heapCheck True (gc_call updfr_sz) code
362
363 {-
364 -- This code is slightly outdated now and we could easily keep the above
365 -- GC methods. However, there may be some performance gains to be made by
366 -- using more specialised GC entry points. Since the semi generic GCFun
367 -- entry needs to check the node and figure out what registers to save...
368 -- if we provided and used more specialised GC entry points then these
369 -- runtime decisions could be turned into compile time decisions.
370
371 args' = case fun of Just f -> f : args
372 Nothing -> args
373 arg_exprs = map (CmmReg . CmmLocal) args'
374 gc_call updfr_sz
375 | arity == 0 = mkJumpGC (CmmReg (CmmGlobal GCEnter1)) arg_exprs updfr_sz
376 | otherwise =
377 case gc_lbl args' of
378 Just _lbl -> panic "StgCmmHeap.entryHeapCheck: not finished"
379 -- mkJumpGC (CmmLit (CmmLabel (mkRtsCodeLabel lbl)))
380 -- arg_exprs updfr_sz
381 Nothing -> mkCall generic_gc (GC, GC) [] [] updfr_sz
382
383 gc_lbl :: [LocalReg] -> Maybe FastString
384 gc_lbl [reg]
385 | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p"
386 | isFloatType ty = case width of
387 W32 -> Just (sLit "stg_gc_f1")
388 W64 -> Just (sLit "stg_gc_d1")
389 _other -> Nothing
390 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
391 | width == W64 = Just (mkGcLabel "stg_gc_l1")
392 | otherwise = Nothing
393 where
394 ty = localRegType reg
395 width = typeWidth ty
396
397 gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs)
398
399 gc_lbl_ptrs :: [Bool] -> Maybe FastString
400 -- JD: TEMPORARY -- UNTIL THESE FUNCTIONS EXIST...
401 --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p")
402 --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p")
403 gc_lbl_ptrs _ = Nothing
404 -}
405
406
407 --------------------------------------------------------------
408 -- A heap/stack check at in a case alternative
409
410 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
411 altHeapCheck regs code
412 = do updfr_sz <- getUpdFrameOff
413 gc_call_code <- gc_call updfr_sz
414 heapCheck False gc_call_code code
415
416 where
417 reg_exprs = map (CmmReg . CmmLocal) regs
418
419 gc_call sp =
420 case rts_label regs of
421 Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[])
422 Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[])
423
424 rts_label [reg]
425 | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
426 | isFloatType ty = case width of
427 W32 -> Just (mkGcLabel "stg_gc_f1")
428 W64 -> Just (mkGcLabel "stg_gc_d1")
429 _ -> Nothing
430
431 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
432 | width == W64 = Just (mkGcLabel "stg_gc_l1")
433 | otherwise = Nothing
434 where
435 ty = localRegType reg
436 width = typeWidth ty
437
438 rts_label _ = Nothing
439
440
441 -- | The generic GC procedure; no params, no results
442 generic_gc :: CmmExpr
443 generic_gc = CmmLit $ mkGcLabel "stg_gc_noregs"
444
445 -- | Create a CLabel for calling a garbage collector entry point
446 mkGcLabel :: String -> CmmLit
447 mkGcLabel = (CmmLabel . (mkCmmCodeLabel rtsPackageId) . fsLit)
448
449 -------------------------------
450 heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a
451 heapCheck checkStack do_gc code
452 = getHeapUsage $ \ hpHw ->
453 -- Emit heap checks, but be sure to do it lazily so
454 -- that the conditionals on hpHw don't cause a black hole
455 do { codeOnly $ do_checks checkStack hpHw do_gc
456 ; tickyAllocHeap hpHw
457 ; doGranAllocate hpHw
458 ; setRealHp hpHw
459 ; code }
460
461 do_checks :: Bool -- Should we check the stack?
462 -> WordOff -- Heap headroom
463 -> CmmAGraph -- What to do on failure
464 -> FCode ()
465 do_checks checkStack alloc do_gc = do
466 loop_id <- newLabelC
467 gc_id <- newLabelC
468 emitLabel loop_id
469 hp_check <- if alloc == 0
470 then return mkNop
471 else do
472 ifthen <- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
473 return (mkAssign hpReg bump_hp <*> ifthen)
474
475 if checkStack
476 then emit =<< mkCmmIfThenElse sp_oflo (mkBranch gc_id) hp_check
477 else emit hp_check
478
479 emit $ mkComment (mkFastString "outOfLine should follow:")
480
481 emitOutOfLine gc_id $
482 mkComment (mkFastString "outOfLine here") <*>
483 do_gc <*>
484 mkBranch loop_id
485 -- Test for stack pointer exhaustion, then
486 -- bump heap pointer, and test for heap exhaustion
487 -- Note that we don't move the heap pointer unless the
488 -- stack check succeeds. Otherwise we might end up
489 -- with slop at the end of the current block, which can
490 -- confuse the LDV profiler.
491 where
492 alloc_lit = CmmLit (mkIntCLit (alloc*wORD_SIZE)) -- Bytes
493 bump_hp = cmmOffsetExprB (CmmReg hpReg) alloc_lit
494
495 -- Sp overflow if (Sp - CmmHighStack < SpLim)
496 sp_oflo = CmmMachOp mo_wordULt
497 [CmmMachOp (MO_Sub (typeWidth (cmmRegType spReg)))
498 [CmmReg spReg, CmmLit CmmHighStackMark],
499 CmmReg spLimReg]
500
501 -- Hp overflow if (Hp > HpLim)
502 -- (Hp has been incremented by now)
503 -- HpLim points to the LAST WORD of valid allocation space.
504 hp_oflo = CmmMachOp mo_wordUGt
505 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
506
507 alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
508
509 {-
510
511 {- Unboxed tuple alternatives and let-no-escapes (the two most annoying
512 constructs to generate code for!) For unboxed tuple returns, there
513 are an arbitrary number of possibly unboxed return values, some of
514 which will be in registers, and the others will be on the stack. We
515 always organise the stack-resident fields into pointers &
516 non-pointers, and pass the number of each to the heap check code. -}
517
518 unbxTupleHeapCheck
519 :: [(Id, GlobalReg)] -- Live registers
520 -> WordOff -- no. of stack slots containing ptrs
521 -> WordOff -- no. of stack slots containing nonptrs
522 -> CmmAGraph -- code to insert in the failure path
523 -> FCode ()
524 -> FCode ()
525
526 unbxTupleHeapCheck regs ptrs nptrs fail_code code
527 -- We can't manage more than 255 pointers/non-pointers
528 -- in a generic heap check.
529 | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
530 | otherwise
531 = initHeapUsage $ \ hpHw -> do
532 { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
533 full_fail_code rts_label
534 ; tickyAllocHeap hpHw }
535 ; setRealHp hpHw
536 ; code }
537 where
538 full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
539 assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho!
540 (CmmLit (mkWordCLit liveness))
541 liveness = mkRegLiveness regs ptrs nptrs
542 rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut")))
543
544
545 {- Old Gransim com -- I have no idea whether it still makes sense (SLPJ Sep07)
546 For GrAnSim the code for doing a heap check and doing a context switch
547 has been separated. Especially, the HEAP_CHK macro only performs a
548 heap check. THREAD_CONTEXT_SWITCH should be used for doing a context
549 switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of
550 every slow entry code in order to simulate the fetching of
551 closures. If fetching is necessary (i.e. current closure is not local)
552 then an automatic context switch is done. -}
553
554
555 When failing a check, we save a return address on the stack and
556 jump to a pre-compiled code fragment that saves the live registers
557 and returns to the scheduler.
558
559 The return address in most cases will be the beginning of the basic
560 block in which the check resides, since we need to perform the check
561 again on re-entry because someone else might have stolen the resource
562 in the meantime.
563
564 %************************************************************************
565 %* *
566 Generic Heap/Stack Checks - used in the RTS
567 %* *
568 %************************************************************************
569
570 \begin{code}
571 hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
572 hpChkGen bytes liveness reentry
573 = do_checks' bytes True assigns stg_gc_gen
574 where
575 assigns = mkStmts [
576 CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
577 CmmAssign (CmmGlobal (VanillaReg 10)) reentry
578 ]
579
580 -- a heap check where R1 points to the closure to enter on return, and
581 -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
582 hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode ()
583 hpChkNodePointsAssignSp0 bytes sp0
584 = do_checks' bytes True assign stg_gc_enter1
585 where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
586
587 stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen")))
588 \end{code}
589
590 -}