690b0a9622b23a86b8d074ed7d977add4197e463
[ghc.git] / compiler / codeGen / StgCmmHeap.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: heap management functions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmHeap (
10 getVirtHp, setVirtHp, setRealHp,
11 getHpRelOffset, hpRel,
12
13 entryHeapCheck, altHeapCheck,
14
15 mkVirtHeapOffsets, mkVirtConstrOffsets,
16 mkStaticClosureFields, mkStaticClosure,
17
18 allocDynClosure, allocDynClosureCmm, emitSetDynHdr
19 ) where
20
21 #include "HsVersions.h"
22
23 import CmmType
24 import StgSyn
25 import CLabel
26 import StgCmmLayout
27 import StgCmmUtils
28 import StgCmmMonad
29 import StgCmmProf
30 import StgCmmTicky
31 import StgCmmGran
32 import StgCmmClosure
33 import StgCmmEnv
34
35 import MkGraph
36
37 import SMRep
38 import Cmm
39 import CmmUtils
40 import CostCentre
41 import Outputable
42 import IdInfo( CafInfo(..), mayHaveCafRefs )
43 import Module
44 import FastString( mkFastString, fsLit )
45 import Constants
46 import DynFlags
47
48 -----------------------------------------------------------
49 -- Initialise dynamic heap objects
50 -----------------------------------------------------------
51
52 allocDynClosure
53 :: CmmInfoTable
54 -> LambdaFormInfo
55 -> CmmExpr -- Cost Centre to stick in the object
56 -> CmmExpr -- Cost Centre to blame for this alloc
57 -- (usually the same; sometimes "OVERHEAD")
58
59 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
60 -- ie Info ptr has offset zero.
61 -- No void args in here
62 -> FCode (LocalReg, CmmAGraph)
63
64 allocDynClosureCmm
65 :: CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
66 -> [(CmmExpr, VirtualHpOffset)]
67 -> FCode (LocalReg, CmmAGraph)
68
69 -- allocDynClosure allocates the thing in the heap,
70 -- and modifies the virtual Hp to account for this.
71 -- The second return value is the graph that sets the value of the
72 -- returned LocalReg, which should point to the closure after executing
73 -- the graph.
74
75 -- Note [Return a LocalReg]
76 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
77 -- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr.
78 -- Reason:
79 -- ...allocate object...
80 -- obj = Hp + 8
81 -- y = f(z)
82 -- ...here obj is still valid,
83 -- but Hp+8 means something quite different...
84
85
86 allocDynClosure info_tbl lf_info use_cc _blame_cc args_w_offsets
87 = do { let (args, offsets) = unzip args_w_offsets
88 ; cmm_args <- mapM getArgAmode args -- No void args
89 ; allocDynClosureCmm info_tbl lf_info
90 use_cc _blame_cc (zip cmm_args offsets)
91 }
92
93 allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets
94 = do { virt_hp <- getVirtHp
95
96 -- SAY WHAT WE ARE ABOUT TO DO
97 ; let rep = cit_rep info_tbl
98 ; tickyDynAlloc rep lf_info
99 ; profDynAlloc rep use_cc
100
101 -- FIND THE OFFSET OF THE INFO-PTR WORD
102 ; let info_offset = virt_hp + 1
103 -- info_offset is the VirtualHpOffset of the first
104 -- word of the new object
105 -- Remember, virtHp points to last allocated word,
106 -- ie 1 *before* the info-ptr word of new object.
107
108 info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
109
110 -- ALLOCATE THE OBJECT
111 ; base <- getHpRelOffset info_offset
112 ; emit (mkComment $ mkFastString "allocDynClosure")
113 ; emitSetDynHdr base info_ptr use_cc
114 ; let (cmm_args, offsets) = unzip amodes_w_offsets
115 ; hpStore base cmm_args offsets
116
117 -- BUMP THE VIRTUAL HEAP POINTER
118 ; setVirtHp (virt_hp + heapClosureSize rep)
119
120 -- Assign to a temporary and return
121 -- Note [Return a LocalReg]
122 ; hp_rel <- getHpRelOffset info_offset
123 ; getCodeR $ assignTemp hp_rel }
124
125 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
126 emitSetDynHdr base info_ptr ccs
127 = hpStore base header [0..]
128 where
129 header :: [CmmExpr]
130 header = [info_ptr] ++ dynProfHdr ccs
131 -- ToDo: Gransim stuff
132 -- ToDo: Parallel stuff
133 -- No ticky header
134
135 hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode ()
136 -- Store the item (expr,off) in base[off]
137 hpStore base vals offs
138 = emit (catAGraphs (zipWith mk_store vals offs))
139 where
140 mk_store val off = mkStore (cmmOffsetW base off) val
141
142
143 -----------------------------------------------------------
144 -- Layout of static closures
145 -----------------------------------------------------------
146
147 -- Make a static closure, adding on any extra padding needed for CAFs,
148 -- and adding a static link field if necessary.
149
150 mkStaticClosureFields
151 :: CmmInfoTable
152 -> CostCentreStack
153 -> CafInfo
154 -> [CmmLit] -- Payload
155 -> [CmmLit] -- The full closure
156 mkStaticClosureFields info_tbl ccs caf_refs payload
157 = mkStaticClosure info_lbl ccs payload padding
158 static_link_field saved_info_field
159 where
160 info_lbl = cit_lbl info_tbl
161
162 -- CAFs must have consistent layout, regardless of whether they
163 -- are actually updatable or not. The layout of a CAF is:
164 --
165 -- 3 saved_info
166 -- 2 static_link
167 -- 1 indirectee
168 -- 0 info ptr
169 --
170 -- the static_link and saved_info fields must always be in the
171 -- same place. So we use isThunkRep rather than closureUpdReqd
172 -- here:
173
174 is_caf = isThunkRep (cit_rep info_tbl)
175
176 padding
177 | not is_caf = []
178 | otherwise = ASSERT(null payload) [mkIntCLit 0]
179
180 static_link_field
181 | is_caf || staticClosureNeedsLink info_tbl = [static_link_value]
182 | otherwise = []
183
184 saved_info_field
185 | is_caf = [mkIntCLit 0]
186 | otherwise = []
187
188 -- For a static constructor which has NoCafRefs, we set the
189 -- static link field to a non-zero value so the garbage
190 -- collector will ignore it.
191 static_link_value
192 | mayHaveCafRefs caf_refs = mkIntCLit 0
193 | otherwise = mkIntCLit 1 -- No CAF refs
194
195
196 mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
197 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
198 mkStaticClosure info_lbl ccs payload padding static_link_field saved_info_field
199 = [CmmLabel info_lbl]
200 ++ variable_header_words
201 ++ concatMap padLitToWord payload
202 ++ padding
203 ++ static_link_field
204 ++ saved_info_field
205 where
206 variable_header_words
207 = staticGranHdr
208 ++ staticParHdr
209 ++ staticProfHdr ccs
210 ++ staticTickyHdr
211
212 -- JD: Simon had ellided this padding, but without it the C back end asserts
213 -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
214 padLitToWord :: CmmLit -> [CmmLit]
215 padLitToWord lit = lit : padding pad_length
216 where width = typeWidth (cmmLitType lit)
217 pad_length = wORD_SIZE - widthInBytes width :: Int
218
219 padding n | n <= 0 = []
220 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
221 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
222 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
223 | otherwise = CmmInt 0 W64 : padding (n-8)
224
225 -----------------------------------------------------------
226 -- Heap overflow checking
227 -----------------------------------------------------------
228
229 {- Note [Heap checks]
230 ~~~~~~~~~~~~~~~~~~
231 Heap checks come in various forms. We provide the following entry
232 points to the runtime system, all of which use the native C-- entry
233 convention.
234
235 * gc() performs garbage collection and returns
236 nothing to its caller
237
238 * A series of canned entry points like
239 r = gc_1p( r )
240 where r is a pointer. This performs gc, and
241 then returns its argument r to its caller.
242
243 * A series of canned entry points like
244 gcfun_2p( f, x, y )
245 where f is a function closure of arity 2
246 This performs garbage collection, keeping alive the
247 three argument ptrs, and then tail-calls f(x,y)
248
249 These are used in the following circumstances
250
251 * entryHeapCheck: Function entry
252 (a) With a canned GC entry sequence
253 f( f_clo, x:ptr, y:ptr ) {
254 Hp = Hp+8
255 if Hp > HpLim goto L
256 ...
257 L: HpAlloc = 8
258 jump gcfun_2p( f_clo, x, y ) }
259 Note the tail call to the garbage collector;
260 it should do no register shuffling
261
262 (b) No canned sequence
263 f( f_clo, x:ptr, y:ptr, ...etc... ) {
264 T: Hp = Hp+8
265 if Hp > HpLim goto L
266 ...
267 L: HpAlloc = 8
268 call gc() -- Needs an info table
269 goto T }
270
271 * altHeapCheck: Immediately following an eval
272 Started as
273 case f x y of r { (p,q) -> rhs }
274 (a) With a canned sequence for the results of f
275 (which is the very common case since
276 all boxed cases return just one pointer
277 ...
278 r = f( x, y )
279 K: -- K needs an info table
280 Hp = Hp+8
281 if Hp > HpLim goto L
282 ...code for rhs...
283
284 L: r = gc_1p( r )
285 goto K }
286
287 Here, the info table needed by the call
288 to gc_1p should be the *same* as the
289 one for the call to f; the C-- optimiser
290 spots this sharing opportunity)
291
292 (b) No canned sequence for results of f
293 Note second info table
294 ...
295 (r1,r2,r3) = call f( x, y )
296 K:
297 Hp = Hp+8
298 if Hp > HpLim goto L
299 ...code for rhs...
300
301 L: call gc() -- Extra info table here
302 goto K
303
304 * generalHeapCheck: Anywhere else
305 e.g. entry to thunk
306 case branch *not* following eval,
307 or let-no-escape
308 Exactly the same as the previous case:
309
310 K: -- K needs an info table
311 Hp = Hp+8
312 if Hp > HpLim goto L
313 ...
314
315 L: call gc()
316 goto K
317 -}
318
319 --------------------------------------------------------------
320 -- A heap/stack check at a function or thunk entry point.
321
322 entryHeapCheck :: ClosureInfo
323 -> Int -- Arg Offset
324 -> Maybe LocalReg -- Function (closure environment)
325 -> Int -- Arity -- not same as len args b/c of voids
326 -> [LocalReg] -- Non-void args (empty for thunk)
327 -> FCode ()
328 -> FCode ()
329
330 entryHeapCheck cl_info offset nodeSet arity args code
331 = do dflags <- getDynFlags
332
333 let platform = targetPlatform dflags
334
335 is_thunk = arity == 0
336 is_fastf = case closureFunInfo cl_info of
337 Just (_, ArgGen _) -> False
338 _otherwise -> True
339
340 args' = map (CmmReg . CmmLocal) args
341 setN = case nodeSet of
342 Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n)
343 Nothing -> mkAssign nodeReg $
344 CmmLit (CmmLabel $ staticClosureLabel platform cl_info)
345
346 {- Thunks: Set R1 = node, jump GCEnter1
347 Function (fast): Set R1 = node, jump GCFun
348 Function (slow): Set R1 = node, call generic_gc -}
349 gc_call upd = setN <*> gc_lbl upd
350 gc_lbl upd
351 | is_thunk = mkDirectJump (CmmReg $ CmmGlobal GCEnter1) [] sp
352 | is_fastf = mkDirectJump (CmmReg $ CmmGlobal GCFun) [] sp
353 | otherwise = mkForeignJump Slow (CmmReg $ CmmGlobal GCFun) args' upd
354 where sp = max offset upd
355 {- DT (12/08/10) This is a little fishy, mainly the sp fix up amount.
356 - This is since the ncg inserts spills before the stack/heap check.
357 - This should be fixed up and then we won't need to fix up the Sp on
358 - GC calls, but until then this fishy code works -}
359
360 updfr_sz <- getUpdFrameOff
361 heapCheck True (gc_call updfr_sz) code
362
363 {-
364 -- This code is slightly outdated now and we could easily keep the above
365 -- GC methods. However, there may be some performance gains to be made by
366 -- using more specialised GC entry points. Since the semi generic GCFun
367 -- entry needs to check the node and figure out what registers to save...
368 -- if we provided and used more specialised GC entry points then these
369 -- runtime decisions could be turned into compile time decisions.
370
371 args' = case fun of Just f -> f : args
372 Nothing -> args
373 arg_exprs = map (CmmReg . CmmLocal) args'
374 gc_call updfr_sz
375 | arity == 0 = mkJumpGC (CmmReg (CmmGlobal GCEnter1)) arg_exprs updfr_sz
376 | otherwise =
377 case gc_lbl args' of
378 Just _lbl -> panic "StgCmmHeap.entryHeapCheck: not finished"
379 -- mkJumpGC (CmmLit (CmmLabel (mkRtsCodeLabel lbl)))
380 -- arg_exprs updfr_sz
381 Nothing -> mkCall generic_gc (GC, GC) [] [] updfr_sz
382
383 gc_lbl :: [LocalReg] -> Maybe FastString
384 gc_lbl [reg]
385 | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p"
386 | isFloatType ty = case width of
387 W32 -> Just (sLit "stg_gc_f1")
388 W64 -> Just (sLit "stg_gc_d1")
389 _other -> Nothing
390 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
391 | width == W64 = Just (mkGcLabel "stg_gc_l1")
392 | otherwise = Nothing
393 where
394 ty = localRegType reg
395 width = typeWidth ty
396
397 gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs)
398
399 gc_lbl_ptrs :: [Bool] -> Maybe FastString
400 -- JD: TEMPORARY -- UNTIL THESE FUNCTIONS EXIST...
401 --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p")
402 --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p")
403 gc_lbl_ptrs _ = Nothing
404 -}
405
406
407 --------------------------------------------------------------
408 -- A heap/stack check at in a case alternative
409
410 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
411 altHeapCheck regs code
412 = do updfr_sz <- getUpdFrameOff
413 heapCheck False (gc_call updfr_sz) code
414
415 where
416 reg_exprs = map (CmmReg . CmmLocal) regs
417
418 gc_call sp =
419 case rts_label regs of
420 Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp
421 Nothing -> mkCall generic_gc (GC, GC) [] [] sp
422
423 rts_label [reg]
424 | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
425 | isFloatType ty = case width of
426 W32 -> Just (mkGcLabel "stg_gc_f1")
427 W64 -> Just (mkGcLabel "stg_gc_d1")
428 _ -> Nothing
429
430 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
431 | width == W64 = Just (mkGcLabel "stg_gc_l1")
432 | otherwise = Nothing
433 where
434 ty = localRegType reg
435 width = typeWidth ty
436
437 rts_label _ = Nothing
438
439
440 -- | The generic GC procedure; no params, no results
441 generic_gc :: CmmExpr
442 generic_gc = CmmLit $ mkGcLabel "stg_gc_noregs"
443
444 -- | Create a CLabel for calling a garbage collector entry point
445 mkGcLabel :: String -> CmmLit
446 mkGcLabel = (CmmLabel . (mkCmmCodeLabel rtsPackageId) . fsLit)
447
448 -------------------------------
449 heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a
450 heapCheck checkStack do_gc code
451 = getHeapUsage $ \ hpHw ->
452 -- Emit heap checks, but be sure to do it lazily so
453 -- that the conditionals on hpHw don't cause a black hole
454 do { emit $ do_checks checkStack hpHw do_gc
455 ; tickyAllocHeap hpHw
456 ; doGranAllocate hpHw
457 ; setRealHp hpHw
458 ; code }
459
460 do_checks :: Bool -- Should we check the stack?
461 -> WordOff -- Heap headroom
462 -> CmmAGraph -- What to do on failure
463 -> CmmAGraph
464 do_checks checkStack alloc do_gc
465 = withFreshLabel "gc" $ \ loop_id ->
466 withFreshLabel "gc" $ \ gc_id ->
467 mkLabel loop_id
468 <*> (let hpCheck = if alloc == 0 then mkNop
469 else mkAssign hpReg bump_hp <*>
470 mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
471 in if checkStack
472 then mkCmmIfThenElse sp_oflo (mkBranch gc_id) hpCheck
473 else hpCheck)
474 <*> mkComment (mkFastString "outOfLine should follow:")
475 <*> outOfLine (mkLabel gc_id
476 <*> mkComment (mkFastString "outOfLine here")
477 <*> do_gc
478 <*> mkBranch loop_id)
479 -- Test for stack pointer exhaustion, then
480 -- bump heap pointer, and test for heap exhaustion
481 -- Note that we don't move the heap pointer unless the
482 -- stack check succeeds. Otherwise we might end up
483 -- with slop at the end of the current block, which can
484 -- confuse the LDV profiler.
485 where
486 alloc_lit = CmmLit (mkIntCLit (alloc*wORD_SIZE)) -- Bytes
487 bump_hp = cmmOffsetExprB (CmmReg hpReg) alloc_lit
488
489 -- Sp overflow if (Sp - CmmHighStack < SpLim)
490 sp_oflo = CmmMachOp mo_wordULt
491 [CmmMachOp (MO_Sub (typeWidth (cmmRegType spReg)))
492 [CmmReg spReg, CmmLit CmmHighStackMark],
493 CmmReg spLimReg]
494
495 -- Hp overflow if (Hp > HpLim)
496 -- (Hp has been incremented by now)
497 -- HpLim points to the LAST WORD of valid allocation space.
498 hp_oflo = CmmMachOp mo_wordUGt
499 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
500
501 alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
502
503 {-
504
505 {- Unboxed tuple alternatives and let-no-escapes (the two most annoying
506 constructs to generate code for!) For unboxed tuple returns, there
507 are an arbitrary number of possibly unboxed return values, some of
508 which will be in registers, and the others will be on the stack. We
509 always organise the stack-resident fields into pointers &
510 non-pointers, and pass the number of each to the heap check code. -}
511
512 unbxTupleHeapCheck
513 :: [(Id, GlobalReg)] -- Live registers
514 -> WordOff -- no. of stack slots containing ptrs
515 -> WordOff -- no. of stack slots containing nonptrs
516 -> CmmAGraph -- code to insert in the failure path
517 -> FCode ()
518 -> FCode ()
519
520 unbxTupleHeapCheck regs ptrs nptrs fail_code code
521 -- We can't manage more than 255 pointers/non-pointers
522 -- in a generic heap check.
523 | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
524 | otherwise
525 = initHeapUsage $ \ hpHw -> do
526 { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
527 full_fail_code rts_label
528 ; tickyAllocHeap hpHw }
529 ; setRealHp hpHw
530 ; code }
531 where
532 full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
533 assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho!
534 (CmmLit (mkWordCLit liveness))
535 liveness = mkRegLiveness regs ptrs nptrs
536 rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut")))
537
538
539 {- Old Gransim com -- I have no idea whether it still makes sense (SLPJ Sep07)
540 For GrAnSim the code for doing a heap check and doing a context switch
541 has been separated. Especially, the HEAP_CHK macro only performs a
542 heap check. THREAD_CONTEXT_SWITCH should be used for doing a context
543 switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of
544 every slow entry code in order to simulate the fetching of
545 closures. If fetching is necessary (i.e. current closure is not local)
546 then an automatic context switch is done. -}
547
548
549 When failing a check, we save a return address on the stack and
550 jump to a pre-compiled code fragment that saves the live registers
551 and returns to the scheduler.
552
553 The return address in most cases will be the beginning of the basic
554 block in which the check resides, since we need to perform the check
555 again on re-entry because someone else might have stolen the resource
556 in the meantime.
557
558 %************************************************************************
559 %* *
560 Generic Heap/Stack Checks - used in the RTS
561 %* *
562 %************************************************************************
563
564 \begin{code}
565 hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
566 hpChkGen bytes liveness reentry
567 = do_checks' bytes True assigns stg_gc_gen
568 where
569 assigns = mkStmts [
570 CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
571 CmmAssign (CmmGlobal (VanillaReg 10)) reentry
572 ]
573
574 -- a heap check where R1 points to the closure to enter on return, and
575 -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
576 hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode ()
577 hpChkNodePointsAssignSp0 bytes sp0
578 = do_checks' bytes True assign stg_gc_enter1
579 where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
580
581 stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen")))
582 \end{code}
583
584 -}