0015da1cacc5c803a6f36c055a2c7eb0f31cb4ee
[ghc.git] / compiler / codeGen / StgCmmHeap.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: heap management functions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmHeap (
10 getVirtHp, setVirtHp, setRealHp,
11 getHpRelOffset, hpRel,
12
13 entryHeapCheck, altHeapCheck,
14
15 layOutDynConstr, layOutStaticConstr,
16 mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure,
17
18 allocDynClosure, allocDynClosureCmm, emitSetDynHdr
19 ) where
20
21 #include "HsVersions.h"
22
23 import CmmType
24 import StgSyn
25 import CLabel
26 import StgCmmLayout
27 import StgCmmUtils
28 import StgCmmMonad
29 import StgCmmProf
30 import StgCmmTicky
31 import StgCmmGran
32 import StgCmmClosure
33 import StgCmmEnv
34
35 import MkGraph
36
37 import SMRep
38 import CmmExpr
39 import CmmUtils
40 import DataCon
41 import TyCon
42 import CostCentre
43 import Outputable
44 import Module
45 import FastString( mkFastString, fsLit )
46 import Constants
47
48 -----------------------------------------------------------
49 -- Layout of heap objects
50 -----------------------------------------------------------
51
52 layOutDynConstr, layOutStaticConstr
53 :: DataCon -> [(PrimRep, a)]
54 -> (ClosureInfo, [(NonVoid a, VirtualHpOffset)])
55 -- No Void arguments in result
56
57 layOutDynConstr = layOutConstr False
58 layOutStaticConstr = layOutConstr True
59
60 layOutConstr :: Bool -> DataCon -> [(PrimRep, a)]
61 -> (ClosureInfo, [(NonVoid a, VirtualHpOffset)])
62 layOutConstr is_static data_con args
63 = (mkConInfo is_static data_con tot_wds ptr_wds,
64 things_w_offsets)
65 where
66 (tot_wds, -- #ptr_wds + #nonptr_wds
67 ptr_wds, -- #ptr_wds
68 things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args
69
70
71 -----------------------------------------------------------
72 -- Initialise dynamic heap objects
73 -----------------------------------------------------------
74
75 allocDynClosure
76 :: ClosureInfo
77 -> CmmExpr -- Cost Centre to stick in the object
78 -> CmmExpr -- Cost Centre to blame for this alloc
79 -- (usually the same; sometimes "OVERHEAD")
80
81 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
82 -- ie Info ptr has offset zero.
83 -- No void args in here
84 -> FCode (LocalReg, CmmAGraph)
85
86 allocDynClosureCmm
87 :: ClosureInfo -> CmmExpr -> CmmExpr
88 -> [(CmmExpr, VirtualHpOffset)]
89 -> FCode (LocalReg, CmmAGraph)
90
91 -- allocDynClosure allocates the thing in the heap,
92 -- and modifies the virtual Hp to account for this.
93 -- The second return value is the graph that sets the value of the
94 -- returned LocalReg, which should point to the closure after executing
95 -- the graph.
96
97 -- Note [Return a LocalReg]
98 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
99 -- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr.
100 -- Reason:
101 -- ...allocate object...
102 -- obj = Hp + 8
103 -- y = f(z)
104 -- ...here obj is still valid,
105 -- but Hp+8 means something quite different...
106
107
108 allocDynClosure cl_info use_cc _blame_cc args_w_offsets
109 = do { let (args, offsets) = unzip args_w_offsets
110 ; cmm_args <- mapM getArgAmode args -- No void args
111 ; allocDynClosureCmm cl_info use_cc _blame_cc (zip cmm_args offsets)
112 }
113
114 allocDynClosureCmm cl_info use_cc _blame_cc amodes_w_offsets
115 = do { virt_hp <- getVirtHp
116
117 -- SAY WHAT WE ARE ABOUT TO DO
118 ; tickyDynAlloc cl_info
119 ; profDynAlloc cl_info use_cc
120 -- ToDo: This is almost certainly wrong
121 -- We're ignoring blame_cc. But until we've
122 -- fixed the boxing hack in chooseDynCostCentres etc,
123 -- we're worried about making things worse by "fixing"
124 -- this part to use blame_cc!
125
126 -- FIND THE OFFSET OF THE INFO-PTR WORD
127 ; let info_offset = virt_hp + 1
128 -- info_offset is the VirtualHpOffset of the first
129 -- word of the new object
130 -- Remember, virtHp points to last allocated word,
131 -- ie 1 *before* the info-ptr word of new object.
132
133 info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info))
134
135 -- ALLOCATE THE OBJECT
136 ; base <- getHpRelOffset info_offset
137 ; emit (mkComment $ mkFastString "allocDynClosure")
138 ; emitSetDynHdr base info_ptr use_cc
139 ; let (cmm_args, offsets) = unzip amodes_w_offsets
140 ; hpStore base cmm_args offsets
141
142 -- BUMP THE VIRTUAL HEAP POINTER
143 ; setVirtHp (virt_hp + closureSize cl_info)
144
145 -- Assign to a temporary and return
146 -- Note [Return a LocalReg]
147 ; hp_rel <- getHpRelOffset info_offset
148 ; getCodeR $ assignTemp hp_rel }
149
150 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
151 emitSetDynHdr base info_ptr ccs
152 = hpStore base header [0..]
153 where
154 header :: [CmmExpr]
155 header = [info_ptr] ++ dynProfHdr ccs
156 -- ToDo: Gransim stuff
157 -- ToDo: Parallel stuff
158 -- No ticky header
159
160 hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode ()
161 -- Store the item (expr,off) in base[off]
162 hpStore base vals offs
163 = emit (catAGraphs (zipWith mk_store vals offs))
164 where
165 mk_store val off = mkStore (cmmOffsetW base off) val
166
167
168 -----------------------------------------------------------
169 -- Layout of static closures
170 -----------------------------------------------------------
171
172 -- Make a static closure, adding on any extra padding needed for CAFs,
173 -- and adding a static link field if necessary.
174
175 mkStaticClosureFields
176 :: ClosureInfo
177 -> CostCentreStack
178 -> Bool -- Has CAF refs
179 -> [CmmLit] -- Payload
180 -> [CmmLit] -- The full closure
181 mkStaticClosureFields cl_info ccs caf_refs payload
182 = mkStaticClosure info_lbl ccs payload padding
183 static_link_field saved_info_field
184 where
185 info_lbl = infoTableLabelFromCI cl_info
186
187 -- CAFs must have consistent layout, regardless of whether they
188 -- are actually updatable or not. The layout of a CAF is:
189 --
190 -- 3 saved_info
191 -- 2 static_link
192 -- 1 indirectee
193 -- 0 info ptr
194 --
195 -- the static_link and saved_info fields must always be in the same
196 -- place. So we use closureNeedsUpdSpace rather than
197 -- closureUpdReqd here:
198
199 is_caf = closureNeedsUpdSpace cl_info
200
201 padding
202 | not is_caf = []
203 | otherwise = ASSERT(null payload) [mkIntCLit 0]
204
205 static_link_field
206 | is_caf || staticClosureNeedsLink cl_info = [static_link_value]
207 | otherwise = []
208
209 saved_info_field
210 | is_caf = [mkIntCLit 0]
211 | otherwise = []
212
213 -- for a static constructor which has NoCafRefs, we set the
214 -- static link field to a non-zero value so the garbage
215 -- collector will ignore it.
216 static_link_value
217 | caf_refs = mkIntCLit 0
218 | otherwise = mkIntCLit 1
219
220
221 mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
222 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
223 mkStaticClosure info_lbl ccs payload padding static_link_field saved_info_field
224 = [CmmLabel info_lbl]
225 ++ variable_header_words
226 ++ concatMap padLitToWord payload
227 ++ padding
228 ++ static_link_field
229 ++ saved_info_field
230 where
231 variable_header_words
232 = staticGranHdr
233 ++ staticParHdr
234 ++ staticProfHdr ccs
235 ++ staticTickyHdr
236
237 -- JD: Simon had ellided this padding, but without it the C back end asserts
238 -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
239 padLitToWord :: CmmLit -> [CmmLit]
240 padLitToWord lit = lit : padding pad_length
241 where width = typeWidth (cmmLitType lit)
242 pad_length = wORD_SIZE - widthInBytes width :: Int
243
244 padding n | n <= 0 = []
245 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
246 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
247 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
248 | otherwise = CmmInt 0 W64 : padding (n-8)
249
250 -----------------------------------------------------------
251 -- Heap overflow checking
252 -----------------------------------------------------------
253
254 {- Note [Heap checks]
255 ~~~~~~~~~~~~~~~~~~
256 Heap checks come in various forms. We provide the following entry
257 points to the runtime system, all of which use the native C-- entry
258 convention.
259
260 * gc() performs garbage collection and returns
261 nothing to its caller
262
263 * A series of canned entry points like
264 r = gc_1p( r )
265 where r is a pointer. This performs gc, and
266 then returns its argument r to its caller.
267
268 * A series of canned entry points like
269 gcfun_2p( f, x, y )
270 where f is a function closure of arity 2
271 This performs garbage collection, keeping alive the
272 three argument ptrs, and then tail-calls f(x,y)
273
274 These are used in the following circumstances
275
276 * entryHeapCheck: Function entry
277 (a) With a canned GC entry sequence
278 f( f_clo, x:ptr, y:ptr ) {
279 Hp = Hp+8
280 if Hp > HpLim goto L
281 ...
282 L: HpAlloc = 8
283 jump gcfun_2p( f_clo, x, y ) }
284 Note the tail call to the garbage collector;
285 it should do no register shuffling
286
287 (b) No canned sequence
288 f( f_clo, x:ptr, y:ptr, ...etc... ) {
289 T: Hp = Hp+8
290 if Hp > HpLim goto L
291 ...
292 L: HpAlloc = 8
293 call gc() -- Needs an info table
294 goto T }
295
296 * altHeapCheck: Immediately following an eval
297 Started as
298 case f x y of r { (p,q) -> rhs }
299 (a) With a canned sequence for the results of f
300 (which is the very common case since
301 all boxed cases return just one pointer
302 ...
303 r = f( x, y )
304 K: -- K needs an info table
305 Hp = Hp+8
306 if Hp > HpLim goto L
307 ...code for rhs...
308
309 L: r = gc_1p( r )
310 goto K }
311
312 Here, the info table needed by the call
313 to gc_1p should be the *same* as the
314 one for the call to f; the C-- optimiser
315 spots this sharing opportunity)
316
317 (b) No canned sequence for results of f
318 Note second info table
319 ...
320 (r1,r2,r3) = call f( x, y )
321 K:
322 Hp = Hp+8
323 if Hp > HpLim goto L
324 ...code for rhs...
325
326 L: call gc() -- Extra info table here
327 goto K
328
329 * generalHeapCheck: Anywhere else
330 e.g. entry to thunk
331 case branch *not* following eval,
332 or let-no-escape
333 Exactly the same as the previous case:
334
335 K: -- K needs an info table
336 Hp = Hp+8
337 if Hp > HpLim goto L
338 ...
339
340 L: call gc()
341 goto K
342 -}
343
344 --------------------------------------------------------------
345 -- A heap/stack check at a function or thunk entry point.
346
347 entryHeapCheck :: ClosureInfo
348 -> Int -- Arg Offset
349 -> Maybe LocalReg -- Function (closure environment)
350 -> Int -- Arity -- not same as len args b/c of voids
351 -> [LocalReg] -- Non-void args (empty for thunk)
352 -> FCode ()
353 -> FCode ()
354
355 entryHeapCheck cl_info offset nodeSet arity args code
356 = do updfr_sz <- getUpdFrameOff
357 heapCheck True (gc_call updfr_sz) code
358
359 where
360 is_thunk = arity == 0
361 is_fastf = case closureFunInfo cl_info of
362 Just (_, ArgGen _) -> False
363 _otherwise -> True
364
365 args' = map (CmmReg . CmmLocal) args
366 setN = case nodeSet of
367 Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n)
368 Nothing -> mkAssign nodeReg $
369 CmmLit (CmmLabel $ closureLabelFromCI cl_info)
370
371 {- Thunks: Set R1 = node, jump GCEnter1
372 Function (fast): Set R1 = node, jump GCFun
373 Function (slow): Set R1 = node, call generic_gc -}
374 gc_call upd = setN <*> gc_lbl upd
375 gc_lbl upd
376 | is_thunk = mkDirectJump (CmmReg $ CmmGlobal GCEnter1) [] sp
377 | is_fastf = mkDirectJump (CmmReg $ CmmGlobal GCFun) [] sp
378 | otherwise = mkForeignJump Slow (CmmReg $ CmmGlobal GCFun) args' upd
379 where sp = max offset upd
380 {- DT (12/08/10) This is a little fishy, mainly the sp fix up amount.
381 - This is since the ncg inserts spills before the stack/heap check.
382 - This should be fixed up and then we won't need to fix up the Sp on
383 - GC calls, but until then this fishy code works -}
384
385 {-
386 -- This code is slightly outdated now and we could easily keep the above
387 -- GC methods. However, there may be some performance gains to be made by
388 -- using more specialised GC entry points. Since the semi generic GCFun
389 -- entry needs to check the node and figure out what registers to save...
390 -- if we provided and used more specialised GC entry points then these
391 -- runtime decisions could be turned into compile time decisions.
392
393 args' = case fun of Just f -> f : args
394 Nothing -> args
395 arg_exprs = map (CmmReg . CmmLocal) args'
396 gc_call updfr_sz
397 | arity == 0 = mkJumpGC (CmmReg (CmmGlobal GCEnter1)) arg_exprs updfr_sz
398 | otherwise =
399 case gc_lbl args' of
400 Just _lbl -> panic "StgCmmHeap.entryHeapCheck: not finished"
401 -- mkJumpGC (CmmLit (CmmLabel (mkRtsCodeLabel lbl)))
402 -- arg_exprs updfr_sz
403 Nothing -> mkCall generic_gc (GC, GC) [] [] updfr_sz
404
405 gc_lbl :: [LocalReg] -> Maybe FastString
406 gc_lbl [reg]
407 | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p"
408 | isFloatType ty = case width of
409 W32 -> Just (sLit "stg_gc_f1")
410 W64 -> Just (sLit "stg_gc_d1")
411 _other -> Nothing
412 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
413 | width == W64 = Just (mkGcLabel "stg_gc_l1")
414 | otherwise = Nothing
415 where
416 ty = localRegType reg
417 width = typeWidth ty
418
419 gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs)
420
421 gc_lbl_ptrs :: [Bool] -> Maybe FastString
422 -- JD: TEMPORARY -- UNTIL THESE FUNCTIONS EXIST...
423 --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p")
424 --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p")
425 gc_lbl_ptrs _ = Nothing
426 -}
427
428
429 --------------------------------------------------------------
430 -- A heap/stack check at in a case alternative
431
432 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
433 altHeapCheck regs code
434 = do updfr_sz <- getUpdFrameOff
435 heapCheck False (gc_call updfr_sz) code
436
437 where
438 reg_exprs = map (CmmReg . CmmLocal) regs
439
440 gc_call sp =
441 case rts_label regs of
442 Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp
443 Nothing -> mkCall generic_gc (GC, GC) [] [] sp
444
445 rts_label [reg]
446 | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
447 | isFloatType ty = case width of
448 W32 -> Just (mkGcLabel "stg_gc_f1")
449 W64 -> Just (mkGcLabel "stg_gc_d1")
450 _ -> Nothing
451
452 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
453 | width == W64 = Just (mkGcLabel "stg_gc_l1")
454 | otherwise = Nothing
455 where
456 ty = localRegType reg
457 width = typeWidth ty
458
459 rts_label _ = Nothing
460
461
462 -- | The generic GC procedure; no params, no results
463 generic_gc :: CmmExpr
464 generic_gc = CmmLit $ mkGcLabel "stg_gc_noregs"
465
466 -- | Create a CLabel for calling a garbage collector entry point
467 mkGcLabel :: String -> CmmLit
468 mkGcLabel = (CmmLabel . (mkCmmCodeLabel rtsPackageId) . fsLit)
469
470 -------------------------------
471 heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a
472 heapCheck checkStack do_gc code
473 = getHeapUsage $ \ hpHw ->
474 -- Emit heap checks, but be sure to do it lazily so
475 -- that the conditionals on hpHw don't cause a black hole
476 do { emit $ do_checks checkStack hpHw do_gc
477 ; tickyAllocHeap hpHw
478 ; doGranAllocate hpHw
479 ; setRealHp hpHw
480 ; code }
481
482 do_checks :: Bool -- Should we check the stack?
483 -> WordOff -- Heap headroom
484 -> CmmAGraph -- What to do on failure
485 -> CmmAGraph
486 do_checks checkStack alloc do_gc
487 = withFreshLabel "gc" $ \ loop_id ->
488 withFreshLabel "gc" $ \ gc_id ->
489 mkLabel loop_id
490 <*> (let hpCheck = if alloc == 0 then mkNop
491 else mkAssign hpReg bump_hp <*>
492 mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
493 in if checkStack
494 then mkCmmIfThenElse sp_oflo (mkBranch gc_id) hpCheck
495 else hpCheck)
496 <*> mkComment (mkFastString "outOfLine should follow:")
497 <*> outOfLine (mkLabel gc_id
498 <*> mkComment (mkFastString "outOfLine here")
499 <*> do_gc
500 <*> mkBranch loop_id)
501 -- Test for stack pointer exhaustion, then
502 -- bump heap pointer, and test for heap exhaustion
503 -- Note that we don't move the heap pointer unless the
504 -- stack check succeeds. Otherwise we might end up
505 -- with slop at the end of the current block, which can
506 -- confuse the LDV profiler.
507 where
508 alloc_lit = CmmLit (mkIntCLit (alloc*wORD_SIZE)) -- Bytes
509 bump_hp = cmmOffsetExprB (CmmReg hpReg) alloc_lit
510
511 -- Sp overflow if (Sp - CmmHighStack < SpLim)
512 sp_oflo = CmmMachOp mo_wordULt
513 [CmmMachOp (MO_Sub (typeWidth (cmmRegType spReg)))
514 [CmmReg spReg, CmmLit CmmHighStackMark],
515 CmmReg spLimReg]
516
517 -- Hp overflow if (Hp > HpLim)
518 -- (Hp has been incremented by now)
519 -- HpLim points to the LAST WORD of valid allocation space.
520 hp_oflo = CmmMachOp mo_wordUGt
521 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
522
523 alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
524
525 {-
526
527 {- Unboxed tuple alternatives and let-no-escapes (the two most annoying
528 constructs to generate code for!) For unboxed tuple returns, there
529 are an arbitrary number of possibly unboxed return values, some of
530 which will be in registers, and the others will be on the stack. We
531 always organise the stack-resident fields into pointers &
532 non-pointers, and pass the number of each to the heap check code. -}
533
534 unbxTupleHeapCheck
535 :: [(Id, GlobalReg)] -- Live registers
536 -> WordOff -- no. of stack slots containing ptrs
537 -> WordOff -- no. of stack slots containing nonptrs
538 -> CmmAGraph -- code to insert in the failure path
539 -> FCode ()
540 -> FCode ()
541
542 unbxTupleHeapCheck regs ptrs nptrs fail_code code
543 -- We can't manage more than 255 pointers/non-pointers
544 -- in a generic heap check.
545 | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
546 | otherwise
547 = initHeapUsage $ \ hpHw -> do
548 { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
549 full_fail_code rts_label
550 ; tickyAllocHeap hpHw }
551 ; setRealHp hpHw
552 ; code }
553 where
554 full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
555 assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho!
556 (CmmLit (mkWordCLit liveness))
557 liveness = mkRegLiveness regs ptrs nptrs
558 rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut")))
559
560
561 {- Old Gransim com -- I have no idea whether it still makes sense (SLPJ Sep07)
562 For GrAnSim the code for doing a heap check and doing a context switch
563 has been separated. Especially, the HEAP_CHK macro only performs a
564 heap check. THREAD_CONTEXT_SWITCH should be used for doing a context
565 switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of
566 every slow entry code in order to simulate the fetching of
567 closures. If fetching is necessary (i.e. current closure is not local)
568 then an automatic context switch is done. -}
569
570
571 When failing a check, we save a return address on the stack and
572 jump to a pre-compiled code fragment that saves the live registers
573 and returns to the scheduler.
574
575 The return address in most cases will be the beginning of the basic
576 block in which the check resides, since we need to perform the check
577 again on re-entry because someone else might have stolen the resource
578 in the meantime.
579
580 %************************************************************************
581 %* *
582 Generic Heap/Stack Checks - used in the RTS
583 %* *
584 %************************************************************************
585
586 \begin{code}
587 hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
588 hpChkGen bytes liveness reentry
589 = do_checks' bytes True assigns stg_gc_gen
590 where
591 assigns = mkStmts [
592 CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
593 CmmAssign (CmmGlobal (VanillaReg 10)) reentry
594 ]
595
596 -- a heap check where R1 points to the closure to enter on return, and
597 -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
598 hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode ()
599 hpChkNodePointsAssignSp0 bytes sp0
600 = do_checks' bytes True assign stg_gc_enter1
601 where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
602
603 stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen")))
604 \end{code}
605
606 -}