Snapshot of codegen refactoring to share with simonpj
[ghc.git] / compiler / codeGen / StgCmmHeap.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: heap management functions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmHeap (
10 getVirtHp, setVirtHp, setRealHp,
11 getHpRelOffset, hpRel,
12
13 entryHeapCheck, altHeapCheck,
14
15 mkVirtHeapOffsets, mkVirtConstrOffsets,
16 mkStaticClosureFields, mkStaticClosure,
17
18 allocDynClosure, allocDynClosureCmm, emitSetDynHdr
19 ) where
20
21 #include "HsVersions.h"
22
23 import CmmType
24 import StgSyn
25 import CLabel
26 import StgCmmLayout
27 import StgCmmUtils
28 import StgCmmMonad
29 import StgCmmProf
30 import StgCmmTicky
31 import StgCmmGran
32 import StgCmmClosure
33 import StgCmmEnv
34
35 import MkGraph
36
37 import SMRep
38 import Cmm
39 import CmmUtils
40 import CostCentre
41 import Outputable
42 import IdInfo( CafInfo(..), mayHaveCafRefs )
43 import Module
44 import FastString( mkFastString, fsLit )
45 import Constants
46
47 -----------------------------------------------------------
48 -- Initialise dynamic heap objects
49 -----------------------------------------------------------
50
51 allocDynClosure
52 :: ClosureInfo
53 -> CmmExpr -- Cost Centre to stick in the object
54 -> CmmExpr -- Cost Centre to blame for this alloc
55 -- (usually the same; sometimes "OVERHEAD")
56
57 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
58 -- ie Info ptr has offset zero.
59 -- No void args in here
60 -> FCode (LocalReg, CmmAGraph)
61
62 allocDynClosureCmm
63 :: ClosureInfo -> CmmExpr -> CmmExpr
64 -> [(CmmExpr, VirtualHpOffset)]
65 -> FCode (LocalReg, CmmAGraph)
66
67 -- allocDynClosure allocates the thing in the heap,
68 -- and modifies the virtual Hp to account for this.
69 -- The second return value is the graph that sets the value of the
70 -- returned LocalReg, which should point to the closure after executing
71 -- the graph.
72
73 -- Note [Return a LocalReg]
74 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
75 -- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr.
76 -- Reason:
77 -- ...allocate object...
78 -- obj = Hp + 8
79 -- y = f(z)
80 -- ...here obj is still valid,
81 -- but Hp+8 means something quite different...
82
83
84 allocDynClosure cl_info use_cc _blame_cc args_w_offsets
85 = do { let (args, offsets) = unzip args_w_offsets
86 ; cmm_args <- mapM getArgAmode args -- No void args
87 ; allocDynClosureCmm cl_info use_cc _blame_cc (zip cmm_args offsets)
88 }
89
90 allocDynClosureCmm cl_info use_cc _blame_cc amodes_w_offsets
91 = do { virt_hp <- getVirtHp
92
93 -- SAY WHAT WE ARE ABOUT TO DO
94 ; tickyDynAlloc cl_info
95 ; profDynAlloc cl_info use_cc
96 -- ToDo: This is almost certainly wrong
97 -- We're ignoring blame_cc. But until we've
98 -- fixed the boxing hack in chooseDynCostCentres etc,
99 -- we're worried about making things worse by "fixing"
100 -- this part to use blame_cc!
101
102 -- FIND THE OFFSET OF THE INFO-PTR WORD
103 ; let info_offset = virt_hp + 1
104 -- info_offset is the VirtualHpOffset of the first
105 -- word of the new object
106 -- Remember, virtHp points to last allocated word,
107 -- ie 1 *before* the info-ptr word of new object.
108
109 info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info))
110
111 -- ALLOCATE THE OBJECT
112 ; base <- getHpRelOffset info_offset
113 ; emit (mkComment $ mkFastString "allocDynClosure")
114 ; emitSetDynHdr base info_ptr use_cc
115 ; let (cmm_args, offsets) = unzip amodes_w_offsets
116 ; hpStore base cmm_args offsets
117
118 -- BUMP THE VIRTUAL HEAP POINTER
119 ; setVirtHp (virt_hp + closureSize cl_info)
120
121 -- Assign to a temporary and return
122 -- Note [Return a LocalReg]
123 ; hp_rel <- getHpRelOffset info_offset
124 ; getCodeR $ assignTemp hp_rel }
125
126 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
127 emitSetDynHdr base info_ptr ccs
128 = hpStore base header [0..]
129 where
130 header :: [CmmExpr]
131 header = [info_ptr] ++ dynProfHdr ccs
132 -- ToDo: Gransim stuff
133 -- ToDo: Parallel stuff
134 -- No ticky header
135
136 hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode ()
137 -- Store the item (expr,off) in base[off]
138 hpStore base vals offs
139 = emit (catAGraphs (zipWith mk_store vals offs))
140 where
141 mk_store val off = mkStore (cmmOffsetW base off) val
142
143
144 -----------------------------------------------------------
145 -- Layout of static closures
146 -----------------------------------------------------------
147
148 -- Make a static closure, adding on any extra padding needed for CAFs,
149 -- and adding a static link field if necessary.
150
151 mkStaticClosureFields
152 :: ClosureInfo
153 -> CostCentreStack
154 -> CafInfo
155 -> [CmmLit] -- Payload
156 -> [CmmLit] -- The full closure
157 mkStaticClosureFields cl_info ccs caf_refs payload
158 = mkStaticClosure info_lbl ccs payload padding
159 static_link_field saved_info_field
160 where
161 info_lbl = infoTableLabelFromCI cl_info
162
163 -- CAFs must have consistent layout, regardless of whether they
164 -- are actually updatable or not. The layout of a CAF is:
165 --
166 -- 3 saved_info
167 -- 2 static_link
168 -- 1 indirectee
169 -- 0 info ptr
170 --
171 -- the static_link and saved_info fields must always be in the same
172 -- place. So we use closureNeedsUpdSpace rather than
173 -- closureUpdReqd here:
174
175 is_caf = closureNeedsUpdSpace cl_info
176
177 padding
178 | not is_caf = []
179 | otherwise = ASSERT(null payload) [mkIntCLit 0]
180
181 static_link_field
182 | is_caf || staticClosureNeedsLink cl_info = [static_link_value]
183 | otherwise = []
184
185 saved_info_field
186 | is_caf = [mkIntCLit 0]
187 | otherwise = []
188
189 -- For a static constructor which has NoCafRefs, we set the
190 -- static link field to a non-zero value so the garbage
191 -- collector will ignore it.
192 static_link_value
193 | mayHaveCafRefs caf_refs = mkIntCLit 0
194 | otherwise = mkIntCLit 1 -- No CAF refs
195
196
197 mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
198 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
199 mkStaticClosure info_lbl ccs payload padding static_link_field saved_info_field
200 = [CmmLabel info_lbl]
201 ++ variable_header_words
202 ++ concatMap padLitToWord payload
203 ++ padding
204 ++ static_link_field
205 ++ saved_info_field
206 where
207 variable_header_words
208 = staticGranHdr
209 ++ staticParHdr
210 ++ staticProfHdr ccs
211 ++ staticTickyHdr
212
213 -- JD: Simon had ellided this padding, but without it the C back end asserts
214 -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
215 padLitToWord :: CmmLit -> [CmmLit]
216 padLitToWord lit = lit : padding pad_length
217 where width = typeWidth (cmmLitType lit)
218 pad_length = wORD_SIZE - widthInBytes width :: Int
219
220 padding n | n <= 0 = []
221 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
222 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
223 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
224 | otherwise = CmmInt 0 W64 : padding (n-8)
225
226 -----------------------------------------------------------
227 -- Heap overflow checking
228 -----------------------------------------------------------
229
230 {- Note [Heap checks]
231 ~~~~~~~~~~~~~~~~~~
232 Heap checks come in various forms. We provide the following entry
233 points to the runtime system, all of which use the native C-- entry
234 convention.
235
236 * gc() performs garbage collection and returns
237 nothing to its caller
238
239 * A series of canned entry points like
240 r = gc_1p( r )
241 where r is a pointer. This performs gc, and
242 then returns its argument r to its caller.
243
244 * A series of canned entry points like
245 gcfun_2p( f, x, y )
246 where f is a function closure of arity 2
247 This performs garbage collection, keeping alive the
248 three argument ptrs, and then tail-calls f(x,y)
249
250 These are used in the following circumstances
251
252 * entryHeapCheck: Function entry
253 (a) With a canned GC entry sequence
254 f( f_clo, x:ptr, y:ptr ) {
255 Hp = Hp+8
256 if Hp > HpLim goto L
257 ...
258 L: HpAlloc = 8
259 jump gcfun_2p( f_clo, x, y ) }
260 Note the tail call to the garbage collector;
261 it should do no register shuffling
262
263 (b) No canned sequence
264 f( f_clo, x:ptr, y:ptr, ...etc... ) {
265 T: Hp = Hp+8
266 if Hp > HpLim goto L
267 ...
268 L: HpAlloc = 8
269 call gc() -- Needs an info table
270 goto T }
271
272 * altHeapCheck: Immediately following an eval
273 Started as
274 case f x y of r { (p,q) -> rhs }
275 (a) With a canned sequence for the results of f
276 (which is the very common case since
277 all boxed cases return just one pointer
278 ...
279 r = f( x, y )
280 K: -- K needs an info table
281 Hp = Hp+8
282 if Hp > HpLim goto L
283 ...code for rhs...
284
285 L: r = gc_1p( r )
286 goto K }
287
288 Here, the info table needed by the call
289 to gc_1p should be the *same* as the
290 one for the call to f; the C-- optimiser
291 spots this sharing opportunity)
292
293 (b) No canned sequence for results of f
294 Note second info table
295 ...
296 (r1,r2,r3) = call f( x, y )
297 K:
298 Hp = Hp+8
299 if Hp > HpLim goto L
300 ...code for rhs...
301
302 L: call gc() -- Extra info table here
303 goto K
304
305 * generalHeapCheck: Anywhere else
306 e.g. entry to thunk
307 case branch *not* following eval,
308 or let-no-escape
309 Exactly the same as the previous case:
310
311 K: -- K needs an info table
312 Hp = Hp+8
313 if Hp > HpLim goto L
314 ...
315
316 L: call gc()
317 goto K
318 -}
319
320 --------------------------------------------------------------
321 -- A heap/stack check at a function or thunk entry point.
322
323 entryHeapCheck :: ClosureInfo
324 -> Int -- Arg Offset
325 -> Maybe LocalReg -- Function (closure environment)
326 -> Int -- Arity -- not same as len args b/c of voids
327 -> [LocalReg] -- Non-void args (empty for thunk)
328 -> FCode ()
329 -> FCode ()
330
331 entryHeapCheck cl_info offset nodeSet arity args code
332 = do updfr_sz <- getUpdFrameOff
333 heapCheck True (gc_call updfr_sz) code
334
335 where
336 is_thunk = arity == 0
337 is_fastf = case closureFunInfo cl_info of
338 Just (_, ArgGen _) -> False
339 _otherwise -> True
340
341 args' = map (CmmReg . CmmLocal) args
342 setN = case nodeSet of
343 Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n)
344 Nothing -> mkAssign nodeReg $
345 CmmLit (CmmLabel $ closureLabelFromCI cl_info)
346
347 {- Thunks: Set R1 = node, jump GCEnter1
348 Function (fast): Set R1 = node, jump GCFun
349 Function (slow): Set R1 = node, call generic_gc -}
350 gc_call upd = setN <*> gc_lbl upd
351 gc_lbl upd
352 | is_thunk = mkDirectJump (CmmReg $ CmmGlobal GCEnter1) [] sp
353 | is_fastf = mkDirectJump (CmmReg $ CmmGlobal GCFun) [] sp
354 | otherwise = mkForeignJump Slow (CmmReg $ CmmGlobal GCFun) args' upd
355 where sp = max offset upd
356 {- DT (12/08/10) This is a little fishy, mainly the sp fix up amount.
357 - This is since the ncg inserts spills before the stack/heap check.
358 - This should be fixed up and then we won't need to fix up the Sp on
359 - GC calls, but until then this fishy code works -}
360
361 {-
362 -- This code is slightly outdated now and we could easily keep the above
363 -- GC methods. However, there may be some performance gains to be made by
364 -- using more specialised GC entry points. Since the semi generic GCFun
365 -- entry needs to check the node and figure out what registers to save...
366 -- if we provided and used more specialised GC entry points then these
367 -- runtime decisions could be turned into compile time decisions.
368
369 args' = case fun of Just f -> f : args
370 Nothing -> args
371 arg_exprs = map (CmmReg . CmmLocal) args'
372 gc_call updfr_sz
373 | arity == 0 = mkJumpGC (CmmReg (CmmGlobal GCEnter1)) arg_exprs updfr_sz
374 | otherwise =
375 case gc_lbl args' of
376 Just _lbl -> panic "StgCmmHeap.entryHeapCheck: not finished"
377 -- mkJumpGC (CmmLit (CmmLabel (mkRtsCodeLabel lbl)))
378 -- arg_exprs updfr_sz
379 Nothing -> mkCall generic_gc (GC, GC) [] [] updfr_sz
380
381 gc_lbl :: [LocalReg] -> Maybe FastString
382 gc_lbl [reg]
383 | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p"
384 | isFloatType ty = case width of
385 W32 -> Just (sLit "stg_gc_f1")
386 W64 -> Just (sLit "stg_gc_d1")
387 _other -> Nothing
388 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
389 | width == W64 = Just (mkGcLabel "stg_gc_l1")
390 | otherwise = Nothing
391 where
392 ty = localRegType reg
393 width = typeWidth ty
394
395 gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs)
396
397 gc_lbl_ptrs :: [Bool] -> Maybe FastString
398 -- JD: TEMPORARY -- UNTIL THESE FUNCTIONS EXIST...
399 --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p")
400 --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p")
401 gc_lbl_ptrs _ = Nothing
402 -}
403
404
405 --------------------------------------------------------------
406 -- A heap/stack check at in a case alternative
407
408 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
409 altHeapCheck regs code
410 = do updfr_sz <- getUpdFrameOff
411 heapCheck False (gc_call updfr_sz) code
412
413 where
414 reg_exprs = map (CmmReg . CmmLocal) regs
415
416 gc_call sp =
417 case rts_label regs of
418 Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp
419 Nothing -> mkCall generic_gc (GC, GC) [] [] sp
420
421 rts_label [reg]
422 | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
423 | isFloatType ty = case width of
424 W32 -> Just (mkGcLabel "stg_gc_f1")
425 W64 -> Just (mkGcLabel "stg_gc_d1")
426 _ -> Nothing
427
428 | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
429 | width == W64 = Just (mkGcLabel "stg_gc_l1")
430 | otherwise = Nothing
431 where
432 ty = localRegType reg
433 width = typeWidth ty
434
435 rts_label _ = Nothing
436
437
438 -- | The generic GC procedure; no params, no results
439 generic_gc :: CmmExpr
440 generic_gc = CmmLit $ mkGcLabel "stg_gc_noregs"
441
442 -- | Create a CLabel for calling a garbage collector entry point
443 mkGcLabel :: String -> CmmLit
444 mkGcLabel = (CmmLabel . (mkCmmCodeLabel rtsPackageId) . fsLit)
445
446 -------------------------------
447 heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a
448 heapCheck checkStack do_gc code
449 = getHeapUsage $ \ hpHw ->
450 -- Emit heap checks, but be sure to do it lazily so
451 -- that the conditionals on hpHw don't cause a black hole
452 do { emit $ do_checks checkStack hpHw do_gc
453 ; tickyAllocHeap hpHw
454 ; doGranAllocate hpHw
455 ; setRealHp hpHw
456 ; code }
457
458 do_checks :: Bool -- Should we check the stack?
459 -> WordOff -- Heap headroom
460 -> CmmAGraph -- What to do on failure
461 -> CmmAGraph
462 do_checks checkStack alloc do_gc
463 = withFreshLabel "gc" $ \ loop_id ->
464 withFreshLabel "gc" $ \ gc_id ->
465 mkLabel loop_id
466 <*> (let hpCheck = if alloc == 0 then mkNop
467 else mkAssign hpReg bump_hp <*>
468 mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
469 in if checkStack
470 then mkCmmIfThenElse sp_oflo (mkBranch gc_id) hpCheck
471 else hpCheck)
472 <*> mkComment (mkFastString "outOfLine should follow:")
473 <*> outOfLine (mkLabel gc_id
474 <*> mkComment (mkFastString "outOfLine here")
475 <*> do_gc
476 <*> mkBranch loop_id)
477 -- Test for stack pointer exhaustion, then
478 -- bump heap pointer, and test for heap exhaustion
479 -- Note that we don't move the heap pointer unless the
480 -- stack check succeeds. Otherwise we might end up
481 -- with slop at the end of the current block, which can
482 -- confuse the LDV profiler.
483 where
484 alloc_lit = CmmLit (mkIntCLit (alloc*wORD_SIZE)) -- Bytes
485 bump_hp = cmmOffsetExprB (CmmReg hpReg) alloc_lit
486
487 -- Sp overflow if (Sp - CmmHighStack < SpLim)
488 sp_oflo = CmmMachOp mo_wordULt
489 [CmmMachOp (MO_Sub (typeWidth (cmmRegType spReg)))
490 [CmmReg spReg, CmmLit CmmHighStackMark],
491 CmmReg spLimReg]
492
493 -- Hp overflow if (Hp > HpLim)
494 -- (Hp has been incremented by now)
495 -- HpLim points to the LAST WORD of valid allocation space.
496 hp_oflo = CmmMachOp mo_wordUGt
497 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
498
499 alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
500
501 {-
502
503 {- Unboxed tuple alternatives and let-no-escapes (the two most annoying
504 constructs to generate code for!) For unboxed tuple returns, there
505 are an arbitrary number of possibly unboxed return values, some of
506 which will be in registers, and the others will be on the stack. We
507 always organise the stack-resident fields into pointers &
508 non-pointers, and pass the number of each to the heap check code. -}
509
510 unbxTupleHeapCheck
511 :: [(Id, GlobalReg)] -- Live registers
512 -> WordOff -- no. of stack slots containing ptrs
513 -> WordOff -- no. of stack slots containing nonptrs
514 -> CmmAGraph -- code to insert in the failure path
515 -> FCode ()
516 -> FCode ()
517
518 unbxTupleHeapCheck regs ptrs nptrs fail_code code
519 -- We can't manage more than 255 pointers/non-pointers
520 -- in a generic heap check.
521 | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
522 | otherwise
523 = initHeapUsage $ \ hpHw -> do
524 { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
525 full_fail_code rts_label
526 ; tickyAllocHeap hpHw }
527 ; setRealHp hpHw
528 ; code }
529 where
530 full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
531 assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho!
532 (CmmLit (mkWordCLit liveness))
533 liveness = mkRegLiveness regs ptrs nptrs
534 rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut")))
535
536
537 {- Old Gransim com -- I have no idea whether it still makes sense (SLPJ Sep07)
538 For GrAnSim the code for doing a heap check and doing a context switch
539 has been separated. Especially, the HEAP_CHK macro only performs a
540 heap check. THREAD_CONTEXT_SWITCH should be used for doing a context
541 switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of
542 every slow entry code in order to simulate the fetching of
543 closures. If fetching is necessary (i.e. current closure is not local)
544 then an automatic context switch is done. -}
545
546
547 When failing a check, we save a return address on the stack and
548 jump to a pre-compiled code fragment that saves the live registers
549 and returns to the scheduler.
550
551 The return address in most cases will be the beginning of the basic
552 block in which the check resides, since we need to perform the check
553 again on re-entry because someone else might have stolen the resource
554 in the meantime.
555
556 %************************************************************************
557 %* *
558 Generic Heap/Stack Checks - used in the RTS
559 %* *
560 %************************************************************************
561
562 \begin{code}
563 hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
564 hpChkGen bytes liveness reentry
565 = do_checks' bytes True assigns stg_gc_gen
566 where
567 assigns = mkStmts [
568 CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
569 CmmAssign (CmmGlobal (VanillaReg 10)) reentry
570 ]
571
572 -- a heap check where R1 points to the closure to enter on return, and
573 -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
574 hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode ()
575 hpChkNodePointsAssignSp0 bytes sp0
576 = do_checks' bytes True assign stg_gc_enter1
577 where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
578
579 stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen")))
580 \end{code}
581
582 -}