Rename package key to unit ID, and installed package ID to component ID.
[ghc.git] / compiler / codeGen / StgCmmHeap.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Stg to C--: heap management functions
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmHeap (
12 getVirtHp, setVirtHp, setRealHp,
13 getHpRelOffset,
14
15 entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
16 heapStackCheckGen,
17 entryHeapCheck',
18
19 mkStaticClosureFields, mkStaticClosure,
20
21 allocDynClosure, allocDynClosureCmm, allocHeapClosure,
22 emitSetDynHdr
23 ) where
24
25 #include "HsVersions.h"
26
27 import StgSyn
28 import CLabel
29 import StgCmmLayout
30 import StgCmmUtils
31 import StgCmmMonad
32 import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
33 import StgCmmTicky
34 import StgCmmClosure
35 import StgCmmEnv
36
37 import MkGraph
38
39 import Hoopl
40 import SMRep
41 import Cmm
42 import CmmUtils
43 import CostCentre
44 import IdInfo( CafInfo(..), mayHaveCafRefs )
45 import Id ( Id )
46 import Module
47 import DynFlags
48 import FastString( mkFastString, fsLit )
49 import Panic( sorry )
50
51 #if __GLASGOW_HASKELL__ >= 709
52 import Prelude hiding ((<*>))
53 #endif
54
55 import Control.Monad (when)
56 import Data.Maybe (isJust)
57
58 -----------------------------------------------------------
59 -- Initialise dynamic heap objects
60 -----------------------------------------------------------
61
62 allocDynClosure
63 :: Maybe Id
64 -> CmmInfoTable
65 -> LambdaFormInfo
66 -> CmmExpr -- Cost Centre to stick in the object
67 -> CmmExpr -- Cost Centre to blame for this alloc
68 -- (usually the same; sometimes "OVERHEAD")
69
70 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
71 -- ie Info ptr has offset zero.
72 -- No void args in here
73 -> FCode CmmExpr -- returns Hp+n
74
75 allocDynClosureCmm
76 :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
77 -> [(CmmExpr, ByteOff)]
78 -> FCode CmmExpr -- returns Hp+n
79
80 -- allocDynClosure allocates the thing in the heap,
81 -- and modifies the virtual Hp to account for this.
82 -- The second return value is the graph that sets the value of the
83 -- returned LocalReg, which should point to the closure after executing
84 -- the graph.
85
86 -- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
87 -- only valid until Hp is changed. The caller should assign the
88 -- result to a LocalReg if it is required to remain live.
89 --
90 -- The reason we don't assign it to a LocalReg here is that the caller
91 -- is often about to call regIdInfo, which immediately assigns the
92 -- result of allocDynClosure to a new temp in order to add the tag.
93 -- So by not generating a LocalReg here we avoid a common source of
94 -- new temporaries and save some compile time. This can be quite
95 -- significant - see test T4801.
96
97
98 allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
99 let (args, offsets) = unzip args_w_offsets
100 cmm_args <- mapM getArgAmode args -- No void args
101 allocDynClosureCmm mb_id info_tbl lf_info
102 use_cc _blame_cc (zip cmm_args offsets)
103
104
105 allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
106 -- SAY WHAT WE ARE ABOUT TO DO
107 let rep = cit_rep info_tbl
108 tickyDynAlloc mb_id rep lf_info
109 let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
110 allocHeapClosure rep info_ptr use_cc amodes_w_offsets
111
112
113 -- | Low-level heap object allocation.
114 allocHeapClosure
115 :: SMRep -- ^ representation of the object
116 -> CmmExpr -- ^ info pointer
117 -> CmmExpr -- ^ cost centre
118 -> [(CmmExpr,ByteOff)] -- ^ payload
119 -> FCode CmmExpr -- ^ returns the address of the object
120 allocHeapClosure rep info_ptr use_cc payload = do
121 profDynAlloc rep use_cc
122
123 virt_hp <- getVirtHp
124
125 -- Find the offset of the info-ptr word
126 let info_offset = virt_hp + 1
127 -- info_offset is the VirtualHpOffset of the first
128 -- word of the new object
129 -- Remember, virtHp points to last allocated word,
130 -- ie 1 *before* the info-ptr word of new object.
131
132 base <- getHpRelOffset info_offset
133 emitComment $ mkFastString "allocHeapClosure"
134 emitSetDynHdr base info_ptr use_cc
135
136 -- Fill in the fields
137 hpStore base payload
138
139 -- Bump the virtual heap pointer
140 dflags <- getDynFlags
141 setVirtHp (virt_hp + heapClosureSizeW dflags rep)
142
143 return base
144
145
146 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
147 emitSetDynHdr base info_ptr ccs
148 = do dflags <- getDynFlags
149 hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
150 where
151 header :: DynFlags -> [CmmExpr]
152 header dflags = [info_ptr] ++ dynProfHdr dflags ccs
153 -- ToDof: Parallel stuff
154 -- No ticky header
155
156 -- Store the item (expr,off) in base[off]
157 hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
158 hpStore base vals = do
159 dflags <- getDynFlags
160 sequence_ $
161 [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
162
163 -----------------------------------------------------------
164 -- Layout of static closures
165 -----------------------------------------------------------
166
167 -- Make a static closure, adding on any extra padding needed for CAFs,
168 -- and adding a static link field if necessary.
169
170 mkStaticClosureFields
171 :: DynFlags
172 -> CmmInfoTable
173 -> CostCentreStack
174 -> CafInfo
175 -> [CmmLit] -- Payload
176 -> [CmmLit] -- The full closure
177 mkStaticClosureFields dflags info_tbl ccs caf_refs payload
178 = mkStaticClosure dflags info_lbl ccs payload padding
179 static_link_field saved_info_field
180 where
181 info_lbl = cit_lbl info_tbl
182
183 -- CAFs must have consistent layout, regardless of whether they
184 -- are actually updatable or not. The layout of a CAF is:
185 --
186 -- 3 saved_info
187 -- 2 static_link
188 -- 1 indirectee
189 -- 0 info ptr
190 --
191 -- the static_link and saved_info fields must always be in the
192 -- same place. So we use isThunkRep rather than closureUpdReqd
193 -- here:
194
195 is_caf = isThunkRep (cit_rep info_tbl)
196
197 padding
198 | is_caf && null payload = [mkIntCLit dflags 0]
199 | otherwise = []
200
201 static_link_field
202 | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
203 = [static_link_value]
204 | otherwise
205 = []
206
207 saved_info_field
208 | is_caf = [mkIntCLit dflags 0]
209 | otherwise = []
210
211 -- For a static constructor which has NoCafRefs, we set the
212 -- static link field to a non-zero value so the garbage
213 -- collector will ignore it.
214 static_link_value
215 | mayHaveCafRefs caf_refs = mkIntCLit dflags 0
216 | otherwise = mkIntCLit dflags 3 -- No CAF refs
217 -- See Note [STATIC_LINK fields]
218 -- in rts/sm/Storage.h
219
220 mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
221 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
222 mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
223 = [CmmLabel info_lbl]
224 ++ staticProfHdr dflags ccs
225 ++ concatMap (padLitToWord dflags) payload
226 ++ padding
227 ++ static_link_field
228 ++ saved_info_field
229
230 -- JD: Simon had ellided this padding, but without it the C back end asserts
231 -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
232 padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
233 padLitToWord dflags lit = lit : padding pad_length
234 where width = typeWidth (cmmLitType dflags lit)
235 pad_length = wORD_SIZE dflags - widthInBytes width :: Int
236
237 padding n | n <= 0 = []
238 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
239 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
240 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
241 | otherwise = CmmInt 0 W64 : padding (n-8)
242
243 -----------------------------------------------------------
244 -- Heap overflow checking
245 -----------------------------------------------------------
246
247 {- Note [Heap checks]
248 ~~~~~~~~~~~~~~~~~~
249 Heap checks come in various forms. We provide the following entry
250 points to the runtime system, all of which use the native C-- entry
251 convention.
252
253 * gc() performs garbage collection and returns
254 nothing to its caller
255
256 * A series of canned entry points like
257 r = gc_1p( r )
258 where r is a pointer. This performs gc, and
259 then returns its argument r to its caller.
260
261 * A series of canned entry points like
262 gcfun_2p( f, x, y )
263 where f is a function closure of arity 2
264 This performs garbage collection, keeping alive the
265 three argument ptrs, and then tail-calls f(x,y)
266
267 These are used in the following circumstances
268
269 * entryHeapCheck: Function entry
270 (a) With a canned GC entry sequence
271 f( f_clo, x:ptr, y:ptr ) {
272 Hp = Hp+8
273 if Hp > HpLim goto L
274 ...
275 L: HpAlloc = 8
276 jump gcfun_2p( f_clo, x, y ) }
277 Note the tail call to the garbage collector;
278 it should do no register shuffling
279
280 (b) No canned sequence
281 f( f_clo, x:ptr, y:ptr, ...etc... ) {
282 T: Hp = Hp+8
283 if Hp > HpLim goto L
284 ...
285 L: HpAlloc = 8
286 call gc() -- Needs an info table
287 goto T }
288
289 * altHeapCheck: Immediately following an eval
290 Started as
291 case f x y of r { (p,q) -> rhs }
292 (a) With a canned sequence for the results of f
293 (which is the very common case since
294 all boxed cases return just one pointer
295 ...
296 r = f( x, y )
297 K: -- K needs an info table
298 Hp = Hp+8
299 if Hp > HpLim goto L
300 ...code for rhs...
301
302 L: r = gc_1p( r )
303 goto K }
304
305 Here, the info table needed by the call
306 to gc_1p should be the *same* as the
307 one for the call to f; the C-- optimiser
308 spots this sharing opportunity)
309
310 (b) No canned sequence for results of f
311 Note second info table
312 ...
313 (r1,r2,r3) = call f( x, y )
314 K:
315 Hp = Hp+8
316 if Hp > HpLim goto L
317 ...code for rhs...
318
319 L: call gc() -- Extra info table here
320 goto K
321
322 * generalHeapCheck: Anywhere else
323 e.g. entry to thunk
324 case branch *not* following eval,
325 or let-no-escape
326 Exactly the same as the previous case:
327
328 K: -- K needs an info table
329 Hp = Hp+8
330 if Hp > HpLim goto L
331 ...
332
333 L: call gc()
334 goto K
335 -}
336
337 --------------------------------------------------------------
338 -- A heap/stack check at a function or thunk entry point.
339
340 entryHeapCheck :: ClosureInfo
341 -> Maybe LocalReg -- Function (closure environment)
342 -> Int -- Arity -- not same as len args b/c of voids
343 -> [LocalReg] -- Non-void args (empty for thunk)
344 -> FCode ()
345 -> FCode ()
346
347 entryHeapCheck cl_info nodeSet arity args code
348 = entryHeapCheck' is_fastf node arity args code
349 where
350 node = case nodeSet of
351 Just r -> CmmReg (CmmLocal r)
352 Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
353
354 is_fastf = case closureFunInfo cl_info of
355 Just (_, ArgGen _) -> False
356 _otherwise -> True
357
358 -- | lower-level version for CmmParse
359 entryHeapCheck' :: Bool -- is a known function pattern
360 -> CmmExpr -- expression for the closure pointer
361 -> Int -- Arity -- not same as len args b/c of voids
362 -> [LocalReg] -- Non-void args (empty for thunk)
363 -> FCode ()
364 -> FCode ()
365 entryHeapCheck' is_fastf node arity args code
366 = do dflags <- getDynFlags
367 let is_thunk = arity == 0
368
369 args' = map (CmmReg . CmmLocal) args
370 stg_gc_fun = CmmReg (CmmGlobal GCFun)
371 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
372
373 {- Thunks: jump stg_gc_enter_1
374
375 Function (fast): call (NativeNode) stg_gc_fun(fun, args)
376
377 Function (slow): call (slow) stg_gc_fun(fun, args)
378 -}
379 gc_call upd
380 | is_thunk
381 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
382
383 | is_fastf
384 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
385
386 | otherwise
387 = mkJump dflags Slow stg_gc_fun (node : args') upd
388
389 updfr_sz <- getUpdFrameOff
390
391 loop_id <- newLabelC
392 emitLabel loop_id
393 heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
394
395 -- ------------------------------------------------------------
396 -- A heap/stack check in a case alternative
397
398
399 -- If there are multiple alts and we need to GC, but don't have a
400 -- continuation already (the scrut was simple), then we should
401 -- pre-generate the continuation. (if there are multiple alts it is
402 -- always a canned GC point).
403
404 -- altHeapCheck:
405 -- If we have a return continuation,
406 -- then if it is a canned GC pattern,
407 -- then we do mkJumpReturnsTo
408 -- else we do a normal call to stg_gc_noregs
409 -- else if it is a canned GC pattern,
410 -- then generate the continuation and do mkCallReturnsTo
411 -- else we do a normal call to stg_gc_noregs
412
413 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
414 altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
415
416 altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
417 altOrNoEscapeHeapCheck checkYield regs code = do
418 dflags <- getDynFlags
419 case cannedGCEntryPoint dflags regs of
420 Nothing -> genericGC checkYield code
421 Just gc -> do
422 lret <- newLabelC
423 let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
424 lcont <- newLabelC
425 tscope <- getTickScope
426 emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
427 emitLabel lcont
428 cannedGCReturnsTo checkYield False gc regs lret off code
429
430 altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
431 altHeapCheckReturnsTo regs lret off code
432 = do dflags <- getDynFlags
433 case cannedGCEntryPoint dflags regs of
434 Nothing -> genericGC False code
435 Just gc -> cannedGCReturnsTo False True gc regs lret off code
436
437 -- noEscapeHeapCheck is implemented identically to altHeapCheck (which
438 -- is more efficient), but cannot be optimized away in the non-allocating
439 -- case because it may occur in a loop
440 noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
441 noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
442
443 cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
444 -> FCode a
445 -> FCode a
446 cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
447 = do dflags <- getDynFlags
448 updfr_sz <- getUpdFrameOff
449 heapCheck False checkYield (gc_call dflags gc updfr_sz) code
450 where
451 reg_exprs = map (CmmReg . CmmLocal) regs
452 -- Note [stg_gc arguments]
453
454 -- NB. we use the NativeReturn convention for passing arguments
455 -- to the canned heap-check routines, because we are in a case
456 -- alternative and hence the [LocalReg] was passed to us in the
457 -- NativeReturn convention.
458 gc_call dflags label sp
459 | cont_on_stack
460 = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
461 | otherwise
462 = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
463
464 genericGC :: Bool -> FCode a -> FCode a
465 genericGC checkYield code
466 = do updfr_sz <- getUpdFrameOff
467 lretry <- newLabelC
468 emitLabel lretry
469 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
470 heapCheck False checkYield (call <*> mkBranch lretry) code
471
472 cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
473 cannedGCEntryPoint dflags regs
474 = case map localRegType regs of
475 [] -> Just (mkGcLabel "stg_gc_noregs")
476 [ty]
477 | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
478 | isFloatType ty -> case width of
479 W32 -> Just (mkGcLabel "stg_gc_f1")
480 W64 -> Just (mkGcLabel "stg_gc_d1")
481 _ -> Nothing
482
483 | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
484 | width == W64 -> Just (mkGcLabel "stg_gc_l1")
485 | otherwise -> Nothing
486 where
487 width = typeWidth ty
488 [ty1,ty2]
489 | isGcPtrType ty1
490 && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
491 [ty1,ty2,ty3]
492 | isGcPtrType ty1
493 && isGcPtrType ty2
494 && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
495 [ty1,ty2,ty3,ty4]
496 | isGcPtrType ty1
497 && isGcPtrType ty2
498 && isGcPtrType ty3
499 && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
500 _otherwise -> Nothing
501
502 -- Note [stg_gc arguments]
503 -- It might seem that we could avoid passing the arguments to the
504 -- stg_gc function, because they are already in the right registers.
505 -- While this is usually the case, it isn't always. Sometimes the
506 -- code generator has cleverly avoided the eval in a case, e.g. in
507 -- ffi/should_run/4221.hs we found
508 --
509 -- case a_r1mb of z
510 -- FunPtr x y -> ...
511 --
512 -- where a_r1mb is bound a top-level constructor, and is known to be
513 -- evaluated. The codegen just assigns x, y and z, and continues;
514 -- R1 is never assigned.
515 --
516 -- So we'll have to rely on optimisations to eliminatethese
517 -- assignments where possible.
518
519
520 -- | The generic GC procedure; no params, no results
521 generic_gc :: CmmExpr
522 generic_gc = mkGcLabel "stg_gc_noregs"
523
524 -- | Create a CLabel for calling a garbage collector entry point
525 mkGcLabel :: String -> CmmExpr
526 mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
527
528 -------------------------------
529 heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
530 heapCheck checkStack checkYield do_gc code
531 = getHeapUsage $ \ hpHw ->
532 -- Emit heap checks, but be sure to do it lazily so
533 -- that the conditionals on hpHw don't cause a black hole
534 do { dflags <- getDynFlags
535 ; let mb_alloc_bytes
536 | hpHw > mBLOCK_SIZE = sorry $ unlines
537 [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
538 "",
539 "This is currently not possible due to a limitation of GHC's code generator.",
540 "See http://hackage.haskell.org/trac/ghc/ticket/4505 for details.",
541 "Suggestion: read data from a file instead of having large static data",
542 "structures in code."]
543 | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
544 | otherwise = Nothing
545 where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags
546 stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
547 | otherwise = Nothing
548 ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
549 ; tickyAllocHeap True hpHw
550 ; setRealHp hpHw
551 ; code }
552
553 heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
554 heapStackCheckGen stk_hwm mb_bytes
555 = do updfr_sz <- getUpdFrameOff
556 lretry <- newLabelC
557 emitLabel lretry
558 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
559 do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
560
561 -- Note [Single stack check]
562 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
563 -- When compiling a function we can determine how much stack space it
564 -- will use. We therefore need to perform only a single stack check at
565 -- the beginning of a function to see if we have enough stack space.
566 --
567 -- The check boils down to comparing Sp-N with SpLim, where N is the
568 -- amount of stack space needed (see Note [Stack usage] below). *BUT*
569 -- at this stage of the pipeline we are not supposed to refer to Sp
570 -- itself, because the stack is not yet manifest, so we don't quite
571 -- know where Sp pointing.
572
573 -- So instead of referring directly to Sp - as we used to do in the
574 -- past - the code generator uses (old + 0) in the stack check. That
575 -- is the address of the first word of the old area, so if we add N
576 -- we'll get the address of highest used word.
577 --
578 -- This makes the check robust. For example, while we need to perform
579 -- only one stack check for each function, we could in theory place
580 -- more stack checks later in the function. They would be redundant,
581 -- but not incorrect (in a sense that they should not change program
582 -- behaviour). We need to make sure however that a stack check
583 -- inserted after incrementing the stack pointer checks for a
584 -- respectively smaller stack space. This would not be the case if the
585 -- code generator produced direct references to Sp. By referencing
586 -- (old + 0) we make sure that we always check for a correct amount of
587 -- stack: when converting (old + 0) to Sp the stack layout phase takes
588 -- into account changes already made to stack pointer. The idea for
589 -- this change came from observations made while debugging #8275.
590
591 -- Note [Stack usage]
592 -- ~~~~~~~~~~~~~~~~~~
593 -- At the moment we convert from STG to Cmm we don't know N, the
594 -- number of bytes of stack that the function will use, so we use a
595 -- special late-bound CmmLit, namely
596 -- CmmHighStackMark
597 -- to stand for the number of bytes needed. When the stack is made
598 -- manifest, the number of bytes needed is calculated, and used to
599 -- replace occurrences of CmmHighStackMark
600 --
601 -- The (Maybe CmmExpr) passed to do_checks is usually
602 -- Just (CmmLit CmmHighStackMark)
603 -- but can also (in certain hand-written RTS functions)
604 -- Just (CmmLit 8) or some other fixed valuet
605 -- If it is Nothing, we don't generate a stack check at all.
606
607 do_checks :: Maybe CmmExpr -- Should we check the stack?
608 -- See Note [Stack usage]
609 -> Bool -- Should we check for preemption?
610 -> Maybe CmmExpr -- Heap headroom (bytes)
611 -> CmmAGraph -- What to do on failure
612 -> FCode ()
613 do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
614 dflags <- getDynFlags
615 gc_id <- newLabelC
616
617 let
618 Just alloc_lit = mb_alloc_lit
619
620 bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
621
622 -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
623 -- At the beginning of a function old + 0 = Sp
624 -- See Note [Single stack check]
625 sp_oflo sp_hwm =
626 CmmMachOp (mo_wordULt dflags)
627 [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
628 [CmmStackSlot Old 0, sp_hwm],
629 CmmReg spLimReg]
630
631 -- Hp overflow if (Hp > HpLim)
632 -- (Hp has been incremented by now)
633 -- HpLim points to the LAST WORD of valid allocation space.
634 hp_oflo = CmmMachOp (mo_wordUGt dflags)
635 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
636
637 alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
638
639 case mb_stk_hwm of
640 Nothing -> return ()
641 Just stk_hwm -> tickyStackCheck >> (emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id)
642
643 -- Emit new label that might potentially be a header
644 -- of a self-recursive tail call.
645 -- See Note [Self-recursive loop header].
646 self_loop_info <- getSelfLoop
647 case self_loop_info of
648 Just (_, loop_header_id, _)
649 | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
650 _otherwise -> return ()
651
652 if (isJust mb_alloc_lit)
653 then do
654 tickyHeapCheck
655 emitAssign hpReg bump_hp
656 emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
657 else do
658 when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
659 -- Yielding if HpLim == 0
660 let yielding = CmmMachOp (mo_wordEq dflags)
661 [CmmReg (CmmGlobal HpLim),
662 CmmLit (zeroCLit dflags)]
663 emit =<< mkCmmIfGoto yielding gc_id
664
665 tscope <- getTickScope
666 emitOutOfLine gc_id
667 (do_gc, tscope) -- this is expected to jump back somewhere
668
669 -- Test for stack pointer exhaustion, then
670 -- bump heap pointer, and test for heap exhaustion
671 -- Note that we don't move the heap pointer unless the
672 -- stack check succeeds. Otherwise we might end up
673 -- with slop at the end of the current block, which can
674 -- confuse the LDV profiler.
675
676 -- Note [Self-recursive loop header]
677 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
678 --
679 -- Self-recursive loop header is required by loopification optimization (See
680 -- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if:
681 --
682 -- 1. There is information about self-loop in the FCode environment. We don't
683 -- check the binder (first component of the self_loop_info) because we are
684 -- certain that if the self-loop info is present then we are compiling the
685 -- binder body. Reason: the only possible way to get here with the
686 -- self_loop_info present is from closureCodeBody.
687 --
688 -- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
689 -- to preempt the heap check (see #367 for motivation behind this check). It
690 -- is True for heap checks placed at the entry to a function and
691 -- let-no-escape heap checks but false for other heap checks (eg. in case
692 -- alternatives or created from hand-written high-level Cmm). The second
693 -- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
694 -- function and some heap checks created in hand-written Cmm. Otherwise it
695 -- is Nothing. In other words the only situation when both conditions are
696 -- true is when compiling stack and heap checks at the entry to a
697 -- function. This is the only situation when we want to emit a self-loop
698 -- label.