Implement unboxed sum primitive type
[ghc.git] / compiler / codeGen / StgCmmHeap.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Stg to C--: heap management functions
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmHeap (
12 getVirtHp, setVirtHp, setRealHp,
13 getHpRelOffset,
14
15 entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
16 heapStackCheckGen,
17 entryHeapCheck',
18
19 mkStaticClosureFields, mkStaticClosure,
20
21 allocDynClosure, allocDynClosureCmm, allocHeapClosure,
22 emitSetDynHdr
23 ) where
24
25 #include "HsVersions.h"
26
27 import StgSyn
28 import CLabel
29 import StgCmmLayout
30 import StgCmmUtils
31 import StgCmmMonad
32 import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
33 import StgCmmTicky
34 import StgCmmClosure
35 import StgCmmEnv
36
37 import MkGraph
38
39 import Hoopl
40 import SMRep
41 import Cmm
42 import CmmUtils
43 import CostCentre
44 import IdInfo( CafInfo(..), mayHaveCafRefs )
45 import Id ( Id )
46 import Module
47 import DynFlags
48 import FastString( mkFastString, fsLit )
49 import Panic( sorry )
50
51 import Prelude hiding ((<*>))
52
53 import Control.Monad (when)
54 import Data.Maybe (isJust)
55
56 -----------------------------------------------------------
57 -- Initialise dynamic heap objects
58 -----------------------------------------------------------
59
60 allocDynClosure
61 :: Maybe Id
62 -> CmmInfoTable
63 -> LambdaFormInfo
64 -> CmmExpr -- Cost Centre to stick in the object
65 -> CmmExpr -- Cost Centre to blame for this alloc
66 -- (usually the same; sometimes "OVERHEAD")
67
68 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
69 -- ie Info ptr has offset zero.
70 -- No void args in here
71 -> FCode CmmExpr -- returns Hp+n
72
73 allocDynClosureCmm
74 :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
75 -> [(CmmArg, ByteOff)]
76 -> FCode CmmExpr -- returns Hp+n
77
78 -- allocDynClosure allocates the thing in the heap,
79 -- and modifies the virtual Hp to account for this.
80 -- The second return value is the graph that sets the value of the
81 -- returned LocalReg, which should point to the closure after executing
82 -- the graph.
83
84 -- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
85 -- only valid until Hp is changed. The caller should assign the
86 -- result to a LocalReg if it is required to remain live.
87 --
88 -- The reason we don't assign it to a LocalReg here is that the caller
89 -- is often about to call regIdInfo, which immediately assigns the
90 -- result of allocDynClosure to a new temp in order to add the tag.
91 -- So by not generating a LocalReg here we avoid a common source of
92 -- new temporaries and save some compile time. This can be quite
93 -- significant - see test T4801.
94
95
96 allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
97 let (args, offsets) = unzip args_w_offsets
98 cmm_args <- mapM getArgAmode args -- No void args
99 allocDynClosureCmm mb_id info_tbl lf_info
100 use_cc _blame_cc (zip cmm_args offsets)
101
102
103 allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
104 -- SAY WHAT WE ARE ABOUT TO DO
105 let rep = cit_rep info_tbl
106 tickyDynAlloc mb_id rep lf_info
107 let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
108 allocHeapClosure rep info_ptr use_cc amodes_w_offsets
109
110
111 -- | Low-level heap object allocation.
112 allocHeapClosure
113 :: SMRep -- ^ representation of the object
114 -> CmmExpr -- ^ info pointer
115 -> CmmExpr -- ^ cost centre
116 -> [(CmmArg,ByteOff)] -- ^ payload
117 -> FCode CmmExpr -- ^ returns the address of the object
118 allocHeapClosure rep info_ptr use_cc payload = do
119 profDynAlloc rep use_cc
120
121 virt_hp <- getVirtHp
122
123 -- Find the offset of the info-ptr word
124 let info_offset = virt_hp + 1
125 -- info_offset is the VirtualHpOffset of the first
126 -- word of the new object
127 -- Remember, virtHp points to last allocated word,
128 -- ie 1 *before* the info-ptr word of new object.
129
130 base <- getHpRelOffset info_offset
131 emitComment $ mkFastString "allocHeapClosure"
132 emitSetDynHdr base info_ptr use_cc
133
134 -- Fill in the fields
135 hpStore base payload
136
137 -- Bump the virtual heap pointer
138 dflags <- getDynFlags
139 setVirtHp (virt_hp + heapClosureSizeW dflags rep)
140
141 return base
142
143
144 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
145 emitSetDynHdr base info_ptr ccs
146 = do dflags <- getDynFlags
147 hpStore base (zip (map CmmExprArg (header dflags)) [0, wORD_SIZE dflags ..])
148 where
149 header :: DynFlags -> [CmmExpr]
150 header dflags = [info_ptr] ++ dynProfHdr dflags ccs
151 -- ToDof: Parallel stuff
152 -- No ticky header
153
154 -- Store the item (expr,off) in base[off]
155 hpStore :: CmmExpr -> [(CmmArg, ByteOff)] -> FCode ()
156 hpStore base vals = do
157 dflags <- getDynFlags
158 sequence_ $
159 [ emitStore (cmmOffsetB dflags base off) val | (CmmExprArg val,off) <- vals ]
160
161 -----------------------------------------------------------
162 -- Layout of static closures
163 -----------------------------------------------------------
164
165 -- Make a static closure, adding on any extra padding needed for CAFs,
166 -- and adding a static link field if necessary.
167
168 mkStaticClosureFields
169 :: DynFlags
170 -> CmmInfoTable
171 -> CostCentreStack
172 -> CafInfo
173 -> [CmmLit] -- Payload
174 -> [CmmLit] -- The full closure
175 mkStaticClosureFields dflags info_tbl ccs caf_refs payload
176 = mkStaticClosure dflags info_lbl ccs payload padding
177 static_link_field saved_info_field
178 where
179 info_lbl = cit_lbl info_tbl
180
181 -- CAFs must have consistent layout, regardless of whether they
182 -- are actually updatable or not. The layout of a CAF is:
183 --
184 -- 3 saved_info
185 -- 2 static_link
186 -- 1 indirectee
187 -- 0 info ptr
188 --
189 -- the static_link and saved_info fields must always be in the
190 -- same place. So we use isThunkRep rather than closureUpdReqd
191 -- here:
192
193 is_caf = isThunkRep (cit_rep info_tbl)
194
195 padding
196 | is_caf && null payload = [mkIntCLit dflags 0]
197 | otherwise = []
198
199 static_link_field
200 | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
201 = [static_link_value]
202 | otherwise
203 = []
204
205 saved_info_field
206 | is_caf = [mkIntCLit dflags 0]
207 | otherwise = []
208
209 -- For a static constructor which has NoCafRefs, we set the
210 -- static link field to a non-zero value so the garbage
211 -- collector will ignore it.
212 static_link_value
213 | mayHaveCafRefs caf_refs = mkIntCLit dflags 0
214 | otherwise = mkIntCLit dflags 3 -- No CAF refs
215 -- See Note [STATIC_LINK fields]
216 -- in rts/sm/Storage.h
217
218 mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
219 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
220 mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
221 = [CmmLabel info_lbl]
222 ++ staticProfHdr dflags ccs
223 ++ concatMap (padLitToWord dflags) payload
224 ++ padding
225 ++ static_link_field
226 ++ saved_info_field
227
228 -- JD: Simon had ellided this padding, but without it the C back end asserts
229 -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
230 padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
231 padLitToWord dflags lit = lit : padding pad_length
232 where width = typeWidth (cmmLitType dflags lit)
233 pad_length = wORD_SIZE dflags - widthInBytes width :: Int
234
235 padding n | n <= 0 = []
236 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
237 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
238 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
239 | otherwise = CmmInt 0 W64 : padding (n-8)
240
241 -----------------------------------------------------------
242 -- Heap overflow checking
243 -----------------------------------------------------------
244
245 {- Note [Heap checks]
246 ~~~~~~~~~~~~~~~~~~
247 Heap checks come in various forms. We provide the following entry
248 points to the runtime system, all of which use the native C-- entry
249 convention.
250
251 * gc() performs garbage collection and returns
252 nothing to its caller
253
254 * A series of canned entry points like
255 r = gc_1p( r )
256 where r is a pointer. This performs gc, and
257 then returns its argument r to its caller.
258
259 * A series of canned entry points like
260 gcfun_2p( f, x, y )
261 where f is a function closure of arity 2
262 This performs garbage collection, keeping alive the
263 three argument ptrs, and then tail-calls f(x,y)
264
265 These are used in the following circumstances
266
267 * entryHeapCheck: Function entry
268 (a) With a canned GC entry sequence
269 f( f_clo, x:ptr, y:ptr ) {
270 Hp = Hp+8
271 if Hp > HpLim goto L
272 ...
273 L: HpAlloc = 8
274 jump gcfun_2p( f_clo, x, y ) }
275 Note the tail call to the garbage collector;
276 it should do no register shuffling
277
278 (b) No canned sequence
279 f( f_clo, x:ptr, y:ptr, ...etc... ) {
280 T: Hp = Hp+8
281 if Hp > HpLim goto L
282 ...
283 L: HpAlloc = 8
284 call gc() -- Needs an info table
285 goto T }
286
287 * altHeapCheck: Immediately following an eval
288 Started as
289 case f x y of r { (p,q) -> rhs }
290 (a) With a canned sequence for the results of f
291 (which is the very common case since
292 all boxed cases return just one pointer
293 ...
294 r = f( x, y )
295 K: -- K needs an info table
296 Hp = Hp+8
297 if Hp > HpLim goto L
298 ...code for rhs...
299
300 L: r = gc_1p( r )
301 goto K }
302
303 Here, the info table needed by the call
304 to gc_1p should be the *same* as the
305 one for the call to f; the C-- optimiser
306 spots this sharing opportunity)
307
308 (b) No canned sequence for results of f
309 Note second info table
310 ...
311 (r1,r2,r3) = call f( x, y )
312 K:
313 Hp = Hp+8
314 if Hp > HpLim goto L
315 ...code for rhs...
316
317 L: call gc() -- Extra info table here
318 goto K
319
320 * generalHeapCheck: Anywhere else
321 e.g. entry to thunk
322 case branch *not* following eval,
323 or let-no-escape
324 Exactly the same as the previous case:
325
326 K: -- K needs an info table
327 Hp = Hp+8
328 if Hp > HpLim goto L
329 ...
330
331 L: call gc()
332 goto K
333 -}
334
335 --------------------------------------------------------------
336 -- A heap/stack check at a function or thunk entry point.
337
338 entryHeapCheck :: ClosureInfo
339 -> Maybe LocalReg -- Function (closure environment)
340 -> Int -- Arity -- not same as len args b/c of voids
341 -> [LocalReg] -- Non-void args (empty for thunk)
342 -> FCode ()
343 -> FCode ()
344
345 entryHeapCheck cl_info nodeSet arity args code
346 = entryHeapCheck' is_fastf node arity args code
347 where
348 node = case nodeSet of
349 Just r -> CmmReg (CmmLocal r)
350 Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
351
352 is_fastf = case closureFunInfo cl_info of
353 Just (_, ArgGen _) -> False
354 _otherwise -> True
355
356 -- | lower-level version for CmmParse
357 entryHeapCheck' :: Bool -- is a known function pattern
358 -> CmmExpr -- expression for the closure pointer
359 -> Int -- Arity -- not same as len args b/c of voids
360 -> [LocalReg] -- Non-void args (empty for thunk)
361 -> FCode ()
362 -> FCode ()
363 entryHeapCheck' is_fastf node arity args code
364 = do dflags <- getDynFlags
365 let is_thunk = arity == 0
366
367 args' = map (CmmExprArg . CmmReg . CmmLocal) args
368 stg_gc_fun = CmmReg (CmmGlobal GCFun)
369 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
370
371 {- Thunks: jump stg_gc_enter_1
372
373 Function (fast): call (NativeNode) stg_gc_fun(fun, args)
374
375 Function (slow): call (slow) stg_gc_fun(fun, args)
376 -}
377 gc_call upd
378 | is_thunk
379 = mkJump dflags NativeNodeCall stg_gc_enter1 [CmmExprArg node] upd
380
381 | is_fastf
382 = mkJump dflags NativeNodeCall stg_gc_fun (CmmExprArg node : args') upd
383
384 | otherwise
385 = mkJump dflags Slow stg_gc_fun (CmmExprArg node : args') upd
386
387 updfr_sz <- getUpdFrameOff
388
389 loop_id <- newLabelC
390 emitLabel loop_id
391 heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
392
393 -- ------------------------------------------------------------
394 -- A heap/stack check in a case alternative
395
396
397 -- If there are multiple alts and we need to GC, but don't have a
398 -- continuation already (the scrut was simple), then we should
399 -- pre-generate the continuation. (if there are multiple alts it is
400 -- always a canned GC point).
401
402 -- altHeapCheck:
403 -- If we have a return continuation,
404 -- then if it is a canned GC pattern,
405 -- then we do mkJumpReturnsTo
406 -- else we do a normal call to stg_gc_noregs
407 -- else if it is a canned GC pattern,
408 -- then generate the continuation and do mkCallReturnsTo
409 -- else we do a normal call to stg_gc_noregs
410
411 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
412 altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
413
414 altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
415 altOrNoEscapeHeapCheck checkYield regs code = do
416 dflags <- getDynFlags
417 case cannedGCEntryPoint dflags regs of
418 Nothing -> genericGC checkYield code
419 Just gc -> do
420 lret <- newLabelC
421 let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
422 lcont <- newLabelC
423 tscope <- getTickScope
424 emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
425 emitLabel lcont
426 cannedGCReturnsTo checkYield False gc regs lret off code
427
428 altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
429 altHeapCheckReturnsTo regs lret off code
430 = do dflags <- getDynFlags
431 case cannedGCEntryPoint dflags regs of
432 Nothing -> genericGC False code
433 Just gc -> cannedGCReturnsTo False True gc regs lret off code
434
435 -- noEscapeHeapCheck is implemented identically to altHeapCheck (which
436 -- is more efficient), but cannot be optimized away in the non-allocating
437 -- case because it may occur in a loop
438 noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
439 noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
440
441 cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
442 -> FCode a
443 -> FCode a
444 cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
445 = do dflags <- getDynFlags
446 updfr_sz <- getUpdFrameOff
447 heapCheck False checkYield (gc_call dflags gc updfr_sz) code
448 where
449 reg_exprs = map (CmmExprArg . CmmReg . CmmLocal) regs
450 -- Note [stg_gc arguments]
451
452 -- NB. we use the NativeReturn convention for passing arguments
453 -- to the canned heap-check routines, because we are in a case
454 -- alternative and hence the [LocalReg] was passed to us in the
455 -- NativeReturn convention.
456 gc_call dflags label sp
457 | cont_on_stack
458 = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
459 | otherwise
460 = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
461
462 genericGC :: Bool -> FCode a -> FCode a
463 genericGC checkYield code
464 = do updfr_sz <- getUpdFrameOff
465 lretry <- newLabelC
466 emitLabel lretry
467 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
468 heapCheck False checkYield (call <*> mkBranch lretry) code
469
470 cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
471 cannedGCEntryPoint dflags regs
472 = case map localRegType regs of
473 [] -> Just (mkGcLabel "stg_gc_noregs")
474 [ty]
475 | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
476 | isFloatType ty -> case width of
477 W32 -> Just (mkGcLabel "stg_gc_f1")
478 W64 -> Just (mkGcLabel "stg_gc_d1")
479 _ -> Nothing
480
481 | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
482 | width == W64 -> Just (mkGcLabel "stg_gc_l1")
483 | otherwise -> Nothing
484 where
485 width = typeWidth ty
486 [ty1,ty2]
487 | isGcPtrType ty1
488 && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
489 [ty1,ty2,ty3]
490 | isGcPtrType ty1
491 && isGcPtrType ty2
492 && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
493 [ty1,ty2,ty3,ty4]
494 | isGcPtrType ty1
495 && isGcPtrType ty2
496 && isGcPtrType ty3
497 && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
498 _otherwise -> Nothing
499
500 -- Note [stg_gc arguments]
501 -- It might seem that we could avoid passing the arguments to the
502 -- stg_gc function, because they are already in the right registers.
503 -- While this is usually the case, it isn't always. Sometimes the
504 -- code generator has cleverly avoided the eval in a case, e.g. in
505 -- ffi/should_run/4221.hs we found
506 --
507 -- case a_r1mb of z
508 -- FunPtr x y -> ...
509 --
510 -- where a_r1mb is bound a top-level constructor, and is known to be
511 -- evaluated. The codegen just assigns x, y and z, and continues;
512 -- R1 is never assigned.
513 --
514 -- So we'll have to rely on optimisations to eliminatethese
515 -- assignments where possible.
516
517
518 -- | The generic GC procedure; no params, no results
519 generic_gc :: CmmExpr
520 generic_gc = mkGcLabel "stg_gc_noregs"
521
522 -- | Create a CLabel for calling a garbage collector entry point
523 mkGcLabel :: String -> CmmExpr
524 mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
525
526 -------------------------------
527 heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
528 heapCheck checkStack checkYield do_gc code
529 = getHeapUsage $ \ hpHw ->
530 -- Emit heap checks, but be sure to do it lazily so
531 -- that the conditionals on hpHw don't cause a black hole
532 do { dflags <- getDynFlags
533 ; let mb_alloc_bytes
534 | hpHw > mBLOCK_SIZE = sorry $ unlines
535 [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
536 "",
537 "This is currently not possible due to a limitation of GHC's code generator.",
538 "See http://hackage.haskell.org/trac/ghc/ticket/4505 for details.",
539 "Suggestion: read data from a file instead of having large static data",
540 "structures in code."]
541 | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
542 | otherwise = Nothing
543 where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags
544 stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
545 | otherwise = Nothing
546 ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
547 ; tickyAllocHeap True hpHw
548 ; setRealHp hpHw
549 ; code }
550
551 heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
552 heapStackCheckGen stk_hwm mb_bytes
553 = do updfr_sz <- getUpdFrameOff
554 lretry <- newLabelC
555 emitLabel lretry
556 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
557 do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
558
559 -- Note [Single stack check]
560 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
561 -- When compiling a function we can determine how much stack space it
562 -- will use. We therefore need to perform only a single stack check at
563 -- the beginning of a function to see if we have enough stack space.
564 --
565 -- The check boils down to comparing Sp-N with SpLim, where N is the
566 -- amount of stack space needed (see Note [Stack usage] below). *BUT*
567 -- at this stage of the pipeline we are not supposed to refer to Sp
568 -- itself, because the stack is not yet manifest, so we don't quite
569 -- know where Sp pointing.
570
571 -- So instead of referring directly to Sp - as we used to do in the
572 -- past - the code generator uses (old + 0) in the stack check. That
573 -- is the address of the first word of the old area, so if we add N
574 -- we'll get the address of highest used word.
575 --
576 -- This makes the check robust. For example, while we need to perform
577 -- only one stack check for each function, we could in theory place
578 -- more stack checks later in the function. They would be redundant,
579 -- but not incorrect (in a sense that they should not change program
580 -- behaviour). We need to make sure however that a stack check
581 -- inserted after incrementing the stack pointer checks for a
582 -- respectively smaller stack space. This would not be the case if the
583 -- code generator produced direct references to Sp. By referencing
584 -- (old + 0) we make sure that we always check for a correct amount of
585 -- stack: when converting (old + 0) to Sp the stack layout phase takes
586 -- into account changes already made to stack pointer. The idea for
587 -- this change came from observations made while debugging #8275.
588
589 -- Note [Stack usage]
590 -- ~~~~~~~~~~~~~~~~~~
591 -- At the moment we convert from STG to Cmm we don't know N, the
592 -- number of bytes of stack that the function will use, so we use a
593 -- special late-bound CmmLit, namely
594 -- CmmHighStackMark
595 -- to stand for the number of bytes needed. When the stack is made
596 -- manifest, the number of bytes needed is calculated, and used to
597 -- replace occurrences of CmmHighStackMark
598 --
599 -- The (Maybe CmmExpr) passed to do_checks is usually
600 -- Just (CmmLit CmmHighStackMark)
601 -- but can also (in certain hand-written RTS functions)
602 -- Just (CmmLit 8) or some other fixed valuet
603 -- If it is Nothing, we don't generate a stack check at all.
604
605 do_checks :: Maybe CmmExpr -- Should we check the stack?
606 -- See Note [Stack usage]
607 -> Bool -- Should we check for preemption?
608 -> Maybe CmmExpr -- Heap headroom (bytes)
609 -> CmmAGraph -- What to do on failure
610 -> FCode ()
611 do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
612 dflags <- getDynFlags
613 gc_id <- newLabelC
614
615 let
616 Just alloc_lit = mb_alloc_lit
617
618 bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
619
620 -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
621 -- At the beginning of a function old + 0 = Sp
622 -- See Note [Single stack check]
623 sp_oflo sp_hwm =
624 CmmMachOp (mo_wordULt dflags)
625 [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
626 [CmmStackSlot Old 0, sp_hwm],
627 CmmReg spLimReg]
628
629 -- Hp overflow if (Hp > HpLim)
630 -- (Hp has been incremented by now)
631 -- HpLim points to the LAST WORD of valid allocation space.
632 hp_oflo = CmmMachOp (mo_wordUGt dflags)
633 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
634
635 alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
636
637 case mb_stk_hwm of
638 Nothing -> return ()
639 Just stk_hwm -> tickyStackCheck >> (emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id)
640
641 -- Emit new label that might potentially be a header
642 -- of a self-recursive tail call.
643 -- See Note [Self-recursive loop header].
644 self_loop_info <- getSelfLoop
645 case self_loop_info of
646 Just (_, loop_header_id, _)
647 | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
648 _otherwise -> return ()
649
650 if (isJust mb_alloc_lit)
651 then do
652 tickyHeapCheck
653 emitAssign hpReg bump_hp
654 emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
655 else do
656 when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
657 -- Yielding if HpLim == 0
658 let yielding = CmmMachOp (mo_wordEq dflags)
659 [CmmReg (CmmGlobal HpLim),
660 CmmLit (zeroCLit dflags)]
661 emit =<< mkCmmIfGoto yielding gc_id
662
663 tscope <- getTickScope
664 emitOutOfLine gc_id
665 (do_gc, tscope) -- this is expected to jump back somewhere
666
667 -- Test for stack pointer exhaustion, then
668 -- bump heap pointer, and test for heap exhaustion
669 -- Note that we don't move the heap pointer unless the
670 -- stack check succeeds. Otherwise we might end up
671 -- with slop at the end of the current block, which can
672 -- confuse the LDV profiler.
673
674 -- Note [Self-recursive loop header]
675 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
676 --
677 -- Self-recursive loop header is required by loopification optimization (See
678 -- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if:
679 --
680 -- 1. There is information about self-loop in the FCode environment. We don't
681 -- check the binder (first component of the self_loop_info) because we are
682 -- certain that if the self-loop info is present then we are compiling the
683 -- binder body. Reason: the only possible way to get here with the
684 -- self_loop_info present is from closureCodeBody.
685 --
686 -- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
687 -- to preempt the heap check (see #367 for motivation behind this check). It
688 -- is True for heap checks placed at the entry to a function and
689 -- let-no-escape heap checks but false for other heap checks (eg. in case
690 -- alternatives or created from hand-written high-level Cmm). The second
691 -- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
692 -- function and some heap checks created in hand-written Cmm. Otherwise it
693 -- is Nothing. In other words the only situation when both conditions are
694 -- true is when compiling stack and heap checks at the entry to a
695 -- function. This is the only situation when we want to emit a self-loop
696 -- label.