Refactor inline array allocation
[ghc.git] / compiler / codeGen / StgCmmHeap.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: heap management functions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmHeap (
10 getVirtHp, setVirtHp, setRealHp,
11 getHpRelOffset, hpRel,
12
13 entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
14 heapStackCheckGen,
15 entryHeapCheck',
16
17 mkStaticClosureFields, mkStaticClosure,
18
19 allocDynClosure, allocDynClosureCmm, allocHeapClosure,
20 emitSetDynHdr
21 ) where
22
23 #include "HsVersions.h"
24
25 import StgSyn
26 import CLabel
27 import StgCmmLayout
28 import StgCmmUtils
29 import StgCmmMonad
30 import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
31 import StgCmmTicky
32 import StgCmmClosure
33 import StgCmmEnv
34
35 import MkGraph
36
37 import Hoopl
38 import SMRep
39 import Cmm
40 import CmmUtils
41 import CostCentre
42 import IdInfo( CafInfo(..), mayHaveCafRefs )
43 import Id ( Id )
44 import Module
45 import DynFlags
46 import FastString( mkFastString, fsLit )
47
48 import Control.Monad (when)
49 import Data.Maybe (isJust)
50
51 -----------------------------------------------------------
52 -- Initialise dynamic heap objects
53 -----------------------------------------------------------
54
55 allocDynClosure
56 :: Maybe Id
57 -> CmmInfoTable
58 -> LambdaFormInfo
59 -> CmmExpr -- Cost Centre to stick in the object
60 -> CmmExpr -- Cost Centre to blame for this alloc
61 -- (usually the same; sometimes "OVERHEAD")
62
63 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
64 -- ie Info ptr has offset zero.
65 -- No void args in here
66 -> FCode CmmExpr -- returns Hp+n
67
68 allocDynClosureCmm
69 :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
70 -> [(CmmExpr, ByteOff)]
71 -> FCode CmmExpr -- returns Hp+n
72
73 -- allocDynClosure allocates the thing in the heap,
74 -- and modifies the virtual Hp to account for this.
75 -- The second return value is the graph that sets the value of the
76 -- returned LocalReg, which should point to the closure after executing
77 -- the graph.
78
79 -- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
80 -- only valid until Hp is changed. The caller should assign the
81 -- result to a LocalReg if it is required to remain live.
82 --
83 -- The reason we don't assign it to a LocalReg here is that the caller
84 -- is often about to call regIdInfo, which immediately assigns the
85 -- result of allocDynClosure to a new temp in order to add the tag.
86 -- So by not generating a LocalReg here we avoid a common source of
87 -- new temporaries and save some compile time. This can be quite
88 -- significant - see test T4801.
89
90
91 allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
92 let (args, offsets) = unzip args_w_offsets
93 cmm_args <- mapM getArgAmode args -- No void args
94 allocDynClosureCmm mb_id info_tbl lf_info
95 use_cc _blame_cc (zip cmm_args offsets)
96
97
98 allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
99 -- SAY WHAT WE ARE ABOUT TO DO
100 let rep = cit_rep info_tbl
101 tickyDynAlloc mb_id rep lf_info
102 profDynAlloc rep use_cc
103 let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
104 allocHeapClosure rep info_ptr use_cc amodes_w_offsets
105
106
107 -- | Low-level heap object allocation.
108 allocHeapClosure
109 :: SMRep -- ^ representation of the object
110 -> CmmExpr -- ^ info pointer
111 -> CmmExpr -- ^ cost centre
112 -> [(CmmExpr,ByteOff)] -- ^ payload
113 -> FCode CmmExpr -- ^ returns the address of the object
114 allocHeapClosure rep info_ptr use_cc payload = do
115 virt_hp <- getVirtHp
116
117 -- Find the offset of the info-ptr word
118 let info_offset = virt_hp + 1
119 -- info_offset is the VirtualHpOffset of the first
120 -- word of the new object
121 -- Remember, virtHp points to last allocated word,
122 -- ie 1 *before* the info-ptr word of new object.
123
124 base <- getHpRelOffset info_offset
125 emitComment $ mkFastString "allocDynClosure"
126 emitSetDynHdr base info_ptr use_cc
127
128 -- Fill in the fields
129 hpStore base payload
130
131 -- Bump the virtual heap pointer
132 dflags <- getDynFlags
133 setVirtHp (virt_hp + heapClosureSizeW dflags rep)
134
135 return base
136
137
138 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
139 emitSetDynHdr base info_ptr ccs
140 = do dflags <- getDynFlags
141 hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
142 where
143 header :: DynFlags -> [CmmExpr]
144 header dflags = [info_ptr] ++ dynProfHdr dflags ccs
145 -- ToDof: Parallel stuff
146 -- No ticky header
147
148 -- Store the item (expr,off) in base[off]
149 hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
150 hpStore base vals = do
151 dflags <- getDynFlags
152 sequence_ $
153 [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
154
155 -----------------------------------------------------------
156 -- Layout of static closures
157 -----------------------------------------------------------
158
159 -- Make a static closure, adding on any extra padding needed for CAFs,
160 -- and adding a static link field if necessary.
161
162 mkStaticClosureFields
163 :: DynFlags
164 -> CmmInfoTable
165 -> CostCentreStack
166 -> CafInfo
167 -> [CmmLit] -- Payload
168 -> [CmmLit] -- The full closure
169 mkStaticClosureFields dflags info_tbl ccs caf_refs payload
170 = mkStaticClosure dflags info_lbl ccs payload padding
171 static_link_field saved_info_field
172 where
173 info_lbl = cit_lbl info_tbl
174
175 -- CAFs must have consistent layout, regardless of whether they
176 -- are actually updatable or not. The layout of a CAF is:
177 --
178 -- 3 saved_info
179 -- 2 static_link
180 -- 1 indirectee
181 -- 0 info ptr
182 --
183 -- the static_link and saved_info fields must always be in the
184 -- same place. So we use isThunkRep rather than closureUpdReqd
185 -- here:
186
187 is_caf = isThunkRep (cit_rep info_tbl)
188
189 padding
190 | is_caf && null payload = [mkIntCLit dflags 0]
191 | otherwise = []
192
193 static_link_field
194 | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
195 = [static_link_value]
196 | otherwise
197 = []
198
199 saved_info_field
200 | is_caf = [mkIntCLit dflags 0]
201 | otherwise = []
202
203 -- For a static constructor which has NoCafRefs, we set the
204 -- static link field to a non-zero value so the garbage
205 -- collector will ignore it.
206 static_link_value
207 | mayHaveCafRefs caf_refs = mkIntCLit dflags 0
208 | otherwise = mkIntCLit dflags 1 -- No CAF refs
209
210
211 mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
212 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
213 mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
214 = [CmmLabel info_lbl]
215 ++ staticProfHdr dflags ccs
216 ++ concatMap (padLitToWord dflags) payload
217 ++ padding
218 ++ static_link_field
219 ++ saved_info_field
220
221 -- JD: Simon had ellided this padding, but without it the C back end asserts
222 -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
223 padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
224 padLitToWord dflags lit = lit : padding pad_length
225 where width = typeWidth (cmmLitType dflags lit)
226 pad_length = wORD_SIZE dflags - widthInBytes width :: Int
227
228 padding n | n <= 0 = []
229 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
230 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
231 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
232 | otherwise = CmmInt 0 W64 : padding (n-8)
233
234 -----------------------------------------------------------
235 -- Heap overflow checking
236 -----------------------------------------------------------
237
238 {- Note [Heap checks]
239 ~~~~~~~~~~~~~~~~~~
240 Heap checks come in various forms. We provide the following entry
241 points to the runtime system, all of which use the native C-- entry
242 convention.
243
244 * gc() performs garbage collection and returns
245 nothing to its caller
246
247 * A series of canned entry points like
248 r = gc_1p( r )
249 where r is a pointer. This performs gc, and
250 then returns its argument r to its caller.
251
252 * A series of canned entry points like
253 gcfun_2p( f, x, y )
254 where f is a function closure of arity 2
255 This performs garbage collection, keeping alive the
256 three argument ptrs, and then tail-calls f(x,y)
257
258 These are used in the following circumstances
259
260 * entryHeapCheck: Function entry
261 (a) With a canned GC entry sequence
262 f( f_clo, x:ptr, y:ptr ) {
263 Hp = Hp+8
264 if Hp > HpLim goto L
265 ...
266 L: HpAlloc = 8
267 jump gcfun_2p( f_clo, x, y ) }
268 Note the tail call to the garbage collector;
269 it should do no register shuffling
270
271 (b) No canned sequence
272 f( f_clo, x:ptr, y:ptr, ...etc... ) {
273 T: Hp = Hp+8
274 if Hp > HpLim goto L
275 ...
276 L: HpAlloc = 8
277 call gc() -- Needs an info table
278 goto T }
279
280 * altHeapCheck: Immediately following an eval
281 Started as
282 case f x y of r { (p,q) -> rhs }
283 (a) With a canned sequence for the results of f
284 (which is the very common case since
285 all boxed cases return just one pointer
286 ...
287 r = f( x, y )
288 K: -- K needs an info table
289 Hp = Hp+8
290 if Hp > HpLim goto L
291 ...code for rhs...
292
293 L: r = gc_1p( r )
294 goto K }
295
296 Here, the info table needed by the call
297 to gc_1p should be the *same* as the
298 one for the call to f; the C-- optimiser
299 spots this sharing opportunity)
300
301 (b) No canned sequence for results of f
302 Note second info table
303 ...
304 (r1,r2,r3) = call f( x, y )
305 K:
306 Hp = Hp+8
307 if Hp > HpLim goto L
308 ...code for rhs...
309
310 L: call gc() -- Extra info table here
311 goto K
312
313 * generalHeapCheck: Anywhere else
314 e.g. entry to thunk
315 case branch *not* following eval,
316 or let-no-escape
317 Exactly the same as the previous case:
318
319 K: -- K needs an info table
320 Hp = Hp+8
321 if Hp > HpLim goto L
322 ...
323
324 L: call gc()
325 goto K
326 -}
327
328 --------------------------------------------------------------
329 -- A heap/stack check at a function or thunk entry point.
330
331 entryHeapCheck :: ClosureInfo
332 -> Maybe LocalReg -- Function (closure environment)
333 -> Int -- Arity -- not same as len args b/c of voids
334 -> [LocalReg] -- Non-void args (empty for thunk)
335 -> FCode ()
336 -> FCode ()
337
338 entryHeapCheck cl_info nodeSet arity args code
339 = entryHeapCheck' is_fastf node arity args code
340 where
341 node = case nodeSet of
342 Just r -> CmmReg (CmmLocal r)
343 Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
344
345 is_fastf = case closureFunInfo cl_info of
346 Just (_, ArgGen _) -> False
347 _otherwise -> True
348
349 -- | lower-level version for CmmParse
350 entryHeapCheck' :: Bool -- is a known function pattern
351 -> CmmExpr -- expression for the closure pointer
352 -> Int -- Arity -- not same as len args b/c of voids
353 -> [LocalReg] -- Non-void args (empty for thunk)
354 -> FCode ()
355 -> FCode ()
356 entryHeapCheck' is_fastf node arity args code
357 = do dflags <- getDynFlags
358 let is_thunk = arity == 0
359
360 args' = map (CmmReg . CmmLocal) args
361 stg_gc_fun = CmmReg (CmmGlobal GCFun)
362 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
363
364 {- Thunks: jump stg_gc_enter_1
365
366 Function (fast): call (NativeNode) stg_gc_fun(fun, args)
367
368 Function (slow): call (slow) stg_gc_fun(fun, args)
369 -}
370 gc_call upd
371 | is_thunk
372 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
373
374 | is_fastf
375 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
376
377 | otherwise
378 = mkJump dflags Slow stg_gc_fun (node : args') upd
379
380 updfr_sz <- getUpdFrameOff
381
382 loop_id <- newLabelC
383 emitLabel loop_id
384 heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
385
386 -- ------------------------------------------------------------
387 -- A heap/stack check in a case alternative
388
389
390 -- If there are multiple alts and we need to GC, but don't have a
391 -- continuation already (the scrut was simple), then we should
392 -- pre-generate the continuation. (if there are multiple alts it is
393 -- always a canned GC point).
394
395 -- altHeapCheck:
396 -- If we have a return continuation,
397 -- then if it is a canned GC pattern,
398 -- then we do mkJumpReturnsTo
399 -- else we do a normal call to stg_gc_noregs
400 -- else if it is a canned GC pattern,
401 -- then generate the continuation and do mkCallReturnsTo
402 -- else we do a normal call to stg_gc_noregs
403
404 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
405 altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
406
407 altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
408 altOrNoEscapeHeapCheck checkYield regs code = do
409 dflags <- getDynFlags
410 case cannedGCEntryPoint dflags regs of
411 Nothing -> genericGC checkYield code
412 Just gc -> do
413 lret <- newLabelC
414 let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
415 lcont <- newLabelC
416 emitOutOfLine lret (copyin <*> mkBranch lcont)
417 emitLabel lcont
418 cannedGCReturnsTo checkYield False gc regs lret off code
419
420 altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
421 altHeapCheckReturnsTo regs lret off code
422 = do dflags <- getDynFlags
423 case cannedGCEntryPoint dflags regs of
424 Nothing -> genericGC False code
425 Just gc -> cannedGCReturnsTo False True gc regs lret off code
426
427 -- noEscapeHeapCheck is implemented identically to altHeapCheck (which
428 -- is more efficient), but cannot be optimized away in the non-allocating
429 -- case because it may occur in a loop
430 noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
431 noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
432
433 cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
434 -> FCode a
435 -> FCode a
436 cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
437 = do dflags <- getDynFlags
438 updfr_sz <- getUpdFrameOff
439 heapCheck False checkYield (gc_call dflags gc updfr_sz) code
440 where
441 reg_exprs = map (CmmReg . CmmLocal) regs
442 -- Note [stg_gc arguments]
443
444 -- NB. we use the NativeReturn convention for passing arguments
445 -- to the canned heap-check routines, because we are in a case
446 -- alternative and hence the [LocalReg] was passed to us in the
447 -- NativeReturn convention.
448 gc_call dflags label sp
449 | cont_on_stack
450 = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
451 | otherwise
452 = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
453
454 genericGC :: Bool -> FCode a -> FCode a
455 genericGC checkYield code
456 = do updfr_sz <- getUpdFrameOff
457 lretry <- newLabelC
458 emitLabel lretry
459 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
460 heapCheck False checkYield (call <*> mkBranch lretry) code
461
462 cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
463 cannedGCEntryPoint dflags regs
464 = case map localRegType regs of
465 [] -> Just (mkGcLabel "stg_gc_noregs")
466 [ty]
467 | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
468 | isFloatType ty -> case width of
469 W32 -> Just (mkGcLabel "stg_gc_f1")
470 W64 -> Just (mkGcLabel "stg_gc_d1")
471 _ -> Nothing
472
473 | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
474 | width == W64 -> Just (mkGcLabel "stg_gc_l1")
475 | otherwise -> Nothing
476 where
477 width = typeWidth ty
478 [ty1,ty2]
479 | isGcPtrType ty1
480 && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
481 [ty1,ty2,ty3]
482 | isGcPtrType ty1
483 && isGcPtrType ty2
484 && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
485 [ty1,ty2,ty3,ty4]
486 | isGcPtrType ty1
487 && isGcPtrType ty2
488 && isGcPtrType ty3
489 && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
490 _otherwise -> Nothing
491
492 -- Note [stg_gc arguments]
493 -- It might seem that we could avoid passing the arguments to the
494 -- stg_gc function, because they are already in the right registers.
495 -- While this is usually the case, it isn't always. Sometimes the
496 -- code generator has cleverly avoided the eval in a case, e.g. in
497 -- ffi/should_run/4221.hs we found
498 --
499 -- case a_r1mb of z
500 -- FunPtr x y -> ...
501 --
502 -- where a_r1mb is bound a top-level constructor, and is known to be
503 -- evaluated. The codegen just assigns x, y and z, and continues;
504 -- R1 is never assigned.
505 --
506 -- So we'll have to rely on optimisations to eliminatethese
507 -- assignments where possible.
508
509
510 -- | The generic GC procedure; no params, no results
511 generic_gc :: CmmExpr
512 generic_gc = mkGcLabel "stg_gc_noregs"
513
514 -- | Create a CLabel for calling a garbage collector entry point
515 mkGcLabel :: String -> CmmExpr
516 mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s)))
517
518 -------------------------------
519 heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
520 heapCheck checkStack checkYield do_gc code
521 = getHeapUsage $ \ hpHw ->
522 -- Emit heap checks, but be sure to do it lazily so
523 -- that the conditionals on hpHw don't cause a black hole
524 do { dflags <- getDynFlags
525 ; let mb_alloc_bytes
526 | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
527 | otherwise = Nothing
528 stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
529 | otherwise = Nothing
530 ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
531 ; tickyAllocHeap True hpHw
532 ; setRealHp hpHw
533 ; code }
534
535 heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
536 heapStackCheckGen stk_hwm mb_bytes
537 = do updfr_sz <- getUpdFrameOff
538 lretry <- newLabelC
539 emitLabel lretry
540 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
541 do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
542
543 -- Note [Single stack check]
544 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
545 -- When compiling a function we can determine how much stack space it
546 -- will use. We therefore need to perform only a single stack check at
547 -- the beginning of a function to see if we have enough stack space.
548 --
549 -- The check boils down to comparing Sp-N with SpLim, where N is the
550 -- amount of stack space needed (see Note [Stack usage] below). *BUT*
551 -- at this stage of the pipeline we are not supposed to refer to Sp
552 -- itself, because the stack is not yet manifest, so we don't quite
553 -- know where Sp pointing.
554
555 -- So instead of referring directly to Sp - as we used to do in the
556 -- past - the code generator uses (old + 0) in the stack check. That
557 -- is the address of the first word of the old area, so if we add N
558 -- we'll get the address of highest used word.
559 --
560 -- This makes the check robust. For example, while we need to perform
561 -- only one stack check for each function, we could in theory place
562 -- more stack checks later in the function. They would be redundant,
563 -- but not incorrect (in a sense that they should not change program
564 -- behaviour). We need to make sure however that a stack check
565 -- inserted after incrementing the stack pointer checks for a
566 -- respectively smaller stack space. This would not be the case if the
567 -- code generator produced direct references to Sp. By referencing
568 -- (old + 0) we make sure that we always check for a correct amount of
569 -- stack: when converting (old + 0) to Sp the stack layout phase takes
570 -- into account changes already made to stack pointer. The idea for
571 -- this change came from observations made while debugging #8275.
572
573 -- Note [Stack usage]
574 -- ~~~~~~~~~~~~~~~~~~
575 -- At the moment we convert from STG to Cmm we don't know N, the
576 -- number of bytes of stack that the function will use, so we use a
577 -- special late-bound CmmLit, namely
578 -- CmmHighStackMark
579 -- to stand for the number of bytes needed. When the stack is made
580 -- manifest, the number of bytes needed is calculated, and used to
581 -- replace occurrences of CmmHighStackMark
582 --
583 -- The (Maybe CmmExpr) passed to do_checks is usually
584 -- Just (CmmLit CmmHighStackMark)
585 -- but can also (in certain hand-written RTS functions)
586 -- Just (CmmLit 8) or some other fixed valuet
587 -- If it is Nothing, we don't generate a stack check at all.
588
589 do_checks :: Maybe CmmExpr -- Should we check the stack?
590 -- See Note [Stack usage]
591 -> Bool -- Should we check for preemption?
592 -> Maybe CmmExpr -- Heap headroom (bytes)
593 -> CmmAGraph -- What to do on failure
594 -> FCode ()
595 do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
596 dflags <- getDynFlags
597 gc_id <- newLabelC
598
599 let
600 Just alloc_lit = mb_alloc_lit
601
602 bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
603
604 -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
605 -- At the beginning of a function old + 0 = Sp
606 -- See Note [Single stack check]
607 sp_oflo sp_hwm =
608 CmmMachOp (mo_wordULt dflags)
609 [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
610 [CmmStackSlot Old 0, sp_hwm],
611 CmmReg spLimReg]
612
613 -- Hp overflow if (Hp > HpLim)
614 -- (Hp has been incremented by now)
615 -- HpLim points to the LAST WORD of valid allocation space.
616 hp_oflo = CmmMachOp (mo_wordUGt dflags)
617 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
618
619 alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
620
621 case mb_stk_hwm of
622 Nothing -> return ()
623 Just stk_hwm -> tickyStackCheck >> (emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id)
624
625 -- Emit new label that might potentially be a header
626 -- of a self-recursive tail call.
627 -- See Note [Self-recursive loop header].
628 self_loop_info <- getSelfLoop
629 case self_loop_info of
630 Just (_, loop_header_id, _)
631 | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
632 _otherwise -> return ()
633
634 if (isJust mb_alloc_lit)
635 then do
636 tickyHeapCheck
637 emitAssign hpReg bump_hp
638 emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
639 else do
640 when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
641 -- Yielding if HpLim == 0
642 let yielding = CmmMachOp (mo_wordEq dflags)
643 [CmmReg (CmmGlobal HpLim),
644 CmmLit (zeroCLit dflags)]
645 emit =<< mkCmmIfGoto yielding gc_id
646
647 emitOutOfLine gc_id $
648 do_gc -- this is expected to jump back somewhere
649
650 -- Test for stack pointer exhaustion, then
651 -- bump heap pointer, and test for heap exhaustion
652 -- Note that we don't move the heap pointer unless the
653 -- stack check succeeds. Otherwise we might end up
654 -- with slop at the end of the current block, which can
655 -- confuse the LDV profiler.
656
657 -- Note [Self-recursive loop header]
658 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
659 --
660 -- Self-recursive loop header is required by loopification optimization (See
661 -- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if:
662 --
663 -- 1. There is information about self-loop in the FCode environment. We don't
664 -- check the binder (first component of the self_loop_info) because we are
665 -- certain that if the self-loop info is present then we are compiling the
666 -- binder body. Reason: the only possible way to get here with the
667 -- self_loop_info present is from closureCodeBody.
668 --
669 -- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
670 -- to preempt the heap check (see #367 for motivation behind this check). It
671 -- is True for heap checks placed at the entry to a function and
672 -- let-no-escape heap checks but false for other heap checks (eg. in case
673 -- alternatives or created from hand-written high-level Cmm). The second
674 -- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
675 -- function and some heap checks created in hand-written Cmm. Otherwise it
676 -- is Nothing. In other words the only situation when both conditions are
677 -- true is when compiling stack and heap checks at the entry to a
678 -- function. This is the only situation when we want to emit a self-loop
679 -- label.