rts: Rename the nondescript initProfiling2 to refreshProfilingCCSs
[ghc.git] / compiler / codeGen / StgCmmHeap.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: heap management functions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmHeap (
10 getVirtHp, setVirtHp, setRealHp,
11 getHpRelOffset,
12
13 entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
14 heapStackCheckGen,
15 entryHeapCheck',
16
17 mkStaticClosureFields, mkStaticClosure,
18
19 allocDynClosure, allocDynClosureCmm, allocHeapClosure,
20 emitSetDynHdr
21 ) where
22
23 import GhcPrelude hiding ((<*>))
24
25 import StgSyn
26 import CLabel
27 import StgCmmLayout
28 import StgCmmUtils
29 import StgCmmMonad
30 import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
31 import StgCmmTicky
32 import StgCmmClosure
33 import StgCmmEnv
34
35 import MkGraph
36
37 import Hoopl.Label
38 import SMRep
39 import BlockId
40 import Cmm
41 import CmmUtils
42 import CostCentre
43 import IdInfo( CafInfo(..), mayHaveCafRefs )
44 import Id ( Id )
45 import Module
46 import DynFlags
47 import FastString( mkFastString, fsLit )
48 import Panic( sorry )
49
50 import Control.Monad (when)
51 import Data.Maybe (isJust)
52
53 -----------------------------------------------------------
54 -- Initialise dynamic heap objects
55 -----------------------------------------------------------
56
57 allocDynClosure
58 :: Maybe Id
59 -> CmmInfoTable
60 -> LambdaFormInfo
61 -> CmmExpr -- Cost Centre to stick in the object
62 -> CmmExpr -- Cost Centre to blame for this alloc
63 -- (usually the same; sometimes "OVERHEAD")
64
65 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
66 -- ie Info ptr has offset zero.
67 -- No void args in here
68 -> FCode CmmExpr -- returns Hp+n
69
70 allocDynClosureCmm
71 :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
72 -> [(CmmExpr, ByteOff)]
73 -> FCode CmmExpr -- returns Hp+n
74
75 -- allocDynClosure allocates the thing in the heap,
76 -- and modifies the virtual Hp to account for this.
77 -- The second return value is the graph that sets the value of the
78 -- returned LocalReg, which should point to the closure after executing
79 -- the graph.
80
81 -- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
82 -- only valid until Hp is changed. The caller should assign the
83 -- result to a LocalReg if it is required to remain live.
84 --
85 -- The reason we don't assign it to a LocalReg here is that the caller
86 -- is often about to call regIdInfo, which immediately assigns the
87 -- result of allocDynClosure to a new temp in order to add the tag.
88 -- So by not generating a LocalReg here we avoid a common source of
89 -- new temporaries and save some compile time. This can be quite
90 -- significant - see test T4801.
91
92
93 allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
94 let (args, offsets) = unzip args_w_offsets
95 cmm_args <- mapM getArgAmode args -- No void args
96 allocDynClosureCmm mb_id info_tbl lf_info
97 use_cc _blame_cc (zip cmm_args offsets)
98
99
100 allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
101 -- SAY WHAT WE ARE ABOUT TO DO
102 let rep = cit_rep info_tbl
103 tickyDynAlloc mb_id rep lf_info
104 let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
105 allocHeapClosure rep info_ptr use_cc amodes_w_offsets
106
107
108 -- | Low-level heap object allocation.
109 allocHeapClosure
110 :: SMRep -- ^ representation of the object
111 -> CmmExpr -- ^ info pointer
112 -> CmmExpr -- ^ cost centre
113 -> [(CmmExpr,ByteOff)] -- ^ payload
114 -> FCode CmmExpr -- ^ returns the address of the object
115 allocHeapClosure rep info_ptr use_cc payload = do
116 profDynAlloc rep use_cc
117
118 virt_hp <- getVirtHp
119
120 -- Find the offset of the info-ptr word
121 let info_offset = virt_hp + 1
122 -- info_offset is the VirtualHpOffset of the first
123 -- word of the new object
124 -- Remember, virtHp points to last allocated word,
125 -- ie 1 *before* the info-ptr word of new object.
126
127 base <- getHpRelOffset info_offset
128 emitComment $ mkFastString "allocHeapClosure"
129 emitSetDynHdr base info_ptr use_cc
130
131 -- Fill in the fields
132 hpStore base payload
133
134 -- Bump the virtual heap pointer
135 dflags <- getDynFlags
136 setVirtHp (virt_hp + heapClosureSizeW dflags rep)
137
138 return base
139
140
141 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
142 emitSetDynHdr base info_ptr ccs
143 = do dflags <- getDynFlags
144 hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
145 where
146 header :: DynFlags -> [CmmExpr]
147 header dflags = [info_ptr] ++ dynProfHdr dflags ccs
148 -- ToDo: Parallel stuff
149 -- No ticky header
150
151 -- Store the item (expr,off) in base[off]
152 hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
153 hpStore base vals = do
154 dflags <- getDynFlags
155 sequence_ $
156 [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
157
158 -----------------------------------------------------------
159 -- Layout of static closures
160 -----------------------------------------------------------
161
162 -- Make a static closure, adding on any extra padding needed for CAFs,
163 -- and adding a static link field if necessary.
164
165 mkStaticClosureFields
166 :: DynFlags
167 -> CmmInfoTable
168 -> CostCentreStack
169 -> CafInfo
170 -> [CmmLit] -- Payload
171 -> [CmmLit] -- The full closure
172 mkStaticClosureFields dflags info_tbl ccs caf_refs payload
173 = mkStaticClosure dflags info_lbl ccs payload padding
174 static_link_field saved_info_field
175 where
176 info_lbl = cit_lbl info_tbl
177
178 -- CAFs must have consistent layout, regardless of whether they
179 -- are actually updatable or not. The layout of a CAF is:
180 --
181 -- 3 saved_info
182 -- 2 static_link
183 -- 1 indirectee
184 -- 0 info ptr
185 --
186 -- the static_link and saved_info fields must always be in the
187 -- same place. So we use isThunkRep rather than closureUpdReqd
188 -- here:
189
190 is_caf = isThunkRep (cit_rep info_tbl)
191
192 padding
193 | is_caf && null payload = [mkIntCLit dflags 0]
194 | otherwise = []
195
196 static_link_field
197 | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
198 = [static_link_value]
199 | otherwise
200 = []
201
202 saved_info_field
203 | is_caf = [mkIntCLit dflags 0]
204 | otherwise = []
205
206 -- For a static constructor which has NoCafRefs, we set the
207 -- static link field to a non-zero value so the garbage
208 -- collector will ignore it.
209 static_link_value
210 | mayHaveCafRefs caf_refs = mkIntCLit dflags 0
211 | otherwise = mkIntCLit dflags 3 -- No CAF refs
212 -- See Note [STATIC_LINK fields]
213 -- in rts/sm/Storage.h
214
215 mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
216 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
217 mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
218 = [CmmLabel info_lbl]
219 ++ staticProfHdr dflags ccs
220 ++ payload
221 ++ padding
222 ++ static_link_field
223 ++ saved_info_field
224
225 -----------------------------------------------------------
226 -- Heap overflow checking
227 -----------------------------------------------------------
228
229 {- Note [Heap checks]
230 ~~~~~~~~~~~~~~~~~~
231 Heap checks come in various forms. We provide the following entry
232 points to the runtime system, all of which use the native C-- entry
233 convention.
234
235 * gc() performs garbage collection and returns
236 nothing to its caller
237
238 * A series of canned entry points like
239 r = gc_1p( r )
240 where r is a pointer. This performs gc, and
241 then returns its argument r to its caller.
242
243 * A series of canned entry points like
244 gcfun_2p( f, x, y )
245 where f is a function closure of arity 2
246 This performs garbage collection, keeping alive the
247 three argument ptrs, and then tail-calls f(x,y)
248
249 These are used in the following circumstances
250
251 * entryHeapCheck: Function entry
252 (a) With a canned GC entry sequence
253 f( f_clo, x:ptr, y:ptr ) {
254 Hp = Hp+8
255 if Hp > HpLim goto L
256 ...
257 L: HpAlloc = 8
258 jump gcfun_2p( f_clo, x, y ) }
259 Note the tail call to the garbage collector;
260 it should do no register shuffling
261
262 (b) No canned sequence
263 f( f_clo, x:ptr, y:ptr, ...etc... ) {
264 T: Hp = Hp+8
265 if Hp > HpLim goto L
266 ...
267 L: HpAlloc = 8
268 call gc() -- Needs an info table
269 goto T }
270
271 * altHeapCheck: Immediately following an eval
272 Started as
273 case f x y of r { (p,q) -> rhs }
274 (a) With a canned sequence for the results of f
275 (which is the very common case since
276 all boxed cases return just one pointer
277 ...
278 r = f( x, y )
279 K: -- K needs an info table
280 Hp = Hp+8
281 if Hp > HpLim goto L
282 ...code for rhs...
283
284 L: r = gc_1p( r )
285 goto K }
286
287 Here, the info table needed by the call
288 to gc_1p should be the *same* as the
289 one for the call to f; the C-- optimiser
290 spots this sharing opportunity)
291
292 (b) No canned sequence for results of f
293 Note second info table
294 ...
295 (r1,r2,r3) = call f( x, y )
296 K:
297 Hp = Hp+8
298 if Hp > HpLim goto L
299 ...code for rhs...
300
301 L: call gc() -- Extra info table here
302 goto K
303
304 * generalHeapCheck: Anywhere else
305 e.g. entry to thunk
306 case branch *not* following eval,
307 or let-no-escape
308 Exactly the same as the previous case:
309
310 K: -- K needs an info table
311 Hp = Hp+8
312 if Hp > HpLim goto L
313 ...
314
315 L: call gc()
316 goto K
317 -}
318
319 --------------------------------------------------------------
320 -- A heap/stack check at a function or thunk entry point.
321
322 entryHeapCheck :: ClosureInfo
323 -> Maybe LocalReg -- Function (closure environment)
324 -> Int -- Arity -- not same as len args b/c of voids
325 -> [LocalReg] -- Non-void args (empty for thunk)
326 -> FCode ()
327 -> FCode ()
328
329 entryHeapCheck cl_info nodeSet arity args code
330 = entryHeapCheck' is_fastf node arity args code
331 where
332 node = case nodeSet of
333 Just r -> CmmReg (CmmLocal r)
334 Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
335
336 is_fastf = case closureFunInfo cl_info of
337 Just (_, ArgGen _) -> False
338 _otherwise -> True
339
340 -- | lower-level version for CmmParse
341 entryHeapCheck' :: Bool -- is a known function pattern
342 -> CmmExpr -- expression for the closure pointer
343 -> Int -- Arity -- not same as len args b/c of voids
344 -> [LocalReg] -- Non-void args (empty for thunk)
345 -> FCode ()
346 -> FCode ()
347 entryHeapCheck' is_fastf node arity args code
348 = do dflags <- getDynFlags
349 let is_thunk = arity == 0
350
351 args' = map (CmmReg . CmmLocal) args
352 stg_gc_fun = CmmReg (CmmGlobal GCFun)
353 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
354
355 {- Thunks: jump stg_gc_enter_1
356
357 Function (fast): call (NativeNode) stg_gc_fun(fun, args)
358
359 Function (slow): call (slow) stg_gc_fun(fun, args)
360 -}
361 gc_call upd
362 | is_thunk
363 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
364
365 | is_fastf
366 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
367
368 | otherwise
369 = mkJump dflags Slow stg_gc_fun (node : args') upd
370
371 updfr_sz <- getUpdFrameOff
372
373 loop_id <- newBlockId
374 emitLabel loop_id
375 heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
376
377 -- ------------------------------------------------------------
378 -- A heap/stack check in a case alternative
379
380
381 -- If there are multiple alts and we need to GC, but don't have a
382 -- continuation already (the scrut was simple), then we should
383 -- pre-generate the continuation. (if there are multiple alts it is
384 -- always a canned GC point).
385
386 -- altHeapCheck:
387 -- If we have a return continuation,
388 -- then if it is a canned GC pattern,
389 -- then we do mkJumpReturnsTo
390 -- else we do a normal call to stg_gc_noregs
391 -- else if it is a canned GC pattern,
392 -- then generate the continuation and do mkCallReturnsTo
393 -- else we do a normal call to stg_gc_noregs
394
395 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
396 altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
397
398 altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
399 altOrNoEscapeHeapCheck checkYield regs code = do
400 dflags <- getDynFlags
401 case cannedGCEntryPoint dflags regs of
402 Nothing -> genericGC checkYield code
403 Just gc -> do
404 lret <- newBlockId
405 let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
406 lcont <- newBlockId
407 tscope <- getTickScope
408 emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
409 emitLabel lcont
410 cannedGCReturnsTo checkYield False gc regs lret off code
411
412 altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
413 altHeapCheckReturnsTo regs lret off code
414 = do dflags <- getDynFlags
415 case cannedGCEntryPoint dflags regs of
416 Nothing -> genericGC False code
417 Just gc -> cannedGCReturnsTo False True gc regs lret off code
418
419 -- noEscapeHeapCheck is implemented identically to altHeapCheck (which
420 -- is more efficient), but cannot be optimized away in the non-allocating
421 -- case because it may occur in a loop
422 noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
423 noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
424
425 cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
426 -> FCode a
427 -> FCode a
428 cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
429 = do dflags <- getDynFlags
430 updfr_sz <- getUpdFrameOff
431 heapCheck False checkYield (gc_call dflags gc updfr_sz) code
432 where
433 reg_exprs = map (CmmReg . CmmLocal) regs
434 -- Note [stg_gc arguments]
435
436 -- NB. we use the NativeReturn convention for passing arguments
437 -- to the canned heap-check routines, because we are in a case
438 -- alternative and hence the [LocalReg] was passed to us in the
439 -- NativeReturn convention.
440 gc_call dflags label sp
441 | cont_on_stack
442 = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
443 | otherwise
444 = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
445
446 genericGC :: Bool -> FCode a -> FCode a
447 genericGC checkYield code
448 = do updfr_sz <- getUpdFrameOff
449 lretry <- newBlockId
450 emitLabel lretry
451 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
452 heapCheck False checkYield (call <*> mkBranch lretry) code
453
454 cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
455 cannedGCEntryPoint dflags regs
456 = case map localRegType regs of
457 [] -> Just (mkGcLabel "stg_gc_noregs")
458 [ty]
459 | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
460 | isFloatType ty -> case width of
461 W32 -> Just (mkGcLabel "stg_gc_f1")
462 W64 -> Just (mkGcLabel "stg_gc_d1")
463 _ -> Nothing
464
465 | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
466 | width == W64 -> Just (mkGcLabel "stg_gc_l1")
467 | otherwise -> Nothing
468 where
469 width = typeWidth ty
470 [ty1,ty2]
471 | isGcPtrType ty1
472 && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
473 [ty1,ty2,ty3]
474 | isGcPtrType ty1
475 && isGcPtrType ty2
476 && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
477 [ty1,ty2,ty3,ty4]
478 | isGcPtrType ty1
479 && isGcPtrType ty2
480 && isGcPtrType ty3
481 && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
482 _otherwise -> Nothing
483
484 -- Note [stg_gc arguments]
485 -- It might seem that we could avoid passing the arguments to the
486 -- stg_gc function, because they are already in the right registers.
487 -- While this is usually the case, it isn't always. Sometimes the
488 -- code generator has cleverly avoided the eval in a case, e.g. in
489 -- ffi/should_run/4221.hs we found
490 --
491 -- case a_r1mb of z
492 -- FunPtr x y -> ...
493 --
494 -- where a_r1mb is bound a top-level constructor, and is known to be
495 -- evaluated. The codegen just assigns x, y and z, and continues;
496 -- R1 is never assigned.
497 --
498 -- So we'll have to rely on optimisations to eliminatethese
499 -- assignments where possible.
500
501
502 -- | The generic GC procedure; no params, no results
503 generic_gc :: CmmExpr
504 generic_gc = mkGcLabel "stg_gc_noregs"
505
506 -- | Create a CLabel for calling a garbage collector entry point
507 mkGcLabel :: String -> CmmExpr
508 mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
509
510 -------------------------------
511 heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
512 heapCheck checkStack checkYield do_gc code
513 = getHeapUsage $ \ hpHw ->
514 -- Emit heap checks, but be sure to do it lazily so
515 -- that the conditionals on hpHw don't cause a black hole
516 do { dflags <- getDynFlags
517 ; let mb_alloc_bytes
518 | hpHw > mBLOCK_SIZE = sorry $ unlines
519 [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
520 "",
521 "This is currently not possible due to a limitation of GHC's code generator.",
522 "See https://gitlab.haskell.org/ghc/ghc/issues/4505 for details.",
523 "Suggestion: read data from a file instead of having large static data",
524 "structures in code."]
525 | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
526 | otherwise = Nothing
527 where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags
528 stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
529 | otherwise = Nothing
530 ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
531 ; tickyAllocHeap True hpHw
532 ; setRealHp hpHw
533 ; code }
534
535 heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
536 heapStackCheckGen stk_hwm mb_bytes
537 = do updfr_sz <- getUpdFrameOff
538 lretry <- newBlockId
539 emitLabel lretry
540 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
541 do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
542
543 -- Note [Single stack check]
544 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
545 -- When compiling a function we can determine how much stack space it
546 -- will use. We therefore need to perform only a single stack check at
547 -- the beginning of a function to see if we have enough stack space.
548 --
549 -- The check boils down to comparing Sp-N with SpLim, where N is the
550 -- amount of stack space needed (see Note [Stack usage] below). *BUT*
551 -- at this stage of the pipeline we are not supposed to refer to Sp
552 -- itself, because the stack is not yet manifest, so we don't quite
553 -- know where Sp pointing.
554
555 -- So instead of referring directly to Sp - as we used to do in the
556 -- past - the code generator uses (old + 0) in the stack check. That
557 -- is the address of the first word of the old area, so if we add N
558 -- we'll get the address of highest used word.
559 --
560 -- This makes the check robust. For example, while we need to perform
561 -- only one stack check for each function, we could in theory place
562 -- more stack checks later in the function. They would be redundant,
563 -- but not incorrect (in a sense that they should not change program
564 -- behaviour). We need to make sure however that a stack check
565 -- inserted after incrementing the stack pointer checks for a
566 -- respectively smaller stack space. This would not be the case if the
567 -- code generator produced direct references to Sp. By referencing
568 -- (old + 0) we make sure that we always check for a correct amount of
569 -- stack: when converting (old + 0) to Sp the stack layout phase takes
570 -- into account changes already made to stack pointer. The idea for
571 -- this change came from observations made while debugging #8275.
572
573 -- Note [Stack usage]
574 -- ~~~~~~~~~~~~~~~~~~
575 -- At the moment we convert from STG to Cmm we don't know N, the
576 -- number of bytes of stack that the function will use, so we use a
577 -- special late-bound CmmLit, namely
578 -- CmmHighStackMark
579 -- to stand for the number of bytes needed. When the stack is made
580 -- manifest, the number of bytes needed is calculated, and used to
581 -- replace occurrences of CmmHighStackMark
582 --
583 -- The (Maybe CmmExpr) passed to do_checks is usually
584 -- Just (CmmLit CmmHighStackMark)
585 -- but can also (in certain hand-written RTS functions)
586 -- Just (CmmLit 8) or some other fixed valuet
587 -- If it is Nothing, we don't generate a stack check at all.
588
589 do_checks :: Maybe CmmExpr -- Should we check the stack?
590 -- See Note [Stack usage]
591 -> Bool -- Should we check for preemption?
592 -> Maybe CmmExpr -- Heap headroom (bytes)
593 -> CmmAGraph -- What to do on failure
594 -> FCode ()
595 do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
596 dflags <- getDynFlags
597 gc_id <- newBlockId
598
599 let
600 Just alloc_lit = mb_alloc_lit
601
602 bump_hp = cmmOffsetExprB dflags hpExpr alloc_lit
603
604 -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
605 -- At the beginning of a function old + 0 = Sp
606 -- See Note [Single stack check]
607 sp_oflo sp_hwm =
608 CmmMachOp (mo_wordULt dflags)
609 [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
610 [CmmStackSlot Old 0, sp_hwm],
611 CmmReg spLimReg]
612
613 -- Hp overflow if (Hp > HpLim)
614 -- (Hp has been incremented by now)
615 -- HpLim points to the LAST WORD of valid allocation space.
616 hp_oflo = CmmMachOp (mo_wordUGt dflags) [hpExpr, hpLimExpr]
617
618 alloc_n = mkAssign hpAllocReg alloc_lit
619
620 case mb_stk_hwm of
621 Nothing -> return ()
622 Just stk_hwm -> tickyStackCheck
623 >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
624
625 -- Emit new label that might potentially be a header
626 -- of a self-recursive tail call.
627 -- See Note [Self-recursive loop header].
628 self_loop_info <- getSelfLoop
629 case self_loop_info of
630 Just (_, loop_header_id, _)
631 | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
632 _otherwise -> return ()
633
634 if (isJust mb_alloc_lit)
635 then do
636 tickyHeapCheck
637 emitAssign hpReg bump_hp
638 emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False)
639 else do
640 when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
641 -- Yielding if HpLim == 0
642 let yielding = CmmMachOp (mo_wordEq dflags)
643 [CmmReg hpLimReg,
644 CmmLit (zeroCLit dflags)]
645 emit =<< mkCmmIfGoto' yielding gc_id (Just False)
646
647 tscope <- getTickScope
648 emitOutOfLine gc_id
649 (do_gc, tscope) -- this is expected to jump back somewhere
650
651 -- Test for stack pointer exhaustion, then
652 -- bump heap pointer, and test for heap exhaustion
653 -- Note that we don't move the heap pointer unless the
654 -- stack check succeeds. Otherwise we might end up
655 -- with slop at the end of the current block, which can
656 -- confuse the LDV profiler.
657
658 -- Note [Self-recursive loop header]
659 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
660 --
661 -- Self-recursive loop header is required by loopification optimization (See
662 -- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if:
663 --
664 -- 1. There is information about self-loop in the FCode environment. We don't
665 -- check the binder (first component of the self_loop_info) because we are
666 -- certain that if the self-loop info is present then we are compiling the
667 -- binder body. Reason: the only possible way to get here with the
668 -- self_loop_info present is from closureCodeBody.
669 --
670 -- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
671 -- to preempt the heap check (see #367 for motivation behind this check). It
672 -- is True for heap checks placed at the entry to a function and
673 -- let-no-escape heap checks but false for other heap checks (eg. in case
674 -- alternatives or created from hand-written high-level Cmm). The second
675 -- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
676 -- function and some heap checks created in hand-written Cmm. Otherwise it
677 -- is Nothing. In other words the only situation when both conditions are
678 -- true is when compiling stack and heap checks at the entry to a
679 -- function. This is the only situation when we want to emit a self-loop
680 -- label.