75ad8b40f4b1a6d538e2250c3912726bc3d740c0
[ghc.git] / compiler / codeGen / StgCmmHeap.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C--: heap management functions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmHeap (
10 getVirtHp, setVirtHp, setRealHp,
11 getHpRelOffset, hpRel,
12
13 entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
14 heapStackCheckGen,
15 entryHeapCheck',
16
17 mkStaticClosureFields, mkStaticClosure,
18
19 allocDynClosure, allocDynClosureCmm,
20 emitSetDynHdr
21 ) where
22
23 #include "HsVersions.h"
24
25 import StgSyn
26 import CLabel
27 import StgCmmLayout
28 import StgCmmUtils
29 import StgCmmMonad
30 import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
31 import StgCmmTicky
32 import StgCmmClosure
33 import StgCmmEnv
34
35 import MkGraph
36
37 import Hoopl
38 import SMRep
39 import Cmm
40 import CmmUtils
41 import CostCentre
42 import IdInfo( CafInfo(..), mayHaveCafRefs )
43 import Id ( Id )
44 import Module
45 import DynFlags
46 import FastString( mkFastString, fsLit )
47
48 import Control.Monad (when)
49 import Data.Maybe (isJust)
50
51 -----------------------------------------------------------
52 -- Initialise dynamic heap objects
53 -----------------------------------------------------------
54
55 allocDynClosure
56 :: Maybe Id
57 -> CmmInfoTable
58 -> LambdaFormInfo
59 -> CmmExpr -- Cost Centre to stick in the object
60 -> CmmExpr -- Cost Centre to blame for this alloc
61 -- (usually the same; sometimes "OVERHEAD")
62
63 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
64 -- ie Info ptr has offset zero.
65 -- No void args in here
66 -> FCode CmmExpr -- returns Hp+n
67
68 allocDynClosureCmm
69 :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
70 -> [(CmmExpr, ByteOff)]
71 -> FCode CmmExpr -- returns Hp+n
72
73 -- allocDynClosure allocates the thing in the heap,
74 -- and modifies the virtual Hp to account for this.
75 -- The second return value is the graph that sets the value of the
76 -- returned LocalReg, which should point to the closure after executing
77 -- the graph.
78
79 -- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
80 -- only valid until Hp is changed. The caller should assign the
81 -- result to a LocalReg if it is required to remain live.
82 --
83 -- The reason we don't assign it to a LocalReg here is that the caller
84 -- is often about to call regIdInfo, which immediately assigns the
85 -- result of allocDynClosure to a new temp in order to add the tag.
86 -- So by not generating a LocalReg here we avoid a common source of
87 -- new temporaries and save some compile time. This can be quite
88 -- significant - see test T4801.
89
90
91 allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets
92 = do { let (args, offsets) = unzip args_w_offsets
93 ; cmm_args <- mapM getArgAmode args -- No void args
94 ; allocDynClosureCmm mb_id info_tbl lf_info
95 use_cc _blame_cc (zip cmm_args offsets)
96 }
97
98 allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets
99 = do { virt_hp <- getVirtHp
100
101 -- SAY WHAT WE ARE ABOUT TO DO
102 ; let rep = cit_rep info_tbl
103 ; tickyDynAlloc mb_id rep lf_info
104 ; profDynAlloc rep use_cc
105
106 -- FIND THE OFFSET OF THE INFO-PTR WORD
107 ; let info_offset = virt_hp + 1
108 -- info_offset is the VirtualHpOffset of the first
109 -- word of the new object
110 -- Remember, virtHp points to last allocated word,
111 -- ie 1 *before* the info-ptr word of new object.
112
113 info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
114
115 -- ALLOCATE THE OBJECT
116 ; base <- getHpRelOffset info_offset
117 ; emitComment $ mkFastString "allocDynClosure"
118 ; emitSetDynHdr base info_ptr use_cc
119 ; let (cmm_args, offsets) = unzip amodes_w_offsets
120 ; hpStore base cmm_args offsets
121
122 -- BUMP THE VIRTUAL HEAP POINTER
123 ; dflags <- getDynFlags
124 ; setVirtHp (virt_hp + heapClosureSize dflags rep)
125
126 ; getHpRelOffset info_offset
127 }
128
129 emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
130 emitSetDynHdr base info_ptr ccs
131 = do dflags <- getDynFlags
132 hpStore base (header dflags) [0, wORD_SIZE dflags ..]
133 where
134 header :: DynFlags -> [CmmExpr]
135 header dflags = [info_ptr] ++ dynProfHdr dflags ccs
136 -- ToDof: Parallel stuff
137 -- No ticky header
138
139 hpStore :: CmmExpr -> [CmmExpr] -> [ByteOff] -> FCode ()
140 -- Store the item (expr,off) in base[off]
141 hpStore base vals offs
142 = do dflags <- getDynFlags
143 let mk_store val off = mkStore (cmmOffsetB dflags base off) val
144 emit (catAGraphs (zipWith mk_store vals offs))
145
146
147 -----------------------------------------------------------
148 -- Layout of static closures
149 -----------------------------------------------------------
150
151 -- Make a static closure, adding on any extra padding needed for CAFs,
152 -- and adding a static link field if necessary.
153
154 mkStaticClosureFields
155 :: DynFlags
156 -> CmmInfoTable
157 -> CostCentreStack
158 -> CafInfo
159 -> [CmmLit] -- Payload
160 -> [CmmLit] -- The full closure
161 mkStaticClosureFields dflags info_tbl ccs caf_refs payload
162 = mkStaticClosure dflags info_lbl ccs payload padding
163 static_link_field saved_info_field
164 where
165 info_lbl = cit_lbl info_tbl
166
167 -- CAFs must have consistent layout, regardless of whether they
168 -- are actually updatable or not. The layout of a CAF is:
169 --
170 -- 3 saved_info
171 -- 2 static_link
172 -- 1 indirectee
173 -- 0 info ptr
174 --
175 -- the static_link and saved_info fields must always be in the
176 -- same place. So we use isThunkRep rather than closureUpdReqd
177 -- here:
178
179 is_caf = isThunkRep (cit_rep info_tbl)
180
181 padding
182 | is_caf && null payload = [mkIntCLit dflags 0]
183 | otherwise = []
184
185 static_link_field
186 | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
187 = [static_link_value]
188 | otherwise
189 = []
190
191 saved_info_field
192 | is_caf = [mkIntCLit dflags 0]
193 | otherwise = []
194
195 -- For a static constructor which has NoCafRefs, we set the
196 -- static link field to a non-zero value so the garbage
197 -- collector will ignore it.
198 static_link_value
199 | mayHaveCafRefs caf_refs = mkIntCLit dflags 0
200 | otherwise = mkIntCLit dflags 1 -- No CAF refs
201
202
203 mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
204 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
205 mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
206 = [CmmLabel info_lbl]
207 ++ staticProfHdr dflags ccs
208 ++ concatMap (padLitToWord dflags) payload
209 ++ padding
210 ++ static_link_field
211 ++ saved_info_field
212
213 -- JD: Simon had ellided this padding, but without it the C back end asserts
214 -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
215 padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
216 padLitToWord dflags lit = lit : padding pad_length
217 where width = typeWidth (cmmLitType dflags lit)
218 pad_length = wORD_SIZE dflags - widthInBytes width :: Int
219
220 padding n | n <= 0 = []
221 | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
222 | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
223 | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
224 | otherwise = CmmInt 0 W64 : padding (n-8)
225
226 -----------------------------------------------------------
227 -- Heap overflow checking
228 -----------------------------------------------------------
229
230 {- Note [Heap checks]
231 ~~~~~~~~~~~~~~~~~~
232 Heap checks come in various forms. We provide the following entry
233 points to the runtime system, all of which use the native C-- entry
234 convention.
235
236 * gc() performs garbage collection and returns
237 nothing to its caller
238
239 * A series of canned entry points like
240 r = gc_1p( r )
241 where r is a pointer. This performs gc, and
242 then returns its argument r to its caller.
243
244 * A series of canned entry points like
245 gcfun_2p( f, x, y )
246 where f is a function closure of arity 2
247 This performs garbage collection, keeping alive the
248 three argument ptrs, and then tail-calls f(x,y)
249
250 These are used in the following circumstances
251
252 * entryHeapCheck: Function entry
253 (a) With a canned GC entry sequence
254 f( f_clo, x:ptr, y:ptr ) {
255 Hp = Hp+8
256 if Hp > HpLim goto L
257 ...
258 L: HpAlloc = 8
259 jump gcfun_2p( f_clo, x, y ) }
260 Note the tail call to the garbage collector;
261 it should do no register shuffling
262
263 (b) No canned sequence
264 f( f_clo, x:ptr, y:ptr, ...etc... ) {
265 T: Hp = Hp+8
266 if Hp > HpLim goto L
267 ...
268 L: HpAlloc = 8
269 call gc() -- Needs an info table
270 goto T }
271
272 * altHeapCheck: Immediately following an eval
273 Started as
274 case f x y of r { (p,q) -> rhs }
275 (a) With a canned sequence for the results of f
276 (which is the very common case since
277 all boxed cases return just one pointer
278 ...
279 r = f( x, y )
280 K: -- K needs an info table
281 Hp = Hp+8
282 if Hp > HpLim goto L
283 ...code for rhs...
284
285 L: r = gc_1p( r )
286 goto K }
287
288 Here, the info table needed by the call
289 to gc_1p should be the *same* as the
290 one for the call to f; the C-- optimiser
291 spots this sharing opportunity)
292
293 (b) No canned sequence for results of f
294 Note second info table
295 ...
296 (r1,r2,r3) = call f( x, y )
297 K:
298 Hp = Hp+8
299 if Hp > HpLim goto L
300 ...code for rhs...
301
302 L: call gc() -- Extra info table here
303 goto K
304
305 * generalHeapCheck: Anywhere else
306 e.g. entry to thunk
307 case branch *not* following eval,
308 or let-no-escape
309 Exactly the same as the previous case:
310
311 K: -- K needs an info table
312 Hp = Hp+8
313 if Hp > HpLim goto L
314 ...
315
316 L: call gc()
317 goto K
318 -}
319
320 --------------------------------------------------------------
321 -- A heap/stack check at a function or thunk entry point.
322
323 entryHeapCheck :: ClosureInfo
324 -> Maybe LocalReg -- Function (closure environment)
325 -> Int -- Arity -- not same as len args b/c of voids
326 -> [LocalReg] -- Non-void args (empty for thunk)
327 -> FCode ()
328 -> FCode ()
329
330 entryHeapCheck cl_info nodeSet arity args code
331 = entryHeapCheck' is_fastf node arity args code
332 where
333 node = case nodeSet of
334 Just r -> CmmReg (CmmLocal r)
335 Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
336
337 is_fastf = case closureFunInfo cl_info of
338 Just (_, ArgGen _) -> False
339 _otherwise -> True
340
341 -- | lower-level version for CmmParse
342 entryHeapCheck' :: Bool -- is a known function pattern
343 -> CmmExpr -- expression for the closure pointer
344 -> Int -- Arity -- not same as len args b/c of voids
345 -> [LocalReg] -- Non-void args (empty for thunk)
346 -> FCode ()
347 -> FCode ()
348 entryHeapCheck' is_fastf node arity args code
349 = do dflags <- getDynFlags
350 let is_thunk = arity == 0
351
352 args' = map (CmmReg . CmmLocal) args
353 stg_gc_fun = CmmReg (CmmGlobal GCFun)
354 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
355
356 {- Thunks: jump stg_gc_enter_1
357
358 Function (fast): call (NativeNode) stg_gc_fun(fun, args)
359
360 Function (slow): call (slow) stg_gc_fun(fun, args)
361 -}
362 gc_call upd
363 | is_thunk
364 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
365
366 | is_fastf
367 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
368
369 | otherwise
370 = mkJump dflags Slow stg_gc_fun (node : args') upd
371
372 updfr_sz <- getUpdFrameOff
373
374 loop_id <- newLabelC
375 emitLabel loop_id
376 heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
377
378 -- ------------------------------------------------------------
379 -- A heap/stack check in a case alternative
380
381
382 -- If there are multiple alts and we need to GC, but don't have a
383 -- continuation already (the scrut was simple), then we should
384 -- pre-generate the continuation. (if there are multiple alts it is
385 -- always a canned GC point).
386
387 -- altHeapCheck:
388 -- If we have a return continuation,
389 -- then if it is a canned GC pattern,
390 -- then we do mkJumpReturnsTo
391 -- else we do a normal call to stg_gc_noregs
392 -- else if it is a canned GC pattern,
393 -- then generate the continuation and do mkCallReturnsTo
394 -- else we do a normal call to stg_gc_noregs
395
396 altHeapCheck :: [LocalReg] -> FCode a -> FCode a
397 altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
398
399 altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
400 altOrNoEscapeHeapCheck checkYield regs code = do
401 dflags <- getDynFlags
402 case cannedGCEntryPoint dflags regs of
403 Nothing -> genericGC checkYield code
404 Just gc -> do
405 lret <- newLabelC
406 let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
407 lcont <- newLabelC
408 emitOutOfLine lret (copyin <*> mkBranch lcont)
409 emitLabel lcont
410 cannedGCReturnsTo checkYield False gc regs lret off code
411
412 altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
413 altHeapCheckReturnsTo regs lret off code
414 = do dflags <- getDynFlags
415 case cannedGCEntryPoint dflags regs of
416 Nothing -> genericGC False code
417 Just gc -> cannedGCReturnsTo False True gc regs lret off code
418
419 -- noEscapeHeapCheck is implemented identically to altHeapCheck (which
420 -- is more efficient), but cannot be optimized away in the non-allocating
421 -- case because it may occur in a loop
422 noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
423 noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
424
425 cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
426 -> FCode a
427 -> FCode a
428 cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
429 = do dflags <- getDynFlags
430 updfr_sz <- getUpdFrameOff
431 heapCheck False checkYield (gc_call dflags gc updfr_sz) code
432 where
433 reg_exprs = map (CmmReg . CmmLocal) regs
434 -- Note [stg_gc arguments]
435
436 -- NB. we use the NativeReturn convention for passing arguments
437 -- to the canned heap-check routines, because we are in a case
438 -- alternative and hence the [LocalReg] was passed to us in the
439 -- NativeReturn convention.
440 gc_call dflags label sp
441 | cont_on_stack
442 = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
443 | otherwise
444 = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
445
446 genericGC :: Bool -> FCode a -> FCode a
447 genericGC checkYield code
448 = do updfr_sz <- getUpdFrameOff
449 lretry <- newLabelC
450 emitLabel lretry
451 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
452 heapCheck False checkYield (call <*> mkBranch lretry) code
453
454 cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
455 cannedGCEntryPoint dflags regs
456 = case map localRegType regs of
457 [] -> Just (mkGcLabel "stg_gc_noregs")
458 [ty]
459 | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
460 | isFloatType ty -> case width of
461 W32 -> Just (mkGcLabel "stg_gc_f1")
462 W64 -> Just (mkGcLabel "stg_gc_d1")
463 _ -> Nothing
464
465 | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
466 | width == W64 -> Just (mkGcLabel "stg_gc_l1")
467 | otherwise -> Nothing
468 where
469 width = typeWidth ty
470 [ty1,ty2]
471 | isGcPtrType ty1
472 && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
473 [ty1,ty2,ty3]
474 | isGcPtrType ty1
475 && isGcPtrType ty2
476 && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
477 [ty1,ty2,ty3,ty4]
478 | isGcPtrType ty1
479 && isGcPtrType ty2
480 && isGcPtrType ty3
481 && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
482 _otherwise -> Nothing
483
484 -- Note [stg_gc arguments]
485 -- It might seem that we could avoid passing the arguments to the
486 -- stg_gc function, because they are already in the right registers.
487 -- While this is usually the case, it isn't always. Sometimes the
488 -- code generator has cleverly avoided the eval in a case, e.g. in
489 -- ffi/should_run/4221.hs we found
490 --
491 -- case a_r1mb of z
492 -- FunPtr x y -> ...
493 --
494 -- where a_r1mb is bound a top-level constructor, and is known to be
495 -- evaluated. The codegen just assigns x, y and z, and continues;
496 -- R1 is never assigned.
497 --
498 -- So we'll have to rely on optimisations to eliminatethese
499 -- assignments where possible.
500
501
502 -- | The generic GC procedure; no params, no results
503 generic_gc :: CmmExpr
504 generic_gc = mkGcLabel "stg_gc_noregs"
505
506 -- | Create a CLabel for calling a garbage collector entry point
507 mkGcLabel :: String -> CmmExpr
508 mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s)))
509
510 -------------------------------
511 heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
512 heapCheck checkStack checkYield do_gc code
513 = getHeapUsage $ \ hpHw ->
514 -- Emit heap checks, but be sure to do it lazily so
515 -- that the conditionals on hpHw don't cause a black hole
516 do { dflags <- getDynFlags
517 ; let mb_alloc_bytes
518 | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
519 | otherwise = Nothing
520 stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
521 | otherwise = Nothing
522 ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
523 ; tickyAllocHeap True hpHw
524 ; setRealHp hpHw
525 ; code }
526
527 heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
528 heapStackCheckGen stk_hwm mb_bytes
529 = do updfr_sz <- getUpdFrameOff
530 lretry <- newLabelC
531 emitLabel lretry
532 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
533 do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
534
535 -- Note [Single stack check]
536 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
537 -- When compiling a function we can determine how much stack space it
538 -- will use. We therefore need to perform only a single stack check at
539 -- the beginning of a function to see if we have enough stack space.
540 --
541 -- The check boils down to comparing Sp-N with SpLim, where N is the
542 -- amount of stack space needed (see Note [Stack usage] below). *BUT*
543 -- at this stage of the pipeline we are not supposed to refer to Sp
544 -- itself, because the stack is not yet manifest, so we don't quite
545 -- know where Sp pointing.
546
547 -- So instead of referring directly to Sp - as we used to do in the
548 -- past - the code generator uses (old + 0) in the stack check. That
549 -- is the address of the first word of the old area, so if we add N
550 -- we'll get the address of highest used word.
551 --
552 -- This makes the check robust. For example, while we need to perform
553 -- only one stack check for each function, we could in theory place
554 -- more stack checks later in the function. They would be redundant,
555 -- but not incorrect (in a sense that they should not change program
556 -- behaviour). We need to make sure however that a stack check
557 -- inserted after incrementing the stack pointer checks for a
558 -- respectively smaller stack space. This would not be the case if the
559 -- code generator produced direct references to Sp. By referencing
560 -- (old + 0) we make sure that we always check for a correct amount of
561 -- stack: when converting (old + 0) to Sp the stack layout phase takes
562 -- into account changes already made to stack pointer. The idea for
563 -- this change came from observations made while debugging #8275.
564
565 -- Note [Stack usage]
566 -- ~~~~~~~~~~~~~~~~~~
567 -- At the moment we convert from STG to Cmm we don't know N, the
568 -- number of bytes of stack that the function will use, so we use a
569 -- special late-bound CmmLit, namely
570 -- CmmHighStackMark
571 -- to stand for the number of bytes needed. When the stack is made
572 -- manifest, the number of bytes needed is calculated, and used to
573 -- replace occurrences of CmmHighStackMark
574 --
575 -- The (Maybe CmmExpr) passed to do_checks is usually
576 -- Just (CmmLit CmmHighStackMark)
577 -- but can also (in certain hand-written RTS functions)
578 -- Just (CmmLit 8) or some other fixed valuet
579 -- If it is Nothing, we don't generate a stack check at all.
580
581 do_checks :: Maybe CmmExpr -- Should we check the stack?
582 -- See Note [Stack usage]
583 -> Bool -- Should we check for preemption?
584 -> Maybe CmmExpr -- Heap headroom (bytes)
585 -> CmmAGraph -- What to do on failure
586 -> FCode ()
587 do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
588 dflags <- getDynFlags
589 gc_id <- newLabelC
590
591 let
592 Just alloc_lit = mb_alloc_lit
593
594 bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit
595
596 -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
597 -- At the beginning of a function old + 0 = Sp
598 -- See Note [Single stack check]
599 sp_oflo sp_hwm =
600 CmmMachOp (mo_wordULt dflags)
601 [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
602 [CmmStackSlot Old 0, sp_hwm],
603 CmmReg spLimReg]
604
605 -- Hp overflow if (Hp > HpLim)
606 -- (Hp has been incremented by now)
607 -- HpLim points to the LAST WORD of valid allocation space.
608 hp_oflo = CmmMachOp (mo_wordUGt dflags)
609 [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
610
611 alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit
612
613 case mb_stk_hwm of
614 Nothing -> return ()
615 Just stk_hwm -> tickyStackCheck >> (emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id)
616
617 -- Emit new label that might potentially be a header
618 -- of a self-recursive tail call.
619 -- See Note [Self-recursive loop header].
620 self_loop_info <- getSelfLoop
621 case self_loop_info of
622 Just (_, loop_header_id, _)
623 | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
624 _otherwise -> return ()
625
626 if (isJust mb_alloc_lit)
627 then do
628 tickyHeapCheck
629 emitAssign hpReg bump_hp
630 emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
631 else do
632 when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
633 -- Yielding if HpLim == 0
634 let yielding = CmmMachOp (mo_wordEq dflags)
635 [CmmReg (CmmGlobal HpLim),
636 CmmLit (zeroCLit dflags)]
637 emit =<< mkCmmIfGoto yielding gc_id
638
639 emitOutOfLine gc_id $
640 do_gc -- this is expected to jump back somewhere
641
642 -- Test for stack pointer exhaustion, then
643 -- bump heap pointer, and test for heap exhaustion
644 -- Note that we don't move the heap pointer unless the
645 -- stack check succeeds. Otherwise we might end up
646 -- with slop at the end of the current block, which can
647 -- confuse the LDV profiler.
648
649 -- Note [Self-recursive loop header]
650 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
651 --
652 -- Self-recursive loop header is required by loopification optimization (See
653 -- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if:
654 --
655 -- 1. There is information about self-loop in the FCode environment. We don't
656 -- check the binder (first component of the self_loop_info) because we are
657 -- certain that if the self-loop info is present then we are compiling the
658 -- binder body. Reason: the only possible way to get here with the
659 -- self_loop_info present is from closureCodeBody.
660 --
661 -- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
662 -- to preempt the heap check (see #367 for motivation behind this check). It
663 -- is True for heap checks placed at the entry to a function and
664 -- let-no-escape heap checks but false for other heap checks (eg. in case
665 -- alternatives or created from hand-written high-level Cmm). The second
666 -- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
667 -- function and some heap checks created in hand-written Cmm. Otherwise it
668 -- is Nothing. In other words the only situation when both conditions are
669 -- true is when compiling stack and heap checks at the entry to a
670 -- function. This is the only situation when we want to emit a self-loop
671 -- label.