Code-size optimisation for top-level indirections (#7308)
[ghc.git] / compiler / codeGen / StgCmmBind.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C-- code generation: bindings
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmBind (
10 cgTopRhsClosure,
11 cgBind,
12 emitBlackHoleCode,
13 pushUpdateFrame, emitUpdateFrame
14 ) where
15
16 #include "HsVersions.h"
17
18 import StgCmmExpr
19 import StgCmmMonad
20 import StgCmmEnv
21 import StgCmmCon
22 import StgCmmHeap
23 import StgCmmProf
24 import StgCmmTicky
25 import StgCmmGran
26 import StgCmmLayout
27 import StgCmmUtils
28 import StgCmmClosure
29 import StgCmmForeign (emitPrimCall)
30
31 import MkGraph
32 import CoreSyn ( AltCon(..) )
33 import SMRep
34 import Cmm
35 import CmmUtils
36 import CLabel
37 import StgSyn
38 import CostCentre
39 import Id
40 import IdInfo
41 import Name
42 import Module
43 import ListSetOps
44 import Util
45 import BasicTypes
46 import Outputable
47 import FastString
48 import Maybes
49 import DynFlags
50
51 import Control.Monad
52
53 ------------------------------------------------------------------------
54 -- Top-level bindings
55 ------------------------------------------------------------------------
56
57 -- For closures bound at top level, allocate in static space.
58 -- They should have no free variables.
59
60 cgTopRhsClosure :: RecFlag -- member of a recursive group?
61 -> Id
62 -> CostCentreStack -- Optional cost centre annotation
63 -> StgBinderInfo
64 -> UpdateFlag
65 -> [Id] -- Args
66 -> StgExpr
67 -> FCode (CgIdInfo, FCode ())
68
69 cgTopRhsClosure rec id ccs _ upd_flag args body
70 = do { dflags <- getDynFlags
71 ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args
72 ; let closure_label = mkLocalClosureLabel (idName id) (idCafInfo id)
73 cg_id_info = litIdInfo dflags id lf_info (CmmLabel closure_label)
74 ; return (cg_id_info, gen_code dflags lf_info closure_label)
75 }
76 where
77 -- special case for a indirection (f = g). We create an IND_STATIC
78 -- closure pointing directly to the indirectee. This is exactly
79 -- what the CAF will eventually evaluate to anyway, we're just
80 -- shortcutting the whole process, and generating a lot less code
81 -- (#7308)
82 --
83 -- Note: we omit the optimisation when this binding is part of a
84 -- recursive group, because the optimisation would inhibit the black
85 -- hole detection from working in that case. Test
86 -- concurrent/should_run/4030 fails, for instance.
87 --
88 gen_code dflags _ closure_label
89 | StgApp f [] <- body, null args, isNonRec rec
90 = do
91 cg_info <- getCgIdInfo f
92 let closure_rep = mkStaticClosureFields dflags
93 indStaticInfoTable ccs MayHaveCafRefs
94 [unLit (idInfoToAmode cg_info)]
95 emitDataLits closure_label closure_rep
96 return ()
97
98 gen_code dflags lf_info closure_label
99 = do { -- LAY OUT THE OBJECT
100 let name = idName id
101 ; mod_name <- getModuleName
102 ; let descr = closureDescription dflags mod_name name
103 closure_info = mkClosureInfo dflags True id lf_info 0 0 descr
104
105 caffy = idCafInfo id
106 info_tbl = mkCmmInfo closure_info -- XXX short-cut
107 closure_rep = mkStaticClosureFields dflags info_tbl ccs caffy []
108
109 -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY)
110 ; emitDataLits closure_label closure_rep
111 ; let fv_details :: [(NonVoid Id, VirtualHpOffset)]
112 (_, _, fv_details) = mkVirtHeapOffsets dflags (isLFThunk lf_info)
113 (addIdReps [])
114 -- Don't drop the non-void args until the closure info has been made
115 ; forkClosureBody (closureCodeBody True id closure_info ccs
116 (nonVoidIds args) (length args) body fv_details)
117
118 ; return () }
119
120 unLit (CmmLit l) = l
121 unLit _ = panic "unLit"
122
123 ------------------------------------------------------------------------
124 -- Non-top-level bindings
125 ------------------------------------------------------------------------
126
127 cgBind :: StgBinding -> FCode ()
128 cgBind (StgNonRec name rhs)
129 = do { (info, fcode) <- cgRhs name rhs
130 ; addBindC (cg_id info) info
131 ; init <- fcode
132 ; emit init
133 }
134 -- init cannot be used in body, so slightly better to sink it eagerly
135
136 cgBind (StgRec pairs)
137 = do { r <- sequence $ unzipWith cgRhs pairs
138 ; let (id_infos, fcodes) = unzip r
139 ; addBindsC id_infos
140 ; (inits, body) <- getCodeR $ sequence fcodes
141 ; emit (catAGraphs inits <*> body) }
142
143 {- Note [cgBind rec]
144
145 Recursive let-bindings are tricky.
146 Consider the following pseudocode:
147
148 let x = \_ -> ... y ...
149 y = \_ -> ... z ...
150 z = \_ -> ... x ...
151 in ...
152
153 For each binding, we need to allocate a closure, and each closure must
154 capture the address of the other closures.
155 We want to generate the following C-- code:
156 // Initialization Code
157 x = hp - 24; // heap address of x's closure
158 y = hp - 40; // heap address of x's closure
159 z = hp - 64; // heap address of x's closure
160 // allocate and initialize x
161 m[hp-8] = ...
162 m[hp-16] = y // the closure for x captures y
163 m[hp-24] = x_info;
164 // allocate and initialize y
165 m[hp-32] = z; // the closure for y captures z
166 m[hp-40] = y_info;
167 // allocate and initialize z
168 ...
169
170 For each closure, we must generate not only the code to allocate and
171 initialize the closure itself, but also some initialization Code that
172 sets a variable holding the closure pointer.
173
174 We could generate a pair of the (init code, body code), but since
175 the bindings are recursive we also have to initialise the
176 environment with the CgIdInfo for all the bindings before compiling
177 anything. So we do this in 3 stages:
178
179 1. collect all the CgIdInfos and initialise the environment
180 2. compile each binding into (init, body) code
181 3. emit all the inits, and then all the bodies
182
183 We'd rather not have separate functions to do steps 1 and 2 for
184 each binding, since in pratice they share a lot of code. So we
185 have just one function, cgRhs, that returns a pair of the CgIdInfo
186 for step 1, and a monadic computation to generate the code in step
187 2.
188
189 The alternative to separating things in this way is to use a
190 fixpoint. That's what we used to do, but it introduces a
191 maintenance nightmare because there is a subtle dependency on not
192 being too strict everywhere. Doing things this way means that the
193 FCode monad can be strict, for example.
194 -}
195
196 cgRhs :: Id
197 -> StgRhs
198 -> FCode (
199 CgIdInfo -- The info for this binding
200 , FCode CmmAGraph -- A computation which will generate the
201 -- code for the binding, and return an
202 -- assignent of the form "x = Hp - n"
203 -- (see above)
204 )
205
206 cgRhs name (StgRhsCon cc con args)
207 = buildDynCon name cc con args
208
209 cgRhs name (StgRhsClosure cc bi fvs upd_flag _srt args body)
210 = do dflags <- getDynFlags
211 mkRhsClosure dflags name cc bi (nonVoidIds fvs) upd_flag args body
212
213 ------------------------------------------------------------------------
214 -- Non-constructor right hand sides
215 ------------------------------------------------------------------------
216
217 mkRhsClosure :: DynFlags -> Id -> CostCentreStack -> StgBinderInfo
218 -> [NonVoid Id] -- Free vars
219 -> UpdateFlag
220 -> [Id] -- Args
221 -> StgExpr
222 -> FCode (CgIdInfo, FCode CmmAGraph)
223
224 {- mkRhsClosure looks for two special forms of the right-hand side:
225 a) selector thunks
226 b) AP thunks
227
228 If neither happens, it just calls mkClosureLFInfo. You might think
229 that mkClosureLFInfo should do all this, but it seems wrong for the
230 latter to look at the structure of an expression
231
232 Note [Selectors]
233 ~~~~~~~~~~~~~~~~
234 We look at the body of the closure to see if it's a selector---turgid,
235 but nothing deep. We are looking for a closure of {\em exactly} the
236 form:
237
238 ... = [the_fv] \ u [] ->
239 case the_fv of
240 con a_1 ... a_n -> a_i
241
242 Note [Ap thunks]
243 ~~~~~~~~~~~~~~~~
244 A more generic AP thunk of the form
245
246 x = [ x_1...x_n ] \.. [] -> x_1 ... x_n
247
248 A set of these is compiled statically into the RTS, so we just use
249 those. We could extend the idea to thunks where some of the x_i are
250 global ids (and hence not free variables), but this would entail
251 generating a larger thunk. It might be an option for non-optimising
252 compilation, though.
253
254 We only generate an Ap thunk if all the free variables are pointers,
255 for semi-obvious reasons.
256
257 -}
258
259 ---------- Note [Selectors] ------------------
260 mkRhsClosure dflags bndr _cc _bi
261 [NonVoid the_fv] -- Just one free var
262 upd_flag -- Updatable thunk
263 [] -- A thunk
264 (StgCase (StgApp scrutinee [{-no args-}])
265 _ _ _ _ -- ignore uniq, etc.
266 (AlgAlt _)
267 [(DataAlt _, params, _use_mask,
268 (StgApp selectee [{-no args-}]))])
269 | the_fv == scrutinee -- Scrutinee is the only free variable
270 && maybeToBool maybe_offset -- Selectee is a component of the tuple
271 && offset_into_int <= mAX_SPEC_SELECTEE_SIZE dflags -- Offset is small enough
272 = -- NOT TRUE: ASSERT(is_single_constructor)
273 -- The simplifier may have statically determined that the single alternative
274 -- is the only possible case and eliminated the others, even if there are
275 -- other constructors in the datatype. It's still ok to make a selector
276 -- thunk in this case, because we *know* which constructor the scrutinee
277 -- will evaluate to.
278 --
279 -- srt is discarded; it must be empty
280 cgRhsStdThunk bndr lf_info [StgVarArg the_fv]
281 where
282 lf_info = mkSelectorLFInfo bndr offset_into_int
283 (isUpdatable upd_flag)
284 (_, _, params_w_offsets) = mkVirtConstrOffsets dflags (addIdReps params)
285 -- Just want the layout
286 maybe_offset = assocMaybe params_w_offsets (NonVoid selectee)
287 Just the_offset = maybe_offset
288 offset_into_int = the_offset - fixedHdrSize dflags
289
290 ---------- Note [Ap thunks] ------------------
291 mkRhsClosure dflags bndr _cc _bi
292 fvs
293 upd_flag
294 [] -- No args; a thunk
295 (StgApp fun_id args)
296
297 | args `lengthIs` (arity-1)
298 && all (isGcPtrRep . idPrimRep . stripNV) fvs
299 && isUpdatable upd_flag
300 && arity <= mAX_SPEC_AP_SIZE dflags
301 && not (gopt Opt_SccProfilingOn dflags)
302 -- not when profiling: we don't want to
303 -- lose information about this particular
304 -- thunk (e.g. its type) (#949)
305
306 -- Ha! an Ap thunk
307 = cgRhsStdThunk bndr lf_info payload
308
309 where
310 lf_info = mkApLFInfo bndr upd_flag arity
311 -- the payload has to be in the correct order, hence we can't
312 -- just use the fvs.
313 payload = StgVarArg fun_id : args
314 arity = length fvs
315
316 ---------- Default case ------------------
317 mkRhsClosure _ bndr cc _ fvs upd_flag args body
318 = do { lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args
319 ; (id_info, reg) <- rhsIdInfo bndr lf_info
320 ; return (id_info, gen_code lf_info reg) }
321 where
322 gen_code lf_info reg
323 = do { -- LAY OUT THE OBJECT
324 -- If the binder is itself a free variable, then don't store
325 -- it in the closure. Instead, just bind it to Node on entry.
326 -- NB we can be sure that Node will point to it, because we
327 -- haven't told mkClosureLFInfo about this; so if the binder
328 -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is*
329 -- stored in the closure itself, so it will make sure that
330 -- Node points to it...
331 ; let
332 is_elem = isIn "cgRhsClosure"
333 bndr_is_a_fv = (NonVoid bndr) `is_elem` fvs
334 reduced_fvs | bndr_is_a_fv = fvs `minusList` [NonVoid bndr]
335 | otherwise = fvs
336
337
338 -- MAKE CLOSURE INFO FOR THIS CLOSURE
339 ; mod_name <- getModuleName
340 ; dflags <- getDynFlags
341 ; let name = idName bndr
342 descr = closureDescription dflags mod_name name
343 fv_details :: [(NonVoid Id, VirtualHpOffset)]
344 (tot_wds, ptr_wds, fv_details)
345 = mkVirtHeapOffsets dflags (isLFThunk lf_info)
346 (addIdReps (map stripNV reduced_fvs))
347 closure_info = mkClosureInfo dflags False -- Not static
348 bndr lf_info tot_wds ptr_wds
349 descr
350
351 -- BUILD ITS INFO TABLE AND CODE
352 ; forkClosureBody $
353 -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere
354 -- (b) ignore Sequel from context; use empty Sequel
355 -- And compile the body
356 closureCodeBody False bndr closure_info cc (nonVoidIds args)
357 (length args) body fv_details
358
359 -- BUILD THE OBJECT
360 -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
361 ; let use_cc = curCCS; blame_cc = curCCS
362 ; emit (mkComment $ mkFastString "calling allocDynClosure")
363 ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off)
364 ; let info_tbl = mkCmmInfo closure_info
365 ; hp_plus_n <- allocDynClosure info_tbl lf_info use_cc blame_cc
366 (map toVarArg fv_details)
367
368 -- RETURN
369 ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
370
371
372 -- Use with care; if used inappropriately, it could break invariants.
373 stripNV :: NonVoid a -> a
374 stripNV (NonVoid a) = a
375
376 -------------------------
377 cgRhsStdThunk
378 :: Id
379 -> LambdaFormInfo
380 -> [StgArg] -- payload
381 -> FCode (CgIdInfo, FCode CmmAGraph)
382
383 cgRhsStdThunk bndr lf_info payload
384 = do { (id_info, reg) <- rhsIdInfo bndr lf_info
385 ; return (id_info, gen_code reg)
386 }
387 where
388 gen_code reg
389 = do -- AHA! A STANDARD-FORM THUNK
390 { -- LAY OUT THE OBJECT
391 mod_name <- getModuleName
392 ; dflags <- getDynFlags
393 ; let (tot_wds, ptr_wds, payload_w_offsets)
394 = mkVirtHeapOffsets dflags (isLFThunk lf_info) (addArgReps payload)
395
396 descr = closureDescription dflags mod_name (idName bndr)
397 closure_info = mkClosureInfo dflags False -- Not static
398 bndr lf_info tot_wds ptr_wds
399 descr
400
401 -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body
402 ; let use_cc = curCCS; blame_cc = curCCS
403
404 -- BUILD THE OBJECT
405 ; let info_tbl = mkCmmInfo closure_info
406 ; hp_plus_n <- allocDynClosure info_tbl lf_info
407 use_cc blame_cc payload_w_offsets
408
409 -- RETURN
410 ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
411
412
413 mkClosureLFInfo :: Id -- The binder
414 -> TopLevelFlag -- True of top level
415 -> [NonVoid Id] -- Free vars
416 -> UpdateFlag -- Update flag
417 -> [Id] -- Args
418 -> FCode LambdaFormInfo
419 mkClosureLFInfo bndr top fvs upd_flag args
420 | null args = return (mkLFThunk (idType bndr) top (map stripNV fvs) upd_flag)
421 | otherwise =
422 do { arg_descr <- mkArgDescr (idName bndr) args
423 ; return (mkLFReEntrant top (map stripNV fvs) args arg_descr) }
424
425
426 ------------------------------------------------------------------------
427 -- The code for closures}
428 ------------------------------------------------------------------------
429
430 closureCodeBody :: Bool -- whether this is a top-level binding
431 -> Id -- the closure's name
432 -> ClosureInfo -- Lots of information about this closure
433 -> CostCentreStack -- Optional cost centre attached to closure
434 -> [NonVoid Id] -- incoming args to the closure
435 -> Int -- arity, including void args
436 -> StgExpr
437 -> [(NonVoid Id, VirtualHpOffset)] -- the closure's free vars
438 -> FCode ()
439
440 {- There are two main cases for the code for closures.
441
442 * If there are *no arguments*, then the closure is a thunk, and not in
443 normal form. So it should set up an update frame (if it is
444 shared). NB: Thunks cannot have a primitive type!
445
446 * If there is *at least one* argument, then this closure is in
447 normal form, so there is no need to set up an update frame.
448
449 The Macros for GrAnSim are produced at the beginning of the
450 argSatisfactionCheck (by calling fetchAndReschedule).
451 There info if Node points to closure is available. -- HWL -}
452
453 closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details
454 | arity == 0 -- No args i.e. thunk
455 = emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $
456 \(_, node, _) -> thunkCode cl_info fv_details cc node arity body
457 where
458 lf_info = closureLFInfo cl_info
459 info_tbl = mkCmmInfo cl_info
460
461 closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
462 = -- Note: args may be [], if all args are Void
463 do { -- Allocate the global ticky counter,
464 -- and establish the ticky-counter
465 -- label for this block
466 let ticky_ctr_lbl = closureRednCountsLabel cl_info
467 ; emitTickyCounter cl_info (map stripNV args)
468 ; setTickyCtrLabel ticky_ctr_lbl $ do
469
470 ; let
471 lf_info = closureLFInfo cl_info
472 info_tbl = mkCmmInfo cl_info
473
474 -- Emit the main entry code
475 ; emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args $
476 \(_offset, node, arg_regs) -> do
477 -- Emit slow-entry code (for entering a closure through a PAP)
478 { mkSlowEntryCode bndr cl_info arg_regs
479
480 ; dflags <- getDynFlags
481 ; let lf_info = closureLFInfo cl_info
482 node_points = nodeMustPointToIt dflags lf_info
483 node' = if node_points then Just node else Nothing
484 ; tickyEnterFun cl_info
485 ; enterCostCentreFun cc
486 (CmmMachOp (mo_wordSub dflags)
487 [ CmmReg nodeReg
488 , mkIntExpr dflags (funTag dflags cl_info) ])
489 ; when node_points (ldvEnterClosure cl_info)
490 ; granYield arg_regs node_points
491
492 -- Main payload
493 ; entryHeapCheck cl_info node' arity arg_regs $ do
494 { fv_bindings <- mapM bind_fv fv_details
495 -- Load free vars out of closure *after*
496 -- heap check, to reduce live vars over check
497 ; when node_points $ load_fvs node lf_info fv_bindings
498 ; void $ cgExpr body
499 }}
500 }
501
502 -- A function closure pointer may be tagged, so we
503 -- must take it into account when accessing the free variables.
504 bind_fv :: (NonVoid Id, VirtualHpOffset) -> FCode (LocalReg, WordOff)
505 bind_fv (id, off) = do { reg <- rebindToReg id; return (reg, off) }
506
507 load_fvs :: LocalReg -> LambdaFormInfo -> [(LocalReg, WordOff)] -> FCode ()
508 load_fvs node lf_info = mapM_ (\ (reg, off) ->
509 do dflags <- getDynFlags
510 let tag = lfDynTag dflags lf_info
511 emit $ mkTaggedObjectLoad dflags reg node off tag)
512
513 -----------------------------------------
514 -- The "slow entry" code for a function. This entry point takes its
515 -- arguments on the stack. It loads the arguments into registers
516 -- according to the calling convention, and jumps to the function's
517 -- normal entry point. The function's closure is assumed to be in
518 -- R1/node.
519 --
520 -- The slow entry point is used for unknown calls: eg. stg_PAP_entry
521
522 mkSlowEntryCode :: Id -> ClosureInfo -> [LocalReg] -> FCode ()
523 -- If this function doesn't have a specialised ArgDescr, we need
524 -- to generate the function's arg bitmap and slow-entry code.
525 -- Here, we emit the slow-entry code.
526 mkSlowEntryCode bndr cl_info arg_regs -- function closure is already in `Node'
527 | Just (_, ArgGen _) <- closureFunInfo cl_info
528 = do dflags <- getDynFlags
529 let node = idToReg dflags (NonVoid bndr)
530 slow_lbl = closureSlowEntryLabel cl_info
531 fast_lbl = closureLocalEntryLabel dflags cl_info
532 -- mkDirectJump does not clobber `Node' containing function closure
533 jump = mkJump dflags NativeNodeCall
534 (mkLblExpr fast_lbl)
535 (map (CmmReg . CmmLocal) (node : arg_regs))
536 (initUpdFrameOff dflags)
537 emitProcWithConvention Slow Nothing slow_lbl (node : arg_regs) jump
538 | otherwise = return ()
539
540 -----------------------------------------
541 thunkCode :: ClosureInfo -> [(NonVoid Id, VirtualHpOffset)] -> CostCentreStack
542 -> LocalReg -> Int -> StgExpr -> FCode ()
543 thunkCode cl_info fv_details _cc node arity body
544 = do { dflags <- getDynFlags
545 ; let node_points = nodeMustPointToIt dflags (closureLFInfo cl_info)
546 node' = if node_points then Just node else Nothing
547 ; tickyEnterThunk cl_info
548 ; ldvEnterClosure cl_info -- NB: Node always points when profiling
549 ; granThunk node_points
550
551 -- Heap overflow check
552 ; entryHeapCheck cl_info node' arity [] $ do
553 { -- Overwrite with black hole if necessary
554 -- but *after* the heap-overflow check
555 ; when (blackHoleOnEntry cl_info && node_points)
556 (blackHoleIt cl_info node)
557
558 -- Push update frame
559 ; setupUpdate cl_info node $
560 -- We only enter cc after setting up update so
561 -- that cc of enclosing scope will be recorded
562 -- in update frame CAF/DICT functions will be
563 -- subsumed by this enclosing cc
564 do { enterCostCentreThunk (CmmReg nodeReg)
565 ; let lf_info = closureLFInfo cl_info
566 ; fv_bindings <- mapM bind_fv fv_details
567 ; load_fvs node lf_info fv_bindings
568 ; void $ cgExpr body }}}
569
570
571 ------------------------------------------------------------------------
572 -- Update and black-hole wrappers
573 ------------------------------------------------------------------------
574
575 blackHoleIt :: ClosureInfo -> LocalReg -> FCode ()
576 -- Only called for closures with no args
577 -- Node points to the closure
578 blackHoleIt closure_info node
579 = emitBlackHoleCode (closureSingleEntry closure_info) (CmmReg (CmmLocal node))
580
581 emitBlackHoleCode :: Bool -> CmmExpr -> FCode ()
582 emitBlackHoleCode is_single_entry node = do
583 dflags <- getDynFlags
584
585 -- Eager blackholing is normally disabled, but can be turned on with
586 -- -feager-blackholing. When it is on, we replace the info pointer
587 -- of the thunk with stg_EAGER_BLACKHOLE_info on entry.
588
589 -- If we wanted to do eager blackholing with slop filling, we'd need
590 -- to do it at the *end* of a basic block, otherwise we overwrite
591 -- the free variables in the thunk that we still need. We have a
592 -- patch for this from Andy Cheadle, but not incorporated yet. --SDM
593 -- [6/2004]
594 --
595 -- Previously, eager blackholing was enabled when ticky-ticky was
596 -- on. But it didn't work, and it wasn't strictly necessary to bring
597 -- back minimal ticky-ticky, so now EAGER_BLACKHOLING is
598 -- unconditionally disabled. -- krc 1/2007
599
600 -- Note the eager-blackholing check is here rather than in blackHoleOnEntry,
601 -- because emitBlackHoleCode is called from CmmParse.
602
603 let eager_blackholing = not (gopt Opt_SccProfilingOn dflags)
604 && gopt Opt_EagerBlackHoling dflags
605 -- Profiling needs slop filling (to support LDV
606 -- profiling), so currently eager blackholing doesn't
607 -- work with profiling.
608
609 when eager_blackholing $ do
610 tickyBlackHole (not is_single_entry)
611 emitStore (cmmOffsetW dflags node (fixedHdrSize dflags))
612 (CmmReg (CmmGlobal CurrentTSO))
613 emitPrimCall [] MO_WriteBarrier []
614 emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo))
615
616 setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode ()
617 -- Nota Bene: this function does not change Node (even if it's a CAF),
618 -- so that the cost centre in the original closure can still be
619 -- extracted by a subsequent enterCostCentre
620 setupUpdate closure_info node body
621 | closureReEntrant closure_info
622 = body
623
624 | not (isStaticClosure closure_info)
625 = if not (closureUpdReqd closure_info)
626 then do tickyUpdateFrameOmitted; body
627 else do
628 tickyPushUpdateFrame
629 dflags <- getDynFlags
630 let
631 bh = blackHoleOnEntry closure_info &&
632 not (gopt Opt_SccProfilingOn dflags) &&
633 gopt Opt_EagerBlackHoling dflags
634
635 lbl | bh = mkBHUpdInfoLabel
636 | otherwise = mkUpdInfoLabel
637
638 pushUpdateFrame lbl (CmmReg (CmmLocal node)) body
639
640 | otherwise -- A static closure
641 = do { tickyUpdateBhCaf closure_info
642
643 ; if closureUpdReqd closure_info
644 then do -- Blackhole the (updatable) CAF:
645 { upd_closure <- link_caf node True
646 ; pushUpdateFrame mkBHUpdInfoLabel upd_closure body }
647 else do {tickyUpdateFrameOmitted; body}
648 }
649
650 -----------------------------------------------------------------------------
651 -- Setting up update frames
652
653 -- Push the update frame on the stack in the Entry area,
654 -- leaving room for the return address that is already
655 -- at the old end of the area.
656 --
657 pushUpdateFrame :: CLabel -> CmmExpr -> FCode () -> FCode ()
658 pushUpdateFrame lbl updatee body
659 = do
660 updfr <- getUpdFrameOff
661 dflags <- getDynFlags
662 let
663 hdr = fixedHdrSize dflags * wORD_SIZE dflags
664 frame = updfr + hdr + sIZEOF_StgUpdateFrame_NoHdr dflags
665 --
666 emitUpdateFrame dflags (CmmStackSlot Old frame) lbl updatee
667 withUpdFrameOff frame body
668
669 emitUpdateFrame :: DynFlags -> CmmExpr -> CLabel -> CmmExpr -> FCode ()
670 emitUpdateFrame dflags frame lbl updatee = do
671 let
672 hdr = fixedHdrSize dflags * wORD_SIZE dflags
673 off_updatee = hdr + oFFSET_StgUpdateFrame_updatee dflags
674 --
675 emitStore frame (mkLblExpr lbl)
676 emitStore (cmmOffset dflags frame off_updatee) updatee
677 initUpdFrameProf frame
678
679 -----------------------------------------------------------------------------
680 -- Entering a CAF
681 --
682 -- When a CAF is first entered, it creates a black hole in the heap,
683 -- and updates itself with an indirection to this new black hole.
684 --
685 -- We update the CAF with an indirection to a newly-allocated black
686 -- hole in the heap. We also set the blocking queue on the newly
687 -- allocated black hole to be empty.
688 --
689 -- Why do we make a black hole in the heap when we enter a CAF?
690 --
691 -- - for a generational garbage collector, which needs a fast
692 -- test for whether an updatee is in an old generation or not
693 --
694 -- - for the parallel system, which can implement updates more
695 -- easily if the updatee is always in the heap. (allegedly).
696 --
697 -- When debugging, we maintain a separate CAF list so we can tell when
698 -- a CAF has been garbage collected.
699
700 -- newCAF must be called before the itbl ptr is overwritten, since
701 -- newCAF records the old itbl ptr in order to do CAF reverting
702 -- (which Hugs needs to do in order that combined mode works right.)
703 --
704
705 -- ToDo [Feb 04] This entire link_caf nonsense could all be moved
706 -- into the "newCAF" RTS procedure, which we call anyway, including
707 -- the allocation of the black-hole indirection closure.
708 -- That way, code size would fall, the CAF-handling code would
709 -- be closer together, and the compiler wouldn't need to know
710 -- about off_indirectee etc.
711
712 link_caf :: LocalReg -- pointer to the closure
713 -> Bool -- True <=> updatable, False <=> single-entry
714 -> FCode CmmExpr -- Returns amode for closure to be updated
715 -- To update a CAF we must allocate a black hole, link the CAF onto the
716 -- CAF list, then update the CAF to point to the fresh black hole.
717 -- This function returns the address of the black hole, so it can be
718 -- updated with the new value when available. The reason for all of this
719 -- is that we only want to update dynamic heap objects, not static ones,
720 -- so that generational GC is easier.
721 link_caf node _is_upd = do
722 { dflags <- getDynFlags
723 -- Alloc black hole specifying CC_HDR(Node) as the cost centre
724 ; let use_cc = costCentreFrom dflags (CmmReg nodeReg)
725 blame_cc = use_cc
726 tso = CmmReg (CmmGlobal CurrentTSO)
727
728 ; hp_rel <- allocDynClosureCmm cafBlackHoleInfoTable mkLFBlackHole
729 use_cc blame_cc [(tso,fixedHdrSize dflags)]
730 -- small optimisation: we duplicate the hp_rel expression in
731 -- both the newCAF call and the value returned below.
732 -- If we instead used allocDynClosureReg which assigns it to a reg,
733 -- then the reg is live across the newCAF call and gets spilled,
734 -- which is stupid. Really we should have an optimisation pass to
735 -- fix this, but we don't yet. --SDM
736
737 -- Call the RTS function newCAF to add the CAF to the CafList
738 -- so that the garbage collector can find them
739 -- This must be done *before* the info table pointer is overwritten,
740 -- because the old info table ptr is needed for reversion
741 ; ret <- newTemp (bWord dflags)
742 ; emitRtsCallGen [(ret,NoHint)] rtsPackageId (fsLit "newCAF")
743 [ (CmmReg (CmmGlobal BaseReg), AddrHint),
744 (CmmReg (CmmLocal node), AddrHint),
745 (hp_rel, AddrHint) ]
746 False
747
748 -- see Note [atomic CAF entry] in rts/sm/Storage.c
749 ; updfr <- getUpdFrameOff
750 ; emit =<< mkCmmIfThen
751 (CmmMachOp (mo_wordEq dflags) [ CmmReg (CmmLocal ret), CmmLit (zeroCLit dflags)])
752 -- re-enter the CAF
753 (let target = entryCode dflags (closureInfoPtr dflags (CmmReg (CmmLocal node))) in
754 mkJump dflags NativeNodeCall target [] updfr)
755
756 ; return hp_rel }
757
758 ------------------------------------------------------------------------
759 -- Profiling
760 ------------------------------------------------------------------------
761
762 -- For "global" data constructors the description is simply occurrence
763 -- name of the data constructor itself. Otherwise it is determined by
764 -- @closureDescription@ from the let binding information.
765
766 closureDescription :: DynFlags
767 -> Module -- Module
768 -> Name -- Id of closure binding
769 -> String
770 -- Not called for StgRhsCon which have global info tables built in
771 -- CgConTbls.lhs with a description generated from the data constructor
772 closureDescription dflags mod_name name
773 = showSDocDump dflags (char '<' <>
774 (if isExternalName name
775 then ppr name -- ppr will include the module name prefix
776 else pprModule mod_name <> char '.' <> ppr name) <>
777 char '>')
778 -- showSDocDump, because we want to see the unique on the Name.
779