Improve code generation for conditionals
[ghc.git] / compiler / codeGen / StgCmmClosure.hs
1 {-# LANGUAGE CPP, RecordWildCards #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Stg to C-- code generation:
6 --
7 -- The types LambdaFormInfo
8 -- ClosureInfo
9 --
10 -- Nothing monadic in here!
11 --
12 -----------------------------------------------------------------------------
13
14 module StgCmmClosure (
15 DynTag, tagForCon, isSmallFamily,
16
17 idPrimRep, isVoidRep, isGcPtrRep, addIdReps, addArgReps,
18 argPrimRep,
19
20 NonVoid(..), fromNonVoid, nonVoidIds, nonVoidStgArgs,
21 assertNonVoidIds, assertNonVoidStgArgs,
22
23 -- * LambdaFormInfo
24 LambdaFormInfo, -- Abstract
25 StandardFormInfo, -- ...ditto...
26 mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo,
27 mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape,
28 mkLFStringLit,
29 lfDynTag,
30 maybeIsLFCon, isLFThunk, isLFReEntrant, lfUpdatable,
31
32 -- * Used by other modules
33 CgLoc(..), SelfLoopInfo, CallMethod(..),
34 nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod,
35
36 -- * ClosureInfo
37 ClosureInfo,
38 mkClosureInfo,
39 mkCmmInfo,
40
41 -- ** Inspection
42 closureLFInfo, closureName,
43
44 -- ** Labels
45 -- These just need the info table label
46 closureInfoLabel, staticClosureLabel,
47 closureSlowEntryLabel, closureLocalEntryLabel,
48
49 -- ** Predicates
50 -- These are really just functions on LambdaFormInfo
51 closureUpdReqd, closureSingleEntry,
52 closureReEntrant, closureFunInfo,
53 isToplevClosure,
54
55 blackHoleOnEntry, -- Needs LambdaFormInfo and SMRep
56 isStaticClosure, -- Needs SMPre
57
58 -- * InfoTables
59 mkDataConInfoTable,
60 cafBlackHoleInfoTable,
61 indStaticInfoTable,
62 staticClosureNeedsLink,
63 ) where
64
65 #include "../includes/MachDeps.h"
66
67 #include "HsVersions.h"
68
69 import StgSyn
70 import SMRep
71 import Cmm
72 import PprCmmExpr()
73
74 import BlockId
75 import CLabel
76 import Id
77 import IdInfo
78 import DataCon
79 import Name
80 import Type
81 import TyCoRep
82 import TcType
83 import TyCon
84 import RepType
85 import BasicTypes
86 import Outputable
87 import DynFlags
88 import Util
89
90 import Data.Coerce (coerce)
91
92 -----------------------------------------------------------------------------
93 -- Data types and synonyms
94 -----------------------------------------------------------------------------
95
96 -- These data types are mostly used by other modules, especially StgCmmMonad,
97 -- but we define them here because some functions in this module need to
98 -- have access to them as well
99
100 data CgLoc
101 = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning
102 -- Hp, so that it remains valid across calls
103
104 | LneLoc BlockId [LocalReg] -- A join point
105 -- A join point (= let-no-escape) should only
106 -- be tail-called, and in a saturated way.
107 -- To tail-call it, assign to these locals,
108 -- and branch to the block id
109
110 instance Outputable CgLoc where
111 ppr (CmmLoc e) = text "cmm" <+> ppr e
112 ppr (LneLoc b rs) = text "lne" <+> ppr b <+> ppr rs
113
114 type SelfLoopInfo = (Id, BlockId, [LocalReg])
115
116 -- used by ticky profiling
117 isKnownFun :: LambdaFormInfo -> Bool
118 isKnownFun LFReEntrant{} = True
119 isKnownFun LFLetNoEscape = True
120 isKnownFun _ = False
121
122
123 -------------------------------------
124 -- Non-void types
125 -------------------------------------
126 -- We frequently need the invariant that an Id or a an argument
127 -- is of a non-void type. This type is a witness to the invariant.
128
129 newtype NonVoid a = NonVoid a
130 deriving (Eq, Show)
131
132 fromNonVoid :: NonVoid a -> a
133 fromNonVoid (NonVoid a) = a
134
135 instance (Outputable a) => Outputable (NonVoid a) where
136 ppr (NonVoid a) = ppr a
137
138 nonVoidIds :: [Id] -> [NonVoid Id]
139 nonVoidIds ids = [NonVoid id | id <- ids, not (isVoidTy (idType id))]
140
141 -- | Used in places where some invariant ensures that all these Ids are
142 -- non-void; e.g. constructor field binders in case expressions.
143 -- See Note [Post-unarisation invariants] in UnariseStg.
144 assertNonVoidIds :: [Id] -> [NonVoid Id]
145 assertNonVoidIds ids = ASSERT(not (any (isVoidTy . idType) ids))
146 coerce ids
147
148 nonVoidStgArgs :: [StgArg] -> [NonVoid StgArg]
149 nonVoidStgArgs args = [NonVoid arg | arg <- args, not (isVoidTy (stgArgType arg))]
150
151 -- | Used in places where some invariant ensures that all these arguments are
152 -- non-void; e.g. constructor arguments.
153 -- See Note [Post-unarisation invariants] in UnariseStg.
154 assertNonVoidStgArgs :: [StgArg] -> [NonVoid StgArg]
155 assertNonVoidStgArgs args = ASSERT(not (any (isVoidTy . stgArgType) args))
156 coerce args
157
158
159 -----------------------------------------------------------------------------
160 -- Representations
161 -----------------------------------------------------------------------------
162
163 -- Why are these here?
164
165 idPrimRep :: Id -> PrimRep
166 idPrimRep id = typePrimRep1 (idType id)
167 -- NB: typePrimRep1 fails on unboxed tuples,
168 -- but by StgCmm no Ids have unboxed tuple type
169
170 addIdReps :: [NonVoid Id] -> [NonVoid (PrimRep, Id)]
171 addIdReps = map (\id -> let id' = fromNonVoid id
172 in NonVoid (idPrimRep id', id'))
173
174 addArgReps :: [NonVoid StgArg] -> [NonVoid (PrimRep, StgArg)]
175 addArgReps = map (\arg -> let arg' = fromNonVoid arg
176 in NonVoid (argPrimRep arg', arg'))
177
178 argPrimRep :: StgArg -> PrimRep
179 argPrimRep arg = typePrimRep1 (stgArgType arg)
180
181
182 -----------------------------------------------------------------------------
183 -- LambdaFormInfo
184 -----------------------------------------------------------------------------
185
186 -- Information about an identifier, from the code generator's point of
187 -- view. Every identifier is bound to a LambdaFormInfo in the
188 -- environment, which gives the code generator enough info to be able to
189 -- tail call or return that identifier.
190
191 data LambdaFormInfo
192 = LFReEntrant -- Reentrant closure (a function)
193 TopLevelFlag -- True if top level
194 OneShotInfo
195 !RepArity -- Arity. Invariant: always > 0
196 !Bool -- True <=> no fvs
197 ArgDescr -- Argument descriptor (should really be in ClosureInfo)
198
199 | LFThunk -- Thunk (zero arity)
200 TopLevelFlag
201 !Bool -- True <=> no free vars
202 !Bool -- True <=> updatable (i.e., *not* single-entry)
203 StandardFormInfo
204 !Bool -- True <=> *might* be a function type
205
206 | LFCon -- A saturated constructor application
207 DataCon -- The constructor
208
209 | LFUnknown -- Used for function arguments and imported things.
210 -- We know nothing about this closure.
211 -- Treat like updatable "LFThunk"...
212 -- Imported things which we *do* know something about use
213 -- one of the other LF constructors (eg LFReEntrant for
214 -- known functions)
215 !Bool -- True <=> *might* be a function type
216 -- The False case is good when we want to enter it,
217 -- because then we know the entry code will do
218 -- For a function, the entry code is the fast entry point
219
220 | LFUnlifted -- A value of unboxed type;
221 -- always a value, needs evaluation
222
223 | LFLetNoEscape -- See LetNoEscape module for precise description
224
225
226 -------------------------
227 -- StandardFormInfo tells whether this thunk has one of
228 -- a small number of standard forms
229
230 data StandardFormInfo
231 = NonStandardThunk
232 -- The usual case: not of the standard forms
233
234 | SelectorThunk
235 -- A SelectorThunk is of form
236 -- case x of
237 -- con a1,..,an -> ak
238 -- and the constructor is from a single-constr type.
239 WordOff -- 0-origin offset of ak within the "goods" of
240 -- constructor (Recall that the a1,...,an may be laid
241 -- out in the heap in a non-obvious order.)
242
243 | ApThunk
244 -- An ApThunk is of form
245 -- x1 ... xn
246 -- The code for the thunk just pushes x2..xn on the stack and enters x1.
247 -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled
248 -- in the RTS to save space.
249 RepArity -- Arity, n
250
251
252 ------------------------------------------------------
253 -- Building LambdaFormInfo
254 ------------------------------------------------------
255
256 mkLFArgument :: Id -> LambdaFormInfo
257 mkLFArgument id
258 | isUnliftedType ty = LFUnlifted
259 | might_be_a_function ty = LFUnknown True
260 | otherwise = LFUnknown False
261 where
262 ty = idType id
263
264 -------------
265 mkLFLetNoEscape :: LambdaFormInfo
266 mkLFLetNoEscape = LFLetNoEscape
267
268 -------------
269 mkLFReEntrant :: TopLevelFlag -- True of top level
270 -> [Id] -- Free vars
271 -> [Id] -- Args
272 -> ArgDescr -- Argument descriptor
273 -> LambdaFormInfo
274
275 mkLFReEntrant _ _ [] _
276 = pprPanic "mkLFReEntrant" empty
277 mkLFReEntrant top fvs args arg_descr
278 = LFReEntrant top os_info (length args) (null fvs) arg_descr
279 where os_info = idOneShotInfo (head args)
280
281 -------------
282 mkLFThunk :: Type -> TopLevelFlag -> [Id] -> UpdateFlag -> LambdaFormInfo
283 mkLFThunk thunk_ty top fvs upd_flag
284 = ASSERT( not (isUpdatable upd_flag) || not (isUnliftedType thunk_ty) )
285 LFThunk top (null fvs)
286 (isUpdatable upd_flag)
287 NonStandardThunk
288 (might_be_a_function thunk_ty)
289
290 --------------
291 might_be_a_function :: Type -> Bool
292 -- Return False only if we are *sure* it's a data type
293 -- Look through newtypes etc as much as poss
294 might_be_a_function ty
295 | [LiftedRep] <- typePrimRep ty
296 , Just tc <- tyConAppTyCon_maybe (unwrapType ty)
297 , isDataTyCon tc
298 = False
299 | otherwise
300 = True
301
302 -------------
303 mkConLFInfo :: DataCon -> LambdaFormInfo
304 mkConLFInfo con = LFCon con
305
306 -------------
307 mkSelectorLFInfo :: Id -> Int -> Bool -> LambdaFormInfo
308 mkSelectorLFInfo id offset updatable
309 = LFThunk NotTopLevel False updatable (SelectorThunk offset)
310 (might_be_a_function (idType id))
311
312 -------------
313 mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo
314 mkApLFInfo id upd_flag arity
315 = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity)
316 (might_be_a_function (idType id))
317
318 -------------
319 mkLFImported :: Id -> LambdaFormInfo
320 mkLFImported id
321 | Just con <- isDataConWorkId_maybe id
322 , isNullaryRepDataCon con
323 = LFCon con -- An imported nullary constructor
324 -- We assume that the constructor is evaluated so that
325 -- the id really does point directly to the constructor
326
327 | arity > 0
328 = LFReEntrant TopLevel noOneShotInfo arity True (panic "arg_descr")
329
330 | otherwise
331 = mkLFArgument id -- Not sure of exact arity
332 where
333 arity = idFunRepArity id
334
335 -------------
336 mkLFStringLit :: LambdaFormInfo
337 mkLFStringLit = LFUnlifted
338
339 -----------------------------------------------------
340 -- Dynamic pointer tagging
341 -----------------------------------------------------
342
343 type DynTag = Int -- The tag on a *pointer*
344 -- (from the dynamic-tagging paper)
345
346 -- Note [Data constructor dynamic tags]
347 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
348 --
349 -- The family size of a data type (the number of constructors
350 -- or the arity of a function) can be either:
351 -- * small, if the family size < 2**tag_bits
352 -- * big, otherwise.
353 --
354 -- Small families can have the constructor tag in the tag bits.
355 -- Big families only use the tag value 1 to represent evaluatedness.
356 -- We don't have very many tag bits: for example, we have 2 bits on
357 -- x86-32 and 3 bits on x86-64.
358
359 isSmallFamily :: DynFlags -> Int -> Bool
360 isSmallFamily dflags fam_size = fam_size <= mAX_PTR_TAG dflags
361
362 tagForCon :: DynFlags -> DataCon -> DynTag
363 tagForCon dflags con
364 | isSmallFamily dflags fam_size = con_tag
365 | otherwise = 1
366 where
367 con_tag = dataConTag con -- NB: 1-indexed
368 fam_size = tyConFamilySize (dataConTyCon con)
369
370 tagForArity :: DynFlags -> RepArity -> DynTag
371 tagForArity dflags arity
372 | isSmallFamily dflags arity = arity
373 | otherwise = 0
374
375 lfDynTag :: DynFlags -> LambdaFormInfo -> DynTag
376 -- Return the tag in the low order bits of a variable bound
377 -- to this LambdaForm
378 lfDynTag dflags (LFCon con) = tagForCon dflags con
379 lfDynTag dflags (LFReEntrant _ _ arity _ _) = tagForArity dflags arity
380 lfDynTag _ _other = 0
381
382
383 -----------------------------------------------------------------------------
384 -- Observing LambdaFormInfo
385 -----------------------------------------------------------------------------
386
387 -------------
388 maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon
389 maybeIsLFCon (LFCon con) = Just con
390 maybeIsLFCon _ = Nothing
391
392 ------------
393 isLFThunk :: LambdaFormInfo -> Bool
394 isLFThunk (LFThunk {}) = True
395 isLFThunk _ = False
396
397 isLFReEntrant :: LambdaFormInfo -> Bool
398 isLFReEntrant (LFReEntrant {}) = True
399 isLFReEntrant _ = False
400
401 -----------------------------------------------------------------------------
402 -- Choosing SM reps
403 -----------------------------------------------------------------------------
404
405 lfClosureType :: LambdaFormInfo -> ClosureTypeInfo
406 lfClosureType (LFReEntrant _ _ arity _ argd) = Fun arity argd
407 lfClosureType (LFCon con) = Constr (dataConTagZ con)
408 (dataConIdentity con)
409 lfClosureType (LFThunk _ _ _ is_sel _) = thunkClosureType is_sel
410 lfClosureType _ = panic "lfClosureType"
411
412 thunkClosureType :: StandardFormInfo -> ClosureTypeInfo
413 thunkClosureType (SelectorThunk off) = ThunkSelector off
414 thunkClosureType _ = Thunk
415
416 -- We *do* get non-updatable top-level thunks sometimes. eg. f = g
417 -- gets compiled to a jump to g (if g has non-zero arity), instead of
418 -- messing around with update frames and PAPs. We set the closure type
419 -- to FUN_STATIC in this case.
420
421 -----------------------------------------------------------------------------
422 -- nodeMustPointToIt
423 -----------------------------------------------------------------------------
424
425 nodeMustPointToIt :: DynFlags -> LambdaFormInfo -> Bool
426 -- If nodeMustPointToIt is true, then the entry convention for
427 -- this closure has R1 (the "Node" register) pointing to the
428 -- closure itself --- the "self" argument
429
430 nodeMustPointToIt _ (LFReEntrant top _ _ no_fvs _)
431 = not no_fvs -- Certainly if it has fvs we need to point to it
432 || isNotTopLevel top -- See Note [GC recovery]
433 -- For lex_profiling we also access the cost centre for a
434 -- non-inherited (i.e. non-top-level) function.
435 -- The isNotTopLevel test above ensures this is ok.
436
437 nodeMustPointToIt dflags (LFThunk top no_fvs updatable NonStandardThunk _)
438 = not no_fvs -- Self parameter
439 || isNotTopLevel top -- Note [GC recovery]
440 || updatable -- Need to push update frame
441 || gopt Opt_SccProfilingOn dflags
442 -- For the non-updatable (single-entry case):
443 --
444 -- True if has fvs (in which case we need access to them, and we
445 -- should black-hole it)
446 -- or profiling (in which case we need to recover the cost centre
447 -- from inside it) ToDo: do we need this even for
448 -- top-level thunks? If not,
449 -- isNotTopLevel subsumes this
450
451 nodeMustPointToIt _ (LFThunk {}) -- Node must point to a standard-form thunk
452 = True
453
454 nodeMustPointToIt _ (LFCon _) = True
455
456 -- Strictly speaking, the above two don't need Node to point
457 -- to it if the arity = 0. But this is a *really* unlikely
458 -- situation. If we know it's nil (say) and we are entering
459 -- it. Eg: let x = [] in x then we will certainly have inlined
460 -- x, since nil is a simple atom. So we gain little by not
461 -- having Node point to known zero-arity things. On the other
462 -- hand, we do lose something; Patrick's code for figuring out
463 -- when something has been updated but not entered relies on
464 -- having Node point to the result of an update. SLPJ
465 -- 27/11/92.
466
467 nodeMustPointToIt _ (LFUnknown _) = True
468 nodeMustPointToIt _ LFUnlifted = False
469 nodeMustPointToIt _ LFLetNoEscape = False
470
471 {- Note [GC recovery]
472 ~~~~~~~~~~~~~~~~~~~~~
473 If we a have a local let-binding (function or thunk)
474 let f = <body> in ...
475 AND <body> allocates, then the heap-overflow check needs to know how
476 to re-start the evaluation. It uses the "self" pointer to do this.
477 So even if there are no free variables in <body>, we still make
478 nodeMustPointToIt be True for non-top-level bindings.
479
480 Why do any such bindings exist? After all, let-floating should have
481 floated them out. Well, a clever optimiser might leave one there to
482 avoid a space leak, deliberately recomputing a thunk. Also (and this
483 really does happen occasionally) let-floating may make a function f smaller
484 so it can be inlined, so now (f True) may generate a local no-fv closure.
485 This actually happened during bootstrapping GHC itself, with f=mkRdrFunBind
486 in TcGenDeriv.) -}
487
488 -----------------------------------------------------------------------------
489 -- getCallMethod
490 -----------------------------------------------------------------------------
491
492 {- The entry conventions depend on the type of closure being entered,
493 whether or not it has free variables, and whether we're running
494 sequentially or in parallel.
495
496 Closure Node Argument Enter
497 Characteristics Par Req'd Passing Via
498 ---------------------------------------------------------------------------
499 Unknown & no & yes & stack & node
500 Known fun (>1 arg), no fvs & no & no & registers & fast entry (enough args)
501 & slow entry (otherwise)
502 Known fun (>1 arg), fvs & no & yes & registers & fast entry (enough args)
503 0 arg, no fvs \r,\s & no & no & n/a & direct entry
504 0 arg, no fvs \u & no & yes & n/a & node
505 0 arg, fvs \r,\s,selector & no & yes & n/a & node
506 0 arg, fvs \r,\s & no & yes & n/a & direct entry
507 0 arg, fvs \u & no & yes & n/a & node
508 Unknown & yes & yes & stack & node
509 Known fun (>1 arg), no fvs & yes & no & registers & fast entry (enough args)
510 & slow entry (otherwise)
511 Known fun (>1 arg), fvs & yes & yes & registers & node
512 0 arg, fvs \r,\s,selector & yes & yes & n/a & node
513 0 arg, no fvs \r,\s & yes & no & n/a & direct entry
514 0 arg, no fvs \u & yes & yes & n/a & node
515 0 arg, fvs \r,\s & yes & yes & n/a & node
516 0 arg, fvs \u & yes & yes & n/a & node
517
518 When black-holing, single-entry closures could also be entered via node
519 (rather than directly) to catch double-entry. -}
520
521 data CallMethod
522 = EnterIt -- No args, not a function
523
524 | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop
525
526 | ReturnIt -- It's a value (function, unboxed value,
527 -- or constructor), so just return it.
528
529 | SlowCall -- Unknown fun, or known fun with
530 -- too few args.
531
532 | DirectEntry -- Jump directly, with args in regs
533 CLabel -- The code label
534 RepArity -- Its arity
535
536 getCallMethod :: DynFlags
537 -> Name -- Function being applied
538 -> Id -- Function Id used to chech if it can refer to
539 -- CAF's and whether the function is tail-calling
540 -- itself
541 -> LambdaFormInfo -- Its info
542 -> RepArity -- Number of available arguments
543 -> RepArity -- Number of them being void arguments
544 -> CgLoc -- Passed in from cgIdApp so that we can
545 -- handle let-no-escape bindings and self-recursive
546 -- tail calls using the same data constructor,
547 -- JumpToIt. This saves us one case branch in
548 -- cgIdApp
549 -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call?
550 -> CallMethod
551
552 getCallMethod dflags _ id _ n_args v_args _cg_loc
553 (Just (self_loop_id, block_id, args))
554 | gopt Opt_Loopification dflags
555 , id == self_loop_id
556 , n_args - v_args == length args
557 -- If these patterns match then we know that:
558 -- * loopification optimisation is turned on
559 -- * function is performing a self-recursive call in a tail position
560 -- * number of non-void parameters of the function matches functions arity.
561 -- See Note [Self-recursive tail calls] and Note [Void arguments in
562 -- self-recursive tail calls] in StgCmmExpr for more details
563 = JumpToIt block_id args
564
565 getCallMethod dflags name id (LFReEntrant _ _ arity _ _) n_args _v_args _cg_loc
566 _self_loop_info
567 | n_args == 0 -- No args at all
568 && not (gopt Opt_SccProfilingOn dflags)
569 -- See Note [Evaluating functions with profiling] in rts/Apply.cmm
570 = ASSERT( arity /= 0 ) ReturnIt
571 | n_args < arity = SlowCall -- Not enough args
572 | otherwise = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity
573
574 getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info
575 = ASSERT( n_args == 0 ) ReturnIt
576
577 getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info
578 = ASSERT( n_args == 0 ) ReturnIt
579 -- n_args=0 because it'd be ill-typed to apply a saturated
580 -- constructor application to anything
581
582 getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun)
583 n_args _v_args _cg_loc _self_loop_info
584 | is_fun -- it *might* be a function, so we must "call" it (which is always safe)
585 = SlowCall -- We cannot just enter it [in eval/apply, the entry code
586 -- is the fast-entry code]
587
588 -- Since is_fun is False, we are *definitely* looking at a data value
589 | updatable || gopt Opt_Ticky dflags -- to catch double entry
590 {- OLD: || opt_SMP
591 I decided to remove this, because in SMP mode it doesn't matter
592 if we enter the same thunk multiple times, so the optimisation
593 of jumping directly to the entry code is still valid. --SDM
594 -}
595 = EnterIt
596
597 -- even a non-updatable selector thunk can be updated by the garbage
598 -- collector, so we must enter it. (#8817)
599 | SelectorThunk{} <- std_form_info
600 = EnterIt
601
602 -- We used to have ASSERT( n_args == 0 ), but actually it is
603 -- possible for the optimiser to generate
604 -- let bot :: Int = error Int "urk"
605 -- in (bot `cast` unsafeCoerce Int (Int -> Int)) 3
606 -- This happens as a result of the case-of-error transformation
607 -- So the right thing to do is just to enter the thing
608
609 | otherwise -- Jump direct to code for single-entry thunks
610 = ASSERT( n_args == 0 )
611 DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info
612 updatable) 0
613
614 getCallMethod _ _name _ (LFUnknown True) _n_arg _v_args _cg_locs _self_loop_info
615 = SlowCall -- might be a function
616
617 getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
618 = ASSERT2( n_args == 0, ppr name <+> ppr n_args )
619 EnterIt -- Not a function
620
621 getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs)
622 _self_loop_info
623 = JumpToIt blk_id lne_regs
624
625 getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method"
626
627 -----------------------------------------------------------------------------
628 -- staticClosureRequired
629 -----------------------------------------------------------------------------
630
631 {- staticClosureRequired is never called (hence commented out)
632
633 SimonMar writes (Sept 07) It's an optimisation we used to apply at
634 one time, I believe, but it got lost probably in the rewrite of
635 the RTS/code generator. I left that code there to remind me to
636 look into whether it was worth doing sometime
637
638 {- Avoiding generating entries and info tables
639 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
640 At present, for every function we generate all of the following,
641 just in case. But they aren't always all needed, as noted below:
642
643 [NB1: all of this applies only to *functions*. Thunks always
644 have closure, info table, and entry code.]
645
646 [NB2: All are needed if the function is *exported*, just to play safe.]
647
648 * Fast-entry code ALWAYS NEEDED
649
650 * Slow-entry code
651 Needed iff (a) we have any un-saturated calls to the function
652 OR (b) the function is passed as an arg
653 OR (c) we're in the parallel world and the function has free vars
654 [Reason: in parallel world, we always enter functions
655 with free vars via the closure.]
656
657 * The function closure
658 Needed iff (a) we have any un-saturated calls to the function
659 OR (b) the function is passed as an arg
660 OR (c) if the function has free vars (ie not top level)
661
662 Why case (a) here? Because if the arg-satis check fails,
663 UpdatePAP stuffs a pointer to the function closure in the PAP.
664 [Could be changed; UpdatePAP could stuff in a code ptr instead,
665 but doesn't seem worth it.]
666
667 [NB: these conditions imply that we might need the closure
668 without the slow-entry code. Here's how.
669
670 f x y = let g w = ...x..y..w...
671 in
672 ...(g t)...
673
674 Here we need a closure for g which contains x and y,
675 but since the calls are all saturated we just jump to the
676 fast entry point for g, with R1 pointing to the closure for g.]
677
678
679 * Standard info table
680 Needed iff (a) we have any un-saturated calls to the function
681 OR (b) the function is passed as an arg
682 OR (c) the function has free vars (ie not top level)
683
684 NB. In the sequential world, (c) is only required so that the function closure has
685 an info table to point to, to keep the storage manager happy.
686 If (c) alone is true we could fake up an info table by choosing
687 one of a standard family of info tables, whose entry code just
688 bombs out.
689
690 [NB In the parallel world (c) is needed regardless because
691 we enter functions with free vars via the closure.]
692
693 If (c) is retained, then we'll sometimes generate an info table
694 (for storage mgr purposes) without slow-entry code. Then we need
695 to use an error label in the info table to substitute for the absent
696 slow entry code.
697 -}
698
699 staticClosureRequired
700 :: Name
701 -> StgBinderInfo
702 -> LambdaFormInfo
703 -> Bool
704 staticClosureRequired binder bndr_info
705 (LFReEntrant top_level _ _ _ _) -- It's a function
706 = ASSERT( isTopLevel top_level )
707 -- Assumption: it's a top-level, no-free-var binding
708 not (satCallsOnly bndr_info)
709
710 staticClosureRequired binder other_binder_info other_lf_info = True
711 -}
712
713 -----------------------------------------------------------------------------
714 -- Data types for closure information
715 -----------------------------------------------------------------------------
716
717
718 {- ClosureInfo: information about a binding
719
720 We make a ClosureInfo for each let binding (both top level and not),
721 but not bindings for data constructors: for those we build a CmmInfoTable
722 directly (see mkDataConInfoTable).
723
724 To a first approximation:
725 ClosureInfo = (LambdaFormInfo, CmmInfoTable)
726
727 A ClosureInfo has enough information
728 a) to construct the info table itself, and build other things
729 related to the binding (e.g. slow entry points for a function)
730 b) to allocate a closure containing that info pointer (i.e.
731 it knows the info table label)
732 -}
733
734 data ClosureInfo
735 = ClosureInfo {
736 closureName :: !Name, -- The thing bound to this closure
737 -- we don't really need this field: it's only used in generating
738 -- code for ticky and profiling, and we could pass the information
739 -- around separately, but it doesn't do much harm to keep it here.
740
741 closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon
742 -- this tells us about what the closure contains: it's right-hand-side.
743
744 -- the rest is just an unpacked CmmInfoTable.
745 closureInfoLabel :: !CLabel,
746 closureSMRep :: !SMRep, -- representation used by storage mgr
747 closureProf :: !ProfilingInfo
748 }
749
750 -- | Convert from 'ClosureInfo' to 'CmmInfoTable'.
751 mkCmmInfo :: ClosureInfo -> CmmInfoTable
752 mkCmmInfo ClosureInfo {..}
753 = CmmInfoTable { cit_lbl = closureInfoLabel
754 , cit_rep = closureSMRep
755 , cit_prof = closureProf
756 , cit_srt = NoC_SRT }
757
758 --------------------------------------
759 -- Building ClosureInfos
760 --------------------------------------
761
762 mkClosureInfo :: DynFlags
763 -> Bool -- Is static
764 -> Id
765 -> LambdaFormInfo
766 -> Int -> Int -- Total and pointer words
767 -> String -- String descriptor
768 -> ClosureInfo
769 mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds val_descr
770 = ClosureInfo { closureName = name
771 , closureLFInfo = lf_info
772 , closureInfoLabel = info_lbl -- These three fields are
773 , closureSMRep = sm_rep -- (almost) an info table
774 , closureProf = prof } -- (we don't have an SRT yet)
775 where
776 name = idName id
777 sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info)
778 prof = mkProfilingInfo dflags id val_descr
779 nonptr_wds = tot_wds - ptr_wds
780
781 info_lbl = mkClosureInfoTableLabel id lf_info
782
783 --------------------------------------
784 -- Other functions over ClosureInfo
785 --------------------------------------
786
787 -- Eager blackholing is normally disabled, but can be turned on with
788 -- -feager-blackholing. When it is on, we replace the info pointer of
789 -- the thunk with stg_EAGER_BLACKHOLE_info on entry.
790
791 -- If we wanted to do eager blackholing with slop filling,
792 -- we'd need to do it at the *end* of a basic block, otherwise
793 -- we overwrite the free variables in the thunk that we still
794 -- need. We have a patch for this from Andy Cheadle, but not
795 -- incorporated yet. --SDM [6/2004]
796 --
797 -- Previously, eager blackholing was enabled when ticky-ticky
798 -- was on. But it didn't work, and it wasn't strictly necessary
799 -- to bring back minimal ticky-ticky, so now EAGER_BLACKHOLING
800 -- is unconditionally disabled. -- krc 1/2007
801 --
802 -- Static closures are never themselves black-holed.
803
804 blackHoleOnEntry :: ClosureInfo -> Bool
805 blackHoleOnEntry cl_info
806 | isStaticRep (closureSMRep cl_info)
807 = False -- Never black-hole a static closure
808
809 | otherwise
810 = case closureLFInfo cl_info of
811 LFReEntrant {} -> False
812 LFLetNoEscape -> False
813 LFThunk _ _no_fvs upd _ _ -> upd -- See Note [Black-holing non-updatable thunks]
814 _other -> panic "blackHoleOnEntry"
815
816 {- Note [Black-holing non-updatable thunks]
817 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
818 We must not black-hole non-updatable (single-entry) thunks otherwise
819 we run into issues like Trac #10414. Specifically:
820
821 * There is no reason to black-hole a non-updatable thunk: it should
822 not be competed for by multiple threads
823
824 * It could, conceivably, cause a space leak if we don't black-hole
825 it, if there was a live but never-followed pointer pointing to it.
826 Let's hope that doesn't happen.
827
828 * It is dangerous to black-hole a non-updatable thunk because
829 - is not updated (of course)
830 - hence, if it is black-holed and another thread tries to evaluate
831 it, that thread will block forever
832 This actually happened in Trac #10414. So we do not black-hole
833 non-updatable thunks.
834
835 * How could two threads evaluate the same non-updatable (single-entry)
836 thunk? See Reid Barton's example below.
837
838 * Only eager blackholing could possibly black-hole a non-updatable
839 thunk, because lazy black-holing only affects thunks with an
840 update frame on the stack.
841
842 Here is and example due to Reid Barton (Trac #10414):
843 x = \u [] concat [[1], []]
844 with the following definitions,
845
846 concat x = case x of
847 [] -> []
848 (:) x xs -> (++) x (concat xs)
849
850 (++) xs ys = case xs of
851 [] -> ys
852 (:) x rest -> (:) x ((++) rest ys)
853
854 Where we use the syntax @\u []@ to denote an updatable thunk and @\s []@ to
855 denote a single-entry (i.e. non-updatable) thunk. After a thread evaluates @x@
856 to WHNF and calls @(++)@ the heap will contain the following thunks,
857
858 x = 1 : y
859 y = \u [] (++) [] z
860 z = \s [] concat []
861
862 Now that the stage is set, consider the follow evaluations by two racing threads
863 A and B,
864
865 1. Both threads enter @y@ before either is able to replace it with an
866 indirection
867
868 2. Thread A does the case analysis in @(++)@ and consequently enters @z@,
869 replacing it with a black-hole
870
871 3. At some later point thread B does the same case analysis and also attempts
872 to enter @z@. However, it finds that it has been replaced with a black-hole
873 so it blocks.
874
875 4. Thread A eventually finishes evaluating @z@ (to @[]@) and updates @y@
876 accordingly. It does *not* update @z@, however, as it is single-entry. This
877 leaves Thread B blocked forever on a black-hole which will never be
878 updated.
879
880 To avoid this sort of condition we never black-hole non-updatable thunks.
881 -}
882
883 isStaticClosure :: ClosureInfo -> Bool
884 isStaticClosure cl_info = isStaticRep (closureSMRep cl_info)
885
886 closureUpdReqd :: ClosureInfo -> Bool
887 closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info
888
889 lfUpdatable :: LambdaFormInfo -> Bool
890 lfUpdatable (LFThunk _ _ upd _ _) = upd
891 lfUpdatable _ = False
892
893 closureSingleEntry :: ClosureInfo -> Bool
894 closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd
895 closureSingleEntry (ClosureInfo { closureLFInfo = LFReEntrant _ OneShotLam _ _ _}) = True
896 closureSingleEntry _ = False
897
898 closureReEntrant :: ClosureInfo -> Bool
899 closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant {} }) = True
900 closureReEntrant _ = False
901
902 closureFunInfo :: ClosureInfo -> Maybe (RepArity, ArgDescr)
903 closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info
904
905 lfFunInfo :: LambdaFormInfo -> Maybe (RepArity, ArgDescr)
906 lfFunInfo (LFReEntrant _ _ arity _ arg_desc) = Just (arity, arg_desc)
907 lfFunInfo _ = Nothing
908
909 funTag :: DynFlags -> ClosureInfo -> DynTag
910 funTag dflags (ClosureInfo { closureLFInfo = lf_info })
911 = lfDynTag dflags lf_info
912
913 isToplevClosure :: ClosureInfo -> Bool
914 isToplevClosure (ClosureInfo { closureLFInfo = lf_info })
915 = case lf_info of
916 LFReEntrant TopLevel _ _ _ _ -> True
917 LFThunk TopLevel _ _ _ _ -> True
918 _other -> False
919
920 --------------------------------------
921 -- Label generation
922 --------------------------------------
923
924 staticClosureLabel :: ClosureInfo -> CLabel
925 staticClosureLabel = toClosureLbl . closureInfoLabel
926
927 closureSlowEntryLabel :: ClosureInfo -> CLabel
928 closureSlowEntryLabel = toSlowEntryLbl . closureInfoLabel
929
930 closureLocalEntryLabel :: DynFlags -> ClosureInfo -> CLabel
931 closureLocalEntryLabel dflags
932 | tablesNextToCode dflags = toInfoLbl . closureInfoLabel
933 | otherwise = toEntryLbl . closureInfoLabel
934
935 mkClosureInfoTableLabel :: Id -> LambdaFormInfo -> CLabel
936 mkClosureInfoTableLabel id lf_info
937 = case lf_info of
938 LFThunk _ _ upd_flag (SelectorThunk offset) _
939 -> mkSelectorInfoLabel upd_flag offset
940
941 LFThunk _ _ upd_flag (ApThunk arity) _
942 -> mkApInfoTableLabel upd_flag arity
943
944 LFThunk{} -> std_mk_lbl name cafs
945 LFReEntrant{} -> std_mk_lbl name cafs
946 _other -> panic "closureInfoTableLabel"
947
948 where
949 name = idName id
950
951 std_mk_lbl | is_local = mkLocalInfoTableLabel
952 | otherwise = mkInfoTableLabel
953
954 cafs = idCafInfo id
955 is_local = isDataConWorkId id
956 -- Make the _info pointer for the implicit datacon worker
957 -- binding local. The reason we can do this is that importing
958 -- code always either uses the _closure or _con_info. By the
959 -- invariants in CorePrep anything else gets eta expanded.
960
961
962 thunkEntryLabel :: DynFlags -> Name -> CafInfo -> StandardFormInfo -> Bool -> CLabel
963 -- thunkEntryLabel is a local help function, not exported. It's used from
964 -- getCallMethod.
965 thunkEntryLabel dflags _thunk_id _ (ApThunk arity) upd_flag
966 = enterApLabel dflags upd_flag arity
967 thunkEntryLabel dflags _thunk_id _ (SelectorThunk offset) upd_flag
968 = enterSelectorLabel dflags upd_flag offset
969 thunkEntryLabel dflags thunk_id c _ _
970 = enterIdLabel dflags thunk_id c
971
972 enterApLabel :: DynFlags -> Bool -> Arity -> CLabel
973 enterApLabel dflags is_updatable arity
974 | tablesNextToCode dflags = mkApInfoTableLabel is_updatable arity
975 | otherwise = mkApEntryLabel is_updatable arity
976
977 enterSelectorLabel :: DynFlags -> Bool -> WordOff -> CLabel
978 enterSelectorLabel dflags upd_flag offset
979 | tablesNextToCode dflags = mkSelectorInfoLabel upd_flag offset
980 | otherwise = mkSelectorEntryLabel upd_flag offset
981
982 enterIdLabel :: DynFlags -> Name -> CafInfo -> CLabel
983 enterIdLabel dflags id c
984 | tablesNextToCode dflags = mkInfoTableLabel id c
985 | otherwise = mkEntryLabel id c
986
987
988 --------------------------------------
989 -- Profiling
990 --------------------------------------
991
992 -- Profiling requires two pieces of information to be determined for
993 -- each closure's info table --- description and type.
994
995 -- The description is stored directly in the @CClosureInfoTable@ when the
996 -- info table is built.
997
998 -- The type is determined from the type information stored with the @Id@
999 -- in the closure info using @closureTypeDescr@.
1000
1001 mkProfilingInfo :: DynFlags -> Id -> String -> ProfilingInfo
1002 mkProfilingInfo dflags id val_descr
1003 | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
1004 | otherwise = ProfilingInfo ty_descr_w8 val_descr_w8
1005 where
1006 ty_descr_w8 = stringToWord8s (getTyDescription (idType id))
1007 val_descr_w8 = stringToWord8s val_descr
1008
1009 getTyDescription :: Type -> String
1010 getTyDescription ty
1011 = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) ->
1012 case tau_ty of
1013 TyVarTy _ -> "*"
1014 AppTy fun _ -> getTyDescription fun
1015 TyConApp tycon _ -> getOccString tycon
1016 FunTy _ res -> '-' : '>' : fun_result res
1017 ForAllTy _ ty -> getTyDescription ty
1018 LitTy n -> getTyLitDescription n
1019 CastTy ty _ -> getTyDescription ty
1020 CoercionTy co -> pprPanic "getTyDescription" (ppr co)
1021 }
1022 where
1023 fun_result (FunTy _ res) = '>' : fun_result res
1024 fun_result other = getTyDescription other
1025
1026 getTyLitDescription :: TyLit -> String
1027 getTyLitDescription l =
1028 case l of
1029 NumTyLit n -> show n
1030 StrTyLit n -> show n
1031
1032 --------------------------------------
1033 -- CmmInfoTable-related things
1034 --------------------------------------
1035
1036 mkDataConInfoTable :: DynFlags -> DataCon -> Bool -> Int -> Int -> CmmInfoTable
1037 mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds
1038 = CmmInfoTable { cit_lbl = info_lbl
1039 , cit_rep = sm_rep
1040 , cit_prof = prof
1041 , cit_srt = NoC_SRT }
1042 where
1043 name = dataConName data_con
1044 info_lbl = mkConInfoTableLabel name NoCafRefs
1045 sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type
1046 cl_type = Constr (dataConTagZ data_con) (dataConIdentity data_con)
1047 -- We keep the *zero-indexed* tag in the srt_len field
1048 -- of the info table of a data constructor.
1049
1050 prof | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
1051 | otherwise = ProfilingInfo ty_descr val_descr
1052
1053 ty_descr = stringToWord8s $ occNameString $ getOccName $ dataConTyCon data_con
1054 val_descr = stringToWord8s $ occNameString $ getOccName data_con
1055
1056 -- We need a black-hole closure info to pass to @allocDynClosure@ when we
1057 -- want to allocate the black hole on entry to a CAF.
1058
1059 cafBlackHoleInfoTable :: CmmInfoTable
1060 cafBlackHoleInfoTable
1061 = CmmInfoTable { cit_lbl = mkCAFBlackHoleInfoTableLabel
1062 , cit_rep = blackHoleRep
1063 , cit_prof = NoProfilingInfo
1064 , cit_srt = NoC_SRT }
1065
1066 indStaticInfoTable :: CmmInfoTable
1067 indStaticInfoTable
1068 = CmmInfoTable { cit_lbl = mkIndStaticInfoLabel
1069 , cit_rep = indStaticRep
1070 , cit_prof = NoProfilingInfo
1071 , cit_srt = NoC_SRT }
1072
1073 staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool
1074 -- A static closure needs a link field to aid the GC when traversing
1075 -- the static closure graph. But it only needs such a field if either
1076 -- a) it has an SRT
1077 -- b) it's a constructor with one or more pointer fields
1078 -- In case (b), the constructor's fields themselves play the role
1079 -- of the SRT.
1080 staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep }
1081 | isConRep smrep = not (isStaticNoCafCon smrep)
1082 | otherwise = has_srt -- needsSRT (cit_srt info_tbl)