Fix "CPP directive" in comment
[ghc.git] / compiler / codeGen / StgCmmClosure.hs
1 {-# LANGUAGE CPP, RecordWildCards #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Stg to C-- code generation:
6 --
7 -- The types LambdaFormInfo
8 -- ClosureInfo
9 --
10 -- Nothing monadic in here!
11 --
12 -----------------------------------------------------------------------------
13
14 module StgCmmClosure (
15 DynTag, tagForCon, isSmallFamily,
16 ConTagZ, dataConTagZ,
17
18 idPrimRep, isVoidRep, isGcPtrRep, addIdReps, addArgReps,
19 argPrimRep,
20
21 -- * LambdaFormInfo
22 LambdaFormInfo, -- Abstract
23 StandardFormInfo, -- ...ditto...
24 mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo,
25 mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape,
26 lfDynTag,
27 maybeIsLFCon, isLFThunk, isLFReEntrant, lfUpdatable,
28
29 -- * Used by other modules
30 CgLoc(..), SelfLoopInfo, CallMethod(..),
31 nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod,
32
33 -- * ClosureInfo
34 ClosureInfo,
35 mkClosureInfo,
36 mkCmmInfo,
37
38 -- ** Inspection
39 closureLFInfo, closureName,
40
41 -- ** Labels
42 -- These just need the info table label
43 closureInfoLabel, staticClosureLabel,
44 closureSlowEntryLabel, closureLocalEntryLabel,
45
46 -- ** Predicates
47 -- These are really just functions on LambdaFormInfo
48 closureUpdReqd, closureSingleEntry,
49 closureReEntrant, closureFunInfo,
50 isToplevClosure,
51
52 blackHoleOnEntry, -- Needs LambdaFormInfo and SMRep
53 isStaticClosure, -- Needs SMPre
54
55 -- * InfoTables
56 mkDataConInfoTable,
57 cafBlackHoleInfoTable,
58 indStaticInfoTable,
59 staticClosureNeedsLink,
60 ) where
61
62 #include "../includes/MachDeps.h"
63
64 #define FAST_STRING_NOT_NEEDED
65 #include "HsVersions.h"
66
67 import StgSyn
68 import SMRep
69 import Cmm
70 import PprCmmExpr()
71
72 import BlockId
73 import CLabel
74 import Id
75 import IdInfo
76 import DataCon
77 import FastString
78 import Name
79 import Type
80 import TypeRep
81 import TcType
82 import TyCon
83 import BasicTypes
84 import Outputable
85 import DynFlags
86 import Util
87
88 -----------------------------------------------------------------------------
89 -- Data types and synonyms
90 -----------------------------------------------------------------------------
91
92 -- These data types are mostly used by other modules, especially StgCmmMonad,
93 -- but we define them here because some functions in this module need to
94 -- have access to them as well
95
96 data CgLoc
97 = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning
98 -- Hp, so that it remains valid across calls
99
100 | LneLoc BlockId [LocalReg] -- A join point
101 -- A join point (= let-no-escape) should only
102 -- be tail-called, and in a saturated way.
103 -- To tail-call it, assign to these locals,
104 -- and branch to the block id
105
106 instance Outputable CgLoc where
107 ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e
108 ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs
109
110 type SelfLoopInfo = (Id, BlockId, [LocalReg])
111
112 -- used by ticky profiling
113 isKnownFun :: LambdaFormInfo -> Bool
114 isKnownFun (LFReEntrant _ _ _ _) = True
115 isKnownFun LFLetNoEscape = True
116 isKnownFun _ = False
117
118
119 -----------------------------------------------------------------------------
120 -- Representations
121 -----------------------------------------------------------------------------
122
123 -- Why are these here?
124
125 idPrimRep :: Id -> PrimRep
126 idPrimRep id = typePrimRep (idType id)
127 -- NB: typePrimRep fails on unboxed tuples,
128 -- but by StgCmm no Ids have unboxed tuple type
129
130 addIdReps :: [Id] -> [(PrimRep, Id)]
131 addIdReps ids = [(idPrimRep id, id) | id <- ids]
132
133 addArgReps :: [StgArg] -> [(PrimRep, StgArg)]
134 addArgReps args = [(argPrimRep arg, arg) | arg <- args]
135
136 argPrimRep :: StgArg -> PrimRep
137 argPrimRep arg = typePrimRep (stgArgType arg)
138
139
140 -----------------------------------------------------------------------------
141 -- LambdaFormInfo
142 -----------------------------------------------------------------------------
143
144 -- Information about an identifier, from the code generator's point of
145 -- view. Every identifier is bound to a LambdaFormInfo in the
146 -- environment, which gives the code generator enough info to be able to
147 -- tail call or return that identifier.
148
149 data LambdaFormInfo
150 = LFReEntrant -- Reentrant closure (a function)
151 TopLevelFlag -- True if top level
152 !RepArity -- Arity. Invariant: always > 0
153 !Bool -- True <=> no fvs
154 ArgDescr -- Argument descriptor (should really be in ClosureInfo)
155
156 | LFThunk -- Thunk (zero arity)
157 TopLevelFlag
158 !Bool -- True <=> no free vars
159 !Bool -- True <=> updatable (i.e., *not* single-entry)
160 StandardFormInfo
161 !Bool -- True <=> *might* be a function type
162
163 | LFCon -- A saturated constructor application
164 DataCon -- The constructor
165
166 | LFUnknown -- Used for function arguments and imported things.
167 -- We know nothing about this closure.
168 -- Treat like updatable "LFThunk"...
169 -- Imported things which we *do* know something about use
170 -- one of the other LF constructors (eg LFReEntrant for
171 -- known functions)
172 !Bool -- True <=> *might* be a function type
173 -- The False case is good when we want to enter it,
174 -- because then we know the entry code will do
175 -- For a function, the entry code is the fast entry point
176
177 | LFUnLifted -- A value of unboxed type;
178 -- always a value, needs evaluation
179
180 | LFLetNoEscape -- See LetNoEscape module for precise description
181
182
183 -------------------------
184 -- StandardFormInfo tells whether this thunk has one of
185 -- a small number of standard forms
186
187 data StandardFormInfo
188 = NonStandardThunk
189 -- The usual case: not of the standard forms
190
191 | SelectorThunk
192 -- A SelectorThunk is of form
193 -- case x of
194 -- con a1,..,an -> ak
195 -- and the constructor is from a single-constr type.
196 WordOff -- 0-origin offset of ak within the "goods" of
197 -- constructor (Recall that the a1,...,an may be laid
198 -- out in the heap in a non-obvious order.)
199
200 | ApThunk
201 -- An ApThunk is of form
202 -- x1 ... xn
203 -- The code for the thunk just pushes x2..xn on the stack and enters x1.
204 -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled
205 -- in the RTS to save space.
206 RepArity -- Arity, n
207
208
209 ------------------------------------------------------
210 -- Building LambdaFormInfo
211 ------------------------------------------------------
212
213 mkLFArgument :: Id -> LambdaFormInfo
214 mkLFArgument id
215 | isUnLiftedType ty = LFUnLifted
216 | might_be_a_function ty = LFUnknown True
217 | otherwise = LFUnknown False
218 where
219 ty = idType id
220
221 -------------
222 mkLFLetNoEscape :: LambdaFormInfo
223 mkLFLetNoEscape = LFLetNoEscape
224
225 -------------
226 mkLFReEntrant :: TopLevelFlag -- True of top level
227 -> [Id] -- Free vars
228 -> [Id] -- Args
229 -> ArgDescr -- Argument descriptor
230 -> LambdaFormInfo
231
232 mkLFReEntrant top fvs args arg_descr
233 = LFReEntrant top (length args) (null fvs) arg_descr
234
235 -------------
236 mkLFThunk :: Type -> TopLevelFlag -> [Id] -> UpdateFlag -> LambdaFormInfo
237 mkLFThunk thunk_ty top fvs upd_flag
238 = ASSERT( not (isUpdatable upd_flag) || not (isUnLiftedType thunk_ty) )
239 LFThunk top (null fvs)
240 (isUpdatable upd_flag)
241 NonStandardThunk
242 (might_be_a_function thunk_ty)
243
244 --------------
245 might_be_a_function :: Type -> Bool
246 -- Return False only if we are *sure* it's a data type
247 -- Look through newtypes etc as much as poss
248 might_be_a_function ty
249 | UnaryRep rep <- repType ty
250 , Just tc <- tyConAppTyCon_maybe rep
251 , isDataTyCon tc
252 = False
253 | otherwise
254 = True
255
256 -------------
257 mkConLFInfo :: DataCon -> LambdaFormInfo
258 mkConLFInfo con = LFCon con
259
260 -------------
261 mkSelectorLFInfo :: Id -> Int -> Bool -> LambdaFormInfo
262 mkSelectorLFInfo id offset updatable
263 = LFThunk NotTopLevel False updatable (SelectorThunk offset)
264 (might_be_a_function (idType id))
265
266 -------------
267 mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo
268 mkApLFInfo id upd_flag arity
269 = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity)
270 (might_be_a_function (idType id))
271
272 -------------
273 mkLFImported :: Id -> LambdaFormInfo
274 mkLFImported id
275 | Just con <- isDataConWorkId_maybe id
276 , isNullaryRepDataCon con
277 = LFCon con -- An imported nullary constructor
278 -- We assume that the constructor is evaluated so that
279 -- the id really does point directly to the constructor
280
281 | arity > 0
282 = LFReEntrant TopLevel arity True (panic "arg_descr")
283
284 | otherwise
285 = mkLFArgument id -- Not sure of exact arity
286 where
287 arity = idRepArity id
288
289 -----------------------------------------------------
290 -- Dynamic pointer tagging
291 -----------------------------------------------------
292
293 type ConTagZ = Int -- A *zero-indexed* contructor tag
294
295 type DynTag = Int -- The tag on a *pointer*
296 -- (from the dynamic-tagging paper)
297
298 -- Note [Data constructor dynamic tags]
299 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
300 --
301 -- The family size of a data type (the number of constructors
302 -- or the arity of a function) can be either:
303 -- * small, if the family size < 2**tag_bits
304 -- * big, otherwise.
305 --
306 -- Small families can have the constructor tag in the tag bits.
307 -- Big families only use the tag value 1 to represent evaluatedness.
308 -- We don't have very many tag bits: for example, we have 2 bits on
309 -- x86-32 and 3 bits on x86-64.
310
311 isSmallFamily :: DynFlags -> Int -> Bool
312 isSmallFamily dflags fam_size = fam_size <= mAX_PTR_TAG dflags
313
314 -- We keep the *zero-indexed* tag in the srt_len field of the info
315 -- table of a data constructor.
316 dataConTagZ :: DataCon -> ConTagZ
317 dataConTagZ con = dataConTag con - fIRST_TAG
318
319 tagForCon :: DynFlags -> DataCon -> DynTag
320 tagForCon dflags con
321 | isSmallFamily dflags fam_size = con_tag + 1
322 | otherwise = 1
323 where
324 con_tag = dataConTagZ con
325 fam_size = tyConFamilySize (dataConTyCon con)
326
327 tagForArity :: DynFlags -> RepArity -> DynTag
328 tagForArity dflags arity
329 | isSmallFamily dflags arity = arity
330 | otherwise = 0
331
332 lfDynTag :: DynFlags -> LambdaFormInfo -> DynTag
333 -- Return the tag in the low order bits of a variable bound
334 -- to this LambdaForm
335 lfDynTag dflags (LFCon con) = tagForCon dflags con
336 lfDynTag dflags (LFReEntrant _ arity _ _) = tagForArity dflags arity
337 lfDynTag _ _other = 0
338
339
340 -----------------------------------------------------------------------------
341 -- Observing LambdaFormInfo
342 -----------------------------------------------------------------------------
343
344 -------------
345 maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon
346 maybeIsLFCon (LFCon con) = Just con
347 maybeIsLFCon _ = Nothing
348
349 ------------
350 isLFThunk :: LambdaFormInfo -> Bool
351 isLFThunk (LFThunk {}) = True
352 isLFThunk _ = False
353
354 isLFReEntrant :: LambdaFormInfo -> Bool
355 isLFReEntrant (LFReEntrant {}) = True
356 isLFReEntrant _ = False
357
358 -----------------------------------------------------------------------------
359 -- Choosing SM reps
360 -----------------------------------------------------------------------------
361
362 lfClosureType :: LambdaFormInfo -> ClosureTypeInfo
363 lfClosureType (LFReEntrant _ arity _ argd) = Fun arity argd
364 lfClosureType (LFCon con) = Constr (dataConTagZ con)
365 (dataConIdentity con)
366 lfClosureType (LFThunk _ _ _ is_sel _) = thunkClosureType is_sel
367 lfClosureType _ = panic "lfClosureType"
368
369 thunkClosureType :: StandardFormInfo -> ClosureTypeInfo
370 thunkClosureType (SelectorThunk off) = ThunkSelector off
371 thunkClosureType _ = Thunk
372
373 -- We *do* get non-updatable top-level thunks sometimes. eg. f = g
374 -- gets compiled to a jump to g (if g has non-zero arity), instead of
375 -- messing around with update frames and PAPs. We set the closure type
376 -- to FUN_STATIC in this case.
377
378 -----------------------------------------------------------------------------
379 -- nodeMustPointToIt
380 -----------------------------------------------------------------------------
381
382 nodeMustPointToIt :: DynFlags -> LambdaFormInfo -> Bool
383 -- If nodeMustPointToIt is true, then the entry convention for
384 -- this closure has R1 (the "Node" register) pointing to the
385 -- closure itself --- the "self" argument
386
387 nodeMustPointToIt _ (LFReEntrant top _ no_fvs _)
388 = not no_fvs -- Certainly if it has fvs we need to point to it
389 || isNotTopLevel top -- See Note [GC recovery]
390 -- For lex_profiling we also access the cost centre for a
391 -- non-inherited (i.e. non-top-level) function.
392 -- The isNotTopLevel test above ensures this is ok.
393
394 nodeMustPointToIt dflags (LFThunk top no_fvs updatable NonStandardThunk _)
395 = not no_fvs -- Self parameter
396 || isNotTopLevel top -- Note [GC recovery]
397 || updatable -- Need to push update frame
398 || gopt Opt_SccProfilingOn dflags
399 -- For the non-updatable (single-entry case):
400 --
401 -- True if has fvs (in which case we need access to them, and we
402 -- should black-hole it)
403 -- or profiling (in which case we need to recover the cost centre
404 -- from inside it) ToDo: do we need this even for
405 -- top-level thunks? If not,
406 -- isNotTopLevel subsumes this
407
408 nodeMustPointToIt _ (LFThunk {}) -- Node must point to a standard-form thunk
409 = True
410
411 nodeMustPointToIt _ (LFCon _) = True
412
413 -- Strictly speaking, the above two don't need Node to point
414 -- to it if the arity = 0. But this is a *really* unlikely
415 -- situation. If we know it's nil (say) and we are entering
416 -- it. Eg: let x = [] in x then we will certainly have inlined
417 -- x, since nil is a simple atom. So we gain little by not
418 -- having Node point to known zero-arity things. On the other
419 -- hand, we do lose something; Patrick's code for figuring out
420 -- when something has been updated but not entered relies on
421 -- having Node point to the result of an update. SLPJ
422 -- 27/11/92.
423
424 nodeMustPointToIt _ (LFUnknown _) = True
425 nodeMustPointToIt _ LFUnLifted = False
426 nodeMustPointToIt _ LFLetNoEscape = False
427
428 {- Note [GC recovery]
429 ~~~~~~~~~~~~~~~~~~~~~
430 If we a have a local let-binding (function or thunk)
431 let f = <body> in ...
432 AND <body> allocates, then the heap-overflow check needs to know how
433 to re-start the evaluation. It uses the "self" pointer to do this.
434 So even if there are no free variables in <body>, we still make
435 nodeMustPointToIt be True for non-top-level bindings.
436
437 Why do any such bindings exist? After all, let-floating should have
438 floated them out. Well, a clever optimiser might leave one there to
439 avoid a space leak, deliberately recomputing a thunk. Also (and this
440 really does happen occasionally) let-floating may make a function f smaller
441 so it can be inlined, so now (f True) may generate a local no-fv closure.
442 This actually happened during bootsrapping GHC itself, with f=mkRdrFunBind
443 in TcGenDeriv.) -}
444
445 -----------------------------------------------------------------------------
446 -- getCallMethod
447 -----------------------------------------------------------------------------
448
449 {- The entry conventions depend on the type of closure being entered,
450 whether or not it has free variables, and whether we're running
451 sequentially or in parallel.
452
453 Closure Node Argument Enter
454 Characteristics Par Req'd Passing Via
455 ---------------------------------------------------------------------------
456 Unknown & no & yes & stack & node
457 Known fun (>1 arg), no fvs & no & no & registers & fast entry (enough args)
458 & slow entry (otherwise)
459 Known fun (>1 arg), fvs & no & yes & registers & fast entry (enough args)
460 0 arg, no fvs \r,\s & no & no & n/a & direct entry
461 0 arg, no fvs \u & no & yes & n/a & node
462 0 arg, fvs \r,\s,selector & no & yes & n/a & node
463 0 arg, fvs \r,\s & no & yes & n/a & direct entry
464 0 arg, fvs \u & no & yes & n/a & node
465 Unknown & yes & yes & stack & node
466 Known fun (>1 arg), no fvs & yes & no & registers & fast entry (enough args)
467 & slow entry (otherwise)
468 Known fun (>1 arg), fvs & yes & yes & registers & node
469 0 arg, fvs \r,\s,selector & yes & yes & n/a & node
470 0 arg, no fvs \r,\s & yes & no & n/a & direct entry
471 0 arg, no fvs \u & yes & yes & n/a & node
472 0 arg, fvs \r,\s & yes & yes & n/a & node
473 0 arg, fvs \u & yes & yes & n/a & node
474
475 When black-holing, single-entry closures could also be entered via node
476 (rather than directly) to catch double-entry. -}
477
478 data CallMethod
479 = EnterIt -- No args, not a function
480
481 | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop
482
483 | ReturnIt -- It's a value (function, unboxed value,
484 -- or constructor), so just return it.
485
486 | SlowCall -- Unknown fun, or known fun with
487 -- too few args.
488
489 | DirectEntry -- Jump directly, with args in regs
490 CLabel -- The code label
491 RepArity -- Its arity
492
493 getCallMethod :: DynFlags
494 -> Name -- Function being applied
495 -> Id -- Function Id used to chech if it can refer to
496 -- CAF's and whether the function is tail-calling
497 -- itself
498 -> LambdaFormInfo -- Its info
499 -> RepArity -- Number of available arguments
500 -> CgLoc -- Passed in from cgIdApp so that we can
501 -- handle let-no-escape bindings and self-recursive
502 -- tail calls using the same data constructor,
503 -- JumpToIt. This saves us one case branch in
504 -- cgIdApp
505 -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call?
506 -> CallMethod
507
508 getCallMethod dflags _ id _ n_args _cg_loc (Just (self_loop_id, block_id, args))
509 | gopt Opt_Loopification dflags, id == self_loop_id, n_args == length args
510 -- If these patterns match then we know that:
511 -- * loopification optimisation is turned on
512 -- * function is performing a self-recursive call in a tail position
513 -- * number of parameters of the function matches functions arity.
514 -- See Note [Self-recursive tail calls] in StgCmmExpr for more details
515 = JumpToIt block_id args
516
517 getCallMethod dflags _name _ lf_info _n_args _cg_loc _self_loop_info
518 | nodeMustPointToIt dflags lf_info && gopt Opt_Parallel dflags
519 = -- If we're parallel, then we must always enter via node.
520 -- The reason is that the closure may have been
521 -- fetched since we allocated it.
522 EnterIt
523
524 getCallMethod dflags name id (LFReEntrant _ arity _ _) n_args _cg_loc
525 _self_loop_info
526 | n_args == 0 = ASSERT( arity /= 0 )
527 ReturnIt -- No args at all
528 | n_args < arity = SlowCall -- Not enough args
529 | otherwise = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity
530
531 getCallMethod _ _name _ LFUnLifted n_args _cg_loc _self_loop_info
532 = ASSERT( n_args == 0 ) ReturnIt
533
534 getCallMethod _ _name _ (LFCon _) n_args _cg_loc _self_loop_info
535 = ASSERT( n_args == 0 ) ReturnIt
536
537 getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun)
538 n_args _cg_loc _self_loop_info
539 | is_fun -- it *might* be a function, so we must "call" it (which is always safe)
540 = SlowCall -- We cannot just enter it [in eval/apply, the entry code
541 -- is the fast-entry code]
542
543 -- Since is_fun is False, we are *definitely* looking at a data value
544 | updatable || gopt Opt_Ticky dflags -- to catch double entry
545 {- OLD: || opt_SMP
546 I decided to remove this, because in SMP mode it doesn't matter
547 if we enter the same thunk multiple times, so the optimisation
548 of jumping directly to the entry code is still valid. --SDM
549 -}
550 = EnterIt
551
552 -- even a non-updatable selector thunk can be updated by the garbage
553 -- collector, so we must enter it. (#8817)
554 | SelectorThunk{} <- std_form_info
555 = EnterIt
556
557 -- We used to have ASSERT( n_args == 0 ), but actually it is
558 -- possible for the optimiser to generate
559 -- let bot :: Int = error Int "urk"
560 -- in (bot `cast` unsafeCoerce Int (Int -> Int)) 3
561 -- This happens as a result of the case-of-error transformation
562 -- So the right thing to do is just to enter the thing
563
564 | otherwise -- Jump direct to code for single-entry thunks
565 = ASSERT( n_args == 0 )
566 DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info
567 updatable) 0
568
569 getCallMethod _ _name _ (LFUnknown True) _n_arg _cg_locs _self_loop_info
570 = SlowCall -- might be a function
571
572 getCallMethod _ name _ (LFUnknown False) n_args _cg_loc _self_loop_info
573 = ASSERT2( n_args == 0, ppr name <+> ppr n_args )
574 EnterIt -- Not a function
575
576 getCallMethod _ _name _ LFLetNoEscape _n_args (LneLoc blk_id lne_regs)
577 _self_loop_info
578 = JumpToIt blk_id lne_regs
579
580 getCallMethod _ _ _ _ _ _ _ = panic "Unknown call method"
581
582 -----------------------------------------------------------------------------
583 -- staticClosureRequired
584 -----------------------------------------------------------------------------
585
586 {- staticClosureRequired is never called (hence commented out)
587
588 SimonMar writes (Sept 07) It's an optimisation we used to apply at
589 one time, I believe, but it got lost probably in the rewrite of
590 the RTS/code generator. I left that code there to remind me to
591 look into whether it was worth doing sometime
592
593 {- Avoiding generating entries and info tables
594 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
595 At present, for every function we generate all of the following,
596 just in case. But they aren't always all needed, as noted below:
597
598 [NB1: all of this applies only to *functions*. Thunks always
599 have closure, info table, and entry code.]
600
601 [NB2: All are needed if the function is *exported*, just to play safe.]
602
603 * Fast-entry code ALWAYS NEEDED
604
605 * Slow-entry code
606 Needed iff (a) we have any un-saturated calls to the function
607 OR (b) the function is passed as an arg
608 OR (c) we're in the parallel world and the function has free vars
609 [Reason: in parallel world, we always enter functions
610 with free vars via the closure.]
611
612 * The function closure
613 Needed iff (a) we have any un-saturated calls to the function
614 OR (b) the function is passed as an arg
615 OR (c) if the function has free vars (ie not top level)
616
617 Why case (a) here? Because if the arg-satis check fails,
618 UpdatePAP stuffs a pointer to the function closure in the PAP.
619 [Could be changed; UpdatePAP could stuff in a code ptr instead,
620 but doesn't seem worth it.]
621
622 [NB: these conditions imply that we might need the closure
623 without the slow-entry code. Here's how.
624
625 f x y = let g w = ...x..y..w...
626 in
627 ...(g t)...
628
629 Here we need a closure for g which contains x and y,
630 but since the calls are all saturated we just jump to the
631 fast entry point for g, with R1 pointing to the closure for g.]
632
633
634 * Standard info table
635 Needed iff (a) we have any un-saturated calls to the function
636 OR (b) the function is passed as an arg
637 OR (c) the function has free vars (ie not top level)
638
639 NB. In the sequential world, (c) is only required so that the function closure has
640 an info table to point to, to keep the storage manager happy.
641 If (c) alone is true we could fake up an info table by choosing
642 one of a standard family of info tables, whose entry code just
643 bombs out.
644
645 [NB In the parallel world (c) is needed regardless because
646 we enter functions with free vars via the closure.]
647
648 If (c) is retained, then we'll sometimes generate an info table
649 (for storage mgr purposes) without slow-entry code. Then we need
650 to use an error label in the info table to substitute for the absent
651 slow entry code.
652 -}
653
654 staticClosureRequired
655 :: Name
656 -> StgBinderInfo
657 -> LambdaFormInfo
658 -> Bool
659 staticClosureRequired binder bndr_info
660 (LFReEntrant top_level _ _ _) -- It's a function
661 = ASSERT( isTopLevel top_level )
662 -- Assumption: it's a top-level, no-free-var binding
663 not (satCallsOnly bndr_info)
664
665 staticClosureRequired binder other_binder_info other_lf_info = True
666 -}
667
668 -----------------------------------------------------------------------------
669 -- Data types for closure information
670 -----------------------------------------------------------------------------
671
672
673 {- ClosureInfo: information about a binding
674
675 We make a ClosureInfo for each let binding (both top level and not),
676 but not bindings for data constructors: for those we build a CmmInfoTable
677 directly (see mkDataConInfoTable).
678
679 To a first approximation:
680 ClosureInfo = (LambdaFormInfo, CmmInfoTable)
681
682 A ClosureInfo has enough information
683 a) to construct the info table itself, and build other things
684 related to the binding (e.g. slow entry points for a function)
685 b) to allocate a closure containing that info pointer (i.e.
686 it knows the info table label)
687 -}
688
689 data ClosureInfo
690 = ClosureInfo {
691 closureName :: !Name, -- The thing bound to this closure
692 -- we don't really need this field: it's only used in generating
693 -- code for ticky and profiling, and we could pass the information
694 -- around separately, but it doesn't do much harm to keep it here.
695
696 closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon
697 -- this tells us about what the closure contains: it's right-hand-side.
698
699 -- the rest is just an unpacked CmmInfoTable.
700 closureInfoLabel :: !CLabel,
701 closureSMRep :: !SMRep, -- representation used by storage mgr
702 closureProf :: !ProfilingInfo
703 }
704
705 -- | Convert from 'ClosureInfo' to 'CmmInfoTable'.
706 mkCmmInfo :: ClosureInfo -> CmmInfoTable
707 mkCmmInfo ClosureInfo {..}
708 = CmmInfoTable { cit_lbl = closureInfoLabel
709 , cit_rep = closureSMRep
710 , cit_prof = closureProf
711 , cit_srt = NoC_SRT }
712
713 --------------------------------------
714 -- Building ClosureInfos
715 --------------------------------------
716
717 mkClosureInfo :: DynFlags
718 -> Bool -- Is static
719 -> Id
720 -> LambdaFormInfo
721 -> Int -> Int -- Total and pointer words
722 -> String -- String descriptor
723 -> ClosureInfo
724 mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds val_descr
725 = ClosureInfo { closureName = name
726 , closureLFInfo = lf_info
727 , closureInfoLabel = info_lbl -- These three fields are
728 , closureSMRep = sm_rep -- (almost) an info table
729 , closureProf = prof } -- (we don't have an SRT yet)
730 where
731 name = idName id
732 sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info)
733 prof = mkProfilingInfo dflags id val_descr
734 nonptr_wds = tot_wds - ptr_wds
735
736 info_lbl = mkClosureInfoTableLabel id lf_info
737
738 --------------------------------------
739 -- Other functions over ClosureInfo
740 --------------------------------------
741
742 -- Eager blackholing is normally disabled, but can be turned on with
743 -- -feager-blackholing. When it is on, we replace the info pointer of
744 -- the thunk with stg_EAGER_BLACKHOLE_info on entry.
745
746 -- If we wanted to do eager blackholing with slop filling,
747 -- we'd need to do it at the *end* of a basic block, otherwise
748 -- we overwrite the free variables in the thunk that we still
749 -- need. We have a patch for this from Andy Cheadle, but not
750 -- incorporated yet. --SDM [6/2004]
751 --
752 --
753 -- Previously, eager blackholing was enabled when ticky-ticky
754 -- was on. But it didn't work, and it wasn't strictly necessary
755 -- to bring back minimal ticky-ticky, so now EAGER_BLACKHOLING
756 -- is unconditionally disabled. -- krc 1/2007
757 --
758 --
759 -- Static closures are never themselves black-holed.
760 --
761 -- We also never black-hole non-updatable thunks.
762 -- See Note [Black-holing non-updatable thunks]
763
764 blackHoleOnEntry :: ClosureInfo -> Bool
765 blackHoleOnEntry cl_info
766 | isStaticRep (closureSMRep cl_info)
767 = False -- Never black-hole a static closure
768
769 | otherwise
770 = case closureLFInfo cl_info of
771 LFReEntrant _ _ _ _ -> False
772 LFLetNoEscape -> False
773 LFThunk _ _no_fvs updatable _ _ -> updatable
774 _other -> panic "blackHoleOnEntry" -- Should never happen
775
776 {-
777 Note [Black-holing non-updatable thunks]
778 =========================================
779
780 We cannot black-hole non-updatable thunks otherwise we run into issues like
781 Trac #10414. A single-entry (non-updatable) thunk can actually be entered more
782 than once in a parallel program, if work is duplicated by two threads both
783 entering the same updatable thunk before the other has blackholed it. So, we
784 must not eagerly blackhole non-updatable thunks, or the second thread to enter
785 one will become blocked indefinitely. (They are not blackholed by lazy
786 blackholing either, since they have no associated update frame.)
787
788 For instance, let's consider the following value (in pseudo-Core, example due to
789 Reid Barton),
790
791 x = \u [] concat [[1], []]
792
793 with the following definitions,
794
795 concat x = case x of
796 [] -> []
797 (:) x xs -> (++) x (concat xs)
798
799 (++) xs ys = case xs of
800 [] -> ys
801 (:) x rest -> (:) x ((++) rest ys)
802
803 Where we use the syntax @\u []@ to denote an updatable thunk and @\s []@ to
804 denote a single-entry (i.e. non-updatable) thunk. After a thread evaluates @x@
805 to WHNF and calls @(++)@ the heap will contain the following thunks,
806
807 x = 1 : y
808 y = \u [] (++) [] z
809 z = \s [] concat []
810
811 Now that the stage is set, consider the follow evaluations by two racing threads
812 A and B,
813
814 1. Both threads enter @y@ before either is able to replace it with an
815 indirection
816
817 2. Thread A does the case analysis in @(++)@ and consequently enters @z@,
818 replacing it with a black-hole
819
820 3. At some later point thread B does the same case analysis and also attempts
821 to enter @z@. However, it finds that it has been replaced with a black-hole
822 so it blocks.
823
824 4. Thread A eventually finishes evaluating @z@ (to @[]@) and updates @y@
825 accordingly. It does *not* update @z@, however, as it is single-entry. This
826 leaves Thread B blocked forever on a black-hole which will never be
827 updated.
828
829 To avoid this sort of condition we never black-hole non-updatable thunks.
830 -}
831
832 isStaticClosure :: ClosureInfo -> Bool
833 isStaticClosure cl_info = isStaticRep (closureSMRep cl_info)
834
835 closureUpdReqd :: ClosureInfo -> Bool
836 closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info
837
838 lfUpdatable :: LambdaFormInfo -> Bool
839 lfUpdatable (LFThunk _ _ upd _ _) = upd
840 lfUpdatable _ = False
841
842 closureSingleEntry :: ClosureInfo -> Bool
843 closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd
844 closureSingleEntry _ = False
845
846 closureReEntrant :: ClosureInfo -> Bool
847 closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True
848 closureReEntrant _ = False
849
850 closureFunInfo :: ClosureInfo -> Maybe (RepArity, ArgDescr)
851 closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info
852
853 lfFunInfo :: LambdaFormInfo -> Maybe (RepArity, ArgDescr)
854 lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc)
855 lfFunInfo _ = Nothing
856
857 funTag :: DynFlags -> ClosureInfo -> DynTag
858 funTag dflags (ClosureInfo { closureLFInfo = lf_info })
859 = lfDynTag dflags lf_info
860
861 isToplevClosure :: ClosureInfo -> Bool
862 isToplevClosure (ClosureInfo { closureLFInfo = lf_info })
863 = case lf_info of
864 LFReEntrant TopLevel _ _ _ -> True
865 LFThunk TopLevel _ _ _ _ -> True
866 _other -> False
867
868 --------------------------------------
869 -- Label generation
870 --------------------------------------
871
872 staticClosureLabel :: ClosureInfo -> CLabel
873 staticClosureLabel = toClosureLbl . closureInfoLabel
874
875 closureSlowEntryLabel :: ClosureInfo -> CLabel
876 closureSlowEntryLabel = toSlowEntryLbl . closureInfoLabel
877
878 closureLocalEntryLabel :: DynFlags -> ClosureInfo -> CLabel
879 closureLocalEntryLabel dflags
880 | tablesNextToCode dflags = toInfoLbl . closureInfoLabel
881 | otherwise = toEntryLbl . closureInfoLabel
882
883 mkClosureInfoTableLabel :: Id -> LambdaFormInfo -> CLabel
884 mkClosureInfoTableLabel id lf_info
885 = case lf_info of
886 LFThunk _ _ upd_flag (SelectorThunk offset) _
887 -> mkSelectorInfoLabel upd_flag offset
888
889 LFThunk _ _ upd_flag (ApThunk arity) _
890 -> mkApInfoTableLabel upd_flag arity
891
892 LFThunk{} -> std_mk_lbl name cafs
893 LFReEntrant{} -> std_mk_lbl name cafs
894 _other -> panic "closureInfoTableLabel"
895
896 where
897 name = idName id
898
899 std_mk_lbl | is_local = mkLocalInfoTableLabel
900 | otherwise = mkInfoTableLabel
901
902 cafs = idCafInfo id
903 is_local = isDataConWorkId id
904 -- Make the _info pointer for the implicit datacon worker
905 -- binding local. The reason we can do this is that importing
906 -- code always either uses the _closure or _con_info. By the
907 -- invariants in CorePrep anything else gets eta expanded.
908
909
910 thunkEntryLabel :: DynFlags -> Name -> CafInfo -> StandardFormInfo -> Bool -> CLabel
911 -- thunkEntryLabel is a local help function, not exported. It's used from
912 -- getCallMethod.
913 thunkEntryLabel dflags _thunk_id _ (ApThunk arity) upd_flag
914 = enterApLabel dflags upd_flag arity
915 thunkEntryLabel dflags _thunk_id _ (SelectorThunk offset) upd_flag
916 = enterSelectorLabel dflags upd_flag offset
917 thunkEntryLabel dflags thunk_id c _ _
918 = enterIdLabel dflags thunk_id c
919
920 enterApLabel :: DynFlags -> Bool -> Arity -> CLabel
921 enterApLabel dflags is_updatable arity
922 | tablesNextToCode dflags = mkApInfoTableLabel is_updatable arity
923 | otherwise = mkApEntryLabel is_updatable arity
924
925 enterSelectorLabel :: DynFlags -> Bool -> WordOff -> CLabel
926 enterSelectorLabel dflags upd_flag offset
927 | tablesNextToCode dflags = mkSelectorInfoLabel upd_flag offset
928 | otherwise = mkSelectorEntryLabel upd_flag offset
929
930 enterIdLabel :: DynFlags -> Name -> CafInfo -> CLabel
931 enterIdLabel dflags id c
932 | tablesNextToCode dflags = mkInfoTableLabel id c
933 | otherwise = mkEntryLabel id c
934
935
936 --------------------------------------
937 -- Profiling
938 --------------------------------------
939
940 -- Profiling requires two pieces of information to be determined for
941 -- each closure's info table --- description and type.
942
943 -- The description is stored directly in the @CClosureInfoTable@ when the
944 -- info table is built.
945
946 -- The type is determined from the type information stored with the @Id@
947 -- in the closure info using @closureTypeDescr@.
948
949 mkProfilingInfo :: DynFlags -> Id -> String -> ProfilingInfo
950 mkProfilingInfo dflags id val_descr
951 | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
952 | otherwise = ProfilingInfo ty_descr_w8 val_descr_w8
953 where
954 ty_descr_w8 = stringToWord8s (getTyDescription (idType id))
955 val_descr_w8 = stringToWord8s val_descr
956
957 getTyDescription :: Type -> String
958 getTyDescription ty
959 = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) ->
960 case tau_ty of
961 TyVarTy _ -> "*"
962 AppTy fun _ -> getTyDescription fun
963 FunTy _ res -> '-' : '>' : fun_result res
964 TyConApp tycon _ -> getOccString tycon
965 ForAllTy _ ty -> getTyDescription ty
966 LitTy n -> getTyLitDescription n
967 }
968 where
969 fun_result (FunTy _ res) = '>' : fun_result res
970 fun_result other = getTyDescription other
971
972 getTyLitDescription :: TyLit -> String
973 getTyLitDescription l =
974 case l of
975 NumTyLit n -> show n
976 StrTyLit n -> show n
977
978 --------------------------------------
979 -- CmmInfoTable-related things
980 --------------------------------------
981
982 mkDataConInfoTable :: DynFlags -> DataCon -> Bool -> Int -> Int -> CmmInfoTable
983 mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds
984 = CmmInfoTable { cit_lbl = info_lbl
985 , cit_rep = sm_rep
986 , cit_prof = prof
987 , cit_srt = NoC_SRT }
988 where
989 name = dataConName data_con
990
991 info_lbl | is_static = mkStaticInfoTableLabel name NoCafRefs
992 | otherwise = mkConInfoTableLabel name NoCafRefs
993
994 sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type
995
996 cl_type = Constr (dataConTagZ data_con) (dataConIdentity data_con)
997
998 prof | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
999 | otherwise = ProfilingInfo ty_descr val_descr
1000
1001 ty_descr = stringToWord8s $ occNameString $ getOccName $ dataConTyCon data_con
1002 val_descr = stringToWord8s $ occNameString $ getOccName data_con
1003
1004 -- We need a black-hole closure info to pass to @allocDynClosure@ when we
1005 -- want to allocate the black hole on entry to a CAF.
1006
1007 cafBlackHoleInfoTable :: CmmInfoTable
1008 cafBlackHoleInfoTable
1009 = CmmInfoTable { cit_lbl = mkCAFBlackHoleInfoTableLabel
1010 , cit_rep = blackHoleRep
1011 , cit_prof = NoProfilingInfo
1012 , cit_srt = NoC_SRT }
1013
1014 indStaticInfoTable :: CmmInfoTable
1015 indStaticInfoTable
1016 = CmmInfoTable { cit_lbl = mkIndStaticInfoLabel
1017 , cit_rep = indStaticRep
1018 , cit_prof = NoProfilingInfo
1019 , cit_srt = NoC_SRT }
1020
1021 staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool
1022 -- A static closure needs a link field to aid the GC when traversing
1023 -- the static closure graph. But it only needs such a field if either
1024 -- a) it has an SRT
1025 -- b) it's a constructor with one or more pointer fields
1026 -- In case (b), the constructor's fields themselves play the role
1027 -- of the SRT.
1028 --
1029 -- At this point, the cit_srt field has not been calculated (that
1030 -- happens right at the end of the Cmm pipeline), but we do have the
1031 -- VarSet of CAFs that CoreToStg attached, and if that is empty there
1032 -- will definitely not be an SRT.
1033 --
1034 staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep }
1035 | isConRep smrep = not (isStaticNoCafCon smrep)
1036 | otherwise = has_srt -- needsSRT (cit_srt info_tbl)