Update Trac ticket URLs to point to GitLab
[ghc.git] / compiler / coreSyn / CoreSyn.hs
1 {-
2 (c) The University of Glasgow 2006
3 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 -}
5
6 {-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts #-}
7 {-# LANGUAGE NamedFieldPuns #-}
8 {-# LANGUAGE BangPatterns #-}
9
10 -- | CoreSyn holds all the main data types for use by for the Glasgow Haskell Compiler midsection
11 module CoreSyn (
12 -- * Main data types
13 Expr(..), Alt, Bind(..), AltCon(..), Arg,
14 Tickish(..), TickishScoping(..), TickishPlacement(..),
15 CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr,
16 TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), deTagExpr,
17
18 -- * In/Out type synonyms
19 InId, InBind, InExpr, InAlt, InArg, InType, InKind,
20 InBndr, InVar, InCoercion, InTyVar, InCoVar,
21 OutId, OutBind, OutExpr, OutAlt, OutArg, OutType, OutKind,
22 OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar, MOutCoercion,
23
24 -- ** 'Expr' construction
25 mkLet, mkLets, mkLetNonRec, mkLetRec, mkLams,
26 mkApps, mkTyApps, mkCoApps, mkVarApps, mkTyArg,
27
28 mkIntLit, mkIntLitInt,
29 mkWordLit, mkWordLitWord,
30 mkWord64LitWord64, mkInt64LitInt64,
31 mkCharLit, mkStringLit,
32 mkFloatLit, mkFloatLitFloat,
33 mkDoubleLit, mkDoubleLitDouble,
34
35 mkConApp, mkConApp2, mkTyBind, mkCoBind,
36 varToCoreExpr, varsToCoreExprs,
37
38 isId, cmpAltCon, cmpAlt, ltAlt,
39
40 -- ** Simple 'Expr' access functions and predicates
41 bindersOf, bindersOfBinds, rhssOfBind, rhssOfAlts,
42 collectBinders, collectTyBinders, collectTyAndValBinders,
43 collectNBinders,
44 collectArgs, stripNArgs, collectArgsTicks, flattenBinds,
45
46 exprToType, exprToCoercion_maybe,
47 applyTypeToArg,
48
49 isValArg, isTypeArg, isCoArg, isTyCoArg, valArgCount, valBndrCount,
50 isRuntimeArg, isRuntimeVar,
51
52 -- * Tick-related functions
53 tickishCounts, tickishScoped, tickishScopesLike, tickishFloatable,
54 tickishCanSplit, mkNoCount, mkNoScope,
55 tickishIsCode, tickishPlace,
56 tickishContains,
57
58 -- * Unfolding data types
59 Unfolding(..), UnfoldingGuidance(..), UnfoldingSource(..),
60
61 -- ** Constructing 'Unfolding's
62 noUnfolding, bootUnfolding, evaldUnfolding, mkOtherCon,
63 unSaturatedOk, needSaturated, boringCxtOk, boringCxtNotOk,
64
65 -- ** Predicates and deconstruction on 'Unfolding'
66 unfoldingTemplate, expandUnfolding_maybe,
67 maybeUnfoldingTemplate, otherCons,
68 isValueUnfolding, isEvaldUnfolding, isCheapUnfolding,
69 isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding,
70 isStableUnfolding, isFragileUnfolding, hasSomeUnfolding,
71 isBootUnfolding,
72 canUnfold, neverUnfoldGuidance, isStableSource,
73
74 -- * Annotated expression data types
75 AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
76
77 -- ** Operations on annotated expressions
78 collectAnnArgs, collectAnnArgsTicks,
79
80 -- ** Operations on annotations
81 deAnnotate, deAnnotate', deAnnAlt, deAnnBind,
82 collectAnnBndrs, collectNAnnBndrs,
83
84 -- * Orphanhood
85 IsOrphan(..), isOrphan, notOrphan, chooseOrphanAnchor,
86
87 -- * Core rule data types
88 CoreRule(..), RuleBase,
89 RuleName, RuleFun, IdUnfoldingFun, InScopeEnv,
90 RuleEnv(..), mkRuleEnv, emptyRuleEnv,
91
92 -- ** Operations on 'CoreRule's
93 ruleArity, ruleName, ruleIdName, ruleActivation,
94 setRuleIdName, ruleModule,
95 isBuiltinRule, isLocalRule, isAutoRule,
96 ) where
97
98 #include "HsVersions.h"
99
100 import GhcPrelude
101
102 import CostCentre
103 import VarEnv( InScopeSet )
104 import Var
105 import Type
106 import Coercion
107 import Name
108 import NameSet
109 import NameEnv( NameEnv, emptyNameEnv )
110 import Literal
111 import DataCon
112 import Module
113 import BasicTypes
114 import DynFlags
115 import Outputable
116 import Util
117 import UniqSet
118 import SrcLoc ( RealSrcSpan, containsSpan )
119 import Binary
120
121 import Data.Data hiding (TyCon)
122 import Data.Int
123 import Data.Word
124
125 infixl 4 `mkApps`, `mkTyApps`, `mkVarApps`, `App`, `mkCoApps`
126 -- Left associative, so that we can say (f `mkTyApps` xs `mkVarApps` ys)
127
128 {-
129 ************************************************************************
130 * *
131 \subsection{The main data types}
132 * *
133 ************************************************************************
134
135 These data types are the heart of the compiler
136 -}
137
138 -- | This is the data type that represents GHCs core intermediate language. Currently
139 -- GHC uses System FC <https://www.microsoft.com/en-us/research/publication/system-f-with-type-equality-coercions/> for this purpose,
140 -- which is closely related to the simpler and better known System F <http://en.wikipedia.org/wiki/System_F>.
141 --
142 -- We get from Haskell source to this Core language in a number of stages:
143 --
144 -- 1. The source code is parsed into an abstract syntax tree, which is represented
145 -- by the data type 'HsExpr.HsExpr' with the names being 'RdrName.RdrNames'
146 --
147 -- 2. This syntax tree is /renamed/, which attaches a 'Unique.Unique' to every 'RdrName.RdrName'
148 -- (yielding a 'Name.Name') to disambiguate identifiers which are lexically identical.
149 -- For example, this program:
150 --
151 -- @
152 -- f x = let f x = x + 1
153 -- in f (x - 2)
154 -- @
155 --
156 -- Would be renamed by having 'Unique's attached so it looked something like this:
157 --
158 -- @
159 -- f_1 x_2 = let f_3 x_4 = x_4 + 1
160 -- in f_3 (x_2 - 2)
161 -- @
162 -- But see Note [Shadowing] below.
163 --
164 -- 3. The resulting syntax tree undergoes type checking (which also deals with instantiating
165 -- type class arguments) to yield a 'HsExpr.HsExpr' type that has 'Id.Id' as it's names.
166 --
167 -- 4. Finally the syntax tree is /desugared/ from the expressive 'HsExpr.HsExpr' type into
168 -- this 'Expr' type, which has far fewer constructors and hence is easier to perform
169 -- optimization, analysis and code generation on.
170 --
171 -- The type parameter @b@ is for the type of binders in the expression tree.
172 --
173 -- The language consists of the following elements:
174 --
175 -- * Variables
176 -- See Note [Variable occurrences in Core]
177 --
178 -- * Primitive literals
179 --
180 -- * Applications: note that the argument may be a 'Type'.
181 -- See Note [CoreSyn let/app invariant]
182 -- See Note [Levity polymorphism invariants]
183 --
184 -- * Lambda abstraction
185 -- See Note [Levity polymorphism invariants]
186 --
187 -- * Recursive and non recursive @let@s. Operationally
188 -- this corresponds to allocating a thunk for the things
189 -- bound and then executing the sub-expression.
190 --
191 -- See Note [CoreSyn letrec invariant]
192 -- See Note [CoreSyn let/app invariant]
193 -- See Note [Levity polymorphism invariants]
194 -- See Note [CoreSyn type and coercion invariant]
195 --
196 -- * Case expression. Operationally this corresponds to evaluating
197 -- the scrutinee (expression examined) to weak head normal form
198 -- and then examining at most one level of resulting constructor (i.e. you
199 -- cannot do nested pattern matching directly with this).
200 --
201 -- The binder gets bound to the value of the scrutinee,
202 -- and the 'Type' must be that of all the case alternatives
203 --
204 -- #case_invariants#
205 -- This is one of the more complicated elements of the Core language,
206 -- and comes with a number of restrictions:
207 --
208 -- 1. The list of alternatives may be empty;
209 -- See Note [Empty case alternatives]
210 --
211 -- 2. The 'DEFAULT' case alternative must be first in the list,
212 -- if it occurs at all.
213 --
214 -- 3. The remaining cases are in order of increasing
215 -- tag (for 'DataAlts') or
216 -- lit (for 'LitAlts').
217 -- This makes finding the relevant constructor easy,
218 -- and makes comparison easier too.
219 --
220 -- 4. The list of alternatives must be exhaustive. An /exhaustive/ case
221 -- does not necessarily mention all constructors:
222 --
223 -- @
224 -- data Foo = Red | Green | Blue
225 -- ... case x of
226 -- Red -> True
227 -- other -> f (case x of
228 -- Green -> ...
229 -- Blue -> ... ) ...
230 -- @
231 --
232 -- The inner case does not need a @Red@ alternative, because @x@
233 -- can't be @Red@ at that program point.
234 --
235 -- 5. Floating-point values must not be scrutinised against literals.
236 -- See #9238 and Note [Rules for floating-point comparisons]
237 -- in PrelRules for rationale.
238 --
239 -- * Cast an expression to a particular type.
240 -- This is used to implement @newtype@s (a @newtype@ constructor or
241 -- destructor just becomes a 'Cast' in Core) and GADTs.
242 --
243 -- * Notes. These allow general information to be added to expressions
244 -- in the syntax tree
245 --
246 -- * A type: this should only show up at the top level of an Arg
247 --
248 -- * A coercion
249
250 -- If you edit this type, you may need to update the GHC formalism
251 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
252 data Expr b
253 = Var Id
254 | Lit Literal
255 | App (Expr b) (Arg b)
256 | Lam b (Expr b)
257 | Let (Bind b) (Expr b)
258 | Case (Expr b) b Type [Alt b] -- See #case_invariants#
259 | Cast (Expr b) Coercion
260 | Tick (Tickish Id) (Expr b)
261 | Type Type
262 | Coercion Coercion
263 deriving Data
264
265 -- | Type synonym for expressions that occur in function argument positions.
266 -- Only 'Arg' should contain a 'Type' at top level, general 'Expr' should not
267 type Arg b = Expr b
268
269 -- | A case split alternative. Consists of the constructor leading to the alternative,
270 -- the variables bound from the constructor, and the expression to be executed given that binding.
271 -- The default alternative is @(DEFAULT, [], rhs)@
272
273 -- If you edit this type, you may need to update the GHC formalism
274 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
275 type Alt b = (AltCon, [b], Expr b)
276
277 -- | A case alternative constructor (i.e. pattern match)
278
279 -- If you edit this type, you may need to update the GHC formalism
280 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
281 data AltCon
282 = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
283 -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
284
285 | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
286 -- Invariant: always an *unlifted* literal
287 -- See Note [Literal alternatives]
288
289 | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
290 deriving (Eq, Data)
291
292 -- This instance is a bit shady. It can only be used to compare AltCons for
293 -- a single type constructor. Fortunately, it seems quite unlikely that we'll
294 -- ever need to compare AltCons for different type constructors.
295 -- The instance adheres to the order described in [CoreSyn case invariants]
296 instance Ord AltCon where
297 compare (DataAlt con1) (DataAlt con2) =
298 ASSERT( dataConTyCon con1 == dataConTyCon con2 )
299 compare (dataConTag con1) (dataConTag con2)
300 compare (DataAlt _) _ = GT
301 compare _ (DataAlt _) = LT
302 compare (LitAlt l1) (LitAlt l2) = compare l1 l2
303 compare (LitAlt _) DEFAULT = GT
304 compare DEFAULT DEFAULT = EQ
305 compare DEFAULT _ = LT
306
307 -- | Binding, used for top level bindings in a module and local bindings in a @let@.
308
309 -- If you edit this type, you may need to update the GHC formalism
310 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
311 data Bind b = NonRec b (Expr b)
312 | Rec [(b, (Expr b))]
313 deriving Data
314
315 {-
316 Note [Shadowing]
317 ~~~~~~~~~~~~~~~~
318 While various passes attempt to rename on-the-fly in a manner that
319 avoids "shadowing" (thereby simplifying downstream optimizations),
320 neither the simplifier nor any other pass GUARANTEES that shadowing is
321 avoided. Thus, all passes SHOULD work fine even in the presence of
322 arbitrary shadowing in their inputs.
323
324 In particular, scrutinee variables `x` in expressions of the form
325 `Case e x t` are often renamed to variables with a prefix
326 "wild_". These "wild" variables may appear in the body of the
327 case-expression, and further, may be shadowed within the body.
328
329 So the Unique in a Var is not really unique at all. Still, it's very
330 useful to give a constant-time equality/ordering for Vars, and to give
331 a key that can be used to make sets of Vars (VarSet), or mappings from
332 Vars to other things (VarEnv). Moreover, if you do want to eliminate
333 shadowing, you can give a new Unique to an Id without changing its
334 printable name, which makes debugging easier.
335
336 Note [Literal alternatives]
337 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
338 Literal alternatives (LitAlt lit) are always for *un-lifted* literals.
339 We have one literal, a literal Integer, that is lifted, and we don't
340 allow in a LitAlt, because LitAlt cases don't do any evaluation. Also
341 (see #5603) if you say
342 case 3 of
343 S# x -> ...
344 J# _ _ -> ...
345 (where S#, J# are the constructors for Integer) we don't want the
346 simplifier calling findAlt with argument (LitAlt 3). No no. Integer
347 literals are an opaque encoding of an algebraic data type, not of
348 an unlifted literal, like all the others.
349
350 Also, we do not permit case analysis with literal patterns on floating-point
351 types. See #9238 and Note [Rules for floating-point comparisons] in
352 PrelRules for the rationale for this restriction.
353
354 -------------------------- CoreSyn INVARIANTS ---------------------------
355
356 Note [Variable occurrences in Core]
357 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
358 Variable /occurrences/ are never CoVars, though /bindings/ can be.
359 All CoVars appear in Coercions.
360
361 For example
362 \(c :: Age~#Int) (d::Int). d |> (sym c)
363 Here 'c' is a CoVar, which is lambda-bound, but it /occurs/ in
364 a Coercion, (sym c).
365
366 Note [CoreSyn letrec invariant]
367 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
368 The right hand sides of all top-level and recursive @let@s
369 /must/ be of lifted type (see "Type#type_classification" for
370 the meaning of /lifted/ vs. /unlifted/).
371
372 There is one exception to this rule, top-level @let@s are
373 allowed to bind primitive string literals: see
374 Note [CoreSyn top-level string literals].
375
376 Note [CoreSyn top-level string literals]
377 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
378 As an exception to the usual rule that top-level binders must be lifted,
379 we allow binding primitive string literals (of type Addr#) of type Addr# at the
380 top level. This allows us to share string literals earlier in the pipeline and
381 crucially allows other optimizations in the Core2Core pipeline to fire.
382 Consider,
383
384 f n = let a::Addr# = "foo"#
385 in \x -> blah
386
387 In order to be able to inline `f`, we would like to float `a` to the top.
388 Another option would be to inline `a`, but that would lead to duplicating string
389 literals, which we want to avoid. See #8472.
390
391 The solution is simply to allow top-level unlifted binders. We can't allow
392 arbitrary unlifted expression at the top-level though, unlifted binders cannot
393 be thunks, so we just allow string literals.
394
395 We allow the top-level primitive string literals to be wrapped in Ticks
396 in the same way they can be wrapped when nested in an expression.
397 CoreToSTG currently discards Ticks around top-level primitive string literals.
398 See #14779.
399
400 Also see Note [Compilation plan for top-level string literals].
401
402 Note [Compilation plan for top-level string literals]
403 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
404 Here is a summary on how top-level string literals are handled by various
405 parts of the compilation pipeline.
406
407 * In the source language, there is no way to bind a primitive string literal
408 at the top level.
409
410 * In Core, we have a special rule that permits top-level Addr# bindings. See
411 Note [CoreSyn top-level string literals]. Core-to-core passes may introduce
412 new top-level string literals.
413
414 * In STG, top-level string literals are explicitly represented in the syntax
415 tree.
416
417 * A top-level string literal may end up exported from a module. In this case,
418 in the object file, the content of the exported literal is given a label with
419 the _bytes suffix.
420
421 Note [CoreSyn let/app invariant]
422 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
423 The let/app invariant
424 the right hand side of a non-recursive 'Let', and
425 the argument of an 'App',
426 /may/ be of unlifted type, but only if
427 the expression is ok-for-speculation
428 or the 'Let' is for a join point.
429
430 This means that the let can be floated around
431 without difficulty. For example, this is OK:
432
433 y::Int# = x +# 1#
434
435 But this is not, as it may affect termination if the
436 expression is floated out:
437
438 y::Int# = fac 4#
439
440 In this situation you should use @case@ rather than a @let@. The function
441 'CoreUtils.needsCaseBinding' can help you determine which to generate, or
442 alternatively use 'MkCore.mkCoreLet' rather than this constructor directly,
443 which will generate a @case@ if necessary
444
445 The let/app invariant is initially enforced by mkCoreLet and mkCoreApp in
446 coreSyn/MkCore.
447
448 Note [CoreSyn type and coercion invariant]
449 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
450 We allow a /non-recursive/, /non-top-level/ let to bind type and
451 coercion variables. These can be very convenient for postponing type
452 substitutions until the next run of the simplifier.
453
454 * A type variable binding must have a RHS of (Type ty)
455
456 * A coercion variable binding must have a RHS of (Coercion co)
457
458 It is possible to have terms that return a coercion, but we use
459 case-binding for those; e.g.
460 case (eq_sel d) of (co :: a ~# b) -> blah
461 where eq_sel :: (a~b) -> (a~#b)
462
463 Or even even
464 case (df @Int) of (co :: a ~# b) -> blah
465 Which is very exotic, and I think never encountered; but see
466 Note [Equality superclasses in quantified constraints]
467 in TcCanonical
468
469 Note [CoreSyn case invariants]
470 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
471 See #case_invariants#
472
473 Note [Levity polymorphism invariants]
474 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
475 The levity-polymorphism invariants are these (as per "Levity Polymorphism",
476 PLDI '17):
477
478 * The type of a term-binder must not be levity-polymorphic,
479 unless it is a let(rec)-bound join point
480 (see Note [Invariants on join points])
481
482 * The type of the argument of an App must not be levity-polymorphic.
483
484 A type (t::TYPE r) is "levity polymorphic" if 'r' has any free variables.
485
486 For example
487 \(r::RuntimeRep). \(a::TYPE r). \(x::a). e
488 is illegal because x's type has kind (TYPE r), which has 'r' free.
489
490 See Note [Levity polymorphism checking] in DsMonad to see where these
491 invariants are established for user-written code.
492
493 Note [CoreSyn let goal]
494 ~~~~~~~~~~~~~~~~~~~~~~~
495 * The simplifier tries to ensure that if the RHS of a let is a constructor
496 application, its arguments are trivial, so that the constructor can be
497 inlined vigorously.
498
499 Note [Type let]
500 ~~~~~~~~~~~~~~~
501 See #type_let#
502
503 Note [Empty case alternatives]
504 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
505 The alternatives of a case expression should be exhaustive. But
506 this exhaustive list can be empty!
507
508 * A case expression can have empty alternatives if (and only if) the
509 scrutinee is bound to raise an exception or diverge. When do we know
510 this? See Note [Bottoming expressions] in CoreUtils.
511
512 * The possibility of empty alternatives is one reason we need a type on
513 the case expression: if the alternatives are empty we can't get the
514 type from the alternatives!
515
516 * In the case of empty types (see Note [Bottoming expressions]), say
517 data T
518 we do NOT want to replace
519 case (x::T) of Bool {} --> error Bool "Inaccessible case"
520 because x might raise an exception, and *that*'s what we want to see!
521 (#6067 is an example.) To preserve semantics we'd have to say
522 x `seq` error Bool "Inaccessible case"
523 but the 'seq' is just a case, so we are back to square 1. Or I suppose
524 we could say
525 x |> UnsafeCoerce T Bool
526 but that loses all trace of the fact that this originated with an empty
527 set of alternatives.
528
529 * We can use the empty-alternative construct to coerce error values from
530 one type to another. For example
531
532 f :: Int -> Int
533 f n = error "urk"
534
535 g :: Int -> (# Char, Bool #)
536 g x = case f x of { 0 -> ..., n -> ... }
537
538 Then if we inline f in g's RHS we get
539 case (error Int "urk") of (# Char, Bool #) { ... }
540 and we can discard the alternatives since the scrutinee is bottom to give
541 case (error Int "urk") of (# Char, Bool #) {}
542
543 This is nicer than using an unsafe coerce between Int ~ (# Char,Bool #),
544 if for no other reason that we don't need to instantiate the (~) at an
545 unboxed type.
546
547 * We treat a case expression with empty alternatives as trivial iff
548 its scrutinee is (see CoreUtils.exprIsTrivial). This is actually
549 important; see Note [Empty case is trivial] in CoreUtils
550
551 * An empty case is replaced by its scrutinee during the CoreToStg
552 conversion; remember STG is un-typed, so there is no need for
553 the empty case to do the type conversion.
554
555 Note [Join points]
556 ~~~~~~~~~~~~~~~~~~
557 In Core, a *join point* is a specially tagged function whose only occurrences
558 are saturated tail calls. A tail call can appear in these places:
559
560 1. In the branches (not the scrutinee) of a case
561 2. Underneath a let (value or join point)
562 3. Inside another join point
563
564 We write a join-point declaration as
565 join j @a @b x y = e1 in e2,
566 like a let binding but with "join" instead (or "join rec" for "let rec"). Note
567 that we put the parameters before the = rather than using lambdas; this is
568 because it's relevant how many parameters the join point takes *as a join
569 point.* This number is called the *join arity,* distinct from arity because it
570 counts types as well as values. Note that a join point may return a lambda! So
571 join j x = x + 1
572 is different from
573 join j = \x -> x + 1
574 The former has join arity 1, while the latter has join arity 0.
575
576 The identifier for a join point is called a join id or a *label.* An invocation
577 is called a *jump.* We write a jump using the jump keyword:
578
579 jump j 3
580
581 The words *label* and *jump* are evocative of assembly code (or Cmm) for a
582 reason: join points are indeed compiled as labeled blocks, and jumps become
583 actual jumps (plus argument passing and stack adjustment). There is no closure
584 allocated and only a fraction of the function-call overhead. Hence we would
585 like as many functions as possible to become join points (see OccurAnal) and
586 the type rules for join points ensure we preserve the properties that make them
587 efficient.
588
589 In the actual AST, a join point is indicated by the IdDetails of the binder: a
590 local value binding gets 'VanillaId' but a join point gets a 'JoinId' with its
591 join arity.
592
593 For more details, see the paper:
594
595 Luke Maurer, Paul Downen, Zena Ariola, and Simon Peyton Jones. "Compiling
596 without continuations." Submitted to PLDI'17.
597
598 https://www.microsoft.com/en-us/research/publication/compiling-without-continuations/
599
600 Note [Invariants on join points]
601 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
602 Join points must follow these invariants:
603
604 1. All occurrences must be tail calls. Each of these tail calls must pass the
605 same number of arguments, counting both types and values; we call this the
606 "join arity" (to distinguish from regular arity, which only counts values).
607
608 2. For join arity n, the right-hand side must begin with at least n lambdas.
609 No ticks, no casts, just lambdas! C.f. CoreUtils.joinRhsArity.
610
611 2a. Moreover, this same constraint applies to any unfolding of the binder.
612 Reason: if we want to push a continuation into the RHS we must push it
613 into the unfolding as well.
614
615 3. If the binding is recursive, then all other bindings in the recursive group
616 must also be join points.
617
618 4. The binding's type must not be polymorphic in its return type (as defined
619 in Note [The polymorphism rule of join points]).
620
621 However, join points have simpler invariants in other ways
622
623 5. A join point can have an unboxed type without the RHS being
624 ok-for-speculation (i.e. drop the let/app invariant)
625 e.g. let j :: Int# = factorial x in ...
626
627 6. A join point can have a levity-polymorphic RHS
628 e.g. let j :: r :: TYPE l = fail void# in ...
629 This happened in an intermediate program #13394
630
631 Examples:
632
633 join j1 x = 1 + x in jump j (jump j x) -- Fails 1: non-tail call
634 join j1' x = 1 + x in if even a
635 then jump j1 a
636 else jump j1 a b -- Fails 1: inconsistent calls
637 join j2 x = flip (+) x in j2 1 2 -- Fails 2: not enough lambdas
638 join j2' x = \y -> x + y in j3 1 -- Passes: extra lams ok
639 join j @a (x :: a) = x -- Fails 4: polymorphic in ret type
640
641 Invariant 1 applies to left-hand sides of rewrite rules, so a rule for a join
642 point must have an exact call as its LHS.
643
644 Strictly speaking, invariant 3 is redundant, since a call from inside a lazy
645 binding isn't a tail call. Since a let-bound value can't invoke a free join
646 point, then, they can't be mutually recursive. (A Core binding group *can*
647 include spurious extra bindings if the occurrence analyser hasn't run, so
648 invariant 3 does still need to be checked.) For the rigorous definition of
649 "tail call", see Section 3 of the paper (Note [Join points]).
650
651 Invariant 4 is subtle; see Note [The polymorphism rule of join points].
652
653 Core Lint will check these invariants, anticipating that any binder whose
654 OccInfo is marked AlwaysTailCalled will become a join point as soon as the
655 simplifier (or simpleOptPgm) runs.
656
657 Note [The type of a join point]
658 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
659 A join point has the same type it would have as a function. That is, if it takes
660 an Int and a Bool and its body produces a String, its type is `Int -> Bool ->
661 String`. Natural as this may seem, it can be awkward. A join point shouldn't be
662 thought to "return" in the same sense a function does---a jump is one-way. This
663 is crucial for understanding how case-of-case interacts with join points:
664
665 case (join
666 j :: Int -> Bool -> String
667 j x y = ...
668 in
669 jump j z w) of
670 "" -> True
671 _ -> False
672
673 The simplifier will pull the case into the join point (see Note [Case-of-case
674 and join points] in Simplify):
675
676 join
677 j :: Int -> Bool -> Bool -- changed!
678 j x y = case ... of "" -> True
679 _ -> False
680 in
681 jump j z w
682
683 The body of the join point now returns a Bool, so the label `j` has to have its
684 type updated accordingly. Inconvenient though this may be, it has the advantage
685 that 'CoreUtils.exprType' can still return a type for any expression, including
686 a jump.
687
688 This differs from the paper (see Note [Invariants on join points]). In the
689 paper, we instead give j the type `Int -> Bool -> forall a. a`. Then each jump
690 carries the "return type" as a parameter, exactly the way other non-returning
691 functions like `error` work:
692
693 case (join
694 j :: Int -> Bool -> forall a. a
695 j x y = ...
696 in
697 jump j z w @String) of
698 "" -> True
699 _ -> False
700
701 Now we can move the case inward and we only have to change the jump:
702
703 join
704 j :: Int -> Bool -> forall a. a
705 j x y = case ... of "" -> True
706 _ -> False
707 in
708 jump j z w @Bool
709
710 (Core Lint would still check that the body of the join point has the right type;
711 that type would simply not be reflected in the join id.)
712
713 Note [The polymorphism rule of join points]
714 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
715 Invariant 4 of Note [Invariants on join points] forbids a join point to be
716 polymorphic in its return type. That is, if its type is
717
718 forall a1 ... ak. t1 -> ... -> tn -> r
719
720 where its join arity is k+n, none of the type parameters ai may occur free in r.
721
722 In some way, this falls out of the fact that given
723
724 join
725 j @a1 ... @ak x1 ... xn = e1
726 in e2
727
728 then all calls to `j` are in tail-call positions of `e`, and expressions in
729 tail-call positions in `e` have the same type as `e`.
730 Therefore the type of `e1` -- the return type of the join point -- must be the
731 same as the type of e2.
732 Since the type variables aren't bound in `e2`, its type can't include them, and
733 thus neither can the type of `e1`.
734
735 This unfortunately prevents the `go` in the following code from being a
736 join-point:
737
738 iter :: forall a. Int -> (a -> a) -> a -> a
739 iter @a n f x = go @a n f x
740 where
741 go :: forall a. Int -> (a -> a) -> a -> a
742 go @a 0 _ x = x
743 go @a n f x = go @a (n-1) f (f x)
744
745 In this case, a static argument transformation would fix that (see
746 ticket #14620):
747
748 iter :: forall a. Int -> (a -> a) -> a -> a
749 iter @a n f x = go' @a n f x
750 where
751 go' :: Int -> (a -> a) -> a -> a
752 go' 0 _ x = x
753 go' n f x = go' (n-1) f (f x)
754
755 In general, loopification could be employed to do that (see #14068.)
756
757 Can we simply drop the requirement, and allow `go` to be a join-point? We
758 could, and it would work. But we could not longer apply the case-of-join-point
759 transformation universally. This transformation would do:
760
761 case (join go @a n f x = case n of 0 -> x
762 n -> go @a (n-1) f (f x)
763 in go @Bool n neg True) of
764 True -> e1; False -> e2
765
766 ===>
767
768 join go @a n f x = case n of 0 -> case x of True -> e1; False -> e2
769 n -> go @a (n-1) f (f x)
770 in go @Bool n neg True
771
772 but that is ill-typed, as `x` is type `a`, not `Bool`.
773
774
775 This also justifies why we do not consider the `e` in `e |> co` to be in
776 tail position: A cast changes the type, but the type must be the same. But
777 operationally, casts are vacuous, so this is a bit unfortunate! See #14610 for
778 ideas how to fix this.
779
780 ************************************************************************
781 * *
782 In/Out type synonyms
783 * *
784 ********************************************************************* -}
785
786 {- Many passes apply a substitution, and it's very handy to have type
787 synonyms to remind us whether or not the substitution has been applied -}
788
789 -- Pre-cloning or substitution
790 type InBndr = CoreBndr
791 type InType = Type
792 type InKind = Kind
793 type InBind = CoreBind
794 type InExpr = CoreExpr
795 type InAlt = CoreAlt
796 type InArg = CoreArg
797 type InCoercion = Coercion
798
799 -- Post-cloning or substitution
800 type OutBndr = CoreBndr
801 type OutType = Type
802 type OutKind = Kind
803 type OutCoercion = Coercion
804 type OutBind = CoreBind
805 type OutExpr = CoreExpr
806 type OutAlt = CoreAlt
807 type OutArg = CoreArg
808 type MOutCoercion = MCoercion
809
810
811 {- *********************************************************************
812 * *
813 Ticks
814 * *
815 ************************************************************************
816 -}
817
818 -- | Allows attaching extra information to points in expressions
819
820 -- If you edit this type, you may need to update the GHC formalism
821 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
822 data Tickish id =
823 -- | An @{-# SCC #-}@ profiling annotation, either automatically
824 -- added by the desugarer as a result of -auto-all, or added by
825 -- the user.
826 ProfNote {
827 profNoteCC :: CostCentre, -- ^ the cost centre
828 profNoteCount :: !Bool, -- ^ bump the entry count?
829 profNoteScope :: !Bool -- ^ scopes over the enclosed expression
830 -- (i.e. not just a tick)
831 }
832
833 -- | A "tick" used by HPC to track the execution of each
834 -- subexpression in the original source code.
835 | HpcTick {
836 tickModule :: Module,
837 tickId :: !Int
838 }
839
840 -- | A breakpoint for the GHCi debugger. This behaves like an HPC
841 -- tick, but has a list of free variables which will be available
842 -- for inspection in GHCi when the program stops at the breakpoint.
843 --
844 -- NB. we must take account of these Ids when (a) counting free variables,
845 -- and (b) substituting (don't substitute for them)
846 | Breakpoint
847 { breakpointId :: !Int
848 , breakpointFVs :: [id] -- ^ the order of this list is important:
849 -- it matches the order of the lists in the
850 -- appropriate entry in HscTypes.ModBreaks.
851 --
852 -- Careful about substitution! See
853 -- Note [substTickish] in CoreSubst.
854 }
855
856 -- | A source note.
857 --
858 -- Source notes are pure annotations: Their presence should neither
859 -- influence compilation nor execution. The semantics are given by
860 -- causality: The presence of a source note means that a local
861 -- change in the referenced source code span will possibly provoke
862 -- the generated code to change. On the flip-side, the functionality
863 -- of annotated code *must* be invariant against changes to all
864 -- source code *except* the spans referenced in the source notes
865 -- (see "Causality of optimized Haskell" paper for details).
866 --
867 -- Therefore extending the scope of any given source note is always
868 -- valid. Note that it is still undesirable though, as this reduces
869 -- their usefulness for debugging and profiling. Therefore we will
870 -- generally try only to make use of this property where it is
871 -- necessary to enable optimizations.
872 | SourceNote
873 { sourceSpan :: RealSrcSpan -- ^ Source covered
874 , sourceName :: String -- ^ Name for source location
875 -- (uses same names as CCs)
876 }
877
878 deriving (Eq, Ord, Data)
879
880 -- | A "counting tick" (where tickishCounts is True) is one that
881 -- counts evaluations in some way. We cannot discard a counting tick,
882 -- and the compiler should preserve the number of counting ticks as
883 -- far as possible.
884 --
885 -- However, we still allow the simplifier to increase or decrease
886 -- sharing, so in practice the actual number of ticks may vary, except
887 -- that we never change the value from zero to non-zero or vice versa.
888 tickishCounts :: Tickish id -> Bool
889 tickishCounts n@ProfNote{} = profNoteCount n
890 tickishCounts HpcTick{} = True
891 tickishCounts Breakpoint{} = True
892 tickishCounts _ = False
893
894
895 -- | Specifies the scoping behaviour of ticks. This governs the
896 -- behaviour of ticks that care about the covered code and the cost
897 -- associated with it. Important for ticks relating to profiling.
898 data TickishScoping =
899 -- | No scoping: The tick does not care about what code it
900 -- covers. Transformations can freely move code inside as well as
901 -- outside without any additional annotation obligations
902 NoScope
903
904 -- | Soft scoping: We want all code that is covered to stay
905 -- covered. Note that this scope type does not forbid
906 -- transformations from happening, as long as all results of
907 -- the transformations are still covered by this tick or a copy of
908 -- it. For example
909 --
910 -- let x = tick<...> (let y = foo in bar) in baz
911 -- ===>
912 -- let x = tick<...> bar; y = tick<...> foo in baz
913 --
914 -- Is a valid transformation as far as "bar" and "foo" is
915 -- concerned, because both still are scoped over by the tick.
916 --
917 -- Note though that one might object to the "let" not being
918 -- covered by the tick any more. However, we are generally lax
919 -- with this - constant costs don't matter too much, and given
920 -- that the "let" was effectively merged we can view it as having
921 -- lost its identity anyway.
922 --
923 -- Also note that this scoping behaviour allows floating a tick
924 -- "upwards" in pretty much any situation. For example:
925 --
926 -- case foo of x -> tick<...> bar
927 -- ==>
928 -- tick<...> case foo of x -> bar
929 --
930 -- While this is always leagl, we want to make a best effort to
931 -- only make us of this where it exposes transformation
932 -- opportunities.
933 | SoftScope
934
935 -- | Cost centre scoping: We don't want any costs to move to other
936 -- cost-centre stacks. This means we not only want no code or cost
937 -- to get moved out of their cost centres, but we also object to
938 -- code getting associated with new cost-centre ticks - or
939 -- changing the order in which they get applied.
940 --
941 -- A rule of thumb is that we don't want any code to gain new
942 -- annotations. However, there are notable exceptions, for
943 -- example:
944 --
945 -- let f = \y -> foo in tick<...> ... (f x) ...
946 -- ==>
947 -- tick<...> ... foo[x/y] ...
948 --
949 -- In-lining lambdas like this is always legal, because inlining a
950 -- function does not change the cost-centre stack when the
951 -- function is called.
952 | CostCentreScope
953
954 deriving (Eq)
955
956 -- | Returns the intended scoping rule for a Tickish
957 tickishScoped :: Tickish id -> TickishScoping
958 tickishScoped n@ProfNote{}
959 | profNoteScope n = CostCentreScope
960 | otherwise = NoScope
961 tickishScoped HpcTick{} = NoScope
962 tickishScoped Breakpoint{} = CostCentreScope
963 -- Breakpoints are scoped: eventually we're going to do call
964 -- stacks, but also this helps prevent the simplifier from moving
965 -- breakpoints around and changing their result type (see #1531).
966 tickishScoped SourceNote{} = SoftScope
967
968 -- | Returns whether the tick scoping rule is at least as permissive
969 -- as the given scoping rule.
970 tickishScopesLike :: Tickish id -> TickishScoping -> Bool
971 tickishScopesLike t scope = tickishScoped t `like` scope
972 where NoScope `like` _ = True
973 _ `like` NoScope = False
974 SoftScope `like` _ = True
975 _ `like` SoftScope = False
976 CostCentreScope `like` _ = True
977
978 -- | Returns @True@ for ticks that can be floated upwards easily even
979 -- where it might change execution counts, such as:
980 --
981 -- Just (tick<...> foo)
982 -- ==>
983 -- tick<...> (Just foo)
984 --
985 -- This is a combination of @tickishSoftScope@ and
986 -- @tickishCounts@. Note that in principle splittable ticks can become
987 -- floatable using @mkNoTick@ -- even though there's currently no
988 -- tickish for which that is the case.
989 tickishFloatable :: Tickish id -> Bool
990 tickishFloatable t = t `tickishScopesLike` SoftScope && not (tickishCounts t)
991
992 -- | Returns @True@ for a tick that is both counting /and/ scoping and
993 -- can be split into its (tick, scope) parts using 'mkNoScope' and
994 -- 'mkNoTick' respectively.
995 tickishCanSplit :: Tickish id -> Bool
996 tickishCanSplit ProfNote{profNoteScope = True, profNoteCount = True}
997 = True
998 tickishCanSplit _ = False
999
1000 mkNoCount :: Tickish id -> Tickish id
1001 mkNoCount n | not (tickishCounts n) = n
1002 | not (tickishCanSplit n) = panic "mkNoCount: Cannot split!"
1003 mkNoCount n@ProfNote{} = n {profNoteCount = False}
1004 mkNoCount _ = panic "mkNoCount: Undefined split!"
1005
1006 mkNoScope :: Tickish id -> Tickish id
1007 mkNoScope n | tickishScoped n == NoScope = n
1008 | not (tickishCanSplit n) = panic "mkNoScope: Cannot split!"
1009 mkNoScope n@ProfNote{} = n {profNoteScope = False}
1010 mkNoScope _ = panic "mkNoScope: Undefined split!"
1011
1012 -- | Return @True@ if this source annotation compiles to some backend
1013 -- code. Without this flag, the tickish is seen as a simple annotation
1014 -- that does not have any associated evaluation code.
1015 --
1016 -- What this means that we are allowed to disregard the tick if doing
1017 -- so means that we can skip generating any code in the first place. A
1018 -- typical example is top-level bindings:
1019 --
1020 -- foo = tick<...> \y -> ...
1021 -- ==>
1022 -- foo = \y -> tick<...> ...
1023 --
1024 -- Here there is just no operational difference between the first and
1025 -- the second version. Therefore code generation should simply
1026 -- translate the code as if it found the latter.
1027 tickishIsCode :: Tickish id -> Bool
1028 tickishIsCode SourceNote{} = False
1029 tickishIsCode _tickish = True -- all the rest for now
1030
1031
1032 -- | Governs the kind of expression that the tick gets placed on when
1033 -- annotating for example using @mkTick@. If we find that we want to
1034 -- put a tickish on an expression ruled out here, we try to float it
1035 -- inwards until we find a suitable expression.
1036 data TickishPlacement =
1037
1038 -- | Place ticks exactly on run-time expressions. We can still
1039 -- move the tick through pure compile-time constructs such as
1040 -- other ticks, casts or type lambdas. This is the most
1041 -- restrictive placement rule for ticks, as all tickishs have in
1042 -- common that they want to track runtime processes. The only
1043 -- legal placement rule for counting ticks.
1044 PlaceRuntime
1045
1046 -- | As @PlaceRuntime@, but we float the tick through all
1047 -- lambdas. This makes sense where there is little difference
1048 -- between annotating the lambda and annotating the lambda's code.
1049 | PlaceNonLam
1050
1051 -- | In addition to floating through lambdas, cost-centre style
1052 -- tickishs can also be moved from constructors, non-function
1053 -- variables and literals. For example:
1054 --
1055 -- let x = scc<...> C (scc<...> y) (scc<...> 3) in ...
1056 --
1057 -- Neither the constructor application, the variable or the
1058 -- literal are likely to have any cost worth mentioning. And even
1059 -- if y names a thunk, the call would not care about the
1060 -- evaluation context. Therefore removing all annotations in the
1061 -- above example is safe.
1062 | PlaceCostCentre
1063
1064 deriving (Eq)
1065
1066 -- | Placement behaviour we want for the ticks
1067 tickishPlace :: Tickish id -> TickishPlacement
1068 tickishPlace n@ProfNote{}
1069 | profNoteCount n = PlaceRuntime
1070 | otherwise = PlaceCostCentre
1071 tickishPlace HpcTick{} = PlaceRuntime
1072 tickishPlace Breakpoint{} = PlaceRuntime
1073 tickishPlace SourceNote{} = PlaceNonLam
1074
1075 -- | Returns whether one tick "contains" the other one, therefore
1076 -- making the second tick redundant.
1077 tickishContains :: Eq b => Tickish b -> Tickish b -> Bool
1078 tickishContains (SourceNote sp1 n1) (SourceNote sp2 n2)
1079 = containsSpan sp1 sp2 && n1 == n2
1080 -- compare the String last
1081 tickishContains t1 t2
1082 = t1 == t2
1083
1084 {-
1085 ************************************************************************
1086 * *
1087 Orphans
1088 * *
1089 ************************************************************************
1090 -}
1091
1092 -- | Is this instance an orphan? If it is not an orphan, contains an 'OccName'
1093 -- witnessing the instance's non-orphanhood.
1094 -- See Note [Orphans]
1095 data IsOrphan
1096 = IsOrphan
1097 | NotOrphan OccName -- The OccName 'n' witnesses the instance's non-orphanhood
1098 -- In that case, the instance is fingerprinted as part
1099 -- of the definition of 'n's definition
1100 deriving Data
1101
1102 -- | Returns true if 'IsOrphan' is orphan.
1103 isOrphan :: IsOrphan -> Bool
1104 isOrphan IsOrphan = True
1105 isOrphan _ = False
1106
1107 -- | Returns true if 'IsOrphan' is not an orphan.
1108 notOrphan :: IsOrphan -> Bool
1109 notOrphan NotOrphan{} = True
1110 notOrphan _ = False
1111
1112 chooseOrphanAnchor :: NameSet -> IsOrphan
1113 -- Something (rule, instance) is relate to all the Names in this
1114 -- list. Choose one of them to be an "anchor" for the orphan. We make
1115 -- the choice deterministic to avoid gratuitious changes in the ABI
1116 -- hash (#4012). Specifically, use lexicographic comparison of
1117 -- OccName rather than comparing Uniques
1118 --
1119 -- NB: 'minimum' use Ord, and (Ord OccName) works lexicographically
1120 --
1121 chooseOrphanAnchor local_names
1122 | isEmptyNameSet local_names = IsOrphan
1123 | otherwise = NotOrphan (minimum occs)
1124 where
1125 occs = map nameOccName $ nonDetEltsUniqSet local_names
1126 -- It's OK to use nonDetEltsUFM here, see comments above
1127
1128 instance Binary IsOrphan where
1129 put_ bh IsOrphan = putByte bh 0
1130 put_ bh (NotOrphan n) = do
1131 putByte bh 1
1132 put_ bh n
1133 get bh = do
1134 h <- getByte bh
1135 case h of
1136 0 -> return IsOrphan
1137 _ -> do
1138 n <- get bh
1139 return $ NotOrphan n
1140
1141 {-
1142 Note [Orphans]
1143 ~~~~~~~~~~~~~~
1144 Class instances, rules, and family instances are divided into orphans
1145 and non-orphans. Roughly speaking, an instance/rule is an orphan if
1146 its left hand side mentions nothing defined in this module. Orphan-hood
1147 has two major consequences
1148
1149 * A module that contains orphans is called an "orphan module". If
1150 the module being compiled depends (transitively) on an oprhan
1151 module M, then M.hi is read in regardless of whether M is oherwise
1152 needed. This is to ensure that we don't miss any instance decls in
1153 M. But it's painful, because it means we need to keep track of all
1154 the orphan modules below us.
1155
1156 * A non-orphan is not finger-printed separately. Instead, for
1157 fingerprinting purposes it is treated as part of the entity it
1158 mentions on the LHS. For example
1159 data T = T1 | T2
1160 instance Eq T where ....
1161 The instance (Eq T) is incorprated as part of T's fingerprint.
1162
1163 In contrast, orphans are all fingerprinted together in the
1164 mi_orph_hash field of the ModIface.
1165
1166 See MkIface.addFingerprints.
1167
1168 Orphan-hood is computed
1169 * For class instances:
1170 when we make a ClsInst
1171 (because it is needed during instance lookup)
1172
1173 * For rules and family instances:
1174 when we generate an IfaceRule (MkIface.coreRuleToIfaceRule)
1175 or IfaceFamInst (MkIface.instanceToIfaceInst)
1176 -}
1177
1178 {-
1179 ************************************************************************
1180 * *
1181 \subsection{Transformation rules}
1182 * *
1183 ************************************************************************
1184
1185 The CoreRule type and its friends are dealt with mainly in CoreRules,
1186 but CoreFVs, Subst, PprCore, CoreTidy also inspect the representation.
1187 -}
1188
1189 -- | Gathers a collection of 'CoreRule's. Maps (the name of) an 'Id' to its rules
1190 type RuleBase = NameEnv [CoreRule]
1191 -- The rules are unordered;
1192 -- we sort out any overlaps on lookup
1193
1194 -- | A full rule environment which we can apply rules from. Like a 'RuleBase',
1195 -- but it also includes the set of visible orphans we use to filter out orphan
1196 -- rules which are not visible (even though we can see them...)
1197 data RuleEnv
1198 = RuleEnv { re_base :: RuleBase
1199 , re_visible_orphs :: ModuleSet
1200 }
1201
1202 mkRuleEnv :: RuleBase -> [Module] -> RuleEnv
1203 mkRuleEnv rules vis_orphs = RuleEnv rules (mkModuleSet vis_orphs)
1204
1205 emptyRuleEnv :: RuleEnv
1206 emptyRuleEnv = RuleEnv emptyNameEnv emptyModuleSet
1207
1208 -- | A 'CoreRule' is:
1209 --
1210 -- * \"Local\" if the function it is a rule for is defined in the
1211 -- same module as the rule itself.
1212 --
1213 -- * \"Orphan\" if nothing on the LHS is defined in the same module
1214 -- as the rule itself
1215 data CoreRule
1216 = Rule {
1217 ru_name :: RuleName, -- ^ Name of the rule, for communication with the user
1218 ru_act :: Activation, -- ^ When the rule is active
1219
1220 -- Rough-matching stuff
1221 -- see comments with InstEnv.ClsInst( is_cls, is_rough )
1222 ru_fn :: Name, -- ^ Name of the 'Id.Id' at the head of this rule
1223 ru_rough :: [Maybe Name], -- ^ Name at the head of each argument to the left hand side
1224
1225 -- Proper-matching stuff
1226 -- see comments with InstEnv.ClsInst( is_tvs, is_tys )
1227 ru_bndrs :: [CoreBndr], -- ^ Variables quantified over
1228 ru_args :: [CoreExpr], -- ^ Left hand side arguments
1229
1230 -- And the right-hand side
1231 ru_rhs :: CoreExpr, -- ^ Right hand side of the rule
1232 -- Occurrence info is guaranteed correct
1233 -- See Note [OccInfo in unfoldings and rules]
1234
1235 -- Locality
1236 ru_auto :: Bool, -- ^ @True@ <=> this rule is auto-generated
1237 -- (notably by Specialise or SpecConstr)
1238 -- @False@ <=> generated at the user's behest
1239 -- See Note [Trimming auto-rules] in TidyPgm
1240 -- for the sole purpose of this field.
1241
1242 ru_origin :: !Module, -- ^ 'Module' the rule was defined in, used
1243 -- to test if we should see an orphan rule.
1244
1245 ru_orphan :: !IsOrphan, -- ^ Whether or not the rule is an orphan.
1246
1247 ru_local :: Bool -- ^ @True@ iff the fn at the head of the rule is
1248 -- defined in the same module as the rule
1249 -- and is not an implicit 'Id' (like a record selector,
1250 -- class operation, or data constructor). This
1251 -- is different from 'ru_orphan', where a rule
1252 -- can avoid being an orphan if *any* Name in
1253 -- LHS of the rule was defined in the same
1254 -- module as the rule.
1255 }
1256
1257 -- | Built-in rules are used for constant folding
1258 -- and suchlike. They have no free variables.
1259 -- A built-in rule is always visible (there is no such thing as
1260 -- an orphan built-in rule.)
1261 | BuiltinRule {
1262 ru_name :: RuleName, -- ^ As above
1263 ru_fn :: Name, -- ^ As above
1264 ru_nargs :: Int, -- ^ Number of arguments that 'ru_try' consumes,
1265 -- if it fires, including type arguments
1266 ru_try :: RuleFun
1267 -- ^ This function does the rewrite. It given too many
1268 -- arguments, it simply discards them; the returned 'CoreExpr'
1269 -- is just the rewrite of 'ru_fn' applied to the first 'ru_nargs' args
1270 }
1271 -- See Note [Extra args in rule matching] in Rules.hs
1272
1273 type RuleFun = DynFlags -> InScopeEnv -> Id -> [CoreExpr] -> Maybe CoreExpr
1274 type InScopeEnv = (InScopeSet, IdUnfoldingFun)
1275
1276 type IdUnfoldingFun = Id -> Unfolding
1277 -- A function that embodies how to unfold an Id if you need
1278 -- to do that in the Rule. The reason we need to pass this info in
1279 -- is that whether an Id is unfoldable depends on the simplifier phase
1280
1281 isBuiltinRule :: CoreRule -> Bool
1282 isBuiltinRule (BuiltinRule {}) = True
1283 isBuiltinRule _ = False
1284
1285 isAutoRule :: CoreRule -> Bool
1286 isAutoRule (BuiltinRule {}) = False
1287 isAutoRule (Rule { ru_auto = is_auto }) = is_auto
1288
1289 -- | The number of arguments the 'ru_fn' must be applied
1290 -- to before the rule can match on it
1291 ruleArity :: CoreRule -> Int
1292 ruleArity (BuiltinRule {ru_nargs = n}) = n
1293 ruleArity (Rule {ru_args = args}) = length args
1294
1295 ruleName :: CoreRule -> RuleName
1296 ruleName = ru_name
1297
1298 ruleModule :: CoreRule -> Maybe Module
1299 ruleModule Rule { ru_origin } = Just ru_origin
1300 ruleModule BuiltinRule {} = Nothing
1301
1302 ruleActivation :: CoreRule -> Activation
1303 ruleActivation (BuiltinRule { }) = AlwaysActive
1304 ruleActivation (Rule { ru_act = act }) = act
1305
1306 -- | The 'Name' of the 'Id.Id' at the head of the rule left hand side
1307 ruleIdName :: CoreRule -> Name
1308 ruleIdName = ru_fn
1309
1310 isLocalRule :: CoreRule -> Bool
1311 isLocalRule = ru_local
1312
1313 -- | Set the 'Name' of the 'Id.Id' at the head of the rule left hand side
1314 setRuleIdName :: Name -> CoreRule -> CoreRule
1315 setRuleIdName nm ru = ru { ru_fn = nm }
1316
1317 {-
1318 ************************************************************************
1319 * *
1320 Unfoldings
1321 * *
1322 ************************************************************************
1323
1324 The @Unfolding@ type is declared here to avoid numerous loops
1325 -}
1326
1327 -- | Records the /unfolding/ of an identifier, which is approximately the form the
1328 -- identifier would have if we substituted its definition in for the identifier.
1329 -- This type should be treated as abstract everywhere except in "CoreUnfold"
1330 data Unfolding
1331 = NoUnfolding -- ^ We have no information about the unfolding.
1332
1333 | BootUnfolding -- ^ We have no information about the unfolding, because
1334 -- this 'Id' came from an @hi-boot@ file.
1335 -- See Note [Inlining and hs-boot files] in ToIface
1336 -- for what this is used for.
1337
1338 | OtherCon [AltCon] -- ^ It ain't one of these constructors.
1339 -- @OtherCon xs@ also indicates that something has been evaluated
1340 -- and hence there's no point in re-evaluating it.
1341 -- @OtherCon []@ is used even for non-data-type values
1342 -- to indicated evaluated-ness. Notably:
1343 --
1344 -- > data C = C !(Int -> Int)
1345 -- > case x of { C f -> ... }
1346 --
1347 -- Here, @f@ gets an @OtherCon []@ unfolding.
1348
1349 | DFunUnfolding { -- The Unfolding of a DFunId
1350 -- See Note [DFun unfoldings]
1351 -- df = /\a1..am. \d1..dn. MkD t1 .. tk
1352 -- (op1 a1..am d1..dn)
1353 -- (op2 a1..am d1..dn)
1354 df_bndrs :: [Var], -- The bound variables [a1..m],[d1..dn]
1355 df_con :: DataCon, -- The dictionary data constructor (never a newtype datacon)
1356 df_args :: [CoreExpr] -- Args of the data con: types, superclasses and methods,
1357 } -- in positional order
1358
1359 | CoreUnfolding { -- An unfolding for an Id with no pragma,
1360 -- or perhaps a NOINLINE pragma
1361 -- (For NOINLINE, the phase, if any, is in the
1362 -- InlinePragInfo for this Id.)
1363 uf_tmpl :: CoreExpr, -- Template; occurrence info is correct
1364 uf_src :: UnfoldingSource, -- Where the unfolding came from
1365 uf_is_top :: Bool, -- True <=> top level binding
1366 uf_is_value :: Bool, -- exprIsHNF template (cached); it is ok to discard
1367 -- a `seq` on this variable
1368 uf_is_conlike :: Bool, -- True <=> applicn of constructor or CONLIKE function
1369 -- Cached version of exprIsConLike
1370 uf_is_work_free :: Bool, -- True <=> doesn't waste (much) work to expand
1371 -- inside an inlining
1372 -- Cached version of exprIsCheap
1373 uf_expandable :: Bool, -- True <=> can expand in RULE matching
1374 -- Cached version of exprIsExpandable
1375 uf_guidance :: UnfoldingGuidance -- Tells about the *size* of the template.
1376 }
1377 -- ^ An unfolding with redundant cached information. Parameters:
1378 --
1379 -- uf_tmpl: Template used to perform unfolding;
1380 -- NB: Occurrence info is guaranteed correct:
1381 -- see Note [OccInfo in unfoldings and rules]
1382 --
1383 -- uf_is_top: Is this a top level binding?
1384 --
1385 -- uf_is_value: 'exprIsHNF' template (cached); it is ok to discard a 'seq' on
1386 -- this variable
1387 --
1388 -- uf_is_work_free: Does this waste only a little work if we expand it inside an inlining?
1389 -- Basically this is a cached version of 'exprIsWorkFree'
1390 --
1391 -- uf_guidance: Tells us about the /size/ of the unfolding template
1392
1393
1394 ------------------------------------------------
1395 data UnfoldingSource
1396 = -- See also Note [Historical note: unfoldings for wrappers]
1397
1398 InlineRhs -- The current rhs of the function
1399 -- Replace uf_tmpl each time around
1400
1401 | InlineStable -- From an INLINE or INLINABLE pragma
1402 -- INLINE if guidance is UnfWhen
1403 -- INLINABLE if guidance is UnfIfGoodArgs/UnfoldNever
1404 -- (well, technically an INLINABLE might be made
1405 -- UnfWhen if it was small enough, and then
1406 -- it will behave like INLINE outside the current
1407 -- module, but that is the way automatic unfoldings
1408 -- work so it is consistent with the intended
1409 -- meaning of INLINABLE).
1410 --
1411 -- uf_tmpl may change, but only as a result of
1412 -- gentle simplification, it doesn't get updated
1413 -- to the current RHS during compilation as with
1414 -- InlineRhs.
1415 --
1416 -- See Note [InlineStable]
1417
1418 | InlineCompulsory -- Something that *has* no binding, so you *must* inline it
1419 -- Only a few primop-like things have this property
1420 -- (see MkId.hs, calls to mkCompulsoryUnfolding).
1421 -- Inline absolutely always, however boring the context.
1422
1423
1424
1425 -- | 'UnfoldingGuidance' says when unfolding should take place
1426 data UnfoldingGuidance
1427 = UnfWhen { -- Inline without thinking about the *size* of the uf_tmpl
1428 -- Used (a) for small *and* cheap unfoldings
1429 -- (b) for INLINE functions
1430 -- See Note [INLINE for small functions] in CoreUnfold
1431 ug_arity :: Arity, -- Number of value arguments expected
1432
1433 ug_unsat_ok :: Bool, -- True <=> ok to inline even if unsaturated
1434 ug_boring_ok :: Bool -- True <=> ok to inline even if the context is boring
1435 -- So True,True means "always"
1436 }
1437
1438 | UnfIfGoodArgs { -- Arose from a normal Id; the info here is the
1439 -- result of a simple analysis of the RHS
1440
1441 ug_args :: [Int], -- Discount if the argument is evaluated.
1442 -- (i.e., a simplification will definitely
1443 -- be possible). One elt of the list per *value* arg.
1444
1445 ug_size :: Int, -- The "size" of the unfolding.
1446
1447 ug_res :: Int -- Scrutinee discount: the discount to substract if the thing is in
1448 } -- a context (case (thing args) of ...),
1449 -- (where there are the right number of arguments.)
1450
1451 | UnfNever -- The RHS is big, so don't inline it
1452 deriving (Eq)
1453
1454 {-
1455 Note [Historical note: unfoldings for wrappers]
1456 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1457 We used to have a nice clever scheme in interface files for
1458 wrappers. A wrapper's unfolding can be reconstructed from its worker's
1459 id and its strictness. This decreased .hi file size (sometimes
1460 significantly, for modules like GHC.Classes with many high-arity w/w
1461 splits) and had a slight corresponding effect on compile times.
1462
1463 However, when we added the second demand analysis, this scheme lead to
1464 some Core lint errors. The second analysis could change the strictness
1465 signatures, which sometimes resulted in a wrapper's regenerated
1466 unfolding applying the wrapper to too many arguments.
1467
1468 Instead of repairing the clever .hi scheme, we abandoned it in favor
1469 of simplicity. The .hi sizes are usually insignificant (excluding the
1470 +1M for base libraries), and compile time barely increases (~+1% for
1471 nofib). The nicer upshot is that the UnfoldingSource no longer mentions
1472 an Id, so, eg, substitutions need not traverse them.
1473
1474
1475 Note [DFun unfoldings]
1476 ~~~~~~~~~~~~~~~~~~~~~~
1477 The Arity in a DFunUnfolding is total number of args (type and value)
1478 that the DFun needs to produce a dictionary. That's not necessarily
1479 related to the ordinary arity of the dfun Id, esp if the class has
1480 one method, so the dictionary is represented by a newtype. Example
1481
1482 class C a where { op :: a -> Int }
1483 instance C a -> C [a] where op xs = op (head xs)
1484
1485 The instance translates to
1486
1487 $dfCList :: forall a. C a => C [a] -- Arity 2!
1488 $dfCList = /\a.\d. $copList {a} d |> co
1489
1490 $copList :: forall a. C a => [a] -> Int -- Arity 2!
1491 $copList = /\a.\d.\xs. op {a} d (head xs)
1492
1493 Now we might encounter (op (dfCList {ty} d) a1 a2)
1494 and we want the (op (dfList {ty} d)) rule to fire, because $dfCList
1495 has all its arguments, even though its (value) arity is 2. That's
1496 why we record the number of expected arguments in the DFunUnfolding.
1497
1498 Note that although it's an Arity, it's most convenient for it to give
1499 the *total* number of arguments, both type and value. See the use
1500 site in exprIsConApp_maybe.
1501 -}
1502
1503 -- Constants for the UnfWhen constructor
1504 needSaturated, unSaturatedOk :: Bool
1505 needSaturated = False
1506 unSaturatedOk = True
1507
1508 boringCxtNotOk, boringCxtOk :: Bool
1509 boringCxtOk = True
1510 boringCxtNotOk = False
1511
1512 ------------------------------------------------
1513 noUnfolding :: Unfolding
1514 -- ^ There is no known 'Unfolding'
1515 evaldUnfolding :: Unfolding
1516 -- ^ This unfolding marks the associated thing as being evaluated
1517
1518 noUnfolding = NoUnfolding
1519 evaldUnfolding = OtherCon []
1520
1521 -- | There is no known 'Unfolding', because this came from an
1522 -- hi-boot file.
1523 bootUnfolding :: Unfolding
1524 bootUnfolding = BootUnfolding
1525
1526 mkOtherCon :: [AltCon] -> Unfolding
1527 mkOtherCon = OtherCon
1528
1529 isStableSource :: UnfoldingSource -> Bool
1530 -- Keep the unfolding template
1531 isStableSource InlineCompulsory = True
1532 isStableSource InlineStable = True
1533 isStableSource InlineRhs = False
1534
1535 -- | Retrieves the template of an unfolding: panics if none is known
1536 unfoldingTemplate :: Unfolding -> CoreExpr
1537 unfoldingTemplate = uf_tmpl
1538
1539 -- | Retrieves the template of an unfolding if possible
1540 -- maybeUnfoldingTemplate is used mainly wnen specialising, and we do
1541 -- want to specialise DFuns, so it's important to return a template
1542 -- for DFunUnfoldings
1543 maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr
1544 maybeUnfoldingTemplate (CoreUnfolding { uf_tmpl = expr })
1545 = Just expr
1546 maybeUnfoldingTemplate (DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args })
1547 = Just (mkLams bndrs (mkApps (Var (dataConWorkId con)) args))
1548 maybeUnfoldingTemplate _
1549 = Nothing
1550
1551 -- | The constructors that the unfolding could never be:
1552 -- returns @[]@ if no information is available
1553 otherCons :: Unfolding -> [AltCon]
1554 otherCons (OtherCon cons) = cons
1555 otherCons _ = []
1556
1557 -- | Determines if it is certainly the case that the unfolding will
1558 -- yield a value (something in HNF): returns @False@ if unsure
1559 isValueUnfolding :: Unfolding -> Bool
1560 -- Returns False for OtherCon
1561 isValueUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1562 isValueUnfolding _ = False
1563
1564 -- | Determines if it possibly the case that the unfolding will
1565 -- yield a value. Unlike 'isValueUnfolding' it returns @True@
1566 -- for 'OtherCon'
1567 isEvaldUnfolding :: Unfolding -> Bool
1568 -- Returns True for OtherCon
1569 isEvaldUnfolding (OtherCon _) = True
1570 isEvaldUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1571 isEvaldUnfolding _ = False
1572
1573 -- | @True@ if the unfolding is a constructor application, the application
1574 -- of a CONLIKE function or 'OtherCon'
1575 isConLikeUnfolding :: Unfolding -> Bool
1576 isConLikeUnfolding (OtherCon _) = True
1577 isConLikeUnfolding (CoreUnfolding { uf_is_conlike = con }) = con
1578 isConLikeUnfolding _ = False
1579
1580 -- | Is the thing we will unfold into certainly cheap?
1581 isCheapUnfolding :: Unfolding -> Bool
1582 isCheapUnfolding (CoreUnfolding { uf_is_work_free = is_wf }) = is_wf
1583 isCheapUnfolding _ = False
1584
1585 isExpandableUnfolding :: Unfolding -> Bool
1586 isExpandableUnfolding (CoreUnfolding { uf_expandable = is_expable }) = is_expable
1587 isExpandableUnfolding _ = False
1588
1589 expandUnfolding_maybe :: Unfolding -> Maybe CoreExpr
1590 -- Expand an expandable unfolding; this is used in rule matching
1591 -- See Note [Expanding variables] in Rules.hs
1592 -- The key point here is that CONLIKE things can be expanded
1593 expandUnfolding_maybe (CoreUnfolding { uf_expandable = True, uf_tmpl = rhs }) = Just rhs
1594 expandUnfolding_maybe _ = Nothing
1595
1596 isCompulsoryUnfolding :: Unfolding -> Bool
1597 isCompulsoryUnfolding (CoreUnfolding { uf_src = InlineCompulsory }) = True
1598 isCompulsoryUnfolding _ = False
1599
1600 isStableUnfolding :: Unfolding -> Bool
1601 -- True of unfoldings that should not be overwritten
1602 -- by a CoreUnfolding for the RHS of a let-binding
1603 isStableUnfolding (CoreUnfolding { uf_src = src }) = isStableSource src
1604 isStableUnfolding (DFunUnfolding {}) = True
1605 isStableUnfolding _ = False
1606
1607 -- | Only returns False if there is no unfolding information available at all
1608 hasSomeUnfolding :: Unfolding -> Bool
1609 hasSomeUnfolding NoUnfolding = False
1610 hasSomeUnfolding BootUnfolding = False
1611 hasSomeUnfolding _ = True
1612
1613 isBootUnfolding :: Unfolding -> Bool
1614 isBootUnfolding BootUnfolding = True
1615 isBootUnfolding _ = False
1616
1617 neverUnfoldGuidance :: UnfoldingGuidance -> Bool
1618 neverUnfoldGuidance UnfNever = True
1619 neverUnfoldGuidance _ = False
1620
1621 isFragileUnfolding :: Unfolding -> Bool
1622 -- An unfolding is fragile if it mentions free variables or
1623 -- is otherwise subject to change. A robust one can be kept.
1624 -- See Note [Fragile unfoldings]
1625 isFragileUnfolding (CoreUnfolding {}) = True
1626 isFragileUnfolding (DFunUnfolding {}) = True
1627 isFragileUnfolding _ = False
1628 -- NoUnfolding, BootUnfolding, OtherCon are all non-fragile
1629
1630 canUnfold :: Unfolding -> Bool
1631 canUnfold (CoreUnfolding { uf_guidance = g }) = not (neverUnfoldGuidance g)
1632 canUnfold _ = False
1633
1634 {- Note [Fragile unfoldings]
1635 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1636 An unfolding is "fragile" if it mentions free variables (and hence would
1637 need substitution) or might be affected by optimisation. The non-fragile
1638 ones are
1639
1640 NoUnfolding, BootUnfolding
1641
1642 OtherCon {} If we know this binder (say a lambda binder) will be
1643 bound to an evaluated thing, we want to retain that
1644 info in simpleOptExpr; see #13077.
1645
1646 We consider even a StableUnfolding as fragile, because it needs substitution.
1647
1648 Note [InlineStable]
1649 ~~~~~~~~~~~~~~~~~
1650 When you say
1651 {-# INLINE f #-}
1652 f x = <rhs>
1653 you intend that calls (f e) are replaced by <rhs>[e/x] So we
1654 should capture (\x.<rhs>) in the Unfolding of 'f', and never meddle
1655 with it. Meanwhile, we can optimise <rhs> to our heart's content,
1656 leaving the original unfolding intact in Unfolding of 'f'. For example
1657 all xs = foldr (&&) True xs
1658 any p = all . map p {-# INLINE any #-}
1659 We optimise any's RHS fully, but leave the InlineRule saying "all . map p",
1660 which deforests well at the call site.
1661
1662 So INLINE pragma gives rise to an InlineRule, which captures the original RHS.
1663
1664 Moreover, it's only used when 'f' is applied to the
1665 specified number of arguments; that is, the number of argument on
1666 the LHS of the '=' sign in the original source definition.
1667 For example, (.) is now defined in the libraries like this
1668 {-# INLINE (.) #-}
1669 (.) f g = \x -> f (g x)
1670 so that it'll inline when applied to two arguments. If 'x' appeared
1671 on the left, thus
1672 (.) f g x = f (g x)
1673 it'd only inline when applied to three arguments. This slightly-experimental
1674 change was requested by Roman, but it seems to make sense.
1675
1676 See also Note [Inlining an InlineRule] in CoreUnfold.
1677
1678
1679 Note [OccInfo in unfoldings and rules]
1680 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1681 In unfoldings and rules, we guarantee that the template is occ-analysed,
1682 so that the occurrence info on the binders is correct. This is important,
1683 because the Simplifier does not re-analyse the template when using it. If
1684 the occurrence info is wrong
1685 - We may get more simplifier iterations than necessary, because
1686 once-occ info isn't there
1687 - More seriously, we may get an infinite loop if there's a Rec
1688 without a loop breaker marked
1689
1690
1691 ************************************************************************
1692 * *
1693 AltCon
1694 * *
1695 ************************************************************************
1696 -}
1697
1698 -- The Ord is needed for the FiniteMap used in the lookForConstructor
1699 -- in SimplEnv. If you declared that lookForConstructor *ignores*
1700 -- constructor-applications with LitArg args, then you could get
1701 -- rid of this Ord.
1702
1703 instance Outputable AltCon where
1704 ppr (DataAlt dc) = ppr dc
1705 ppr (LitAlt lit) = ppr lit
1706 ppr DEFAULT = text "__DEFAULT"
1707
1708 cmpAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Ordering
1709 cmpAlt (con1, _, _) (con2, _, _) = con1 `cmpAltCon` con2
1710
1711 ltAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Bool
1712 ltAlt a1 a2 = (a1 `cmpAlt` a2) == LT
1713
1714 cmpAltCon :: AltCon -> AltCon -> Ordering
1715 -- ^ Compares 'AltCon's within a single list of alternatives
1716 -- DEFAULT comes out smallest, so that sorting by AltCon
1717 -- puts alternatives in the order required by #case_invariants#
1718 cmpAltCon DEFAULT DEFAULT = EQ
1719 cmpAltCon DEFAULT _ = LT
1720
1721 cmpAltCon (DataAlt d1) (DataAlt d2) = dataConTag d1 `compare` dataConTag d2
1722 cmpAltCon (DataAlt _) DEFAULT = GT
1723 cmpAltCon (LitAlt l1) (LitAlt l2) = l1 `compare` l2
1724 cmpAltCon (LitAlt _) DEFAULT = GT
1725
1726 cmpAltCon con1 con2 = WARN( True, text "Comparing incomparable AltCons" <+>
1727 ppr con1 <+> ppr con2 )
1728 LT
1729
1730 {-
1731 ************************************************************************
1732 * *
1733 \subsection{Useful synonyms}
1734 * *
1735 ************************************************************************
1736
1737 Note [CoreProgram]
1738 ~~~~~~~~~~~~~~~~~~
1739 The top level bindings of a program, a CoreProgram, are represented as
1740 a list of CoreBind
1741
1742 * Later bindings in the list can refer to earlier ones, but not vice
1743 versa. So this is OK
1744 NonRec { x = 4 }
1745 Rec { p = ...q...x...
1746 ; q = ...p...x }
1747 Rec { f = ...p..x..f.. }
1748 NonRec { g = ..f..q...x.. }
1749 But it would NOT be ok for 'f' to refer to 'g'.
1750
1751 * The occurrence analyser does strongly-connected component analysis
1752 on each Rec binding, and splits it into a sequence of smaller
1753 bindings where possible. So the program typically starts life as a
1754 single giant Rec, which is then dependency-analysed into smaller
1755 chunks.
1756 -}
1757
1758 -- If you edit this type, you may need to update the GHC formalism
1759 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1760 type CoreProgram = [CoreBind] -- See Note [CoreProgram]
1761
1762 -- | The common case for the type of binders and variables when
1763 -- we are manipulating the Core language within GHC
1764 type CoreBndr = Var
1765 -- | Expressions where binders are 'CoreBndr's
1766 type CoreExpr = Expr CoreBndr
1767 -- | Argument expressions where binders are 'CoreBndr's
1768 type CoreArg = Arg CoreBndr
1769 -- | Binding groups where binders are 'CoreBndr's
1770 type CoreBind = Bind CoreBndr
1771 -- | Case alternatives where binders are 'CoreBndr's
1772 type CoreAlt = Alt CoreBndr
1773
1774 {-
1775 ************************************************************************
1776 * *
1777 \subsection{Tagging}
1778 * *
1779 ************************************************************************
1780 -}
1781
1782 -- | Binders are /tagged/ with a t
1783 data TaggedBndr t = TB CoreBndr t -- TB for "tagged binder"
1784
1785 type TaggedBind t = Bind (TaggedBndr t)
1786 type TaggedExpr t = Expr (TaggedBndr t)
1787 type TaggedArg t = Arg (TaggedBndr t)
1788 type TaggedAlt t = Alt (TaggedBndr t)
1789
1790 instance Outputable b => Outputable (TaggedBndr b) where
1791 ppr (TB b l) = char '<' <> ppr b <> comma <> ppr l <> char '>'
1792
1793 deTagExpr :: TaggedExpr t -> CoreExpr
1794 deTagExpr (Var v) = Var v
1795 deTagExpr (Lit l) = Lit l
1796 deTagExpr (Type ty) = Type ty
1797 deTagExpr (Coercion co) = Coercion co
1798 deTagExpr (App e1 e2) = App (deTagExpr e1) (deTagExpr e2)
1799 deTagExpr (Lam (TB b _) e) = Lam b (deTagExpr e)
1800 deTagExpr (Let bind body) = Let (deTagBind bind) (deTagExpr body)
1801 deTagExpr (Case e (TB b _) ty alts) = Case (deTagExpr e) b ty (map deTagAlt alts)
1802 deTagExpr (Tick t e) = Tick t (deTagExpr e)
1803 deTagExpr (Cast e co) = Cast (deTagExpr e) co
1804
1805 deTagBind :: TaggedBind t -> CoreBind
1806 deTagBind (NonRec (TB b _) rhs) = NonRec b (deTagExpr rhs)
1807 deTagBind (Rec prs) = Rec [(b, deTagExpr rhs) | (TB b _, rhs) <- prs]
1808
1809 deTagAlt :: TaggedAlt t -> CoreAlt
1810 deTagAlt (con, bndrs, rhs) = (con, [b | TB b _ <- bndrs], deTagExpr rhs)
1811
1812 {-
1813 ************************************************************************
1814 * *
1815 \subsection{Core-constructing functions with checking}
1816 * *
1817 ************************************************************************
1818 -}
1819
1820 -- | Apply a list of argument expressions to a function expression in a nested fashion. Prefer to
1821 -- use 'MkCore.mkCoreApps' if possible
1822 mkApps :: Expr b -> [Arg b] -> Expr b
1823 -- | Apply a list of type argument expressions to a function expression in a nested fashion
1824 mkTyApps :: Expr b -> [Type] -> Expr b
1825 -- | Apply a list of coercion argument expressions to a function expression in a nested fashion
1826 mkCoApps :: Expr b -> [Coercion] -> Expr b
1827 -- | Apply a list of type or value variables to a function expression in a nested fashion
1828 mkVarApps :: Expr b -> [Var] -> Expr b
1829 -- | Apply a list of argument expressions to a data constructor in a nested fashion. Prefer to
1830 -- use 'MkCore.mkCoreConApps' if possible
1831 mkConApp :: DataCon -> [Arg b] -> Expr b
1832
1833 mkApps f args = foldl' App f args
1834 mkCoApps f args = foldl' (\ e a -> App e (Coercion a)) f args
1835 mkVarApps f vars = foldl' (\ e a -> App e (varToCoreExpr a)) f vars
1836 mkConApp con args = mkApps (Var (dataConWorkId con)) args
1837
1838 mkTyApps f args = foldl' (\ e a -> App e (mkTyArg a)) f args
1839
1840 mkConApp2 :: DataCon -> [Type] -> [Var] -> Expr b
1841 mkConApp2 con tys arg_ids = Var (dataConWorkId con)
1842 `mkApps` map Type tys
1843 `mkApps` map varToCoreExpr arg_ids
1844
1845 mkTyArg :: Type -> Expr b
1846 mkTyArg ty
1847 | Just co <- isCoercionTy_maybe ty = Coercion co
1848 | otherwise = Type ty
1849
1850 -- | Create a machine integer literal expression of type @Int#@ from an @Integer@.
1851 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1852 mkIntLit :: DynFlags -> Integer -> Expr b
1853 -- | Create a machine integer literal expression of type @Int#@ from an @Int@.
1854 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1855 mkIntLitInt :: DynFlags -> Int -> Expr b
1856
1857 mkIntLit dflags n = Lit (mkLitInt dflags n)
1858 mkIntLitInt dflags n = Lit (mkLitInt dflags (toInteger n))
1859
1860 -- | Create a machine word literal expression of type @Word#@ from an @Integer@.
1861 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1862 mkWordLit :: DynFlags -> Integer -> Expr b
1863 -- | Create a machine word literal expression of type @Word#@ from a @Word@.
1864 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1865 mkWordLitWord :: DynFlags -> Word -> Expr b
1866
1867 mkWordLit dflags w = Lit (mkLitWord dflags w)
1868 mkWordLitWord dflags w = Lit (mkLitWord dflags (toInteger w))
1869
1870 mkWord64LitWord64 :: Word64 -> Expr b
1871 mkWord64LitWord64 w = Lit (mkLitWord64 (toInteger w))
1872
1873 mkInt64LitInt64 :: Int64 -> Expr b
1874 mkInt64LitInt64 w = Lit (mkLitInt64 (toInteger w))
1875
1876 -- | Create a machine character literal expression of type @Char#@.
1877 -- If you want an expression of type @Char@ use 'MkCore.mkCharExpr'
1878 mkCharLit :: Char -> Expr b
1879 -- | Create a machine string literal expression of type @Addr#@.
1880 -- If you want an expression of type @String@ use 'MkCore.mkStringExpr'
1881 mkStringLit :: String -> Expr b
1882
1883 mkCharLit c = Lit (mkLitChar c)
1884 mkStringLit s = Lit (mkLitString s)
1885
1886 -- | Create a machine single precision literal expression of type @Float#@ from a @Rational@.
1887 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1888 mkFloatLit :: Rational -> Expr b
1889 -- | Create a machine single precision literal expression of type @Float#@ from a @Float@.
1890 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1891 mkFloatLitFloat :: Float -> Expr b
1892
1893 mkFloatLit f = Lit (mkLitFloat f)
1894 mkFloatLitFloat f = Lit (mkLitFloat (toRational f))
1895
1896 -- | Create a machine double precision literal expression of type @Double#@ from a @Rational@.
1897 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1898 mkDoubleLit :: Rational -> Expr b
1899 -- | Create a machine double precision literal expression of type @Double#@ from a @Double@.
1900 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1901 mkDoubleLitDouble :: Double -> Expr b
1902
1903 mkDoubleLit d = Lit (mkLitDouble d)
1904 mkDoubleLitDouble d = Lit (mkLitDouble (toRational d))
1905
1906 -- | Bind all supplied binding groups over an expression in a nested let expression. Assumes
1907 -- that the rhs satisfies the let/app invariant. Prefer to use 'MkCore.mkCoreLets' if
1908 -- possible, which does guarantee the invariant
1909 mkLets :: [Bind b] -> Expr b -> Expr b
1910 -- | Bind all supplied binders over an expression in a nested lambda expression. Prefer to
1911 -- use 'MkCore.mkCoreLams' if possible
1912 mkLams :: [b] -> Expr b -> Expr b
1913
1914 mkLams binders body = foldr Lam body binders
1915 mkLets binds body = foldr mkLet body binds
1916
1917 mkLet :: Bind b -> Expr b -> Expr b
1918 -- The desugarer sometimes generates an empty Rec group
1919 -- which Lint rejects, so we kill it off right away
1920 mkLet (Rec []) body = body
1921 mkLet bind body = Let bind body
1922
1923 -- | @mkLetNonRec bndr rhs body@ wraps @body@ in a @let@ binding @bndr@.
1924 mkLetNonRec :: b -> Expr b -> Expr b -> Expr b
1925 mkLetNonRec b rhs body = Let (NonRec b rhs) body
1926
1927 -- | @mkLetRec binds body@ wraps @body@ in a @let rec@ with the given set of
1928 -- @binds@ if binds is non-empty.
1929 mkLetRec :: [(b, Expr b)] -> Expr b -> Expr b
1930 mkLetRec [] body = body
1931 mkLetRec bs body = Let (Rec bs) body
1932
1933 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1934 -- this can only be used to bind something in a non-recursive @let@ expression
1935 mkTyBind :: TyVar -> Type -> CoreBind
1936 mkTyBind tv ty = NonRec tv (Type ty)
1937
1938 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1939 -- this can only be used to bind something in a non-recursive @let@ expression
1940 mkCoBind :: CoVar -> Coercion -> CoreBind
1941 mkCoBind cv co = NonRec cv (Coercion co)
1942
1943 -- | Convert a binder into either a 'Var' or 'Type' 'Expr' appropriately
1944 varToCoreExpr :: CoreBndr -> Expr b
1945 varToCoreExpr v | isTyVar v = Type (mkTyVarTy v)
1946 | isCoVar v = Coercion (mkCoVarCo v)
1947 | otherwise = ASSERT( isId v ) Var v
1948
1949 varsToCoreExprs :: [CoreBndr] -> [Expr b]
1950 varsToCoreExprs vs = map varToCoreExpr vs
1951
1952 {-
1953 ************************************************************************
1954 * *
1955 Getting a result type
1956 * *
1957 ************************************************************************
1958
1959 These are defined here to avoid a module loop between CoreUtils and CoreFVs
1960
1961 -}
1962
1963 applyTypeToArg :: Type -> CoreExpr -> Type
1964 -- ^ Determines the type resulting from applying an expression with given type
1965 -- to a given argument expression
1966 applyTypeToArg fun_ty arg = piResultTy fun_ty (exprToType arg)
1967
1968 -- | If the expression is a 'Type', converts. Otherwise,
1969 -- panics. NB: This does /not/ convert 'Coercion' to 'CoercionTy'.
1970 exprToType :: CoreExpr -> Type
1971 exprToType (Type ty) = ty
1972 exprToType _bad = pprPanic "exprToType" empty
1973
1974 -- | If the expression is a 'Coercion', converts.
1975 exprToCoercion_maybe :: CoreExpr -> Maybe Coercion
1976 exprToCoercion_maybe (Coercion co) = Just co
1977 exprToCoercion_maybe _ = Nothing
1978
1979 {-
1980 ************************************************************************
1981 * *
1982 \subsection{Simple access functions}
1983 * *
1984 ************************************************************************
1985 -}
1986
1987 -- | Extract every variable by this group
1988 bindersOf :: Bind b -> [b]
1989 -- If you edit this function, you may need to update the GHC formalism
1990 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1991 bindersOf (NonRec binder _) = [binder]
1992 bindersOf (Rec pairs) = [binder | (binder, _) <- pairs]
1993
1994 -- | 'bindersOf' applied to a list of binding groups
1995 bindersOfBinds :: [Bind b] -> [b]
1996 bindersOfBinds binds = foldr ((++) . bindersOf) [] binds
1997
1998 rhssOfBind :: Bind b -> [Expr b]
1999 rhssOfBind (NonRec _ rhs) = [rhs]
2000 rhssOfBind (Rec pairs) = [rhs | (_,rhs) <- pairs]
2001
2002 rhssOfAlts :: [Alt b] -> [Expr b]
2003 rhssOfAlts alts = [e | (_,_,e) <- alts]
2004
2005 -- | Collapse all the bindings in the supplied groups into a single
2006 -- list of lhs\/rhs pairs suitable for binding in a 'Rec' binding group
2007 flattenBinds :: [Bind b] -> [(b, Expr b)]
2008 flattenBinds (NonRec b r : binds) = (b,r) : flattenBinds binds
2009 flattenBinds (Rec prs1 : binds) = prs1 ++ flattenBinds binds
2010 flattenBinds [] = []
2011
2012 -- | We often want to strip off leading lambdas before getting down to
2013 -- business. Variants are 'collectTyBinders', 'collectValBinders',
2014 -- and 'collectTyAndValBinders'
2015 collectBinders :: Expr b -> ([b], Expr b)
2016 collectTyBinders :: CoreExpr -> ([TyVar], CoreExpr)
2017 collectValBinders :: CoreExpr -> ([Id], CoreExpr)
2018 collectTyAndValBinders :: CoreExpr -> ([TyVar], [Id], CoreExpr)
2019 -- | Strip off exactly N leading lambdas (type or value). Good for use with
2020 -- join points.
2021 collectNBinders :: Int -> Expr b -> ([b], Expr b)
2022
2023 collectBinders expr
2024 = go [] expr
2025 where
2026 go bs (Lam b e) = go (b:bs) e
2027 go bs e = (reverse bs, e)
2028
2029 collectTyBinders expr
2030 = go [] expr
2031 where
2032 go tvs (Lam b e) | isTyVar b = go (b:tvs) e
2033 go tvs e = (reverse tvs, e)
2034
2035 collectValBinders expr
2036 = go [] expr
2037 where
2038 go ids (Lam b e) | isId b = go (b:ids) e
2039 go ids body = (reverse ids, body)
2040
2041 collectTyAndValBinders expr
2042 = (tvs, ids, body)
2043 where
2044 (tvs, body1) = collectTyBinders expr
2045 (ids, body) = collectValBinders body1
2046
2047 collectNBinders orig_n orig_expr
2048 = go orig_n [] orig_expr
2049 where
2050 go 0 bs expr = (reverse bs, expr)
2051 go n bs (Lam b e) = go (n-1) (b:bs) e
2052 go _ _ _ = pprPanic "collectNBinders" $ int orig_n
2053
2054 -- | Takes a nested application expression and returns the function
2055 -- being applied and the arguments to which it is applied
2056 collectArgs :: Expr b -> (Expr b, [Arg b])
2057 collectArgs expr
2058 = go expr []
2059 where
2060 go (App f a) as = go f (a:as)
2061 go e as = (e, as)
2062
2063 -- | Attempt to remove the last N arguments of a function call.
2064 -- Strip off any ticks or coercions encountered along the way and any
2065 -- at the end.
2066 stripNArgs :: Word -> Expr a -> Maybe (Expr a)
2067 stripNArgs !n (Tick _ e) = stripNArgs n e
2068 stripNArgs n (Cast f _) = stripNArgs n f
2069 stripNArgs 0 e = Just e
2070 stripNArgs n (App f _) = stripNArgs (n - 1) f
2071 stripNArgs _ _ = Nothing
2072
2073 -- | Like @collectArgs@, but also collects looks through floatable
2074 -- ticks if it means that we can find more arguments.
2075 collectArgsTicks :: (Tickish Id -> Bool) -> Expr b
2076 -> (Expr b, [Arg b], [Tickish Id])
2077 collectArgsTicks skipTick expr
2078 = go expr [] []
2079 where
2080 go (App f a) as ts = go f (a:as) ts
2081 go (Tick t e) as ts
2082 | skipTick t = go e as (t:ts)
2083 go e as ts = (e, as, reverse ts)
2084
2085
2086 {-
2087 ************************************************************************
2088 * *
2089 \subsection{Predicates}
2090 * *
2091 ************************************************************************
2092
2093 At one time we optionally carried type arguments through to runtime.
2094 @isRuntimeVar v@ returns if (Lam v _) really becomes a lambda at runtime,
2095 i.e. if type applications are actual lambdas because types are kept around
2096 at runtime. Similarly isRuntimeArg.
2097 -}
2098
2099 -- | Will this variable exist at runtime?
2100 isRuntimeVar :: Var -> Bool
2101 isRuntimeVar = isId
2102
2103 -- | Will this argument expression exist at runtime?
2104 isRuntimeArg :: CoreExpr -> Bool
2105 isRuntimeArg = isValArg
2106
2107 -- | Returns @True@ for value arguments, false for type args
2108 -- NB: coercions are value arguments (zero width, to be sure,
2109 -- like State#, but still value args).
2110 isValArg :: Expr b -> Bool
2111 isValArg e = not (isTypeArg e)
2112
2113 -- | Returns @True@ iff the expression is a 'Type' or 'Coercion'
2114 -- expression at its top level
2115 isTyCoArg :: Expr b -> Bool
2116 isTyCoArg (Type {}) = True
2117 isTyCoArg (Coercion {}) = True
2118 isTyCoArg _ = False
2119
2120 -- | Returns @True@ iff the expression is a 'Coercion'
2121 -- expression at its top level
2122 isCoArg :: Expr b -> Bool
2123 isCoArg (Coercion {}) = True
2124 isCoArg _ = False
2125
2126 -- | Returns @True@ iff the expression is a 'Type' expression at its
2127 -- top level. Note this does NOT include 'Coercion's.
2128 isTypeArg :: Expr b -> Bool
2129 isTypeArg (Type {}) = True
2130 isTypeArg _ = False
2131
2132 -- | The number of binders that bind values rather than types
2133 valBndrCount :: [CoreBndr] -> Int
2134 valBndrCount = count isId
2135
2136 -- | The number of argument expressions that are values rather than types at their top level
2137 valArgCount :: [Arg b] -> Int
2138 valArgCount = count isValArg
2139
2140 {-
2141 ************************************************************************
2142 * *
2143 \subsection{Annotated core}
2144 * *
2145 ************************************************************************
2146 -}
2147
2148 -- | Annotated core: allows annotation at every node in the tree
2149 type AnnExpr bndr annot = (annot, AnnExpr' bndr annot)
2150
2151 -- | A clone of the 'Expr' type but allowing annotation at every tree node
2152 data AnnExpr' bndr annot
2153 = AnnVar Id
2154 | AnnLit Literal
2155 | AnnLam bndr (AnnExpr bndr annot)
2156 | AnnApp (AnnExpr bndr annot) (AnnExpr bndr annot)
2157 | AnnCase (AnnExpr bndr annot) bndr Type [AnnAlt bndr annot]
2158 | AnnLet (AnnBind bndr annot) (AnnExpr bndr annot)
2159 | AnnCast (AnnExpr bndr annot) (annot, Coercion)
2160 -- Put an annotation on the (root of) the coercion
2161 | AnnTick (Tickish Id) (AnnExpr bndr annot)
2162 | AnnType Type
2163 | AnnCoercion Coercion
2164
2165 -- | A clone of the 'Alt' type but allowing annotation at every tree node
2166 type AnnAlt bndr annot = (AltCon, [bndr], AnnExpr bndr annot)
2167
2168 -- | A clone of the 'Bind' type but allowing annotation at every tree node
2169 data AnnBind bndr annot
2170 = AnnNonRec bndr (AnnExpr bndr annot)
2171 | AnnRec [(bndr, AnnExpr bndr annot)]
2172
2173 -- | Takes a nested application expression and returns the function
2174 -- being applied and the arguments to which it is applied
2175 collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
2176 collectAnnArgs expr
2177 = go expr []
2178 where
2179 go (_, AnnApp f a) as = go f (a:as)
2180 go e as = (e, as)
2181
2182 collectAnnArgsTicks :: (Tickish Var -> Bool) -> AnnExpr b a
2183 -> (AnnExpr b a, [AnnExpr b a], [Tickish Var])
2184 collectAnnArgsTicks tickishOk expr
2185 = go expr [] []
2186 where
2187 go (_, AnnApp f a) as ts = go f (a:as) ts
2188 go (_, AnnTick t e) as ts | tickishOk t
2189 = go e as (t:ts)
2190 go e as ts = (e, as, reverse ts)
2191
2192 deAnnotate :: AnnExpr bndr annot -> Expr bndr
2193 deAnnotate (_, e) = deAnnotate' e
2194
2195 deAnnotate' :: AnnExpr' bndr annot -> Expr bndr
2196 deAnnotate' (AnnType t) = Type t
2197 deAnnotate' (AnnCoercion co) = Coercion co
2198 deAnnotate' (AnnVar v) = Var v
2199 deAnnotate' (AnnLit lit) = Lit lit
2200 deAnnotate' (AnnLam binder body) = Lam binder (deAnnotate body)
2201 deAnnotate' (AnnApp fun arg) = App (deAnnotate fun) (deAnnotate arg)
2202 deAnnotate' (AnnCast e (_,co)) = Cast (deAnnotate e) co
2203 deAnnotate' (AnnTick tick body) = Tick tick (deAnnotate body)
2204
2205 deAnnotate' (AnnLet bind body)
2206 = Let (deAnnBind bind) (deAnnotate body)
2207 deAnnotate' (AnnCase scrut v t alts)
2208 = Case (deAnnotate scrut) v t (map deAnnAlt alts)
2209
2210 deAnnAlt :: AnnAlt bndr annot -> Alt bndr
2211 deAnnAlt (con,args,rhs) = (con,args,deAnnotate rhs)
2212
2213 deAnnBind :: AnnBind b annot -> Bind b
2214 deAnnBind (AnnNonRec var rhs) = NonRec var (deAnnotate rhs)
2215 deAnnBind (AnnRec pairs) = Rec [(v,deAnnotate rhs) | (v,rhs) <- pairs]
2216
2217 -- | As 'collectBinders' but for 'AnnExpr' rather than 'Expr'
2218 collectAnnBndrs :: AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2219 collectAnnBndrs e
2220 = collect [] e
2221 where
2222 collect bs (_, AnnLam b body) = collect (b:bs) body
2223 collect bs body = (reverse bs, body)
2224
2225 -- | As 'collectNBinders' but for 'AnnExpr' rather than 'Expr'
2226 collectNAnnBndrs :: Int -> AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2227 collectNAnnBndrs orig_n e
2228 = collect orig_n [] e
2229 where
2230 collect 0 bs body = (reverse bs, body)
2231 collect n bs (_, AnnLam b body) = collect (n-1) (b:bs) body
2232 collect _ _ _ = pprPanic "collectNBinders" $ int orig_n