Join points can be levity-polymorphic
[ghc.git] / compiler / coreSyn / CoreSyn.hs
1 {-
2 (c) The University of Glasgow 2006
3 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 -}
5
6 {-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts #-}
7
8 -- | CoreSyn holds all the main data types for use by for the Glasgow Haskell Compiler midsection
9 module CoreSyn (
10 -- * Main data types
11 Expr(..), Alt, Bind(..), AltCon(..), Arg,
12 Tickish(..), TickishScoping(..), TickishPlacement(..),
13 CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr,
14 TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), deTagExpr,
15
16 -- * In/Out type synonyms
17 InId, InBind, InExpr, InAlt, InArg, InType, InKind,
18 InBndr, InVar, InCoercion, InTyVar, InCoVar,
19 OutId, OutBind, OutExpr, OutAlt, OutArg, OutType, OutKind,
20 OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar,
21
22 -- ** 'Expr' construction
23 mkLet, mkLets, mkLams,
24 mkApps, mkTyApps, mkCoApps, mkVarApps, mkTyArg,
25
26 mkIntLit, mkIntLitInt,
27 mkWordLit, mkWordLitWord,
28 mkWord64LitWord64, mkInt64LitInt64,
29 mkCharLit, mkStringLit,
30 mkFloatLit, mkFloatLitFloat,
31 mkDoubleLit, mkDoubleLitDouble,
32
33 mkConApp, mkConApp2, mkTyBind, mkCoBind,
34 varToCoreExpr, varsToCoreExprs,
35
36 isId, cmpAltCon, cmpAlt, ltAlt,
37
38 -- ** Simple 'Expr' access functions and predicates
39 bindersOf, bindersOfBinds, rhssOfBind, rhssOfAlts,
40 collectBinders, collectTyBinders, collectTyAndValBinders,
41 collectNBinders,
42 collectArgs, collectArgsTicks, flattenBinds,
43
44 exprToType, exprToCoercion_maybe,
45 applyTypeToArg,
46
47 isValArg, isTypeArg, isTyCoArg, valArgCount, valBndrCount,
48 isRuntimeArg, isRuntimeVar,
49
50 -- * Tick-related functions
51 tickishCounts, tickishScoped, tickishScopesLike, tickishFloatable,
52 tickishCanSplit, mkNoCount, mkNoScope,
53 tickishIsCode, tickishPlace,
54 tickishContains,
55
56 -- * Unfolding data types
57 Unfolding(..), UnfoldingGuidance(..), UnfoldingSource(..),
58
59 -- ** Constructing 'Unfolding's
60 noUnfolding, bootUnfolding, evaldUnfolding, mkOtherCon,
61 unSaturatedOk, needSaturated, boringCxtOk, boringCxtNotOk,
62
63 -- ** Predicates and deconstruction on 'Unfolding'
64 unfoldingTemplate, expandUnfolding_maybe,
65 maybeUnfoldingTemplate, otherCons,
66 isValueUnfolding, isEvaldUnfolding, isCheapUnfolding,
67 isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding,
68 isStableUnfolding, isFragileUnfolding, hasSomeUnfolding,
69 isBootUnfolding,
70 canUnfold, neverUnfoldGuidance, isStableSource,
71
72 -- * Annotated expression data types
73 AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
74
75 -- ** Operations on annotated expressions
76 collectAnnArgs, collectAnnArgsTicks,
77
78 -- ** Operations on annotations
79 deAnnotate, deAnnotate', deAnnAlt,
80 collectAnnBndrs, collectNAnnBndrs,
81
82 -- * Orphanhood
83 IsOrphan(..), isOrphan, notOrphan, chooseOrphanAnchor,
84
85 -- * Core rule data types
86 CoreRule(..), RuleBase,
87 RuleName, RuleFun, IdUnfoldingFun, InScopeEnv,
88 RuleEnv(..), mkRuleEnv, emptyRuleEnv,
89
90 -- ** Operations on 'CoreRule's
91 ruleArity, ruleName, ruleIdName, ruleActivation,
92 setRuleIdName,
93 isBuiltinRule, isLocalRule, isAutoRule,
94
95 -- * Core vectorisation declarations data type
96 CoreVect(..)
97 ) where
98
99 #include "HsVersions.h"
100
101 import CostCentre
102 import VarEnv( InScopeSet )
103 import Var
104 import Type
105 import Coercion
106 import Name
107 import NameSet
108 import NameEnv( NameEnv, emptyNameEnv )
109 import Literal
110 import DataCon
111 import Module
112 import TyCon
113 import BasicTypes
114 import DynFlags
115 import Outputable
116 import Util
117 import UniqSet
118 import SrcLoc ( RealSrcSpan, containsSpan )
119 import Binary
120
121 import Data.Data hiding (TyCon)
122 import Data.Int
123 import Data.Word
124
125 infixl 4 `mkApps`, `mkTyApps`, `mkVarApps`, `App`, `mkCoApps`
126 -- Left associative, so that we can say (f `mkTyApps` xs `mkVarApps` ys)
127
128 {-
129 ************************************************************************
130 * *
131 \subsection{The main data types}
132 * *
133 ************************************************************************
134
135 These data types are the heart of the compiler
136 -}
137
138 -- | This is the data type that represents GHCs core intermediate language. Currently
139 -- GHC uses System FC <http://research.microsoft.com/~simonpj/papers/ext-f/> for this purpose,
140 -- which is closely related to the simpler and better known System F <http://en.wikipedia.org/wiki/System_F>.
141 --
142 -- We get from Haskell source to this Core language in a number of stages:
143 --
144 -- 1. The source code is parsed into an abstract syntax tree, which is represented
145 -- by the data type 'HsExpr.HsExpr' with the names being 'RdrName.RdrNames'
146 --
147 -- 2. This syntax tree is /renamed/, which attaches a 'Unique.Unique' to every 'RdrName.RdrName'
148 -- (yielding a 'Name.Name') to disambiguate identifiers which are lexically identical.
149 -- For example, this program:
150 --
151 -- @
152 -- f x = let f x = x + 1
153 -- in f (x - 2)
154 -- @
155 --
156 -- Would be renamed by having 'Unique's attached so it looked something like this:
157 --
158 -- @
159 -- f_1 x_2 = let f_3 x_4 = x_4 + 1
160 -- in f_3 (x_2 - 2)
161 -- @
162 -- But see Note [Shadowing] below.
163 --
164 -- 3. The resulting syntax tree undergoes type checking (which also deals with instantiating
165 -- type class arguments) to yield a 'HsExpr.HsExpr' type that has 'Id.Id' as it's names.
166 --
167 -- 4. Finally the syntax tree is /desugared/ from the expressive 'HsExpr.HsExpr' type into
168 -- this 'Expr' type, which has far fewer constructors and hence is easier to perform
169 -- optimization, analysis and code generation on.
170 --
171 -- The type parameter @b@ is for the type of binders in the expression tree.
172 --
173 -- The language consists of the following elements:
174 --
175 -- * Variables
176 --
177 -- * Primitive literals
178 --
179 -- * Applications: note that the argument may be a 'Type'.
180 -- See Note [CoreSyn let/app invariant]
181 -- See Note [Levity polymorphism invariants]
182 --
183 -- * Lambda abstraction
184 -- See Note [Levity polymorphism invariants]
185 --
186 -- * Recursive and non recursive @let@s. Operationally
187 -- this corresponds to allocating a thunk for the things
188 -- bound and then executing the sub-expression.
189 --
190 -- #top_level_invariant#
191 -- #letrec_invariant#
192 --
193 -- The right hand sides of all top-level and recursive @let@s
194 -- /must/ be of lifted type (see "Type#type_classification" for
195 -- the meaning of /lifted/ vs. /unlifted/). There is one exception
196 -- to this rule, top-level @let@s are allowed to bind primitive
197 -- string literals, see Note [CoreSyn top-level string literals].
198 --
199 -- See Note [CoreSyn let/app invariant]
200 -- See Note [Levity polymorphism invariants]
201 --
202 -- #type_let#
203 -- We allow a /non-recursive/ let to bind a type variable, thus:
204 --
205 -- > Let (NonRec tv (Type ty)) body
206 --
207 -- This can be very convenient for postponing type substitutions until
208 -- the next run of the simplifier.
209 --
210 -- At the moment, the rest of the compiler only deals with type-let
211 -- in a Let expression, rather than at top level. We may want to revist
212 -- this choice.
213 --
214 -- * Case expression. Operationally this corresponds to evaluating
215 -- the scrutinee (expression examined) to weak head normal form
216 -- and then examining at most one level of resulting constructor (i.e. you
217 -- cannot do nested pattern matching directly with this).
218 --
219 -- The binder gets bound to the value of the scrutinee,
220 -- and the 'Type' must be that of all the case alternatives
221 --
222 -- #case_invariants#
223 -- This is one of the more complicated elements of the Core language,
224 -- and comes with a number of restrictions:
225 --
226 -- 1. The list of alternatives may be empty;
227 -- See Note [Empty case alternatives]
228 --
229 -- 2. The 'DEFAULT' case alternative must be first in the list,
230 -- if it occurs at all.
231 --
232 -- 3. The remaining cases are in order of increasing
233 -- tag (for 'DataAlts') or
234 -- lit (for 'LitAlts').
235 -- This makes finding the relevant constructor easy,
236 -- and makes comparison easier too.
237 --
238 -- 4. The list of alternatives must be exhaustive. An /exhaustive/ case
239 -- does not necessarily mention all constructors:
240 --
241 -- @
242 -- data Foo = Red | Green | Blue
243 -- ... case x of
244 -- Red -> True
245 -- other -> f (case x of
246 -- Green -> ...
247 -- Blue -> ... ) ...
248 -- @
249 --
250 -- The inner case does not need a @Red@ alternative, because @x@
251 -- can't be @Red@ at that program point.
252 --
253 -- 5. Floating-point values must not be scrutinised against literals.
254 -- See Trac #9238 and Note [Rules for floating-point comparisons]
255 -- in PrelRules for rationale.
256 --
257 -- * Cast an expression to a particular type.
258 -- This is used to implement @newtype@s (a @newtype@ constructor or
259 -- destructor just becomes a 'Cast' in Core) and GADTs.
260 --
261 -- * Notes. These allow general information to be added to expressions
262 -- in the syntax tree
263 --
264 -- * A type: this should only show up at the top level of an Arg
265 --
266 -- * A coercion
267
268 -- If you edit this type, you may need to update the GHC formalism
269 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
270 data Expr b
271 = Var Id
272 | Lit Literal
273 | App (Expr b) (Arg b)
274 | Lam b (Expr b)
275 | Let (Bind b) (Expr b)
276 | Case (Expr b) b Type [Alt b] -- See #case_invariants#
277 | Cast (Expr b) Coercion
278 | Tick (Tickish Id) (Expr b)
279 | Type Type
280 | Coercion Coercion
281 deriving Data
282
283 -- | Type synonym for expressions that occur in function argument positions.
284 -- Only 'Arg' should contain a 'Type' at top level, general 'Expr' should not
285 type Arg b = Expr b
286
287 -- | A case split alternative. Consists of the constructor leading to the alternative,
288 -- the variables bound from the constructor, and the expression to be executed given that binding.
289 -- The default alternative is @(DEFAULT, [], rhs)@
290
291 -- If you edit this type, you may need to update the GHC formalism
292 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
293 type Alt b = (AltCon, [b], Expr b)
294
295 -- | A case alternative constructor (i.e. pattern match)
296
297 -- If you edit this type, you may need to update the GHC formalism
298 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
299 data AltCon
300 = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
301 -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
302
303 | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
304 -- Invariant: always an *unlifted* literal
305 -- See Note [Literal alternatives]
306
307 | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
308 deriving (Eq, Data)
309
310 -- | Binding, used for top level bindings in a module and local bindings in a @let@.
311
312 -- If you edit this type, you may need to update the GHC formalism
313 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
314 data Bind b = NonRec b (Expr b)
315 | Rec [(b, (Expr b))]
316 deriving Data
317
318 {-
319 Note [Shadowing]
320 ~~~~~~~~~~~~~~~~
321 While various passes attempt to rename on-the-fly in a manner that
322 avoids "shadowing" (thereby simplifying downstream optimizations),
323 neither the simplifier nor any other pass GUARANTEES that shadowing is
324 avoided. Thus, all passes SHOULD work fine even in the presence of
325 arbitrary shadowing in their inputs.
326
327 In particular, scrutinee variables `x` in expressions of the form
328 `Case e x t` are often renamed to variables with a prefix
329 "wild_". These "wild" variables may appear in the body of the
330 case-expression, and further, may be shadowed within the body.
331
332 So the Unique in an Var is not really unique at all. Still, it's very
333 useful to give a constant-time equality/ordering for Vars, and to give
334 a key that can be used to make sets of Vars (VarSet), or mappings from
335 Vars to other things (VarEnv). Moreover, if you do want to eliminate
336 shadowing, you can give a new Unique to an Id without changing its
337 printable name, which makes debugging easier.
338
339 Note [Literal alternatives]
340 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
341 Literal alternatives (LitAlt lit) are always for *un-lifted* literals.
342 We have one literal, a literal Integer, that is lifted, and we don't
343 allow in a LitAlt, because LitAlt cases don't do any evaluation. Also
344 (see Trac #5603) if you say
345 case 3 of
346 S# x -> ...
347 J# _ _ -> ...
348 (where S#, J# are the constructors for Integer) we don't want the
349 simplifier calling findAlt with argument (LitAlt 3). No no. Integer
350 literals are an opaque encoding of an algebraic data type, not of
351 an unlifted literal, like all the others.
352
353 Also, we do not permit case analysis with literal patterns on floating-point
354 types. See Trac #9238 and Note [Rules for floating-point comparisons] in
355 PrelRules for the rationale for this restriction.
356
357 -------------------------- CoreSyn INVARIANTS ---------------------------
358
359 Note [CoreSyn top-level invariant]
360 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
361 See #toplevel_invariant#
362
363 Note [CoreSyn letrec invariant]
364 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
365 See #letrec_invariant#
366
367 Note [CoreSyn top-level string literals]
368 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
369 As an exception to the usual rule that top-level binders must be lifted,
370 we allow binding primitive string literals (of type Addr#) of type Addr# at the
371 top level. This allows us to share string literals earlier in the pipeline and
372 crucially allows other optimizations in the Core2Core pipeline to fire.
373 Consider,
374
375 f n = let a::Addr# = "foo"#
376 in \x -> blah
377
378 In order to be able to inline `f`, we would like to float `a` to the top.
379 Another option would be to inline `a`, but that would lead to duplicating string
380 literals, which we want to avoid. See Trac #8472.
381
382 The solution is simply to allow top-level unlifted binders. We can't allow
383 arbitrary unlifted expression at the top-level though, unlifted binders cannot
384 be thunks, so we just allow string literals.
385
386 It is important to note that top-level primitive string literals cannot be
387 wrapped in Ticks, as is otherwise done with lifted bindings. CoreToStg expects
388 to see just a plain (Lit (MachStr ...)) expression on the RHS of primitive
389 string bindings; anything else and things break. CoreLint checks this invariant.
390
391 Also see Note [Compilation plan for top-level string literals].
392
393 Note [Compilation plan for top-level string literals]
394 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
395 Here is a summary on how top-level string literals are handled by various
396 parts of the compilation pipeline.
397
398 * In the source language, there is no way to bind a primitive string literal
399 at the top leve.
400
401 * In Core, we have a special rule that permits top-level Addr# bindings. See
402 Note [CoreSyn top-level string literals]. Core-to-core passes may introduce
403 new top-level string literals.
404
405 * In STG, top-level string literals are explicitly represented in the syntax
406 tree.
407
408 * A top-level string literal may end up exported from a module. In this case,
409 in the object file, the content of the exported literal is given a label with
410 the _bytes suffix.
411
412 Note [CoreSyn let/app invariant]
413 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
414 The let/app invariant
415 the right hand side of a non-recursive 'Let', and
416 the argument of an 'App',
417 /may/ be of unlifted type, but only if
418 the expression is ok-for-speculation
419 or the 'Let' is for a join point.
420
421 This means that the let can be floated around
422 without difficulty. For example, this is OK:
423
424 y::Int# = x +# 1#
425
426 But this is not, as it may affect termination if the
427 expression is floated out:
428
429 y::Int# = fac 4#
430
431 In this situation you should use @case@ rather than a @let@. The function
432 'CoreUtils.needsCaseBinding' can help you determine which to generate, or
433 alternatively use 'MkCore.mkCoreLet' rather than this constructor directly,
434 which will generate a @case@ if necessary
435
436 The let/app invariant is initially enforced by mkCoreLet and mkCoreApp in
437 coreSyn/MkCore.
438
439 Note [CoreSyn case invariants]
440 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
441 See #case_invariants#
442
443 Note [Levity polymorphism invariants]
444 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
445 The levity-polymorphism invariants are these:
446
447 * The type of a term-binder must not be levity-polymorphic,
448 unless it is a let(rec)-bound join point
449 (see Note [Invariants on join points])
450
451 * The type of the argument of an App must not be levity-polymorphic.
452
453 A type (t::TYPE r) is "levity polymorphic" if 'r' has any free variables.
454
455 For example
456 \(r::RuntimeRep). \(a::TYPE r). \(x::a). e
457 is illegal because x's type has kind (TYPE r), which has 'r' free.
458
459 See Note [Levity polymorphism checking] in DsMonad to see where these
460 invariants are established for user-written code.
461
462 Note [CoreSyn let goal]
463 ~~~~~~~~~~~~~~~~~~~~~~~
464 * The simplifier tries to ensure that if the RHS of a let is a constructor
465 application, its arguments are trivial, so that the constructor can be
466 inlined vigorously.
467
468 Note [Type let]
469 ~~~~~~~~~~~~~~~
470 See #type_let#
471
472 Note [Empty case alternatives]
473 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
474 The alternatives of a case expression should be exhaustive. But
475 this exhaustive list can be empty!
476
477 * A case expression can have empty alternatives if (and only if) the
478 scrutinee is bound to raise an exception or diverge. When do we know
479 this? See Note [Bottoming expressions] in CoreUtils.
480
481 * The possiblity of empty alternatives is one reason we need a type on
482 the case expression: if the alternatives are empty we can't get the
483 type from the alternatives!
484
485 * In the case of empty types (see Note [Bottoming expressions]), say
486 data T
487 we do NOT want to replace
488 case (x::T) of Bool {} --> error Bool "Inaccessible case"
489 because x might raise an exception, and *that*'s what we want to see!
490 (Trac #6067 is an example.) To preserve semantics we'd have to say
491 x `seq` error Bool "Inaccessible case"
492 but the 'seq' is just a case, so we are back to square 1. Or I suppose
493 we could say
494 x |> UnsafeCoerce T Bool
495 but that loses all trace of the fact that this originated with an empty
496 set of alternatives.
497
498 * We can use the empty-alternative construct to coerce error values from
499 one type to another. For example
500
501 f :: Int -> Int
502 f n = error "urk"
503
504 g :: Int -> (# Char, Bool #)
505 g x = case f x of { 0 -> ..., n -> ... }
506
507 Then if we inline f in g's RHS we get
508 case (error Int "urk") of (# Char, Bool #) { ... }
509 and we can discard the alternatives since the scrutinee is bottom to give
510 case (error Int "urk") of (# Char, Bool #) {}
511
512 This is nicer than using an unsafe coerce between Int ~ (# Char,Bool #),
513 if for no other reason that we don't need to instantiate the (~) at an
514 unboxed type.
515
516 * We treat a case expression with empty alternatives as trivial iff
517 its scrutinee is (see CoreUtils.exprIsTrivial). This is actually
518 important; see Note [Empty case is trivial] in CoreUtils
519
520 * An empty case is replaced by its scrutinee during the CoreToStg
521 conversion; remember STG is un-typed, so there is no need for
522 the empty case to do the type conversion.
523
524 Note [Join points]
525 ~~~~~~~~~~~~~~~~~~
526 In Core, a *join point* is a specially tagged function whose only occurrences
527 are saturated tail calls. A tail call can appear in these places:
528
529 1. In the branches (not the scrutinee) of a case
530 2. Underneath a let (value or join point)
531 3. Inside another join point
532
533 We write a join-point declaration as
534 join j @a @b x y = e1 in e2,
535 like a let binding but with "join" instead (or "join rec" for "let rec"). Note
536 that we put the parameters before the = rather than using lambdas; this is
537 because it's relevant how many parameters the join point takes *as a join
538 point.* This number is called the *join arity,* distinct from arity because it
539 counts types as well as values. Note that a join point may return a lambda! So
540 join j x = x + 1
541 is different from
542 join j = \x -> x + 1
543 The former has join arity 1, while the latter has join arity 0.
544
545 The identifier for a join point is called a join id or a *label.* An invocation
546 is called a *jump.* We write a jump using the jump keyword:
547
548 jump j 3
549
550 The words *label* and *jump* are evocative of assembly code (or Cmm) for a
551 reason: join points are indeed compiled as labeled blocks, and jumps become
552 actual jumps (plus argument passing and stack adjustment). There is no closure
553 allocated and only a fraction of the function-call overhead. Hence we would
554 like as many functions as possible to become join points (see OccurAnal) and
555 the type rules for join points ensure we preserve the properties that make them
556 efficient.
557
558 In the actual AST, a join point is indicated by the IdDetails of the binder: a
559 local value binding gets 'VanillaId' but a join point gets a 'JoinId' with its
560 join arity.
561
562 For more details, see the paper:
563
564 Luke Maurer, Paul Downen, Zena Ariola, and Simon Peyton Jones. "Compiling
565 without continuations." Submitted to PLDI'17.
566
567 https://www.microsoft.com/en-us/research/publication/compiling-without-continuations/
568
569 Note [Invariants on join points]
570 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
571 Join points must follow these invariants:
572
573 1. All occurrences must be tail calls. Each of these tail calls must pass the
574 same number of arguments, counting both types and values; we call this the
575 "join arity" (to distinguish from regular arity, which only counts values).
576
577 2. For join arity n, the right-hand side must begin with at least n lambdas.
578
579 3. If the binding is recursive, then all other bindings in the recursive group
580 must also be join points.
581
582 4. The binding's type must not be polymorphic in its return type (as defined
583 in Note [The polymorphism rule of join points]).
584
585 However, join points have simpler invariants in other ways
586
587 5. A join point can have an unboxed type without the RHS being
588 ok-for-speculation (i.e. drop the let/app invariant)
589 e.g. let j :: Int# = factorial x in ...
590
591 6. A join point can have a levity-polymorphic RHS
592 e.g. let j :: r :: TYPE l = fail void# in ...
593 This happened in an intermediate program Trac #13394
594
595 Examples:
596
597 join j1 x = 1 + x in jump j (jump j x) -- Fails 1: non-tail call
598 join j1' x = 1 + x in if even a
599 then jump j1 a
600 else jump j1 a b -- Fails 1: inconsistent calls
601 join j2 x = flip (+) x in j2 1 2 -- Fails 2: not enough lambdas
602 join j2' x = \y -> x + y in j3 1 -- Passes: extra lams ok
603 join j @a (x :: a) = x -- Fails 4: polymorphic in ret type
604
605 Invariant 1 applies to left-hand sides of rewrite rules, so a rule for a join
606 point must have an exact call as its LHS.
607
608 Strictly speaking, invariant 3 is redundant, since a call from inside a lazy
609 binding isn't a tail call. Since a let-bound value can't invoke a free join
610 point, then, they can't be mutually recursive. (A Core binding group *can*
611 include spurious extra bindings if the occurrence analyser hasn't run, so
612 invariant 3 does still need to be checked.) For the rigorous definition of
613 "tail call", see Section 3 of the paper (Note [Join points]).
614
615 Invariant 4 is subtle; see Note [The polymorphism rule of join points].
616
617 Core Lint will check these invariants, anticipating that any binder whose
618 OccInfo is marked AlwaysTailCalled will become a join point as soon as the
619 simplifier (or simpleOptPgm) runs.
620
621 Note [The type of a join point]
622 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
623 A join point has the same type it would have as a function. That is, if it takes
624 an Int and a Bool and its body produces a String, its type is `Int -> Bool ->
625 String`. Natural as this may seem, it can be awkward. A join point shouldn't be
626 thought to "return" in the same sense a function does---a jump is one-way. This
627 is crucial for understanding how case-of-case interacts with join points:
628
629 case (join
630 j :: Int -> Bool -> String
631 j x y = ...
632 in
633 jump j z w) of
634 "" -> True
635 _ -> False
636
637 The simplifier will pull the case into the join point (see Note [Case-of-case
638 and join points] in Simplify):
639
640 join
641 j :: Int -> Bool -> Bool -- changed!
642 j x y = case ... of "" -> True
643 _ -> False
644 in
645 jump j z w
646
647 The body of the join point now returns a Bool, so the label `j` has to have its
648 type updated accordingly. Inconvenient though this may be, it has the advantage
649 that 'CoreUtils.exprType' can still return a type for any expression, including
650 a jump.
651
652 This differs from the paper (see Note [Invariants on join points]). In the
653 paper, we instead give j the type `Int -> Bool -> forall a. a`. Then each jump
654 carries the "return type" as a parameter, exactly the way other non-returning
655 functions like `error` work:
656
657 case (join
658 j :: Int -> Bool -> forall a. a
659 j x y = ...
660 in
661 jump j z w @String) of
662 "" -> True
663 _ -> False
664
665 Now we can move the case inward and we only have to change the jump:
666
667 join
668 j :: Int -> Bool -> forall a. a
669 j x y = case ... of "" -> True
670 _ -> False
671 in
672 jump j z w @Bool
673
674 (Core Lint would still check that the body of the join point has the right type;
675 that type would simply not be reflected in the join id.)
676
677 Note [The polymorphism rule of join points]
678 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
679 Invariant 4 of Note [Invariants on join points] forbids a join point to be
680 polymorphic in its return type. That is, if its type is
681
682 forall a1 ... ak. t1 -> ... -> tn -> r
683
684 where its join arity is k+n, none of the type parameters ai may occur free in r.
685 The most direct explanation is that given
686
687 join j @a1 ... @ak x1 ... xn = e1 in e2
688
689 our typing rules require `e1` and `e2` to have the same type. Therefore the type
690 of `e1`---the return type of the join point---must be the same as the type of
691 e2. Since the type variables aren't bound in `e2`, its type can't include them,
692 and thus neither can the type of `e1`.
693
694 There's a deeper explanation in terms of the sequent calculus in Section 5.3 of
695 a previous paper:
696
697 Paul Downen, Luke Maurer, Zena Ariola, and Simon Peyton Jones. "Sequent
698 calculus as a compiler intermediate language." ICFP'16.
699
700 https://www.microsoft.com/en-us/research/wp-content/uploads/2016/04/sequent-calculus-icfp16.pdf
701
702 The quick version: Consider the CPS term (the paper uses the sequent calculus,
703 but we can translate readily):
704
705 \k -> join j @a1 ... @ak x1 ... xn = e1 k in e2 k
706
707 Since `j` is a join point, it doesn't bind a continuation variable but reuses
708 the variable `k` from the context. But the parameters `ai` are not in `k`'s
709 scope, and `k`'s type determines the return type of `j`; thus the `ai`s don't
710 appear in the return type of `j`. (Also, since `e1` and `e2` are passed the same
711 continuation, they must have the same type; hence the direct explanation above.)
712
713 ************************************************************************
714 * *
715 In/Out type synonyms
716 * *
717 ********************************************************************* -}
718
719 {- Many passes apply a substitution, and it's very handy to have type
720 synonyms to remind us whether or not the substitution has been applied -}
721
722 -- Pre-cloning or substitution
723 type InBndr = CoreBndr
724 type InType = Type
725 type InKind = Kind
726 type InBind = CoreBind
727 type InExpr = CoreExpr
728 type InAlt = CoreAlt
729 type InArg = CoreArg
730 type InCoercion = Coercion
731
732 -- Post-cloning or substitution
733 type OutBndr = CoreBndr
734 type OutType = Type
735 type OutKind = Kind
736 type OutCoercion = Coercion
737 type OutBind = CoreBind
738 type OutExpr = CoreExpr
739 type OutAlt = CoreAlt
740 type OutArg = CoreArg
741
742
743 {- *********************************************************************
744 * *
745 Ticks
746 * *
747 ************************************************************************
748 -}
749
750 -- | Allows attaching extra information to points in expressions
751
752 -- If you edit this type, you may need to update the GHC formalism
753 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
754 data Tickish id =
755 -- | An @{-# SCC #-}@ profiling annotation, either automatically
756 -- added by the desugarer as a result of -auto-all, or added by
757 -- the user.
758 ProfNote {
759 profNoteCC :: CostCentre, -- ^ the cost centre
760 profNoteCount :: !Bool, -- ^ bump the entry count?
761 profNoteScope :: !Bool -- ^ scopes over the enclosed expression
762 -- (i.e. not just a tick)
763 }
764
765 -- | A "tick" used by HPC to track the execution of each
766 -- subexpression in the original source code.
767 | HpcTick {
768 tickModule :: Module,
769 tickId :: !Int
770 }
771
772 -- | A breakpoint for the GHCi debugger. This behaves like an HPC
773 -- tick, but has a list of free variables which will be available
774 -- for inspection in GHCi when the program stops at the breakpoint.
775 --
776 -- NB. we must take account of these Ids when (a) counting free variables,
777 -- and (b) substituting (don't substitute for them)
778 | Breakpoint
779 { breakpointId :: !Int
780 , breakpointFVs :: [id] -- ^ the order of this list is important:
781 -- it matches the order of the lists in the
782 -- appropriate entry in HscTypes.ModBreaks.
783 --
784 -- Careful about substitution! See
785 -- Note [substTickish] in CoreSubst.
786 }
787
788 -- | A source note.
789 --
790 -- Source notes are pure annotations: Their presence should neither
791 -- influence compilation nor execution. The semantics are given by
792 -- causality: The presence of a source note means that a local
793 -- change in the referenced source code span will possibly provoke
794 -- the generated code to change. On the flip-side, the functionality
795 -- of annotated code *must* be invariant against changes to all
796 -- source code *except* the spans referenced in the source notes
797 -- (see "Causality of optimized Haskell" paper for details).
798 --
799 -- Therefore extending the scope of any given source note is always
800 -- valid. Note that it is still undesirable though, as this reduces
801 -- their usefulness for debugging and profiling. Therefore we will
802 -- generally try only to make use of this property where it is
803 -- necessary to enable optimizations.
804 | SourceNote
805 { sourceSpan :: RealSrcSpan -- ^ Source covered
806 , sourceName :: String -- ^ Name for source location
807 -- (uses same names as CCs)
808 }
809
810 deriving (Eq, Ord, Data)
811
812 -- | A "counting tick" (where tickishCounts is True) is one that
813 -- counts evaluations in some way. We cannot discard a counting tick,
814 -- and the compiler should preserve the number of counting ticks as
815 -- far as possible.
816 --
817 -- However, we still allow the simplifier to increase or decrease
818 -- sharing, so in practice the actual number of ticks may vary, except
819 -- that we never change the value from zero to non-zero or vice versa.
820 tickishCounts :: Tickish id -> Bool
821 tickishCounts n@ProfNote{} = profNoteCount n
822 tickishCounts HpcTick{} = True
823 tickishCounts Breakpoint{} = True
824 tickishCounts _ = False
825
826
827 -- | Specifies the scoping behaviour of ticks. This governs the
828 -- behaviour of ticks that care about the covered code and the cost
829 -- associated with it. Important for ticks relating to profiling.
830 data TickishScoping =
831 -- | No scoping: The tick does not care about what code it
832 -- covers. Transformations can freely move code inside as well as
833 -- outside without any additional annotation obligations
834 NoScope
835
836 -- | Soft scoping: We want all code that is covered to stay
837 -- covered. Note that this scope type does not forbid
838 -- transformations from happening, as as long as all results of
839 -- the transformations are still covered by this tick or a copy of
840 -- it. For example
841 --
842 -- let x = tick<...> (let y = foo in bar) in baz
843 -- ===>
844 -- let x = tick<...> bar; y = tick<...> foo in baz
845 --
846 -- Is a valid transformation as far as "bar" and "foo" is
847 -- concerned, because both still are scoped over by the tick.
848 --
849 -- Note though that one might object to the "let" not being
850 -- covered by the tick any more. However, we are generally lax
851 -- with this - constant costs don't matter too much, and given
852 -- that the "let" was effectively merged we can view it as having
853 -- lost its identity anyway.
854 --
855 -- Also note that this scoping behaviour allows floating a tick
856 -- "upwards" in pretty much any situation. For example:
857 --
858 -- case foo of x -> tick<...> bar
859 -- ==>
860 -- tick<...> case foo of x -> bar
861 --
862 -- While this is always leagl, we want to make a best effort to
863 -- only make us of this where it exposes transformation
864 -- opportunities.
865 | SoftScope
866
867 -- | Cost centre scoping: We don't want any costs to move to other
868 -- cost-centre stacks. This means we not only want no code or cost
869 -- to get moved out of their cost centres, but we also object to
870 -- code getting associated with new cost-centre ticks - or
871 -- changing the order in which they get applied.
872 --
873 -- A rule of thumb is that we don't want any code to gain new
874 -- annotations. However, there are notable exceptions, for
875 -- example:
876 --
877 -- let f = \y -> foo in tick<...> ... (f x) ...
878 -- ==>
879 -- tick<...> ... foo[x/y] ...
880 --
881 -- In-lining lambdas like this is always legal, because inlining a
882 -- function does not change the cost-centre stack when the
883 -- function is called.
884 | CostCentreScope
885
886 deriving (Eq)
887
888 -- | Returns the intended scoping rule for a Tickish
889 tickishScoped :: Tickish id -> TickishScoping
890 tickishScoped n@ProfNote{}
891 | profNoteScope n = CostCentreScope
892 | otherwise = NoScope
893 tickishScoped HpcTick{} = NoScope
894 tickishScoped Breakpoint{} = CostCentreScope
895 -- Breakpoints are scoped: eventually we're going to do call
896 -- stacks, but also this helps prevent the simplifier from moving
897 -- breakpoints around and changing their result type (see #1531).
898 tickishScoped SourceNote{} = SoftScope
899
900 -- | Returns whether the tick scoping rule is at least as permissive
901 -- as the given scoping rule.
902 tickishScopesLike :: Tickish id -> TickishScoping -> Bool
903 tickishScopesLike t scope = tickishScoped t `like` scope
904 where NoScope `like` _ = True
905 _ `like` NoScope = False
906 SoftScope `like` _ = True
907 _ `like` SoftScope = False
908 CostCentreScope `like` _ = True
909
910 -- | Returns @True@ for ticks that can be floated upwards easily even
911 -- where it might change execution counts, such as:
912 --
913 -- Just (tick<...> foo)
914 -- ==>
915 -- tick<...> (Just foo)
916 --
917 -- This is a combination of @tickishSoftScope@ and
918 -- @tickishCounts@. Note that in principle splittable ticks can become
919 -- floatable using @mkNoTick@ -- even though there's currently no
920 -- tickish for which that is the case.
921 tickishFloatable :: Tickish id -> Bool
922 tickishFloatable t = t `tickishScopesLike` SoftScope && not (tickishCounts t)
923
924 -- | Returns @True@ for a tick that is both counting /and/ scoping and
925 -- can be split into its (tick, scope) parts using 'mkNoScope' and
926 -- 'mkNoTick' respectively.
927 tickishCanSplit :: Tickish id -> Bool
928 tickishCanSplit ProfNote{profNoteScope = True, profNoteCount = True}
929 = True
930 tickishCanSplit _ = False
931
932 mkNoCount :: Tickish id -> Tickish id
933 mkNoCount n | not (tickishCounts n) = n
934 | not (tickishCanSplit n) = panic "mkNoCount: Cannot split!"
935 mkNoCount n@ProfNote{} = n {profNoteCount = False}
936 mkNoCount _ = panic "mkNoCount: Undefined split!"
937
938 mkNoScope :: Tickish id -> Tickish id
939 mkNoScope n | tickishScoped n == NoScope = n
940 | not (tickishCanSplit n) = panic "mkNoScope: Cannot split!"
941 mkNoScope n@ProfNote{} = n {profNoteScope = False}
942 mkNoScope _ = panic "mkNoScope: Undefined split!"
943
944 -- | Return @True@ if this source annotation compiles to some backend
945 -- code. Without this flag, the tickish is seen as a simple annotation
946 -- that does not have any associated evaluation code.
947 --
948 -- What this means that we are allowed to disregard the tick if doing
949 -- so means that we can skip generating any code in the first place. A
950 -- typical example is top-level bindings:
951 --
952 -- foo = tick<...> \y -> ...
953 -- ==>
954 -- foo = \y -> tick<...> ...
955 --
956 -- Here there is just no operational difference between the first and
957 -- the second version. Therefore code generation should simply
958 -- translate the code as if it found the latter.
959 tickishIsCode :: Tickish id -> Bool
960 tickishIsCode SourceNote{} = False
961 tickishIsCode _tickish = True -- all the rest for now
962
963
964 -- | Governs the kind of expression that the tick gets placed on when
965 -- annotating for example using @mkTick@. If we find that we want to
966 -- put a tickish on an expression ruled out here, we try to float it
967 -- inwards until we find a suitable expression.
968 data TickishPlacement =
969
970 -- | Place ticks exactly on run-time expressions. We can still
971 -- move the tick through pure compile-time constructs such as
972 -- other ticks, casts or type lambdas. This is the most
973 -- restrictive placement rule for ticks, as all tickishs have in
974 -- common that they want to track runtime processes. The only
975 -- legal placement rule for counting ticks.
976 PlaceRuntime
977
978 -- | As @PlaceRuntime@, but we float the tick through all
979 -- lambdas. This makes sense where there is little difference
980 -- between annotating the lambda and annotating the lambda's code.
981 | PlaceNonLam
982
983 -- | In addition to floating through lambdas, cost-centre style
984 -- tickishs can also be moved from constructors, non-function
985 -- variables and literals. For example:
986 --
987 -- let x = scc<...> C (scc<...> y) (scc<...> 3) in ...
988 --
989 -- Neither the constructor application, the variable or the
990 -- literal are likely to have any cost worth mentioning. And even
991 -- if y names a thunk, the call would not care about the
992 -- evaluation context. Therefore removing all annotations in the
993 -- above example is safe.
994 | PlaceCostCentre
995
996 deriving (Eq)
997
998 -- | Placement behaviour we want for the ticks
999 tickishPlace :: Tickish id -> TickishPlacement
1000 tickishPlace n@ProfNote{}
1001 | profNoteCount n = PlaceRuntime
1002 | otherwise = PlaceCostCentre
1003 tickishPlace HpcTick{} = PlaceRuntime
1004 tickishPlace Breakpoint{} = PlaceRuntime
1005 tickishPlace SourceNote{} = PlaceNonLam
1006
1007 -- | Returns whether one tick "contains" the other one, therefore
1008 -- making the second tick redundant.
1009 tickishContains :: Eq b => Tickish b -> Tickish b -> Bool
1010 tickishContains (SourceNote sp1 n1) (SourceNote sp2 n2)
1011 = containsSpan sp1 sp2 && n1 == n2
1012 -- compare the String last
1013 tickishContains t1 t2
1014 = t1 == t2
1015
1016 {-
1017 ************************************************************************
1018 * *
1019 Orphans
1020 * *
1021 ************************************************************************
1022 -}
1023
1024 -- | Is this instance an orphan? If it is not an orphan, contains an 'OccName'
1025 -- witnessing the instance's non-orphanhood.
1026 -- See Note [Orphans]
1027 data IsOrphan
1028 = IsOrphan
1029 | NotOrphan OccName -- The OccName 'n' witnesses the instance's non-orphanhood
1030 -- In that case, the instance is fingerprinted as part
1031 -- of the definition of 'n's definition
1032 deriving Data
1033
1034 -- | Returns true if 'IsOrphan' is orphan.
1035 isOrphan :: IsOrphan -> Bool
1036 isOrphan IsOrphan = True
1037 isOrphan _ = False
1038
1039 -- | Returns true if 'IsOrphan' is not an orphan.
1040 notOrphan :: IsOrphan -> Bool
1041 notOrphan NotOrphan{} = True
1042 notOrphan _ = False
1043
1044 chooseOrphanAnchor :: NameSet -> IsOrphan
1045 -- Something (rule, instance) is relate to all the Names in this
1046 -- list. Choose one of them to be an "anchor" for the orphan. We make
1047 -- the choice deterministic to avoid gratuitious changes in the ABI
1048 -- hash (Trac #4012). Specifically, use lexicographic comparison of
1049 -- OccName rather than comparing Uniques
1050 --
1051 -- NB: 'minimum' use Ord, and (Ord OccName) works lexicographically
1052 --
1053 chooseOrphanAnchor local_names
1054 | isEmptyNameSet local_names = IsOrphan
1055 | otherwise = NotOrphan (minimum occs)
1056 where
1057 occs = map nameOccName $ nonDetEltsUniqSet local_names
1058 -- It's OK to use nonDetEltsUFM here, see comments above
1059
1060 instance Binary IsOrphan where
1061 put_ bh IsOrphan = putByte bh 0
1062 put_ bh (NotOrphan n) = do
1063 putByte bh 1
1064 put_ bh n
1065 get bh = do
1066 h <- getByte bh
1067 case h of
1068 0 -> return IsOrphan
1069 _ -> do
1070 n <- get bh
1071 return $ NotOrphan n
1072
1073 {-
1074 Note [Orphans]
1075 ~~~~~~~~~~~~~~
1076 Class instances, rules, and family instances are divided into orphans
1077 and non-orphans. Roughly speaking, an instance/rule is an orphan if
1078 its left hand side mentions nothing defined in this module. Orphan-hood
1079 has two major consequences
1080
1081 * A module that contains orphans is called an "orphan module". If
1082 the module being compiled depends (transitively) on an oprhan
1083 module M, then M.hi is read in regardless of whether M is oherwise
1084 needed. This is to ensure that we don't miss any instance decls in
1085 M. But it's painful, because it means we need to keep track of all
1086 the orphan modules below us.
1087
1088 * A non-orphan is not finger-printed separately. Instead, for
1089 fingerprinting purposes it is treated as part of the entity it
1090 mentions on the LHS. For example
1091 data T = T1 | T2
1092 instance Eq T where ....
1093 The instance (Eq T) is incorprated as part of T's fingerprint.
1094
1095 In contrast, orphans are all fingerprinted together in the
1096 mi_orph_hash field of the ModIface.
1097
1098 See MkIface.addFingerprints.
1099
1100 Orphan-hood is computed
1101 * For class instances:
1102 when we make a ClsInst
1103 (because it is needed during instance lookup)
1104
1105 * For rules and family instances:
1106 when we generate an IfaceRule (MkIface.coreRuleToIfaceRule)
1107 or IfaceFamInst (MkIface.instanceToIfaceInst)
1108 -}
1109
1110 {-
1111 ************************************************************************
1112 * *
1113 \subsection{Transformation rules}
1114 * *
1115 ************************************************************************
1116
1117 The CoreRule type and its friends are dealt with mainly in CoreRules,
1118 but CoreFVs, Subst, PprCore, CoreTidy also inspect the representation.
1119 -}
1120
1121 -- | Gathers a collection of 'CoreRule's. Maps (the name of) an 'Id' to its rules
1122 type RuleBase = NameEnv [CoreRule]
1123 -- The rules are unordered;
1124 -- we sort out any overlaps on lookup
1125
1126 -- | A full rule environment which we can apply rules from. Like a 'RuleBase',
1127 -- but it also includes the set of visible orphans we use to filter out orphan
1128 -- rules which are not visible (even though we can see them...)
1129 data RuleEnv
1130 = RuleEnv { re_base :: RuleBase
1131 , re_visible_orphs :: ModuleSet
1132 }
1133
1134 mkRuleEnv :: RuleBase -> [Module] -> RuleEnv
1135 mkRuleEnv rules vis_orphs = RuleEnv rules (mkModuleSet vis_orphs)
1136
1137 emptyRuleEnv :: RuleEnv
1138 emptyRuleEnv = RuleEnv emptyNameEnv emptyModuleSet
1139
1140 -- | A 'CoreRule' is:
1141 --
1142 -- * \"Local\" if the function it is a rule for is defined in the
1143 -- same module as the rule itself.
1144 --
1145 -- * \"Orphan\" if nothing on the LHS is defined in the same module
1146 -- as the rule itself
1147 data CoreRule
1148 = Rule {
1149 ru_name :: RuleName, -- ^ Name of the rule, for communication with the user
1150 ru_act :: Activation, -- ^ When the rule is active
1151
1152 -- Rough-matching stuff
1153 -- see comments with InstEnv.ClsInst( is_cls, is_rough )
1154 ru_fn :: Name, -- ^ Name of the 'Id.Id' at the head of this rule
1155 ru_rough :: [Maybe Name], -- ^ Name at the head of each argument to the left hand side
1156
1157 -- Proper-matching stuff
1158 -- see comments with InstEnv.ClsInst( is_tvs, is_tys )
1159 ru_bndrs :: [CoreBndr], -- ^ Variables quantified over
1160 ru_args :: [CoreExpr], -- ^ Left hand side arguments
1161
1162 -- And the right-hand side
1163 ru_rhs :: CoreExpr, -- ^ Right hand side of the rule
1164 -- Occurrence info is guaranteed correct
1165 -- See Note [OccInfo in unfoldings and rules]
1166
1167 -- Locality
1168 ru_auto :: Bool, -- ^ @True@ <=> this rule is auto-generated
1169 -- (notably by Specialise or SpecConstr)
1170 -- @False@ <=> generated at the user's behest
1171 -- See Note [Trimming auto-rules] in TidyPgm
1172 -- for the sole purpose of this field.
1173
1174 ru_origin :: !Module, -- ^ 'Module' the rule was defined in, used
1175 -- to test if we should see an orphan rule.
1176
1177 ru_orphan :: !IsOrphan, -- ^ Whether or not the rule is an orphan.
1178
1179 ru_local :: Bool -- ^ @True@ iff the fn at the head of the rule is
1180 -- defined in the same module as the rule
1181 -- and is not an implicit 'Id' (like a record selector,
1182 -- class operation, or data constructor). This
1183 -- is different from 'ru_orphan', where a rule
1184 -- can avoid being an orphan if *any* Name in
1185 -- LHS of the rule was defined in the same
1186 -- module as the rule.
1187 }
1188
1189 -- | Built-in rules are used for constant folding
1190 -- and suchlike. They have no free variables.
1191 -- A built-in rule is always visible (there is no such thing as
1192 -- an orphan built-in rule.)
1193 | BuiltinRule {
1194 ru_name :: RuleName, -- ^ As above
1195 ru_fn :: Name, -- ^ As above
1196 ru_nargs :: Int, -- ^ Number of arguments that 'ru_try' consumes,
1197 -- if it fires, including type arguments
1198 ru_try :: RuleFun
1199 -- ^ This function does the rewrite. It given too many
1200 -- arguments, it simply discards them; the returned 'CoreExpr'
1201 -- is just the rewrite of 'ru_fn' applied to the first 'ru_nargs' args
1202 }
1203 -- See Note [Extra args in rule matching] in Rules.hs
1204
1205 type RuleFun = DynFlags -> InScopeEnv -> Id -> [CoreExpr] -> Maybe CoreExpr
1206 type InScopeEnv = (InScopeSet, IdUnfoldingFun)
1207
1208 type IdUnfoldingFun = Id -> Unfolding
1209 -- A function that embodies how to unfold an Id if you need
1210 -- to do that in the Rule. The reason we need to pass this info in
1211 -- is that whether an Id is unfoldable depends on the simplifier phase
1212
1213 isBuiltinRule :: CoreRule -> Bool
1214 isBuiltinRule (BuiltinRule {}) = True
1215 isBuiltinRule _ = False
1216
1217 isAutoRule :: CoreRule -> Bool
1218 isAutoRule (BuiltinRule {}) = False
1219 isAutoRule (Rule { ru_auto = is_auto }) = is_auto
1220
1221 -- | The number of arguments the 'ru_fn' must be applied
1222 -- to before the rule can match on it
1223 ruleArity :: CoreRule -> Int
1224 ruleArity (BuiltinRule {ru_nargs = n}) = n
1225 ruleArity (Rule {ru_args = args}) = length args
1226
1227 ruleName :: CoreRule -> RuleName
1228 ruleName = ru_name
1229
1230 ruleActivation :: CoreRule -> Activation
1231 ruleActivation (BuiltinRule { }) = AlwaysActive
1232 ruleActivation (Rule { ru_act = act }) = act
1233
1234 -- | The 'Name' of the 'Id.Id' at the head of the rule left hand side
1235 ruleIdName :: CoreRule -> Name
1236 ruleIdName = ru_fn
1237
1238 isLocalRule :: CoreRule -> Bool
1239 isLocalRule = ru_local
1240
1241 -- | Set the 'Name' of the 'Id.Id' at the head of the rule left hand side
1242 setRuleIdName :: Name -> CoreRule -> CoreRule
1243 setRuleIdName nm ru = ru { ru_fn = nm }
1244
1245 {-
1246 ************************************************************************
1247 * *
1248 \subsection{Vectorisation declarations}
1249 * *
1250 ************************************************************************
1251
1252 Representation of desugared vectorisation declarations that are fed to the vectoriser (via
1253 'ModGuts').
1254 -}
1255
1256 data CoreVect = Vect Id CoreExpr
1257 | NoVect Id
1258 | VectType Bool TyCon (Maybe TyCon)
1259 | VectClass TyCon -- class tycon
1260 | VectInst Id -- instance dfun (always SCALAR) !!!FIXME: should be superfluous now
1261
1262 {-
1263 ************************************************************************
1264 * *
1265 Unfoldings
1266 * *
1267 ************************************************************************
1268
1269 The @Unfolding@ type is declared here to avoid numerous loops
1270 -}
1271
1272 -- | Records the /unfolding/ of an identifier, which is approximately the form the
1273 -- identifier would have if we substituted its definition in for the identifier.
1274 -- This type should be treated as abstract everywhere except in "CoreUnfold"
1275 data Unfolding
1276 = NoUnfolding -- ^ We have no information about the unfolding.
1277
1278 | BootUnfolding -- ^ We have no information about the unfolding, because
1279 -- this 'Id' came from an @hi-boot@ file.
1280 -- See Note [Inlining and hs-boot files] in ToIface
1281 -- for what this is used for.
1282
1283 | OtherCon [AltCon] -- ^ It ain't one of these constructors.
1284 -- @OtherCon xs@ also indicates that something has been evaluated
1285 -- and hence there's no point in re-evaluating it.
1286 -- @OtherCon []@ is used even for non-data-type values
1287 -- to indicated evaluated-ness. Notably:
1288 --
1289 -- > data C = C !(Int -> Int)
1290 -- > case x of { C f -> ... }
1291 --
1292 -- Here, @f@ gets an @OtherCon []@ unfolding.
1293
1294 | DFunUnfolding { -- The Unfolding of a DFunId
1295 -- See Note [DFun unfoldings]
1296 -- df = /\a1..am. \d1..dn. MkD t1 .. tk
1297 -- (op1 a1..am d1..dn)
1298 -- (op2 a1..am d1..dn)
1299 df_bndrs :: [Var], -- The bound variables [a1..m],[d1..dn]
1300 df_con :: DataCon, -- The dictionary data constructor (never a newtype datacon)
1301 df_args :: [CoreExpr] -- Args of the data con: types, superclasses and methods,
1302 } -- in positional order
1303
1304 | CoreUnfolding { -- An unfolding for an Id with no pragma,
1305 -- or perhaps a NOINLINE pragma
1306 -- (For NOINLINE, the phase, if any, is in the
1307 -- InlinePragInfo for this Id.)
1308 uf_tmpl :: CoreExpr, -- Template; occurrence info is correct
1309 uf_src :: UnfoldingSource, -- Where the unfolding came from
1310 uf_is_top :: Bool, -- True <=> top level binding
1311 uf_is_value :: Bool, -- exprIsHNF template (cached); it is ok to discard
1312 -- a `seq` on this variable
1313 uf_is_conlike :: Bool, -- True <=> applicn of constructor or CONLIKE function
1314 -- Cached version of exprIsConLike
1315 uf_is_work_free :: Bool, -- True <=> doesn't waste (much) work to expand
1316 -- inside an inlining
1317 -- Cached version of exprIsCheap
1318 uf_expandable :: Bool, -- True <=> can expand in RULE matching
1319 -- Cached version of exprIsExpandable
1320 uf_guidance :: UnfoldingGuidance -- Tells about the *size* of the template.
1321 }
1322 -- ^ An unfolding with redundant cached information. Parameters:
1323 --
1324 -- uf_tmpl: Template used to perform unfolding;
1325 -- NB: Occurrence info is guaranteed correct:
1326 -- see Note [OccInfo in unfoldings and rules]
1327 --
1328 -- uf_is_top: Is this a top level binding?
1329 --
1330 -- uf_is_value: 'exprIsHNF' template (cached); it is ok to discard a 'seq' on
1331 -- this variable
1332 --
1333 -- uf_is_work_free: Does this waste only a little work if we expand it inside an inlining?
1334 -- Basically this is a cached version of 'exprIsWorkFree'
1335 --
1336 -- uf_guidance: Tells us about the /size/ of the unfolding template
1337
1338
1339 ------------------------------------------------
1340 data UnfoldingSource
1341 = -- See also Note [Historical note: unfoldings for wrappers]
1342
1343 InlineRhs -- The current rhs of the function
1344 -- Replace uf_tmpl each time around
1345
1346 | InlineStable -- From an INLINE or INLINABLE pragma
1347 -- INLINE if guidance is UnfWhen
1348 -- INLINABLE if guidance is UnfIfGoodArgs/UnfoldNever
1349 -- (well, technically an INLINABLE might be made
1350 -- UnfWhen if it was small enough, and then
1351 -- it will behave like INLINE outside the current
1352 -- module, but that is the way automatic unfoldings
1353 -- work so it is consistent with the intended
1354 -- meaning of INLINABLE).
1355 --
1356 -- uf_tmpl may change, but only as a result of
1357 -- gentle simplification, it doesn't get updated
1358 -- to the current RHS during compilation as with
1359 -- InlineRhs.
1360 --
1361 -- See Note [InlineStable]
1362
1363 | InlineCompulsory -- Something that *has* no binding, so you *must* inline it
1364 -- Only a few primop-like things have this property
1365 -- (see MkId.hs, calls to mkCompulsoryUnfolding).
1366 -- Inline absolutely always, however boring the context.
1367
1368
1369
1370 -- | 'UnfoldingGuidance' says when unfolding should take place
1371 data UnfoldingGuidance
1372 = UnfWhen { -- Inline without thinking about the *size* of the uf_tmpl
1373 -- Used (a) for small *and* cheap unfoldings
1374 -- (b) for INLINE functions
1375 -- See Note [INLINE for small functions] in CoreUnfold
1376 ug_arity :: Arity, -- Number of value arguments expected
1377
1378 ug_unsat_ok :: Bool, -- True <=> ok to inline even if unsaturated
1379 ug_boring_ok :: Bool -- True <=> ok to inline even if the context is boring
1380 -- So True,True means "always"
1381 }
1382
1383 | UnfIfGoodArgs { -- Arose from a normal Id; the info here is the
1384 -- result of a simple analysis of the RHS
1385
1386 ug_args :: [Int], -- Discount if the argument is evaluated.
1387 -- (i.e., a simplification will definitely
1388 -- be possible). One elt of the list per *value* arg.
1389
1390 ug_size :: Int, -- The "size" of the unfolding.
1391
1392 ug_res :: Int -- Scrutinee discount: the discount to substract if the thing is in
1393 } -- a context (case (thing args) of ...),
1394 -- (where there are the right number of arguments.)
1395
1396 | UnfNever -- The RHS is big, so don't inline it
1397 deriving (Eq)
1398
1399 {-
1400 Note [Historical note: unfoldings for wrappers]
1401 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1402 We used to have a nice clever scheme in interface files for
1403 wrappers. A wrapper's unfolding can be reconstructed from its worker's
1404 id and its strictness. This decreased .hi file size (sometimes
1405 significantly, for modules like GHC.Classes with many high-arity w/w
1406 splits) and had a slight corresponding effect on compile times.
1407
1408 However, when we added the second demand analysis, this scheme lead to
1409 some Core lint errors. The second analysis could change the strictness
1410 signatures, which sometimes resulted in a wrapper's regenerated
1411 unfolding applying the wrapper to too many arguments.
1412
1413 Instead of repairing the clever .hi scheme, we abandoned it in favor
1414 of simplicity. The .hi sizes are usually insignificant (excluding the
1415 +1M for base libraries), and compile time barely increases (~+1% for
1416 nofib). The nicer upshot is that the UnfoldingSource no longer mentions
1417 an Id, so, eg, substitutions need not traverse them.
1418
1419
1420 Note [DFun unfoldings]
1421 ~~~~~~~~~~~~~~~~~~~~~~
1422 The Arity in a DFunUnfolding is total number of args (type and value)
1423 that the DFun needs to produce a dictionary. That's not necessarily
1424 related to the ordinary arity of the dfun Id, esp if the class has
1425 one method, so the dictionary is represented by a newtype. Example
1426
1427 class C a where { op :: a -> Int }
1428 instance C a -> C [a] where op xs = op (head xs)
1429
1430 The instance translates to
1431
1432 $dfCList :: forall a. C a => C [a] -- Arity 2!
1433 $dfCList = /\a.\d. $copList {a} d |> co
1434
1435 $copList :: forall a. C a => [a] -> Int -- Arity 2!
1436 $copList = /\a.\d.\xs. op {a} d (head xs)
1437
1438 Now we might encounter (op (dfCList {ty} d) a1 a2)
1439 and we want the (op (dfList {ty} d)) rule to fire, because $dfCList
1440 has all its arguments, even though its (value) arity is 2. That's
1441 why we record the number of expected arguments in the DFunUnfolding.
1442
1443 Note that although it's an Arity, it's most convenient for it to give
1444 the *total* number of arguments, both type and value. See the use
1445 site in exprIsConApp_maybe.
1446 -}
1447
1448 -- Constants for the UnfWhen constructor
1449 needSaturated, unSaturatedOk :: Bool
1450 needSaturated = False
1451 unSaturatedOk = True
1452
1453 boringCxtNotOk, boringCxtOk :: Bool
1454 boringCxtOk = True
1455 boringCxtNotOk = False
1456
1457 ------------------------------------------------
1458 noUnfolding :: Unfolding
1459 -- ^ There is no known 'Unfolding'
1460 evaldUnfolding :: Unfolding
1461 -- ^ This unfolding marks the associated thing as being evaluated
1462
1463 noUnfolding = NoUnfolding
1464 evaldUnfolding = OtherCon []
1465
1466 -- | There is no known 'Unfolding', because this came from an
1467 -- hi-boot file.
1468 bootUnfolding :: Unfolding
1469 bootUnfolding = BootUnfolding
1470
1471 mkOtherCon :: [AltCon] -> Unfolding
1472 mkOtherCon = OtherCon
1473
1474 isStableSource :: UnfoldingSource -> Bool
1475 -- Keep the unfolding template
1476 isStableSource InlineCompulsory = True
1477 isStableSource InlineStable = True
1478 isStableSource InlineRhs = False
1479
1480 -- | Retrieves the template of an unfolding: panics if none is known
1481 unfoldingTemplate :: Unfolding -> CoreExpr
1482 unfoldingTemplate = uf_tmpl
1483
1484 -- | Retrieves the template of an unfolding if possible
1485 -- maybeUnfoldingTemplate is used mainly wnen specialising, and we do
1486 -- want to specialise DFuns, so it's important to return a template
1487 -- for DFunUnfoldings
1488 maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr
1489 maybeUnfoldingTemplate (CoreUnfolding { uf_tmpl = expr })
1490 = Just expr
1491 maybeUnfoldingTemplate (DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args })
1492 = Just (mkLams bndrs (mkApps (Var (dataConWorkId con)) args))
1493 maybeUnfoldingTemplate _
1494 = Nothing
1495
1496 -- | The constructors that the unfolding could never be:
1497 -- returns @[]@ if no information is available
1498 otherCons :: Unfolding -> [AltCon]
1499 otherCons (OtherCon cons) = cons
1500 otherCons _ = []
1501
1502 -- | Determines if it is certainly the case that the unfolding will
1503 -- yield a value (something in HNF): returns @False@ if unsure
1504 isValueUnfolding :: Unfolding -> Bool
1505 -- Returns False for OtherCon
1506 isValueUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1507 isValueUnfolding _ = False
1508
1509 -- | Determines if it possibly the case that the unfolding will
1510 -- yield a value. Unlike 'isValueUnfolding' it returns @True@
1511 -- for 'OtherCon'
1512 isEvaldUnfolding :: Unfolding -> Bool
1513 -- Returns True for OtherCon
1514 isEvaldUnfolding (OtherCon _) = True
1515 isEvaldUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1516 isEvaldUnfolding _ = False
1517
1518 -- | @True@ if the unfolding is a constructor application, the application
1519 -- of a CONLIKE function or 'OtherCon'
1520 isConLikeUnfolding :: Unfolding -> Bool
1521 isConLikeUnfolding (OtherCon _) = True
1522 isConLikeUnfolding (CoreUnfolding { uf_is_conlike = con }) = con
1523 isConLikeUnfolding _ = False
1524
1525 -- | Is the thing we will unfold into certainly cheap?
1526 isCheapUnfolding :: Unfolding -> Bool
1527 isCheapUnfolding (CoreUnfolding { uf_is_work_free = is_wf }) = is_wf
1528 isCheapUnfolding _ = False
1529
1530 isExpandableUnfolding :: Unfolding -> Bool
1531 isExpandableUnfolding (CoreUnfolding { uf_expandable = is_expable }) = is_expable
1532 isExpandableUnfolding _ = False
1533
1534 expandUnfolding_maybe :: Unfolding -> Maybe CoreExpr
1535 -- Expand an expandable unfolding; this is used in rule matching
1536 -- See Note [Expanding variables] in Rules.hs
1537 -- The key point here is that CONLIKE things can be expanded
1538 expandUnfolding_maybe (CoreUnfolding { uf_expandable = True, uf_tmpl = rhs }) = Just rhs
1539 expandUnfolding_maybe _ = Nothing
1540
1541 isCompulsoryUnfolding :: Unfolding -> Bool
1542 isCompulsoryUnfolding (CoreUnfolding { uf_src = InlineCompulsory }) = True
1543 isCompulsoryUnfolding _ = False
1544
1545 isStableUnfolding :: Unfolding -> Bool
1546 -- True of unfoldings that should not be overwritten
1547 -- by a CoreUnfolding for the RHS of a let-binding
1548 isStableUnfolding (CoreUnfolding { uf_src = src }) = isStableSource src
1549 isStableUnfolding (DFunUnfolding {}) = True
1550 isStableUnfolding _ = False
1551
1552 -- | Only returns False if there is no unfolding information available at all
1553 hasSomeUnfolding :: Unfolding -> Bool
1554 hasSomeUnfolding NoUnfolding = False
1555 hasSomeUnfolding BootUnfolding = False
1556 hasSomeUnfolding _ = True
1557
1558 isBootUnfolding :: Unfolding -> Bool
1559 isBootUnfolding BootUnfolding = True
1560 isBootUnfolding _ = False
1561
1562 neverUnfoldGuidance :: UnfoldingGuidance -> Bool
1563 neverUnfoldGuidance UnfNever = True
1564 neverUnfoldGuidance _ = False
1565
1566 isFragileUnfolding :: Unfolding -> Bool
1567 -- An unfolding is fragile if it mentions free variables or
1568 -- is otherwise subject to change. A robust one can be kept.
1569 -- See Note [Fragile unfoldings]
1570 isFragileUnfolding (CoreUnfolding {}) = True
1571 isFragileUnfolding (DFunUnfolding {}) = True
1572 isFragileUnfolding _ = False
1573 -- NoUnfolding, BootUnfolding, OtherCon are all non-fragile
1574
1575 canUnfold :: Unfolding -> Bool
1576 canUnfold (CoreUnfolding { uf_guidance = g }) = not (neverUnfoldGuidance g)
1577 canUnfold _ = False
1578
1579 {- Note [Fragile unfoldings]
1580 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1581 An unfolding is "fragile" if it mentions free variables (and hence would
1582 need substitution) or might be affected by optimisation. The non-fragile
1583 ones are
1584
1585 NoUnfolding, BootUnfolding
1586
1587 OtherCon {} If we know this binder (say a lambda binder) will be
1588 bound to an evaluated thing, we want to retain that
1589 info in simpleOptExpr; see Trac #13077.
1590
1591 We consider even a StableUnfolding as fragile, because it needs substitution.
1592
1593 Note [InlineStable]
1594 ~~~~~~~~~~~~~~~~~
1595 When you say
1596 {-# INLINE f #-}
1597 f x = <rhs>
1598 you intend that calls (f e) are replaced by <rhs>[e/x] So we
1599 should capture (\x.<rhs>) in the Unfolding of 'f', and never meddle
1600 with it. Meanwhile, we can optimise <rhs> to our heart's content,
1601 leaving the original unfolding intact in Unfolding of 'f'. For example
1602 all xs = foldr (&&) True xs
1603 any p = all . map p {-# INLINE any #-}
1604 We optimise any's RHS fully, but leave the InlineRule saying "all . map p",
1605 which deforests well at the call site.
1606
1607 So INLINE pragma gives rise to an InlineRule, which captures the original RHS.
1608
1609 Moreover, it's only used when 'f' is applied to the
1610 specified number of arguments; that is, the number of argument on
1611 the LHS of the '=' sign in the original source definition.
1612 For example, (.) is now defined in the libraries like this
1613 {-# INLINE (.) #-}
1614 (.) f g = \x -> f (g x)
1615 so that it'll inline when applied to two arguments. If 'x' appeared
1616 on the left, thus
1617 (.) f g x = f (g x)
1618 it'd only inline when applied to three arguments. This slightly-experimental
1619 change was requested by Roman, but it seems to make sense.
1620
1621 See also Note [Inlining an InlineRule] in CoreUnfold.
1622
1623
1624 Note [OccInfo in unfoldings and rules]
1625 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1626 In unfoldings and rules, we guarantee that the template is occ-analysed,
1627 so that the occurrence info on the binders is correct. This is important,
1628 because the Simplifier does not re-analyse the template when using it. If
1629 the occurrence info is wrong
1630 - We may get more simpifier iterations than necessary, because
1631 once-occ info isn't there
1632 - More seriously, we may get an infinite loop if there's a Rec
1633 without a loop breaker marked
1634
1635
1636 ************************************************************************
1637 * *
1638 AltCon
1639 * *
1640 ************************************************************************
1641 -}
1642
1643 -- The Ord is needed for the FiniteMap used in the lookForConstructor
1644 -- in SimplEnv. If you declared that lookForConstructor *ignores*
1645 -- constructor-applications with LitArg args, then you could get
1646 -- rid of this Ord.
1647
1648 instance Outputable AltCon where
1649 ppr (DataAlt dc) = ppr dc
1650 ppr (LitAlt lit) = ppr lit
1651 ppr DEFAULT = text "__DEFAULT"
1652
1653 cmpAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Ordering
1654 cmpAlt (con1, _, _) (con2, _, _) = con1 `cmpAltCon` con2
1655
1656 ltAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Bool
1657 ltAlt a1 a2 = (a1 `cmpAlt` a2) == LT
1658
1659 cmpAltCon :: AltCon -> AltCon -> Ordering
1660 -- ^ Compares 'AltCon's within a single list of alternatives
1661 cmpAltCon DEFAULT DEFAULT = EQ
1662 cmpAltCon DEFAULT _ = LT
1663
1664 cmpAltCon (DataAlt d1) (DataAlt d2) = dataConTag d1 `compare` dataConTag d2
1665 cmpAltCon (DataAlt _) DEFAULT = GT
1666 cmpAltCon (LitAlt l1) (LitAlt l2) = l1 `compare` l2
1667 cmpAltCon (LitAlt _) DEFAULT = GT
1668
1669 cmpAltCon con1 con2 = WARN( True, text "Comparing incomparable AltCons" <+>
1670 ppr con1 <+> ppr con2 )
1671 LT
1672
1673 {-
1674 ************************************************************************
1675 * *
1676 \subsection{Useful synonyms}
1677 * *
1678 ************************************************************************
1679
1680 Note [CoreProgram]
1681 ~~~~~~~~~~~~~~~~~~
1682 The top level bindings of a program, a CoreProgram, are represented as
1683 a list of CoreBind
1684
1685 * Later bindings in the list can refer to earlier ones, but not vice
1686 versa. So this is OK
1687 NonRec { x = 4 }
1688 Rec { p = ...q...x...
1689 ; q = ...p...x }
1690 Rec { f = ...p..x..f.. }
1691 NonRec { g = ..f..q...x.. }
1692 But it would NOT be ok for 'f' to refer to 'g'.
1693
1694 * The occurrence analyser does strongly-connected component analysis
1695 on each Rec binding, and splits it into a sequence of smaller
1696 bindings where possible. So the program typically starts life as a
1697 single giant Rec, which is then dependency-analysed into smaller
1698 chunks.
1699 -}
1700
1701 -- If you edit this type, you may need to update the GHC formalism
1702 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1703 type CoreProgram = [CoreBind] -- See Note [CoreProgram]
1704
1705 -- | The common case for the type of binders and variables when
1706 -- we are manipulating the Core language within GHC
1707 type CoreBndr = Var
1708 -- | Expressions where binders are 'CoreBndr's
1709 type CoreExpr = Expr CoreBndr
1710 -- | Argument expressions where binders are 'CoreBndr's
1711 type CoreArg = Arg CoreBndr
1712 -- | Binding groups where binders are 'CoreBndr's
1713 type CoreBind = Bind CoreBndr
1714 -- | Case alternatives where binders are 'CoreBndr's
1715 type CoreAlt = Alt CoreBndr
1716
1717 {-
1718 ************************************************************************
1719 * *
1720 \subsection{Tagging}
1721 * *
1722 ************************************************************************
1723 -}
1724
1725 -- | Binders are /tagged/ with a t
1726 data TaggedBndr t = TB CoreBndr t -- TB for "tagged binder"
1727
1728 type TaggedBind t = Bind (TaggedBndr t)
1729 type TaggedExpr t = Expr (TaggedBndr t)
1730 type TaggedArg t = Arg (TaggedBndr t)
1731 type TaggedAlt t = Alt (TaggedBndr t)
1732
1733 instance Outputable b => Outputable (TaggedBndr b) where
1734 ppr (TB b l) = char '<' <> ppr b <> comma <> ppr l <> char '>'
1735
1736 deTagExpr :: TaggedExpr t -> CoreExpr
1737 deTagExpr (Var v) = Var v
1738 deTagExpr (Lit l) = Lit l
1739 deTagExpr (Type ty) = Type ty
1740 deTagExpr (Coercion co) = Coercion co
1741 deTagExpr (App e1 e2) = App (deTagExpr e1) (deTagExpr e2)
1742 deTagExpr (Lam (TB b _) e) = Lam b (deTagExpr e)
1743 deTagExpr (Let bind body) = Let (deTagBind bind) (deTagExpr body)
1744 deTagExpr (Case e (TB b _) ty alts) = Case (deTagExpr e) b ty (map deTagAlt alts)
1745 deTagExpr (Tick t e) = Tick t (deTagExpr e)
1746 deTagExpr (Cast e co) = Cast (deTagExpr e) co
1747
1748 deTagBind :: TaggedBind t -> CoreBind
1749 deTagBind (NonRec (TB b _) rhs) = NonRec b (deTagExpr rhs)
1750 deTagBind (Rec prs) = Rec [(b, deTagExpr rhs) | (TB b _, rhs) <- prs]
1751
1752 deTagAlt :: TaggedAlt t -> CoreAlt
1753 deTagAlt (con, bndrs, rhs) = (con, [b | TB b _ <- bndrs], deTagExpr rhs)
1754
1755 {-
1756 ************************************************************************
1757 * *
1758 \subsection{Core-constructing functions with checking}
1759 * *
1760 ************************************************************************
1761 -}
1762
1763 -- | Apply a list of argument expressions to a function expression in a nested fashion. Prefer to
1764 -- use 'MkCore.mkCoreApps' if possible
1765 mkApps :: Expr b -> [Arg b] -> Expr b
1766 -- | Apply a list of type argument expressions to a function expression in a nested fashion
1767 mkTyApps :: Expr b -> [Type] -> Expr b
1768 -- | Apply a list of coercion argument expressions to a function expression in a nested fashion
1769 mkCoApps :: Expr b -> [Coercion] -> Expr b
1770 -- | Apply a list of type or value variables to a function expression in a nested fashion
1771 mkVarApps :: Expr b -> [Var] -> Expr b
1772 -- | Apply a list of argument expressions to a data constructor in a nested fashion. Prefer to
1773 -- use 'MkCore.mkCoreConApps' if possible
1774 mkConApp :: DataCon -> [Arg b] -> Expr b
1775
1776 mkApps f args = foldl App f args
1777 mkCoApps f args = foldl (\ e a -> App e (Coercion a)) f args
1778 mkVarApps f vars = foldl (\ e a -> App e (varToCoreExpr a)) f vars
1779 mkConApp con args = mkApps (Var (dataConWorkId con)) args
1780
1781 mkTyApps f args = foldl (\ e a -> App e (mkTyArg a)) f args
1782
1783 mkConApp2 :: DataCon -> [Type] -> [Var] -> Expr b
1784 mkConApp2 con tys arg_ids = Var (dataConWorkId con)
1785 `mkApps` map Type tys
1786 `mkApps` map varToCoreExpr arg_ids
1787
1788 mkTyArg :: Type -> Expr b
1789 mkTyArg ty
1790 | Just co <- isCoercionTy_maybe ty = Coercion co
1791 | otherwise = Type ty
1792
1793 -- | Create a machine integer literal expression of type @Int#@ from an @Integer@.
1794 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1795 mkIntLit :: DynFlags -> Integer -> Expr b
1796 -- | Create a machine integer literal expression of type @Int#@ from an @Int@.
1797 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1798 mkIntLitInt :: DynFlags -> Int -> Expr b
1799
1800 mkIntLit dflags n = Lit (mkMachInt dflags n)
1801 mkIntLitInt dflags n = Lit (mkMachInt dflags (toInteger n))
1802
1803 -- | Create a machine word literal expression of type @Word#@ from an @Integer@.
1804 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1805 mkWordLit :: DynFlags -> Integer -> Expr b
1806 -- | Create a machine word literal expression of type @Word#@ from a @Word@.
1807 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1808 mkWordLitWord :: DynFlags -> Word -> Expr b
1809
1810 mkWordLit dflags w = Lit (mkMachWord dflags w)
1811 mkWordLitWord dflags w = Lit (mkMachWord dflags (toInteger w))
1812
1813 mkWord64LitWord64 :: Word64 -> Expr b
1814 mkWord64LitWord64 w = Lit (mkMachWord64 (toInteger w))
1815
1816 mkInt64LitInt64 :: Int64 -> Expr b
1817 mkInt64LitInt64 w = Lit (mkMachInt64 (toInteger w))
1818
1819 -- | Create a machine character literal expression of type @Char#@.
1820 -- If you want an expression of type @Char@ use 'MkCore.mkCharExpr'
1821 mkCharLit :: Char -> Expr b
1822 -- | Create a machine string literal expression of type @Addr#@.
1823 -- If you want an expression of type @String@ use 'MkCore.mkStringExpr'
1824 mkStringLit :: String -> Expr b
1825
1826 mkCharLit c = Lit (mkMachChar c)
1827 mkStringLit s = Lit (mkMachString s)
1828
1829 -- | Create a machine single precision literal expression of type @Float#@ from a @Rational@.
1830 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1831 mkFloatLit :: Rational -> Expr b
1832 -- | Create a machine single precision literal expression of type @Float#@ from a @Float@.
1833 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1834 mkFloatLitFloat :: Float -> Expr b
1835
1836 mkFloatLit f = Lit (mkMachFloat f)
1837 mkFloatLitFloat f = Lit (mkMachFloat (toRational f))
1838
1839 -- | Create a machine double precision literal expression of type @Double#@ from a @Rational@.
1840 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1841 mkDoubleLit :: Rational -> Expr b
1842 -- | Create a machine double precision literal expression of type @Double#@ from a @Double@.
1843 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1844 mkDoubleLitDouble :: Double -> Expr b
1845
1846 mkDoubleLit d = Lit (mkMachDouble d)
1847 mkDoubleLitDouble d = Lit (mkMachDouble (toRational d))
1848
1849 -- | Bind all supplied binding groups over an expression in a nested let expression. Assumes
1850 -- that the rhs satisfies the let/app invariant. Prefer to use 'MkCore.mkCoreLets' if
1851 -- possible, which does guarantee the invariant
1852 mkLets :: [Bind b] -> Expr b -> Expr b
1853 -- | Bind all supplied binders over an expression in a nested lambda expression. Prefer to
1854 -- use 'MkCore.mkCoreLams' if possible
1855 mkLams :: [b] -> Expr b -> Expr b
1856
1857 mkLams binders body = foldr Lam body binders
1858 mkLets binds body = foldr mkLet body binds
1859
1860 mkLet :: Bind b -> Expr b -> Expr b
1861 -- The desugarer sometimes generates an empty Rec group
1862 -- which Lint rejects, so we kill it off right away
1863 mkLet (Rec []) body = body
1864 mkLet bind body = Let bind body
1865
1866 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1867 -- this can only be used to bind something in a non-recursive @let@ expression
1868 mkTyBind :: TyVar -> Type -> CoreBind
1869 mkTyBind tv ty = NonRec tv (Type ty)
1870
1871 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1872 -- this can only be used to bind something in a non-recursive @let@ expression
1873 mkCoBind :: CoVar -> Coercion -> CoreBind
1874 mkCoBind cv co = NonRec cv (Coercion co)
1875
1876 -- | Convert a binder into either a 'Var' or 'Type' 'Expr' appropriately
1877 varToCoreExpr :: CoreBndr -> Expr b
1878 varToCoreExpr v | isTyVar v = Type (mkTyVarTy v)
1879 | isCoVar v = Coercion (mkCoVarCo v)
1880 | otherwise = ASSERT( isId v ) Var v
1881
1882 varsToCoreExprs :: [CoreBndr] -> [Expr b]
1883 varsToCoreExprs vs = map varToCoreExpr vs
1884
1885 {-
1886 ************************************************************************
1887 * *
1888 Getting a result type
1889 * *
1890 ************************************************************************
1891
1892 These are defined here to avoid a module loop between CoreUtils and CoreFVs
1893
1894 -}
1895
1896 applyTypeToArg :: Type -> CoreExpr -> Type
1897 -- ^ Determines the type resulting from applying an expression with given type
1898 -- to a given argument expression
1899 applyTypeToArg fun_ty arg = piResultTy fun_ty (exprToType arg)
1900
1901 -- | If the expression is a 'Type', converts. Otherwise,
1902 -- panics. NB: This does /not/ convert 'Coercion' to 'CoercionTy'.
1903 exprToType :: CoreExpr -> Type
1904 exprToType (Type ty) = ty
1905 exprToType _bad = pprPanic "exprToType" empty
1906
1907 -- | If the expression is a 'Coercion', converts.
1908 exprToCoercion_maybe :: CoreExpr -> Maybe Coercion
1909 exprToCoercion_maybe (Coercion co) = Just co
1910 exprToCoercion_maybe _ = Nothing
1911
1912 {-
1913 ************************************************************************
1914 * *
1915 \subsection{Simple access functions}
1916 * *
1917 ************************************************************************
1918 -}
1919
1920 -- | Extract every variable by this group
1921 bindersOf :: Bind b -> [b]
1922 -- If you edit this function, you may need to update the GHC formalism
1923 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1924 bindersOf (NonRec binder _) = [binder]
1925 bindersOf (Rec pairs) = [binder | (binder, _) <- pairs]
1926
1927 -- | 'bindersOf' applied to a list of binding groups
1928 bindersOfBinds :: [Bind b] -> [b]
1929 bindersOfBinds binds = foldr ((++) . bindersOf) [] binds
1930
1931 rhssOfBind :: Bind b -> [Expr b]
1932 rhssOfBind (NonRec _ rhs) = [rhs]
1933 rhssOfBind (Rec pairs) = [rhs | (_,rhs) <- pairs]
1934
1935 rhssOfAlts :: [Alt b] -> [Expr b]
1936 rhssOfAlts alts = [e | (_,_,e) <- alts]
1937
1938 -- | Collapse all the bindings in the supplied groups into a single
1939 -- list of lhs\/rhs pairs suitable for binding in a 'Rec' binding group
1940 flattenBinds :: [Bind b] -> [(b, Expr b)]
1941 flattenBinds (NonRec b r : binds) = (b,r) : flattenBinds binds
1942 flattenBinds (Rec prs1 : binds) = prs1 ++ flattenBinds binds
1943 flattenBinds [] = []
1944
1945 -- | We often want to strip off leading lambdas before getting down to
1946 -- business. Variants are 'collectTyBinders', 'collectValBinders',
1947 -- and 'collectTyAndValBinders'
1948 collectBinders :: Expr b -> ([b], Expr b)
1949 collectTyBinders :: CoreExpr -> ([TyVar], CoreExpr)
1950 collectValBinders :: CoreExpr -> ([Id], CoreExpr)
1951 collectTyAndValBinders :: CoreExpr -> ([TyVar], [Id], CoreExpr)
1952 -- | Strip off exactly N leading lambdas (type or value). Good for use with
1953 -- join points.
1954 collectNBinders :: Int -> Expr b -> ([b], Expr b)
1955
1956 collectBinders expr
1957 = go [] expr
1958 where
1959 go bs (Lam b e) = go (b:bs) e
1960 go bs e = (reverse bs, e)
1961
1962 collectTyBinders expr
1963 = go [] expr
1964 where
1965 go tvs (Lam b e) | isTyVar b = go (b:tvs) e
1966 go tvs e = (reverse tvs, e)
1967
1968 collectValBinders expr
1969 = go [] expr
1970 where
1971 go ids (Lam b e) | isId b = go (b:ids) e
1972 go ids body = (reverse ids, body)
1973
1974 collectTyAndValBinders expr
1975 = (tvs, ids, body)
1976 where
1977 (tvs, body1) = collectTyBinders expr
1978 (ids, body) = collectValBinders body1
1979
1980 collectNBinders orig_n orig_expr
1981 = go orig_n [] orig_expr
1982 where
1983 go 0 bs expr = (reverse bs, expr)
1984 go n bs (Lam b e) = go (n-1) (b:bs) e
1985 go _ _ _ = pprPanic "collectNBinders" $ int orig_n
1986
1987 -- | Takes a nested application expression and returns the the function
1988 -- being applied and the arguments to which it is applied
1989 collectArgs :: Expr b -> (Expr b, [Arg b])
1990 collectArgs expr
1991 = go expr []
1992 where
1993 go (App f a) as = go f (a:as)
1994 go e as = (e, as)
1995
1996 -- | Like @collectArgs@, but also collects looks through floatable
1997 -- ticks if it means that we can find more arguments.
1998 collectArgsTicks :: (Tickish Id -> Bool) -> Expr b
1999 -> (Expr b, [Arg b], [Tickish Id])
2000 collectArgsTicks skipTick expr
2001 = go expr [] []
2002 where
2003 go (App f a) as ts = go f (a:as) ts
2004 go (Tick t e) as ts
2005 | skipTick t = go e as (t:ts)
2006 go e as ts = (e, as, reverse ts)
2007
2008
2009 {-
2010 ************************************************************************
2011 * *
2012 \subsection{Predicates}
2013 * *
2014 ************************************************************************
2015
2016 At one time we optionally carried type arguments through to runtime.
2017 @isRuntimeVar v@ returns if (Lam v _) really becomes a lambda at runtime,
2018 i.e. if type applications are actual lambdas because types are kept around
2019 at runtime. Similarly isRuntimeArg.
2020 -}
2021
2022 -- | Will this variable exist at runtime?
2023 isRuntimeVar :: Var -> Bool
2024 isRuntimeVar = isId
2025
2026 -- | Will this argument expression exist at runtime?
2027 isRuntimeArg :: CoreExpr -> Bool
2028 isRuntimeArg = isValArg
2029
2030 -- | Returns @True@ for value arguments, false for type args
2031 -- NB: coercions are value arguments (zero width, to be sure,
2032 -- like State#, but still value args).
2033 isValArg :: Expr b -> Bool
2034 isValArg e = not (isTypeArg e)
2035
2036 -- | Returns @True@ iff the expression is a 'Type' or 'Coercion'
2037 -- expression at its top level
2038 isTyCoArg :: Expr b -> Bool
2039 isTyCoArg (Type {}) = True
2040 isTyCoArg (Coercion {}) = True
2041 isTyCoArg _ = False
2042
2043 -- | Returns @True@ iff the expression is a 'Type' expression at its
2044 -- top level. Note this does NOT include 'Coercion's.
2045 isTypeArg :: Expr b -> Bool
2046 isTypeArg (Type {}) = True
2047 isTypeArg _ = False
2048
2049 -- | The number of binders that bind values rather than types
2050 valBndrCount :: [CoreBndr] -> Int
2051 valBndrCount = count isId
2052
2053 -- | The number of argument expressions that are values rather than types at their top level
2054 valArgCount :: [Arg b] -> Int
2055 valArgCount = count isValArg
2056
2057 {-
2058 ************************************************************************
2059 * *
2060 \subsection{Annotated core}
2061 * *
2062 ************************************************************************
2063 -}
2064
2065 -- | Annotated core: allows annotation at every node in the tree
2066 type AnnExpr bndr annot = (annot, AnnExpr' bndr annot)
2067
2068 -- | A clone of the 'Expr' type but allowing annotation at every tree node
2069 data AnnExpr' bndr annot
2070 = AnnVar Id
2071 | AnnLit Literal
2072 | AnnLam bndr (AnnExpr bndr annot)
2073 | AnnApp (AnnExpr bndr annot) (AnnExpr bndr annot)
2074 | AnnCase (AnnExpr bndr annot) bndr Type [AnnAlt bndr annot]
2075 | AnnLet (AnnBind bndr annot) (AnnExpr bndr annot)
2076 | AnnCast (AnnExpr bndr annot) (annot, Coercion)
2077 -- Put an annotation on the (root of) the coercion
2078 | AnnTick (Tickish Id) (AnnExpr bndr annot)
2079 | AnnType Type
2080 | AnnCoercion Coercion
2081
2082 -- | A clone of the 'Alt' type but allowing annotation at every tree node
2083 type AnnAlt bndr annot = (AltCon, [bndr], AnnExpr bndr annot)
2084
2085 -- | A clone of the 'Bind' type but allowing annotation at every tree node
2086 data AnnBind bndr annot
2087 = AnnNonRec bndr (AnnExpr bndr annot)
2088 | AnnRec [(bndr, AnnExpr bndr annot)]
2089
2090 -- | Takes a nested application expression and returns the the function
2091 -- being applied and the arguments to which it is applied
2092 collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
2093 collectAnnArgs expr
2094 = go expr []
2095 where
2096 go (_, AnnApp f a) as = go f (a:as)
2097 go e as = (e, as)
2098
2099 collectAnnArgsTicks :: (Tickish Var -> Bool) -> AnnExpr b a
2100 -> (AnnExpr b a, [AnnExpr b a], [Tickish Var])
2101 collectAnnArgsTicks tickishOk expr
2102 = go expr [] []
2103 where
2104 go (_, AnnApp f a) as ts = go f (a:as) ts
2105 go (_, AnnTick t e) as ts | tickishOk t
2106 = go e as (t:ts)
2107 go e as ts = (e, as, reverse ts)
2108
2109 deAnnotate :: AnnExpr bndr annot -> Expr bndr
2110 deAnnotate (_, e) = deAnnotate' e
2111
2112 deAnnotate' :: AnnExpr' bndr annot -> Expr bndr
2113 deAnnotate' (AnnType t) = Type t
2114 deAnnotate' (AnnCoercion co) = Coercion co
2115 deAnnotate' (AnnVar v) = Var v
2116 deAnnotate' (AnnLit lit) = Lit lit
2117 deAnnotate' (AnnLam binder body) = Lam binder (deAnnotate body)
2118 deAnnotate' (AnnApp fun arg) = App (deAnnotate fun) (deAnnotate arg)
2119 deAnnotate' (AnnCast e (_,co)) = Cast (deAnnotate e) co
2120 deAnnotate' (AnnTick tick body) = Tick tick (deAnnotate body)
2121
2122 deAnnotate' (AnnLet bind body)
2123 = Let (deAnnBind bind) (deAnnotate body)
2124 where
2125 deAnnBind (AnnNonRec var rhs) = NonRec var (deAnnotate rhs)
2126 deAnnBind (AnnRec pairs) = Rec [(v,deAnnotate rhs) | (v,rhs) <- pairs]
2127
2128 deAnnotate' (AnnCase scrut v t alts)
2129 = Case (deAnnotate scrut) v t (map deAnnAlt alts)
2130
2131 deAnnAlt :: AnnAlt bndr annot -> Alt bndr
2132 deAnnAlt (con,args,rhs) = (con,args,deAnnotate rhs)
2133
2134 -- | As 'collectBinders' but for 'AnnExpr' rather than 'Expr'
2135 collectAnnBndrs :: AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2136 collectAnnBndrs e
2137 = collect [] e
2138 where
2139 collect bs (_, AnnLam b body) = collect (b:bs) body
2140 collect bs body = (reverse bs, body)
2141
2142 -- | As 'collectNBinders' but for 'AnnExpr' rather than 'Expr'
2143 collectNAnnBndrs :: Int -> AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2144 collectNAnnBndrs orig_n e
2145 = collect orig_n [] e
2146 where
2147 collect 0 bs body = (reverse bs, body)
2148 collect n bs (_, AnnLam b body) = collect (n-1) (b:bs) body
2149 collect _ _ _ = pprPanic "collectNBinders" $ int orig_n