Kill off the remaining Rec []
[ghc.git] / compiler / coreSyn / CoreSyn.hs
1 {-
2 (c) The University of Glasgow 2006
3 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 -}
5
6 {-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts #-}
7
8 -- | CoreSyn holds all the main data types for use by for the Glasgow Haskell Compiler midsection
9 module CoreSyn (
10 -- * Main data types
11 Expr(..), Alt, Bind(..), AltCon(..), Arg,
12 Tickish(..), TickishScoping(..), TickishPlacement(..),
13 CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr,
14 TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), deTagExpr,
15
16 -- * In/Out type synonyms
17 InId, InBind, InExpr, InAlt, InArg, InType, InKind,
18 InBndr, InVar, InCoercion, InTyVar, InCoVar,
19 OutId, OutBind, OutExpr, OutAlt, OutArg, OutType, OutKind,
20 OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar,
21
22 -- ** 'Expr' construction
23 mkLet, mkLets, mkLams,
24 mkApps, mkTyApps, mkCoApps, mkVarApps, mkTyArg,
25
26 mkIntLit, mkIntLitInt,
27 mkWordLit, mkWordLitWord,
28 mkWord64LitWord64, mkInt64LitInt64,
29 mkCharLit, mkStringLit,
30 mkFloatLit, mkFloatLitFloat,
31 mkDoubleLit, mkDoubleLitDouble,
32
33 mkConApp, mkConApp2, mkTyBind, mkCoBind,
34 varToCoreExpr, varsToCoreExprs,
35
36 isId, cmpAltCon, cmpAlt, ltAlt,
37
38 -- ** Simple 'Expr' access functions and predicates
39 bindersOf, bindersOfBinds, rhssOfBind, rhssOfAlts,
40 collectBinders, collectTyBinders, collectTyAndValBinders,
41 collectNBinders,
42 collectArgs, collectArgsTicks, flattenBinds,
43
44 exprToType, exprToCoercion_maybe,
45 applyTypeToArg,
46
47 isValArg, isTypeArg, isTyCoArg, valArgCount, valBndrCount,
48 isRuntimeArg, isRuntimeVar,
49
50 -- * Tick-related functions
51 tickishCounts, tickishScoped, tickishScopesLike, tickishFloatable,
52 tickishCanSplit, mkNoCount, mkNoScope,
53 tickishIsCode, tickishPlace,
54 tickishContains,
55
56 -- * Unfolding data types
57 Unfolding(..), UnfoldingGuidance(..), UnfoldingSource(..),
58
59 -- ** Constructing 'Unfolding's
60 noUnfolding, bootUnfolding, evaldUnfolding, mkOtherCon,
61 unSaturatedOk, needSaturated, boringCxtOk, boringCxtNotOk,
62
63 -- ** Predicates and deconstruction on 'Unfolding'
64 unfoldingTemplate, expandUnfolding_maybe,
65 maybeUnfoldingTemplate, otherCons,
66 isValueUnfolding, isEvaldUnfolding, isCheapUnfolding,
67 isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding,
68 isStableUnfolding, isFragileUnfolding, hasSomeUnfolding,
69 isBootUnfolding,
70 canUnfold, neverUnfoldGuidance, isStableSource,
71
72 -- * Annotated expression data types
73 AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
74
75 -- ** Operations on annotated expressions
76 collectAnnArgs, collectAnnArgsTicks,
77
78 -- ** Operations on annotations
79 deAnnotate, deAnnotate', deAnnAlt,
80 collectAnnBndrs, collectNAnnBndrs,
81
82 -- * Orphanhood
83 IsOrphan(..), isOrphan, notOrphan, chooseOrphanAnchor,
84
85 -- * Core rule data types
86 CoreRule(..), RuleBase,
87 RuleName, RuleFun, IdUnfoldingFun, InScopeEnv,
88 RuleEnv(..), mkRuleEnv, emptyRuleEnv,
89
90 -- ** Operations on 'CoreRule's
91 ruleArity, ruleName, ruleIdName, ruleActivation,
92 setRuleIdName,
93 isBuiltinRule, isLocalRule, isAutoRule,
94
95 -- * Core vectorisation declarations data type
96 CoreVect(..)
97 ) where
98
99 #include "HsVersions.h"
100
101 import CostCentre
102 import VarEnv( InScopeSet )
103 import Var
104 import Type
105 import Coercion
106 import Name
107 import NameSet
108 import NameEnv( NameEnv, emptyNameEnv )
109 import Literal
110 import DataCon
111 import Module
112 import TyCon
113 import BasicTypes
114 import DynFlags
115 import Outputable
116 import Util
117 import UniqFM
118 import SrcLoc ( RealSrcSpan, containsSpan )
119 import Binary
120
121 import Data.Data hiding (TyCon)
122 import Data.Int
123 import Data.Word
124
125 infixl 4 `mkApps`, `mkTyApps`, `mkVarApps`, `App`, `mkCoApps`
126 -- Left associative, so that we can say (f `mkTyApps` xs `mkVarApps` ys)
127
128 {-
129 ************************************************************************
130 * *
131 \subsection{The main data types}
132 * *
133 ************************************************************************
134
135 These data types are the heart of the compiler
136 -}
137
138 -- | This is the data type that represents GHCs core intermediate language. Currently
139 -- GHC uses System FC <http://research.microsoft.com/~simonpj/papers/ext-f/> for this purpose,
140 -- which is closely related to the simpler and better known System F <http://en.wikipedia.org/wiki/System_F>.
141 --
142 -- We get from Haskell source to this Core language in a number of stages:
143 --
144 -- 1. The source code is parsed into an abstract syntax tree, which is represented
145 -- by the data type 'HsExpr.HsExpr' with the names being 'RdrName.RdrNames'
146 --
147 -- 2. This syntax tree is /renamed/, which attaches a 'Unique.Unique' to every 'RdrName.RdrName'
148 -- (yielding a 'Name.Name') to disambiguate identifiers which are lexically identical.
149 -- For example, this program:
150 --
151 -- @
152 -- f x = let f x = x + 1
153 -- in f (x - 2)
154 -- @
155 --
156 -- Would be renamed by having 'Unique's attached so it looked something like this:
157 --
158 -- @
159 -- f_1 x_2 = let f_3 x_4 = x_4 + 1
160 -- in f_3 (x_2 - 2)
161 -- @
162 -- But see Note [Shadowing] below.
163 --
164 -- 3. The resulting syntax tree undergoes type checking (which also deals with instantiating
165 -- type class arguments) to yield a 'HsExpr.HsExpr' type that has 'Id.Id' as it's names.
166 --
167 -- 4. Finally the syntax tree is /desugared/ from the expressive 'HsExpr.HsExpr' type into
168 -- this 'Expr' type, which has far fewer constructors and hence is easier to perform
169 -- optimization, analysis and code generation on.
170 --
171 -- The type parameter @b@ is for the type of binders in the expression tree.
172 --
173 -- The language consists of the following elements:
174 --
175 -- * Variables
176 --
177 -- * Primitive literals
178 --
179 -- * Applications: note that the argument may be a 'Type'.
180 -- See Note [CoreSyn let/app invariant]
181 -- See Note [Levity polymorphism invariants]
182 --
183 -- * Lambda abstraction
184 -- See Note [Levity polymorphism invariants]
185 --
186 -- * Recursive and non recursive @let@s. Operationally
187 -- this corresponds to allocating a thunk for the things
188 -- bound and then executing the sub-expression.
189 --
190 -- #top_level_invariant#
191 -- #letrec_invariant#
192 --
193 -- The right hand sides of all top-level and recursive @let@s
194 -- /must/ be of lifted type (see "Type#type_classification" for
195 -- the meaning of /lifted/ vs. /unlifted/). There is one exception
196 -- to this rule, top-level @let@s are allowed to bind primitive
197 -- string literals, see Note [CoreSyn top-level string literals].
198 --
199 -- See Note [CoreSyn let/app invariant]
200 -- See Note [Levity polymorphism invariants]
201 --
202 -- #type_let#
203 -- We allow a /non-recursive/ let to bind a type variable, thus:
204 --
205 -- > Let (NonRec tv (Type ty)) body
206 --
207 -- This can be very convenient for postponing type substitutions until
208 -- the next run of the simplifier.
209 --
210 -- At the moment, the rest of the compiler only deals with type-let
211 -- in a Let expression, rather than at top level. We may want to revist
212 -- this choice.
213 --
214 -- * Case expression. Operationally this corresponds to evaluating
215 -- the scrutinee (expression examined) to weak head normal form
216 -- and then examining at most one level of resulting constructor (i.e. you
217 -- cannot do nested pattern matching directly with this).
218 --
219 -- The binder gets bound to the value of the scrutinee,
220 -- and the 'Type' must be that of all the case alternatives
221 --
222 -- #case_invariants#
223 -- This is one of the more complicated elements of the Core language,
224 -- and comes with a number of restrictions:
225 --
226 -- 1. The list of alternatives may be empty;
227 -- See Note [Empty case alternatives]
228 --
229 -- 2. The 'DEFAULT' case alternative must be first in the list,
230 -- if it occurs at all.
231 --
232 -- 3. The remaining cases are in order of increasing
233 -- tag (for 'DataAlts') or
234 -- lit (for 'LitAlts').
235 -- This makes finding the relevant constructor easy,
236 -- and makes comparison easier too.
237 --
238 -- 4. The list of alternatives must be exhaustive. An /exhaustive/ case
239 -- does not necessarily mention all constructors:
240 --
241 -- @
242 -- data Foo = Red | Green | Blue
243 -- ... case x of
244 -- Red -> True
245 -- other -> f (case x of
246 -- Green -> ...
247 -- Blue -> ... ) ...
248 -- @
249 --
250 -- The inner case does not need a @Red@ alternative, because @x@
251 -- can't be @Red@ at that program point.
252 --
253 -- 5. Floating-point values must not be scrutinised against literals.
254 -- See Trac #9238 and Note [Rules for floating-point comparisons]
255 -- in PrelRules for rationale.
256 --
257 -- * Cast an expression to a particular type.
258 -- This is used to implement @newtype@s (a @newtype@ constructor or
259 -- destructor just becomes a 'Cast' in Core) and GADTs.
260 --
261 -- * Notes. These allow general information to be added to expressions
262 -- in the syntax tree
263 --
264 -- * A type: this should only show up at the top level of an Arg
265 --
266 -- * A coercion
267
268 -- If you edit this type, you may need to update the GHC formalism
269 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
270 data Expr b
271 = Var Id
272 | Lit Literal
273 | App (Expr b) (Arg b)
274 | Lam b (Expr b)
275 | Let (Bind b) (Expr b)
276 | Case (Expr b) b Type [Alt b] -- See #case_invariants#
277 | Cast (Expr b) Coercion
278 | Tick (Tickish Id) (Expr b)
279 | Type Type
280 | Coercion Coercion
281 deriving Data
282
283 -- | Type synonym for expressions that occur in function argument positions.
284 -- Only 'Arg' should contain a 'Type' at top level, general 'Expr' should not
285 type Arg b = Expr b
286
287 -- | A case split alternative. Consists of the constructor leading to the alternative,
288 -- the variables bound from the constructor, and the expression to be executed given that binding.
289 -- The default alternative is @(DEFAULT, [], rhs)@
290
291 -- If you edit this type, you may need to update the GHC formalism
292 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
293 type Alt b = (AltCon, [b], Expr b)
294
295 -- | A case alternative constructor (i.e. pattern match)
296
297 -- If you edit this type, you may need to update the GHC formalism
298 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
299 data AltCon
300 = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
301 -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
302
303 | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
304 -- Invariant: always an *unlifted* literal
305 -- See Note [Literal alternatives]
306
307 | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
308 deriving (Eq, Data)
309
310 -- | Binding, used for top level bindings in a module and local bindings in a @let@.
311
312 -- If you edit this type, you may need to update the GHC formalism
313 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
314 data Bind b = NonRec b (Expr b)
315 | Rec [(b, (Expr b))]
316 deriving Data
317
318 {-
319 Note [Shadowing]
320 ~~~~~~~~~~~~~~~~
321 While various passes attempt to rename on-the-fly in a manner that
322 avoids "shadowing" (thereby simplifying downstream optimizations),
323 neither the simplifier nor any other pass GUARANTEES that shadowing is
324 avoided. Thus, all passes SHOULD work fine even in the presence of
325 arbitrary shadowing in their inputs.
326
327 In particular, scrutinee variables `x` in expressions of the form
328 `Case e x t` are often renamed to variables with a prefix
329 "wild_". These "wild" variables may appear in the body of the
330 case-expression, and further, may be shadowed within the body.
331
332 So the Unique in an Var is not really unique at all. Still, it's very
333 useful to give a constant-time equality/ordering for Vars, and to give
334 a key that can be used to make sets of Vars (VarSet), or mappings from
335 Vars to other things (VarEnv). Moreover, if you do want to eliminate
336 shadowing, you can give a new Unique to an Id without changing its
337 printable name, which makes debugging easier.
338
339 Note [Literal alternatives]
340 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
341 Literal alternatives (LitAlt lit) are always for *un-lifted* literals.
342 We have one literal, a literal Integer, that is lifted, and we don't
343 allow in a LitAlt, because LitAlt cases don't do any evaluation. Also
344 (see Trac #5603) if you say
345 case 3 of
346 S# x -> ...
347 J# _ _ -> ...
348 (where S#, J# are the constructors for Integer) we don't want the
349 simplifier calling findAlt with argument (LitAlt 3). No no. Integer
350 literals are an opaque encoding of an algebraic data type, not of
351 an unlifted literal, like all the others.
352
353 Also, we do not permit case analysis with literal patterns on floating-point
354 types. See Trac #9238 and Note [Rules for floating-point comparisons] in
355 PrelRules for the rationale for this restriction.
356
357 -------------------------- CoreSyn INVARIANTS ---------------------------
358
359 Note [CoreSyn top-level invariant]
360 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
361 See #toplevel_invariant#
362
363 Note [CoreSyn letrec invariant]
364 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
365 See #letrec_invariant#
366
367 Note [CoreSyn top-level string literals]
368 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
369 As an exception to the usual rule that top-level binders must be lifted,
370 we allow binding primitive string literals (of type Addr#) of type Addr# at the
371 top level. This allows us to share string literals earlier in the pipeline and
372 crucially allows other optimizations in the Core2Core pipeline to fire.
373 Consider,
374
375 f n = let a::Addr# = "foo"#
376 in \x -> blah
377
378 In order to be able to inline `f`, we would like to float `a` to the top.
379 Another option would be to inline `a`, but that would lead to duplicating string
380 literals, which we want to avoid. See Trac #8472.
381
382 The solution is simply to allow top-level unlifted binders. We can't allow
383 arbitrary unlifted expression at the top-level though, unlifted binders cannot
384 be thunks, so we just allow string literals.
385
386 It is important to note that top-level primitive string literals cannot be
387 wrapped in Ticks, as is otherwise done with lifted bindings. CoreToStg expects
388 to see just a plain (Lit (MachStr ...)) expression on the RHS of primitive
389 string bindings; anything else and things break. CoreLint checks this invariant.
390
391 Also see Note [Compilation plan for top-level string literals].
392
393 Note [Compilation plan for top-level string literals]
394 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
395 Here is a summary on how top-level string literals are handled by various
396 parts of the compilation pipeline.
397
398 * In the source language, there is no way to bind a primitive string literal
399 at the top leve.
400
401 * In Core, we have a special rule that permits top-level Addr# bindings. See
402 Note [CoreSyn top-level string literals]. Core-to-core passes may introduce
403 new top-level string literals.
404
405 * In STG, top-level string literals are explicitly represented in the syntax
406 tree.
407
408 * A top-level string literal may end up exported from a module. In this case,
409 in the object file, the content of the exported literal is given a label with
410 the _bytes suffix.
411
412 Note [CoreSyn let/app invariant]
413 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
414 The let/app invariant
415 the right hand side of a non-recursive 'Let', and
416 the argument of an 'App',
417 /may/ be of unlifted type, but only if
418 the expression is ok-for-speculation
419 or the 'Let' is for a join point.
420
421 This means that the let can be floated around
422 without difficulty. For example, this is OK:
423
424 y::Int# = x +# 1#
425
426 But this is not, as it may affect termination if the
427 expression is floated out:
428
429 y::Int# = fac 4#
430
431 In this situation you should use @case@ rather than a @let@. The function
432 'CoreUtils.needsCaseBinding' can help you determine which to generate, or
433 alternatively use 'MkCore.mkCoreLet' rather than this constructor directly,
434 which will generate a @case@ if necessary
435
436 The let/app invariant is initially enforced by mkCoreLet and mkCoreApp in
437 coreSyn/MkCore.
438
439 Note [CoreSyn case invariants]
440 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
441 See #case_invariants#
442
443 Note [Levity polymorphism invariants]
444 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
445 The levity-polymorphism invariants are these:
446
447 * The type of a term-binder must not be levity-polymorphic
448 * The type of the argument of an App must not be levity-polymorphic.
449
450 A type (t::TYPE r) is "levity polymorphic" if 'r' has any free variables.
451
452 For example
453 \(r::RuntimeRep). \(a::TYPE r). \(x::a). e
454 is illegal because x's type has kind (TYPE r), which has 'r' free.
455
456 See Note [Levity polymorphism checking] in DsMonad to see where these
457 invariants are established for user-written code.
458
459 Note [CoreSyn let goal]
460 ~~~~~~~~~~~~~~~~~~~~~~~
461 * The simplifier tries to ensure that if the RHS of a let is a constructor
462 application, its arguments are trivial, so that the constructor can be
463 inlined vigorously.
464
465 Note [Type let]
466 ~~~~~~~~~~~~~~~
467 See #type_let#
468
469 Note [Empty case alternatives]
470 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
471 The alternatives of a case expression should be exhaustive. But
472 this exhaustive list can be empty!
473
474 * A case expression can have empty alternatives if (and only if) the
475 scrutinee is bound to raise an exception or diverge. When do we know
476 this? See Note [Bottoming expressions] in CoreUtils.
477
478 * The possiblity of empty alternatives is one reason we need a type on
479 the case expression: if the alternatives are empty we can't get the
480 type from the alternatives!
481
482 * In the case of empty types (see Note [Bottoming expressions]), say
483 data T
484 we do NOT want to replace
485 case (x::T) of Bool {} --> error Bool "Inaccessible case"
486 because x might raise an exception, and *that*'s what we want to see!
487 (Trac #6067 is an example.) To preserve semantics we'd have to say
488 x `seq` error Bool "Inaccessible case"
489 but the 'seq' is just a case, so we are back to square 1. Or I suppose
490 we could say
491 x |> UnsafeCoerce T Bool
492 but that loses all trace of the fact that this originated with an empty
493 set of alternatives.
494
495 * We can use the empty-alternative construct to coerce error values from
496 one type to another. For example
497
498 f :: Int -> Int
499 f n = error "urk"
500
501 g :: Int -> (# Char, Bool #)
502 g x = case f x of { 0 -> ..., n -> ... }
503
504 Then if we inline f in g's RHS we get
505 case (error Int "urk") of (# Char, Bool #) { ... }
506 and we can discard the alternatives since the scrutinee is bottom to give
507 case (error Int "urk") of (# Char, Bool #) {}
508
509 This is nicer than using an unsafe coerce between Int ~ (# Char,Bool #),
510 if for no other reason that we don't need to instantiate the (~) at an
511 unboxed type.
512
513 * We treat a case expression with empty alternatives as trivial iff
514 its scrutinee is (see CoreUtils.exprIsTrivial). This is actually
515 important; see Note [Empty case is trivial] in CoreUtils
516
517 * An empty case is replaced by its scrutinee during the CoreToStg
518 conversion; remember STG is un-typed, so there is no need for
519 the empty case to do the type conversion.
520
521 Note [Join points]
522 ~~~~~~~~~~~~~~~~~~
523 In Core, a *join point* is a specially tagged function whose only occurrences
524 are saturated tail calls. A tail call can appear in these places:
525
526 1. In the branches (not the scrutinee) of a case
527 2. Underneath a let (value or join point)
528 3. Inside another join point
529
530 We write a join-point declaration as
531 join j @a @b x y = e1 in e2,
532 like a let binding but with "join" instead (or "join rec" for "let rec"). Note
533 that we put the parameters before the = rather than using lambdas; this is
534 because it's relevant how many parameters the join point takes *as a join
535 point.* This number is called the *join arity,* distinct from arity because it
536 counts types as well as values. Note that a join point may return a lambda! So
537 join j x = x + 1
538 is different from
539 join j = \x -> x + 1
540 The former has join arity 1, while the latter has join arity 0.
541
542 The identifier for a join point is called a join id or a *label.* An invocation
543 is called a *jump.* We write a jump using the jump keyword:
544
545 jump j 3
546
547 The words *label* and *jump* are evocative of assembly code (or Cmm) for a
548 reason: join points are indeed compiled as labeled blocks, and jumps become
549 actual jumps (plus argument passing and stack adjustment). There is no closure
550 allocated and only a fraction of the function-call overhead. Hence we would
551 like as many functions as possible to become join points (see OccurAnal) and
552 the type rules for join points ensure we preserve the properties that make them
553 efficient.
554
555 In the actual AST, a join point is indicated by the IdDetails of the binder: a
556 local value binding gets 'VanillaId' but a join point gets a 'JoinId' with its
557 join arity.
558
559 For more details, see the paper:
560
561 Luke Maurer, Paul Downen, Zena Ariola, and Simon Peyton Jones. "Compiling
562 without continuations." Submitted to PLDI'17.
563
564 https://www.microsoft.com/en-us/research/publication/compiling-without-continuations/
565
566 Note [Invariants on join points]
567 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
568 Join points must follow these invariants:
569
570 1. All occurrences must be tail calls. Each of these tail calls must pass the
571 same number of arguments, counting both types and values; we call this the
572 "join arity" (to distinguish from regular arity, which only counts values).
573 2. For join arity n, the right-hand side must begin with at least n lambdas.
574 3. If the binding is recursive, then all other bindings in the recursive group
575 must also be join points.
576 4. The binding's type must not be polymorphic in its return type (as defined
577 in Note [The polymorphism rule of join points]).
578
579 Examples:
580
581 join j1 x = 1 + x in jump j (jump j x) -- Fails 1: non-tail call
582 join j1' x = 1 + x in if even a
583 then jump j1 a
584 else jump j1 a b -- Fails 1: inconsistent calls
585 join j2 x = flip (+) x in j2 1 2 -- Fails 2: not enough lambdas
586 join j2' x = \y -> x + y in j3 1 -- Passes: extra lams ok
587 join j @a (x :: a) = x -- Fails 4: polymorphic in ret type
588
589 Invariant 1 applies to left-hand sides of rewrite rules, so a rule for a join
590 point must have an exact call as its LHS.
591
592 Strictly speaking, invariant 3 is redundant, since a call from inside a lazy
593 binding isn't a tail call. Since a let-bound value can't invoke a free join
594 point, then, they can't be mutually recursive. (A Core binding group *can*
595 include spurious extra bindings if the occurrence analyser hasn't run, so
596 invariant 3 does still need to be checked.) For the rigorous definition of
597 "tail call", see Section 3 of the paper (Note [Join points]).
598
599 Invariant 4 is subtle; see Note [The polymorphism rule of join points].
600
601 Core Lint will check these invariants, anticipating that any binder whose
602 OccInfo is marked AlwaysTailCalled will become a join point as soon as the
603 simplifier (or simpleOptPgm) runs.
604
605 Note [The type of a join point]
606 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
607 A join point has the same type it would have as a function. That is, if it takes
608 an Int and a Bool and its body produces a String, its type is `Int -> Bool ->
609 String`. Natural as this may seem, it can be awkward. A join point shouldn't be
610 thought to "return" in the same sense a function does---a jump is one-way. This
611 is crucial for understanding how case-of-case interacts with join points:
612
613 case (join
614 j :: Int -> Bool -> String
615 j x y = ...
616 in
617 jump j z w) of
618 "" -> True
619 _ -> False
620
621 The simplifier will pull the case into the join point (see Note [Case-of-case
622 and join points] in Simplify):
623
624 join
625 j :: Int -> Bool -> Bool -- changed!
626 j x y = case ... of "" -> True
627 _ -> False
628 in
629 jump j z w
630
631 The body of the join point now returns a Bool, so the label `j` has to have its
632 type updated accordingly. Inconvenient though this may be, it has the advantage
633 that 'CoreUtils.exprType' can still return a type for any expression, including
634 a jump.
635
636 This differs from the paper (see Note [Invariants on join points]). In the
637 paper, we instead give j the type `Int -> Bool -> forall a. a`. Then each jump
638 carries the "return type" as a parameter, exactly the way other non-returning
639 functions like `error` work:
640
641 case (join
642 j :: Int -> Bool -> forall a. a
643 j x y = ...
644 in
645 jump j z w @String) of
646 "" -> True
647 _ -> False
648
649 Now we can move the case inward and we only have to change the jump:
650
651 join
652 j :: Int -> Bool -> forall a. a
653 j x y = case ... of "" -> True
654 _ -> False
655 in
656 jump j z w @Bool
657
658 (Core Lint would still check that the body of the join point has the right type;
659 that type would simply not be reflected in the join id.)
660
661 Note [The polymorphism rule of join points]
662 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
663 Invariant 4 of Note [Invariants on join points] forbids a join point to be
664 polymorphic in its return type. That is, if its type is
665
666 forall a1 ... ak. t1 -> ... -> tn -> r
667
668 where its join arity is k+n, none of the type parameters ai may occur free in r.
669 The most direct explanation is that given
670
671 join j @a1 ... @ak x1 ... xn = e1 in e2
672
673 our typing rules require `e1` and `e2` to have the same type. Therefore the type
674 of `e1`---the return type of the join point---must be the same as the type of
675 e2. Since the type variables aren't bound in `e2`, its type can't include them,
676 and thus neither can the type of `e1`.
677
678 There's a deeper explanation in terms of the sequent calculus in Section 5.3 of
679 a previous paper:
680
681 Paul Downen, Luke Maurer, Zena Ariola, and Simon Peyton Jones. "Sequent
682 calculus as a compiler intermediate language." ICFP'16.
683
684 https://www.microsoft.com/en-us/research/wp-content/uploads/2016/04/sequent-calculus-icfp16.pdf
685
686 The quick version: Consider the CPS term (the paper uses the sequent calculus,
687 but we can translate readily):
688
689 \k -> join j @a1 ... @ak x1 ... xn = e1 k in e2 k
690
691 Since `j` is a join point, it doesn't bind a continuation variable but reuses
692 the variable `k` from the context. But the parameters `ai` are not in `k`'s
693 scope, and `k`'s type determines the return type of `j`; thus the `ai`s don't
694 appear in the return type of `j`. (Also, since `e1` and `e2` are passed the same
695 continuation, they must have the same type; hence the direct explanation above.)
696
697 ************************************************************************
698 * *
699 In/Out type synonyms
700 * *
701 ********************************************************************* -}
702
703 {- Many passes apply a substitution, and it's very handy to have type
704 synonyms to remind us whether or not the substitution has been applied -}
705
706 -- Pre-cloning or substitution
707 type InBndr = CoreBndr
708 type InType = Type
709 type InKind = Kind
710 type InBind = CoreBind
711 type InExpr = CoreExpr
712 type InAlt = CoreAlt
713 type InArg = CoreArg
714 type InCoercion = Coercion
715
716 -- Post-cloning or substitution
717 type OutBndr = CoreBndr
718 type OutType = Type
719 type OutKind = Kind
720 type OutCoercion = Coercion
721 type OutBind = CoreBind
722 type OutExpr = CoreExpr
723 type OutAlt = CoreAlt
724 type OutArg = CoreArg
725
726
727 {- *********************************************************************
728 * *
729 Ticks
730 * *
731 ************************************************************************
732 -}
733
734 -- | Allows attaching extra information to points in expressions
735
736 -- If you edit this type, you may need to update the GHC formalism
737 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
738 data Tickish id =
739 -- | An @{-# SCC #-}@ profiling annotation, either automatically
740 -- added by the desugarer as a result of -auto-all, or added by
741 -- the user.
742 ProfNote {
743 profNoteCC :: CostCentre, -- ^ the cost centre
744 profNoteCount :: !Bool, -- ^ bump the entry count?
745 profNoteScope :: !Bool -- ^ scopes over the enclosed expression
746 -- (i.e. not just a tick)
747 }
748
749 -- | A "tick" used by HPC to track the execution of each
750 -- subexpression in the original source code.
751 | HpcTick {
752 tickModule :: Module,
753 tickId :: !Int
754 }
755
756 -- | A breakpoint for the GHCi debugger. This behaves like an HPC
757 -- tick, but has a list of free variables which will be available
758 -- for inspection in GHCi when the program stops at the breakpoint.
759 --
760 -- NB. we must take account of these Ids when (a) counting free variables,
761 -- and (b) substituting (don't substitute for them)
762 | Breakpoint
763 { breakpointId :: !Int
764 , breakpointFVs :: [id] -- ^ the order of this list is important:
765 -- it matches the order of the lists in the
766 -- appropriate entry in HscTypes.ModBreaks.
767 --
768 -- Careful about substitution! See
769 -- Note [substTickish] in CoreSubst.
770 }
771
772 -- | A source note.
773 --
774 -- Source notes are pure annotations: Their presence should neither
775 -- influence compilation nor execution. The semantics are given by
776 -- causality: The presence of a source note means that a local
777 -- change in the referenced source code span will possibly provoke
778 -- the generated code to change. On the flip-side, the functionality
779 -- of annotated code *must* be invariant against changes to all
780 -- source code *except* the spans referenced in the source notes
781 -- (see "Causality of optimized Haskell" paper for details).
782 --
783 -- Therefore extending the scope of any given source note is always
784 -- valid. Note that it is still undesirable though, as this reduces
785 -- their usefulness for debugging and profiling. Therefore we will
786 -- generally try only to make use of this property where it is
787 -- necessary to enable optimizations.
788 | SourceNote
789 { sourceSpan :: RealSrcSpan -- ^ Source covered
790 , sourceName :: String -- ^ Name for source location
791 -- (uses same names as CCs)
792 }
793
794 deriving (Eq, Ord, Data)
795
796 -- | A "counting tick" (where tickishCounts is True) is one that
797 -- counts evaluations in some way. We cannot discard a counting tick,
798 -- and the compiler should preserve the number of counting ticks as
799 -- far as possible.
800 --
801 -- However, we still allow the simplifier to increase or decrease
802 -- sharing, so in practice the actual number of ticks may vary, except
803 -- that we never change the value from zero to non-zero or vice versa.
804 tickishCounts :: Tickish id -> Bool
805 tickishCounts n@ProfNote{} = profNoteCount n
806 tickishCounts HpcTick{} = True
807 tickishCounts Breakpoint{} = True
808 tickishCounts _ = False
809
810
811 -- | Specifies the scoping behaviour of ticks. This governs the
812 -- behaviour of ticks that care about the covered code and the cost
813 -- associated with it. Important for ticks relating to profiling.
814 data TickishScoping =
815 -- | No scoping: The tick does not care about what code it
816 -- covers. Transformations can freely move code inside as well as
817 -- outside without any additional annotation obligations
818 NoScope
819
820 -- | Soft scoping: We want all code that is covered to stay
821 -- covered. Note that this scope type does not forbid
822 -- transformations from happening, as as long as all results of
823 -- the transformations are still covered by this tick or a copy of
824 -- it. For example
825 --
826 -- let x = tick<...> (let y = foo in bar) in baz
827 -- ===>
828 -- let x = tick<...> bar; y = tick<...> foo in baz
829 --
830 -- Is a valid transformation as far as "bar" and "foo" is
831 -- concerned, because both still are scoped over by the tick.
832 --
833 -- Note though that one might object to the "let" not being
834 -- covered by the tick any more. However, we are generally lax
835 -- with this - constant costs don't matter too much, and given
836 -- that the "let" was effectively merged we can view it as having
837 -- lost its identity anyway.
838 --
839 -- Also note that this scoping behaviour allows floating a tick
840 -- "upwards" in pretty much any situation. For example:
841 --
842 -- case foo of x -> tick<...> bar
843 -- ==>
844 -- tick<...> case foo of x -> bar
845 --
846 -- While this is always leagl, we want to make a best effort to
847 -- only make us of this where it exposes transformation
848 -- opportunities.
849 | SoftScope
850
851 -- | Cost centre scoping: We don't want any costs to move to other
852 -- cost-centre stacks. This means we not only want no code or cost
853 -- to get moved out of their cost centres, but we also object to
854 -- code getting associated with new cost-centre ticks - or
855 -- changing the order in which they get applied.
856 --
857 -- A rule of thumb is that we don't want any code to gain new
858 -- annotations. However, there are notable exceptions, for
859 -- example:
860 --
861 -- let f = \y -> foo in tick<...> ... (f x) ...
862 -- ==>
863 -- tick<...> ... foo[x/y] ...
864 --
865 -- In-lining lambdas like this is always legal, because inlining a
866 -- function does not change the cost-centre stack when the
867 -- function is called.
868 | CostCentreScope
869
870 deriving (Eq)
871
872 -- | Returns the intended scoping rule for a Tickish
873 tickishScoped :: Tickish id -> TickishScoping
874 tickishScoped n@ProfNote{}
875 | profNoteScope n = CostCentreScope
876 | otherwise = NoScope
877 tickishScoped HpcTick{} = NoScope
878 tickishScoped Breakpoint{} = CostCentreScope
879 -- Breakpoints are scoped: eventually we're going to do call
880 -- stacks, but also this helps prevent the simplifier from moving
881 -- breakpoints around and changing their result type (see #1531).
882 tickishScoped SourceNote{} = SoftScope
883
884 -- | Returns whether the tick scoping rule is at least as permissive
885 -- as the given scoping rule.
886 tickishScopesLike :: Tickish id -> TickishScoping -> Bool
887 tickishScopesLike t scope = tickishScoped t `like` scope
888 where NoScope `like` _ = True
889 _ `like` NoScope = False
890 SoftScope `like` _ = True
891 _ `like` SoftScope = False
892 CostCentreScope `like` _ = True
893
894 -- | Returns @True@ for ticks that can be floated upwards easily even
895 -- where it might change execution counts, such as:
896 --
897 -- Just (tick<...> foo)
898 -- ==>
899 -- tick<...> (Just foo)
900 --
901 -- This is a combination of @tickishSoftScope@ and
902 -- @tickishCounts@. Note that in principle splittable ticks can become
903 -- floatable using @mkNoTick@ -- even though there's currently no
904 -- tickish for which that is the case.
905 tickishFloatable :: Tickish id -> Bool
906 tickishFloatable t = t `tickishScopesLike` SoftScope && not (tickishCounts t)
907
908 -- | Returns @True@ for a tick that is both counting /and/ scoping and
909 -- can be split into its (tick, scope) parts using 'mkNoScope' and
910 -- 'mkNoTick' respectively.
911 tickishCanSplit :: Tickish id -> Bool
912 tickishCanSplit ProfNote{profNoteScope = True, profNoteCount = True}
913 = True
914 tickishCanSplit _ = False
915
916 mkNoCount :: Tickish id -> Tickish id
917 mkNoCount n | not (tickishCounts n) = n
918 | not (tickishCanSplit n) = panic "mkNoCount: Cannot split!"
919 mkNoCount n@ProfNote{} = n {profNoteCount = False}
920 mkNoCount _ = panic "mkNoCount: Undefined split!"
921
922 mkNoScope :: Tickish id -> Tickish id
923 mkNoScope n | tickishScoped n == NoScope = n
924 | not (tickishCanSplit n) = panic "mkNoScope: Cannot split!"
925 mkNoScope n@ProfNote{} = n {profNoteScope = False}
926 mkNoScope _ = panic "mkNoScope: Undefined split!"
927
928 -- | Return @True@ if this source annotation compiles to some backend
929 -- code. Without this flag, the tickish is seen as a simple annotation
930 -- that does not have any associated evaluation code.
931 --
932 -- What this means that we are allowed to disregard the tick if doing
933 -- so means that we can skip generating any code in the first place. A
934 -- typical example is top-level bindings:
935 --
936 -- foo = tick<...> \y -> ...
937 -- ==>
938 -- foo = \y -> tick<...> ...
939 --
940 -- Here there is just no operational difference between the first and
941 -- the second version. Therefore code generation should simply
942 -- translate the code as if it found the latter.
943 tickishIsCode :: Tickish id -> Bool
944 tickishIsCode SourceNote{} = False
945 tickishIsCode _tickish = True -- all the rest for now
946
947
948 -- | Governs the kind of expression that the tick gets placed on when
949 -- annotating for example using @mkTick@. If we find that we want to
950 -- put a tickish on an expression ruled out here, we try to float it
951 -- inwards until we find a suitable expression.
952 data TickishPlacement =
953
954 -- | Place ticks exactly on run-time expressions. We can still
955 -- move the tick through pure compile-time constructs such as
956 -- other ticks, casts or type lambdas. This is the most
957 -- restrictive placement rule for ticks, as all tickishs have in
958 -- common that they want to track runtime processes. The only
959 -- legal placement rule for counting ticks.
960 PlaceRuntime
961
962 -- | As @PlaceRuntime@, but we float the tick through all
963 -- lambdas. This makes sense where there is little difference
964 -- between annotating the lambda and annotating the lambda's code.
965 | PlaceNonLam
966
967 -- | In addition to floating through lambdas, cost-centre style
968 -- tickishs can also be moved from constructors, non-function
969 -- variables and literals. For example:
970 --
971 -- let x = scc<...> C (scc<...> y) (scc<...> 3) in ...
972 --
973 -- Neither the constructor application, the variable or the
974 -- literal are likely to have any cost worth mentioning. And even
975 -- if y names a thunk, the call would not care about the
976 -- evaluation context. Therefore removing all annotations in the
977 -- above example is safe.
978 | PlaceCostCentre
979
980 deriving (Eq)
981
982 -- | Placement behaviour we want for the ticks
983 tickishPlace :: Tickish id -> TickishPlacement
984 tickishPlace n@ProfNote{}
985 | profNoteCount n = PlaceRuntime
986 | otherwise = PlaceCostCentre
987 tickishPlace HpcTick{} = PlaceRuntime
988 tickishPlace Breakpoint{} = PlaceRuntime
989 tickishPlace SourceNote{} = PlaceNonLam
990
991 -- | Returns whether one tick "contains" the other one, therefore
992 -- making the second tick redundant.
993 tickishContains :: Eq b => Tickish b -> Tickish b -> Bool
994 tickishContains (SourceNote sp1 n1) (SourceNote sp2 n2)
995 = containsSpan sp1 sp2 && n1 == n2
996 -- compare the String last
997 tickishContains t1 t2
998 = t1 == t2
999
1000 {-
1001 ************************************************************************
1002 * *
1003 Orphans
1004 * *
1005 ************************************************************************
1006 -}
1007
1008 -- | Is this instance an orphan? If it is not an orphan, contains an 'OccName'
1009 -- witnessing the instance's non-orphanhood.
1010 -- See Note [Orphans]
1011 data IsOrphan
1012 = IsOrphan
1013 | NotOrphan OccName -- The OccName 'n' witnesses the instance's non-orphanhood
1014 -- In that case, the instance is fingerprinted as part
1015 -- of the definition of 'n's definition
1016 deriving Data
1017
1018 -- | Returns true if 'IsOrphan' is orphan.
1019 isOrphan :: IsOrphan -> Bool
1020 isOrphan IsOrphan = True
1021 isOrphan _ = False
1022
1023 -- | Returns true if 'IsOrphan' is not an orphan.
1024 notOrphan :: IsOrphan -> Bool
1025 notOrphan NotOrphan{} = True
1026 notOrphan _ = False
1027
1028 chooseOrphanAnchor :: NameSet -> IsOrphan
1029 -- Something (rule, instance) is relate to all the Names in this
1030 -- list. Choose one of them to be an "anchor" for the orphan. We make
1031 -- the choice deterministic to avoid gratuitious changes in the ABI
1032 -- hash (Trac #4012). Specifically, use lexicographic comparison of
1033 -- OccName rather than comparing Uniques
1034 --
1035 -- NB: 'minimum' use Ord, and (Ord OccName) works lexicographically
1036 --
1037 chooseOrphanAnchor local_names
1038 | isEmptyNameSet local_names = IsOrphan
1039 | otherwise = NotOrphan (minimum occs)
1040 where
1041 occs = map nameOccName $ nonDetEltsUFM local_names
1042 -- It's OK to use nonDetEltsUFM here, see comments above
1043
1044 instance Binary IsOrphan where
1045 put_ bh IsOrphan = putByte bh 0
1046 put_ bh (NotOrphan n) = do
1047 putByte bh 1
1048 put_ bh n
1049 get bh = do
1050 h <- getByte bh
1051 case h of
1052 0 -> return IsOrphan
1053 _ -> do
1054 n <- get bh
1055 return $ NotOrphan n
1056
1057 {-
1058 Note [Orphans]
1059 ~~~~~~~~~~~~~~
1060 Class instances, rules, and family instances are divided into orphans
1061 and non-orphans. Roughly speaking, an instance/rule is an orphan if
1062 its left hand side mentions nothing defined in this module. Orphan-hood
1063 has two major consequences
1064
1065 * A module that contains orphans is called an "orphan module". If
1066 the module being compiled depends (transitively) on an oprhan
1067 module M, then M.hi is read in regardless of whether M is oherwise
1068 needed. This is to ensure that we don't miss any instance decls in
1069 M. But it's painful, because it means we need to keep track of all
1070 the orphan modules below us.
1071
1072 * A non-orphan is not finger-printed separately. Instead, for
1073 fingerprinting purposes it is treated as part of the entity it
1074 mentions on the LHS. For example
1075 data T = T1 | T2
1076 instance Eq T where ....
1077 The instance (Eq T) is incorprated as part of T's fingerprint.
1078
1079 In contrast, orphans are all fingerprinted together in the
1080 mi_orph_hash field of the ModIface.
1081
1082 See MkIface.addFingerprints.
1083
1084 Orphan-hood is computed
1085 * For class instances:
1086 when we make a ClsInst
1087 (because it is needed during instance lookup)
1088
1089 * For rules and family instances:
1090 when we generate an IfaceRule (MkIface.coreRuleToIfaceRule)
1091 or IfaceFamInst (MkIface.instanceToIfaceInst)
1092 -}
1093
1094 {-
1095 ************************************************************************
1096 * *
1097 \subsection{Transformation rules}
1098 * *
1099 ************************************************************************
1100
1101 The CoreRule type and its friends are dealt with mainly in CoreRules,
1102 but CoreFVs, Subst, PprCore, CoreTidy also inspect the representation.
1103 -}
1104
1105 -- | Gathers a collection of 'CoreRule's. Maps (the name of) an 'Id' to its rules
1106 type RuleBase = NameEnv [CoreRule]
1107 -- The rules are unordered;
1108 -- we sort out any overlaps on lookup
1109
1110 -- | A full rule environment which we can apply rules from. Like a 'RuleBase',
1111 -- but it also includes the set of visible orphans we use to filter out orphan
1112 -- rules which are not visible (even though we can see them...)
1113 data RuleEnv
1114 = RuleEnv { re_base :: RuleBase
1115 , re_visible_orphs :: ModuleSet
1116 }
1117
1118 mkRuleEnv :: RuleBase -> [Module] -> RuleEnv
1119 mkRuleEnv rules vis_orphs = RuleEnv rules (mkModuleSet vis_orphs)
1120
1121 emptyRuleEnv :: RuleEnv
1122 emptyRuleEnv = RuleEnv emptyNameEnv emptyModuleSet
1123
1124 -- | A 'CoreRule' is:
1125 --
1126 -- * \"Local\" if the function it is a rule for is defined in the
1127 -- same module as the rule itself.
1128 --
1129 -- * \"Orphan\" if nothing on the LHS is defined in the same module
1130 -- as the rule itself
1131 data CoreRule
1132 = Rule {
1133 ru_name :: RuleName, -- ^ Name of the rule, for communication with the user
1134 ru_act :: Activation, -- ^ When the rule is active
1135
1136 -- Rough-matching stuff
1137 -- see comments with InstEnv.ClsInst( is_cls, is_rough )
1138 ru_fn :: Name, -- ^ Name of the 'Id.Id' at the head of this rule
1139 ru_rough :: [Maybe Name], -- ^ Name at the head of each argument to the left hand side
1140
1141 -- Proper-matching stuff
1142 -- see comments with InstEnv.ClsInst( is_tvs, is_tys )
1143 ru_bndrs :: [CoreBndr], -- ^ Variables quantified over
1144 ru_args :: [CoreExpr], -- ^ Left hand side arguments
1145
1146 -- And the right-hand side
1147 ru_rhs :: CoreExpr, -- ^ Right hand side of the rule
1148 -- Occurrence info is guaranteed correct
1149 -- See Note [OccInfo in unfoldings and rules]
1150
1151 -- Locality
1152 ru_auto :: Bool, -- ^ @True@ <=> this rule is auto-generated
1153 -- (notably by Specialise or SpecConstr)
1154 -- @False@ <=> generated at the user's behest
1155 -- See Note [Trimming auto-rules] in TidyPgm
1156 -- for the sole purpose of this field.
1157
1158 ru_origin :: !Module, -- ^ 'Module' the rule was defined in, used
1159 -- to test if we should see an orphan rule.
1160
1161 ru_orphan :: !IsOrphan, -- ^ Whether or not the rule is an orphan.
1162
1163 ru_local :: Bool -- ^ @True@ iff the fn at the head of the rule is
1164 -- defined in the same module as the rule
1165 -- and is not an implicit 'Id' (like a record selector,
1166 -- class operation, or data constructor). This
1167 -- is different from 'ru_orphan', where a rule
1168 -- can avoid being an orphan if *any* Name in
1169 -- LHS of the rule was defined in the same
1170 -- module as the rule.
1171 }
1172
1173 -- | Built-in rules are used for constant folding
1174 -- and suchlike. They have no free variables.
1175 -- A built-in rule is always visible (there is no such thing as
1176 -- an orphan built-in rule.)
1177 | BuiltinRule {
1178 ru_name :: RuleName, -- ^ As above
1179 ru_fn :: Name, -- ^ As above
1180 ru_nargs :: Int, -- ^ Number of arguments that 'ru_try' consumes,
1181 -- if it fires, including type arguments
1182 ru_try :: RuleFun
1183 -- ^ This function does the rewrite. It given too many
1184 -- arguments, it simply discards them; the returned 'CoreExpr'
1185 -- is just the rewrite of 'ru_fn' applied to the first 'ru_nargs' args
1186 }
1187 -- See Note [Extra args in rule matching] in Rules.hs
1188
1189 type RuleFun = DynFlags -> InScopeEnv -> Id -> [CoreExpr] -> Maybe CoreExpr
1190 type InScopeEnv = (InScopeSet, IdUnfoldingFun)
1191
1192 type IdUnfoldingFun = Id -> Unfolding
1193 -- A function that embodies how to unfold an Id if you need
1194 -- to do that in the Rule. The reason we need to pass this info in
1195 -- is that whether an Id is unfoldable depends on the simplifier phase
1196
1197 isBuiltinRule :: CoreRule -> Bool
1198 isBuiltinRule (BuiltinRule {}) = True
1199 isBuiltinRule _ = False
1200
1201 isAutoRule :: CoreRule -> Bool
1202 isAutoRule (BuiltinRule {}) = False
1203 isAutoRule (Rule { ru_auto = is_auto }) = is_auto
1204
1205 -- | The number of arguments the 'ru_fn' must be applied
1206 -- to before the rule can match on it
1207 ruleArity :: CoreRule -> Int
1208 ruleArity (BuiltinRule {ru_nargs = n}) = n
1209 ruleArity (Rule {ru_args = args}) = length args
1210
1211 ruleName :: CoreRule -> RuleName
1212 ruleName = ru_name
1213
1214 ruleActivation :: CoreRule -> Activation
1215 ruleActivation (BuiltinRule { }) = AlwaysActive
1216 ruleActivation (Rule { ru_act = act }) = act
1217
1218 -- | The 'Name' of the 'Id.Id' at the head of the rule left hand side
1219 ruleIdName :: CoreRule -> Name
1220 ruleIdName = ru_fn
1221
1222 isLocalRule :: CoreRule -> Bool
1223 isLocalRule = ru_local
1224
1225 -- | Set the 'Name' of the 'Id.Id' at the head of the rule left hand side
1226 setRuleIdName :: Name -> CoreRule -> CoreRule
1227 setRuleIdName nm ru = ru { ru_fn = nm }
1228
1229 {-
1230 ************************************************************************
1231 * *
1232 \subsection{Vectorisation declarations}
1233 * *
1234 ************************************************************************
1235
1236 Representation of desugared vectorisation declarations that are fed to the vectoriser (via
1237 'ModGuts').
1238 -}
1239
1240 data CoreVect = Vect Id CoreExpr
1241 | NoVect Id
1242 | VectType Bool TyCon (Maybe TyCon)
1243 | VectClass TyCon -- class tycon
1244 | VectInst Id -- instance dfun (always SCALAR) !!!FIXME: should be superfluous now
1245
1246 {-
1247 ************************************************************************
1248 * *
1249 Unfoldings
1250 * *
1251 ************************************************************************
1252
1253 The @Unfolding@ type is declared here to avoid numerous loops
1254 -}
1255
1256 -- | Records the /unfolding/ of an identifier, which is approximately the form the
1257 -- identifier would have if we substituted its definition in for the identifier.
1258 -- This type should be treated as abstract everywhere except in "CoreUnfold"
1259 data Unfolding
1260 = NoUnfolding -- ^ We have no information about the unfolding.
1261
1262 | BootUnfolding -- ^ We have no information about the unfolding, because
1263 -- this 'Id' came from an @hi-boot@ file.
1264 -- See Note [Inlining and hs-boot files] in ToIface
1265 -- for what this is used for.
1266
1267 | OtherCon [AltCon] -- ^ It ain't one of these constructors.
1268 -- @OtherCon xs@ also indicates that something has been evaluated
1269 -- and hence there's no point in re-evaluating it.
1270 -- @OtherCon []@ is used even for non-data-type values
1271 -- to indicated evaluated-ness. Notably:
1272 --
1273 -- > data C = C !(Int -> Int)
1274 -- > case x of { C f -> ... }
1275 --
1276 -- Here, @f@ gets an @OtherCon []@ unfolding.
1277
1278 | DFunUnfolding { -- The Unfolding of a DFunId
1279 -- See Note [DFun unfoldings]
1280 -- df = /\a1..am. \d1..dn. MkD t1 .. tk
1281 -- (op1 a1..am d1..dn)
1282 -- (op2 a1..am d1..dn)
1283 df_bndrs :: [Var], -- The bound variables [a1..m],[d1..dn]
1284 df_con :: DataCon, -- The dictionary data constructor (never a newtype datacon)
1285 df_args :: [CoreExpr] -- Args of the data con: types, superclasses and methods,
1286 } -- in positional order
1287
1288 | CoreUnfolding { -- An unfolding for an Id with no pragma,
1289 -- or perhaps a NOINLINE pragma
1290 -- (For NOINLINE, the phase, if any, is in the
1291 -- InlinePragInfo for this Id.)
1292 uf_tmpl :: CoreExpr, -- Template; occurrence info is correct
1293 uf_src :: UnfoldingSource, -- Where the unfolding came from
1294 uf_is_top :: Bool, -- True <=> top level binding
1295 uf_is_value :: Bool, -- exprIsHNF template (cached); it is ok to discard
1296 -- a `seq` on this variable
1297 uf_is_conlike :: Bool, -- True <=> applicn of constructor or CONLIKE function
1298 -- Cached version of exprIsConLike
1299 uf_is_work_free :: Bool, -- True <=> doesn't waste (much) work to expand
1300 -- inside an inlining
1301 -- Cached version of exprIsCheap
1302 uf_expandable :: Bool, -- True <=> can expand in RULE matching
1303 -- Cached version of exprIsExpandable
1304 uf_guidance :: UnfoldingGuidance -- Tells about the *size* of the template.
1305 }
1306 -- ^ An unfolding with redundant cached information. Parameters:
1307 --
1308 -- uf_tmpl: Template used to perform unfolding;
1309 -- NB: Occurrence info is guaranteed correct:
1310 -- see Note [OccInfo in unfoldings and rules]
1311 --
1312 -- uf_is_top: Is this a top level binding?
1313 --
1314 -- uf_is_value: 'exprIsHNF' template (cached); it is ok to discard a 'seq' on
1315 -- this variable
1316 --
1317 -- uf_is_work_free: Does this waste only a little work if we expand it inside an inlining?
1318 -- Basically this is a cached version of 'exprIsWorkFree'
1319 --
1320 -- uf_guidance: Tells us about the /size/ of the unfolding template
1321
1322
1323 ------------------------------------------------
1324 data UnfoldingSource
1325 = -- See also Note [Historical note: unfoldings for wrappers]
1326
1327 InlineRhs -- The current rhs of the function
1328 -- Replace uf_tmpl each time around
1329
1330 | InlineStable -- From an INLINE or INLINABLE pragma
1331 -- INLINE if guidance is UnfWhen
1332 -- INLINABLE if guidance is UnfIfGoodArgs/UnfoldNever
1333 -- (well, technically an INLINABLE might be made
1334 -- UnfWhen if it was small enough, and then
1335 -- it will behave like INLINE outside the current
1336 -- module, but that is the way automatic unfoldings
1337 -- work so it is consistent with the intended
1338 -- meaning of INLINABLE).
1339 --
1340 -- uf_tmpl may change, but only as a result of
1341 -- gentle simplification, it doesn't get updated
1342 -- to the current RHS during compilation as with
1343 -- InlineRhs.
1344 --
1345 -- See Note [InlineStable]
1346
1347 | InlineCompulsory -- Something that *has* no binding, so you *must* inline it
1348 -- Only a few primop-like things have this property
1349 -- (see MkId.hs, calls to mkCompulsoryUnfolding).
1350 -- Inline absolutely always, however boring the context.
1351
1352
1353
1354 -- | 'UnfoldingGuidance' says when unfolding should take place
1355 data UnfoldingGuidance
1356 = UnfWhen { -- Inline without thinking about the *size* of the uf_tmpl
1357 -- Used (a) for small *and* cheap unfoldings
1358 -- (b) for INLINE functions
1359 -- See Note [INLINE for small functions] in CoreUnfold
1360 ug_arity :: Arity, -- Number of value arguments expected
1361
1362 ug_unsat_ok :: Bool, -- True <=> ok to inline even if unsaturated
1363 ug_boring_ok :: Bool -- True <=> ok to inline even if the context is boring
1364 -- So True,True means "always"
1365 }
1366
1367 | UnfIfGoodArgs { -- Arose from a normal Id; the info here is the
1368 -- result of a simple analysis of the RHS
1369
1370 ug_args :: [Int], -- Discount if the argument is evaluated.
1371 -- (i.e., a simplification will definitely
1372 -- be possible). One elt of the list per *value* arg.
1373
1374 ug_size :: Int, -- The "size" of the unfolding.
1375
1376 ug_res :: Int -- Scrutinee discount: the discount to substract if the thing is in
1377 } -- a context (case (thing args) of ...),
1378 -- (where there are the right number of arguments.)
1379
1380 | UnfNever -- The RHS is big, so don't inline it
1381 deriving (Eq)
1382
1383 {-
1384 Note [Historical note: unfoldings for wrappers]
1385 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1386 We used to have a nice clever scheme in interface files for
1387 wrappers. A wrapper's unfolding can be reconstructed from its worker's
1388 id and its strictness. This decreased .hi file size (sometimes
1389 significantly, for modules like GHC.Classes with many high-arity w/w
1390 splits) and had a slight corresponding effect on compile times.
1391
1392 However, when we added the second demand analysis, this scheme lead to
1393 some Core lint errors. The second analysis could change the strictness
1394 signatures, which sometimes resulted in a wrapper's regenerated
1395 unfolding applying the wrapper to too many arguments.
1396
1397 Instead of repairing the clever .hi scheme, we abandoned it in favor
1398 of simplicity. The .hi sizes are usually insignificant (excluding the
1399 +1M for base libraries), and compile time barely increases (~+1% for
1400 nofib). The nicer upshot is that the UnfoldingSource no longer mentions
1401 an Id, so, eg, substitutions need not traverse them.
1402
1403
1404 Note [DFun unfoldings]
1405 ~~~~~~~~~~~~~~~~~~~~~~
1406 The Arity in a DFunUnfolding is total number of args (type and value)
1407 that the DFun needs to produce a dictionary. That's not necessarily
1408 related to the ordinary arity of the dfun Id, esp if the class has
1409 one method, so the dictionary is represented by a newtype. Example
1410
1411 class C a where { op :: a -> Int }
1412 instance C a -> C [a] where op xs = op (head xs)
1413
1414 The instance translates to
1415
1416 $dfCList :: forall a. C a => C [a] -- Arity 2!
1417 $dfCList = /\a.\d. $copList {a} d |> co
1418
1419 $copList :: forall a. C a => [a] -> Int -- Arity 2!
1420 $copList = /\a.\d.\xs. op {a} d (head xs)
1421
1422 Now we might encounter (op (dfCList {ty} d) a1 a2)
1423 and we want the (op (dfList {ty} d)) rule to fire, because $dfCList
1424 has all its arguments, even though its (value) arity is 2. That's
1425 why we record the number of expected arguments in the DFunUnfolding.
1426
1427 Note that although it's an Arity, it's most convenient for it to give
1428 the *total* number of arguments, both type and value. See the use
1429 site in exprIsConApp_maybe.
1430 -}
1431
1432 -- Constants for the UnfWhen constructor
1433 needSaturated, unSaturatedOk :: Bool
1434 needSaturated = False
1435 unSaturatedOk = True
1436
1437 boringCxtNotOk, boringCxtOk :: Bool
1438 boringCxtOk = True
1439 boringCxtNotOk = False
1440
1441 ------------------------------------------------
1442 noUnfolding :: Unfolding
1443 -- ^ There is no known 'Unfolding'
1444 evaldUnfolding :: Unfolding
1445 -- ^ This unfolding marks the associated thing as being evaluated
1446
1447 noUnfolding = NoUnfolding
1448 evaldUnfolding = OtherCon []
1449
1450 -- | There is no known 'Unfolding', because this came from an
1451 -- hi-boot file.
1452 bootUnfolding :: Unfolding
1453 bootUnfolding = BootUnfolding
1454
1455 mkOtherCon :: [AltCon] -> Unfolding
1456 mkOtherCon = OtherCon
1457
1458 isStableSource :: UnfoldingSource -> Bool
1459 -- Keep the unfolding template
1460 isStableSource InlineCompulsory = True
1461 isStableSource InlineStable = True
1462 isStableSource InlineRhs = False
1463
1464 -- | Retrieves the template of an unfolding: panics if none is known
1465 unfoldingTemplate :: Unfolding -> CoreExpr
1466 unfoldingTemplate = uf_tmpl
1467
1468 -- | Retrieves the template of an unfolding if possible
1469 -- maybeUnfoldingTemplate is used mainly wnen specialising, and we do
1470 -- want to specialise DFuns, so it's important to return a template
1471 -- for DFunUnfoldings
1472 maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr
1473 maybeUnfoldingTemplate (CoreUnfolding { uf_tmpl = expr })
1474 = Just expr
1475 maybeUnfoldingTemplate (DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args })
1476 = Just (mkLams bndrs (mkApps (Var (dataConWorkId con)) args))
1477 maybeUnfoldingTemplate _
1478 = Nothing
1479
1480 -- | The constructors that the unfolding could never be:
1481 -- returns @[]@ if no information is available
1482 otherCons :: Unfolding -> [AltCon]
1483 otherCons (OtherCon cons) = cons
1484 otherCons _ = []
1485
1486 -- | Determines if it is certainly the case that the unfolding will
1487 -- yield a value (something in HNF): returns @False@ if unsure
1488 isValueUnfolding :: Unfolding -> Bool
1489 -- Returns False for OtherCon
1490 isValueUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1491 isValueUnfolding _ = False
1492
1493 -- | Determines if it possibly the case that the unfolding will
1494 -- yield a value. Unlike 'isValueUnfolding' it returns @True@
1495 -- for 'OtherCon'
1496 isEvaldUnfolding :: Unfolding -> Bool
1497 -- Returns True for OtherCon
1498 isEvaldUnfolding (OtherCon _) = True
1499 isEvaldUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1500 isEvaldUnfolding _ = False
1501
1502 -- | @True@ if the unfolding is a constructor application, the application
1503 -- of a CONLIKE function or 'OtherCon'
1504 isConLikeUnfolding :: Unfolding -> Bool
1505 isConLikeUnfolding (OtherCon _) = True
1506 isConLikeUnfolding (CoreUnfolding { uf_is_conlike = con }) = con
1507 isConLikeUnfolding _ = False
1508
1509 -- | Is the thing we will unfold into certainly cheap?
1510 isCheapUnfolding :: Unfolding -> Bool
1511 isCheapUnfolding (CoreUnfolding { uf_is_work_free = is_wf }) = is_wf
1512 isCheapUnfolding _ = False
1513
1514 isExpandableUnfolding :: Unfolding -> Bool
1515 isExpandableUnfolding (CoreUnfolding { uf_expandable = is_expable }) = is_expable
1516 isExpandableUnfolding _ = False
1517
1518 expandUnfolding_maybe :: Unfolding -> Maybe CoreExpr
1519 -- Expand an expandable unfolding; this is used in rule matching
1520 -- See Note [Expanding variables] in Rules.hs
1521 -- The key point here is that CONLIKE things can be expanded
1522 expandUnfolding_maybe (CoreUnfolding { uf_expandable = True, uf_tmpl = rhs }) = Just rhs
1523 expandUnfolding_maybe _ = Nothing
1524
1525 isCompulsoryUnfolding :: Unfolding -> Bool
1526 isCompulsoryUnfolding (CoreUnfolding { uf_src = InlineCompulsory }) = True
1527 isCompulsoryUnfolding _ = False
1528
1529 isStableUnfolding :: Unfolding -> Bool
1530 -- True of unfoldings that should not be overwritten
1531 -- by a CoreUnfolding for the RHS of a let-binding
1532 isStableUnfolding (CoreUnfolding { uf_src = src }) = isStableSource src
1533 isStableUnfolding (DFunUnfolding {}) = True
1534 isStableUnfolding _ = False
1535
1536 -- | Only returns False if there is no unfolding information available at all
1537 hasSomeUnfolding :: Unfolding -> Bool
1538 hasSomeUnfolding NoUnfolding = False
1539 hasSomeUnfolding BootUnfolding = False
1540 hasSomeUnfolding _ = True
1541
1542 isBootUnfolding :: Unfolding -> Bool
1543 isBootUnfolding BootUnfolding = True
1544 isBootUnfolding _ = False
1545
1546 neverUnfoldGuidance :: UnfoldingGuidance -> Bool
1547 neverUnfoldGuidance UnfNever = True
1548 neverUnfoldGuidance _ = False
1549
1550 isFragileUnfolding :: Unfolding -> Bool
1551 -- An unfolding is fragile if it mentions free variables or
1552 -- is otherwise subject to change. A robust one can be kept.
1553 -- See Note [Fragile unfoldings]
1554 isFragileUnfolding (CoreUnfolding {}) = True
1555 isFragileUnfolding (DFunUnfolding {}) = True
1556 isFragileUnfolding _ = False
1557 -- NoUnfolding, BootUnfolding, OtherCon are all non-fragile
1558
1559 canUnfold :: Unfolding -> Bool
1560 canUnfold (CoreUnfolding { uf_guidance = g }) = not (neverUnfoldGuidance g)
1561 canUnfold _ = False
1562
1563 {- Note [Fragile unfoldings]
1564 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1565 An unfolding is "fragile" if it mentions free variables (and hence would
1566 need substitution) or might be affected by optimisation. The non-fragile
1567 ones are
1568
1569 NoUnfolding, BootUnfolding
1570
1571 OtherCon {} If we know this binder (say a lambda binder) will be
1572 bound to an evaluated thing, we want to retain that
1573 info in simpleOptExpr; see Trac #13077.
1574
1575 We consider even a StableUnfolding as fragile, because it needs substitution.
1576
1577 Note [InlineStable]
1578 ~~~~~~~~~~~~~~~~~
1579 When you say
1580 {-# INLINE f #-}
1581 f x = <rhs>
1582 you intend that calls (f e) are replaced by <rhs>[e/x] So we
1583 should capture (\x.<rhs>) in the Unfolding of 'f', and never meddle
1584 with it. Meanwhile, we can optimise <rhs> to our heart's content,
1585 leaving the original unfolding intact in Unfolding of 'f'. For example
1586 all xs = foldr (&&) True xs
1587 any p = all . map p {-# INLINE any #-}
1588 We optimise any's RHS fully, but leave the InlineRule saying "all . map p",
1589 which deforests well at the call site.
1590
1591 So INLINE pragma gives rise to an InlineRule, which captures the original RHS.
1592
1593 Moreover, it's only used when 'f' is applied to the
1594 specified number of arguments; that is, the number of argument on
1595 the LHS of the '=' sign in the original source definition.
1596 For example, (.) is now defined in the libraries like this
1597 {-# INLINE (.) #-}
1598 (.) f g = \x -> f (g x)
1599 so that it'll inline when applied to two arguments. If 'x' appeared
1600 on the left, thus
1601 (.) f g x = f (g x)
1602 it'd only inline when applied to three arguments. This slightly-experimental
1603 change was requested by Roman, but it seems to make sense.
1604
1605 See also Note [Inlining an InlineRule] in CoreUnfold.
1606
1607
1608 Note [OccInfo in unfoldings and rules]
1609 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1610 In unfoldings and rules, we guarantee that the template is occ-analysed,
1611 so that the occurrence info on the binders is correct. This is important,
1612 because the Simplifier does not re-analyse the template when using it. If
1613 the occurrence info is wrong
1614 - We may get more simpifier iterations than necessary, because
1615 once-occ info isn't there
1616 - More seriously, we may get an infinite loop if there's a Rec
1617 without a loop breaker marked
1618
1619
1620 ************************************************************************
1621 * *
1622 AltCon
1623 * *
1624 ************************************************************************
1625 -}
1626
1627 -- The Ord is needed for the FiniteMap used in the lookForConstructor
1628 -- in SimplEnv. If you declared that lookForConstructor *ignores*
1629 -- constructor-applications with LitArg args, then you could get
1630 -- rid of this Ord.
1631
1632 instance Outputable AltCon where
1633 ppr (DataAlt dc) = ppr dc
1634 ppr (LitAlt lit) = ppr lit
1635 ppr DEFAULT = text "__DEFAULT"
1636
1637 cmpAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Ordering
1638 cmpAlt (con1, _, _) (con2, _, _) = con1 `cmpAltCon` con2
1639
1640 ltAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Bool
1641 ltAlt a1 a2 = (a1 `cmpAlt` a2) == LT
1642
1643 cmpAltCon :: AltCon -> AltCon -> Ordering
1644 -- ^ Compares 'AltCon's within a single list of alternatives
1645 cmpAltCon DEFAULT DEFAULT = EQ
1646 cmpAltCon DEFAULT _ = LT
1647
1648 cmpAltCon (DataAlt d1) (DataAlt d2) = dataConTag d1 `compare` dataConTag d2
1649 cmpAltCon (DataAlt _) DEFAULT = GT
1650 cmpAltCon (LitAlt l1) (LitAlt l2) = l1 `compare` l2
1651 cmpAltCon (LitAlt _) DEFAULT = GT
1652
1653 cmpAltCon con1 con2 = WARN( True, text "Comparing incomparable AltCons" <+>
1654 ppr con1 <+> ppr con2 )
1655 LT
1656
1657 {-
1658 ************************************************************************
1659 * *
1660 \subsection{Useful synonyms}
1661 * *
1662 ************************************************************************
1663
1664 Note [CoreProgram]
1665 ~~~~~~~~~~~~~~~~~~
1666 The top level bindings of a program, a CoreProgram, are represented as
1667 a list of CoreBind
1668
1669 * Later bindings in the list can refer to earlier ones, but not vice
1670 versa. So this is OK
1671 NonRec { x = 4 }
1672 Rec { p = ...q...x...
1673 ; q = ...p...x }
1674 Rec { f = ...p..x..f.. }
1675 NonRec { g = ..f..q...x.. }
1676 But it would NOT be ok for 'f' to refer to 'g'.
1677
1678 * The occurrence analyser does strongly-connected component analysis
1679 on each Rec binding, and splits it into a sequence of smaller
1680 bindings where possible. So the program typically starts life as a
1681 single giant Rec, which is then dependency-analysed into smaller
1682 chunks.
1683 -}
1684
1685 -- If you edit this type, you may need to update the GHC formalism
1686 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1687 type CoreProgram = [CoreBind] -- See Note [CoreProgram]
1688
1689 -- | The common case for the type of binders and variables when
1690 -- we are manipulating the Core language within GHC
1691 type CoreBndr = Var
1692 -- | Expressions where binders are 'CoreBndr's
1693 type CoreExpr = Expr CoreBndr
1694 -- | Argument expressions where binders are 'CoreBndr's
1695 type CoreArg = Arg CoreBndr
1696 -- | Binding groups where binders are 'CoreBndr's
1697 type CoreBind = Bind CoreBndr
1698 -- | Case alternatives where binders are 'CoreBndr's
1699 type CoreAlt = Alt CoreBndr
1700
1701 {-
1702 ************************************************************************
1703 * *
1704 \subsection{Tagging}
1705 * *
1706 ************************************************************************
1707 -}
1708
1709 -- | Binders are /tagged/ with a t
1710 data TaggedBndr t = TB CoreBndr t -- TB for "tagged binder"
1711
1712 type TaggedBind t = Bind (TaggedBndr t)
1713 type TaggedExpr t = Expr (TaggedBndr t)
1714 type TaggedArg t = Arg (TaggedBndr t)
1715 type TaggedAlt t = Alt (TaggedBndr t)
1716
1717 instance Outputable b => Outputable (TaggedBndr b) where
1718 ppr (TB b l) = char '<' <> ppr b <> comma <> ppr l <> char '>'
1719
1720 -- OutputableBndr Var is declared separately in PprCore; using a FlexibleContext
1721 -- to avoid circularity
1722 instance (OutputableBndr Var, Outputable b) =>
1723 OutputableBndr (TaggedBndr b) where
1724 pprBndr _ b = ppr b -- Simple
1725 pprInfixOcc b = ppr b
1726 pprPrefixOcc b = ppr b
1727 bndrIsJoin_maybe (TB b _) = isJoinId_maybe b
1728
1729 deTagExpr :: TaggedExpr t -> CoreExpr
1730 deTagExpr (Var v) = Var v
1731 deTagExpr (Lit l) = Lit l
1732 deTagExpr (Type ty) = Type ty
1733 deTagExpr (Coercion co) = Coercion co
1734 deTagExpr (App e1 e2) = App (deTagExpr e1) (deTagExpr e2)
1735 deTagExpr (Lam (TB b _) e) = Lam b (deTagExpr e)
1736 deTagExpr (Let bind body) = Let (deTagBind bind) (deTagExpr body)
1737 deTagExpr (Case e (TB b _) ty alts) = Case (deTagExpr e) b ty (map deTagAlt alts)
1738 deTagExpr (Tick t e) = Tick t (deTagExpr e)
1739 deTagExpr (Cast e co) = Cast (deTagExpr e) co
1740
1741 deTagBind :: TaggedBind t -> CoreBind
1742 deTagBind (NonRec (TB b _) rhs) = NonRec b (deTagExpr rhs)
1743 deTagBind (Rec prs) = Rec [(b, deTagExpr rhs) | (TB b _, rhs) <- prs]
1744
1745 deTagAlt :: TaggedAlt t -> CoreAlt
1746 deTagAlt (con, bndrs, rhs) = (con, [b | TB b _ <- bndrs], deTagExpr rhs)
1747
1748 {-
1749 ************************************************************************
1750 * *
1751 \subsection{Core-constructing functions with checking}
1752 * *
1753 ************************************************************************
1754 -}
1755
1756 -- | Apply a list of argument expressions to a function expression in a nested fashion. Prefer to
1757 -- use 'MkCore.mkCoreApps' if possible
1758 mkApps :: Expr b -> [Arg b] -> Expr b
1759 -- | Apply a list of type argument expressions to a function expression in a nested fashion
1760 mkTyApps :: Expr b -> [Type] -> Expr b
1761 -- | Apply a list of coercion argument expressions to a function expression in a nested fashion
1762 mkCoApps :: Expr b -> [Coercion] -> Expr b
1763 -- | Apply a list of type or value variables to a function expression in a nested fashion
1764 mkVarApps :: Expr b -> [Var] -> Expr b
1765 -- | Apply a list of argument expressions to a data constructor in a nested fashion. Prefer to
1766 -- use 'MkCore.mkCoreConApps' if possible
1767 mkConApp :: DataCon -> [Arg b] -> Expr b
1768
1769 mkApps f args = foldl App f args
1770 mkCoApps f args = foldl (\ e a -> App e (Coercion a)) f args
1771 mkVarApps f vars = foldl (\ e a -> App e (varToCoreExpr a)) f vars
1772 mkConApp con args = mkApps (Var (dataConWorkId con)) args
1773
1774 mkTyApps f args = foldl (\ e a -> App e (mkTyArg a)) f args
1775
1776 mkConApp2 :: DataCon -> [Type] -> [Var] -> Expr b
1777 mkConApp2 con tys arg_ids = Var (dataConWorkId con)
1778 `mkApps` map Type tys
1779 `mkApps` map varToCoreExpr arg_ids
1780
1781 mkTyArg :: Type -> Expr b
1782 mkTyArg ty
1783 | Just co <- isCoercionTy_maybe ty = Coercion co
1784 | otherwise = Type ty
1785
1786 -- | Create a machine integer literal expression of type @Int#@ from an @Integer@.
1787 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1788 mkIntLit :: DynFlags -> Integer -> Expr b
1789 -- | Create a machine integer literal expression of type @Int#@ from an @Int@.
1790 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1791 mkIntLitInt :: DynFlags -> Int -> Expr b
1792
1793 mkIntLit dflags n = Lit (mkMachInt dflags n)
1794 mkIntLitInt dflags n = Lit (mkMachInt dflags (toInteger n))
1795
1796 -- | Create a machine word literal expression of type @Word#@ from an @Integer@.
1797 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1798 mkWordLit :: DynFlags -> Integer -> Expr b
1799 -- | Create a machine word literal expression of type @Word#@ from a @Word@.
1800 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1801 mkWordLitWord :: DynFlags -> Word -> Expr b
1802
1803 mkWordLit dflags w = Lit (mkMachWord dflags w)
1804 mkWordLitWord dflags w = Lit (mkMachWord dflags (toInteger w))
1805
1806 mkWord64LitWord64 :: Word64 -> Expr b
1807 mkWord64LitWord64 w = Lit (mkMachWord64 (toInteger w))
1808
1809 mkInt64LitInt64 :: Int64 -> Expr b
1810 mkInt64LitInt64 w = Lit (mkMachInt64 (toInteger w))
1811
1812 -- | Create a machine character literal expression of type @Char#@.
1813 -- If you want an expression of type @Char@ use 'MkCore.mkCharExpr'
1814 mkCharLit :: Char -> Expr b
1815 -- | Create a machine string literal expression of type @Addr#@.
1816 -- If you want an expression of type @String@ use 'MkCore.mkStringExpr'
1817 mkStringLit :: String -> Expr b
1818
1819 mkCharLit c = Lit (mkMachChar c)
1820 mkStringLit s = Lit (mkMachString s)
1821
1822 -- | Create a machine single precision literal expression of type @Float#@ from a @Rational@.
1823 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1824 mkFloatLit :: Rational -> Expr b
1825 -- | Create a machine single precision literal expression of type @Float#@ from a @Float@.
1826 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1827 mkFloatLitFloat :: Float -> Expr b
1828
1829 mkFloatLit f = Lit (mkMachFloat f)
1830 mkFloatLitFloat f = Lit (mkMachFloat (toRational f))
1831
1832 -- | Create a machine double precision literal expression of type @Double#@ from a @Rational@.
1833 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1834 mkDoubleLit :: Rational -> Expr b
1835 -- | Create a machine double precision literal expression of type @Double#@ from a @Double@.
1836 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1837 mkDoubleLitDouble :: Double -> Expr b
1838
1839 mkDoubleLit d = Lit (mkMachDouble d)
1840 mkDoubleLitDouble d = Lit (mkMachDouble (toRational d))
1841
1842 -- | Bind all supplied binding groups over an expression in a nested let expression. Assumes
1843 -- that the rhs satisfies the let/app invariant. Prefer to use 'MkCore.mkCoreLets' if
1844 -- possible, which does guarantee the invariant
1845 mkLets :: [Bind b] -> Expr b -> Expr b
1846 -- | Bind all supplied binders over an expression in a nested lambda expression. Prefer to
1847 -- use 'MkCore.mkCoreLams' if possible
1848 mkLams :: [b] -> Expr b -> Expr b
1849
1850 mkLams binders body = foldr Lam body binders
1851 mkLets binds body = foldr mkLet body binds
1852
1853 mkLet :: Bind b -> Expr b -> Expr b
1854 -- The desugarer sometimes generates an empty Rec group
1855 -- which Lint rejects, so we kill it off right away
1856 mkLet (Rec []) body = body
1857 mkLet bind body = Let bind body
1858
1859 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1860 -- this can only be used to bind something in a non-recursive @let@ expression
1861 mkTyBind :: TyVar -> Type -> CoreBind
1862 mkTyBind tv ty = NonRec tv (Type ty)
1863
1864 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1865 -- this can only be used to bind something in a non-recursive @let@ expression
1866 mkCoBind :: CoVar -> Coercion -> CoreBind
1867 mkCoBind cv co = NonRec cv (Coercion co)
1868
1869 -- | Convert a binder into either a 'Var' or 'Type' 'Expr' appropriately
1870 varToCoreExpr :: CoreBndr -> Expr b
1871 varToCoreExpr v | isTyVar v = Type (mkTyVarTy v)
1872 | isCoVar v = Coercion (mkCoVarCo v)
1873 | otherwise = ASSERT( isId v ) Var v
1874
1875 varsToCoreExprs :: [CoreBndr] -> [Expr b]
1876 varsToCoreExprs vs = map varToCoreExpr vs
1877
1878 {-
1879 ************************************************************************
1880 * *
1881 Getting a result type
1882 * *
1883 ************************************************************************
1884
1885 These are defined here to avoid a module loop between CoreUtils and CoreFVs
1886
1887 -}
1888
1889 applyTypeToArg :: Type -> CoreExpr -> Type
1890 -- ^ Determines the type resulting from applying an expression with given type
1891 -- to a given argument expression
1892 applyTypeToArg fun_ty arg = piResultTy fun_ty (exprToType arg)
1893
1894 -- | If the expression is a 'Type', converts. Otherwise,
1895 -- panics. NB: This does /not/ convert 'Coercion' to 'CoercionTy'.
1896 exprToType :: CoreExpr -> Type
1897 exprToType (Type ty) = ty
1898 exprToType _bad = pprPanic "exprToType" empty
1899
1900 -- | If the expression is a 'Coercion', converts.
1901 exprToCoercion_maybe :: CoreExpr -> Maybe Coercion
1902 exprToCoercion_maybe (Coercion co) = Just co
1903 exprToCoercion_maybe _ = Nothing
1904
1905 {-
1906 ************************************************************************
1907 * *
1908 \subsection{Simple access functions}
1909 * *
1910 ************************************************************************
1911 -}
1912
1913 -- | Extract every variable by this group
1914 bindersOf :: Bind b -> [b]
1915 -- If you edit this function, you may need to update the GHC formalism
1916 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1917 bindersOf (NonRec binder _) = [binder]
1918 bindersOf (Rec pairs) = [binder | (binder, _) <- pairs]
1919
1920 -- | 'bindersOf' applied to a list of binding groups
1921 bindersOfBinds :: [Bind b] -> [b]
1922 bindersOfBinds binds = foldr ((++) . bindersOf) [] binds
1923
1924 rhssOfBind :: Bind b -> [Expr b]
1925 rhssOfBind (NonRec _ rhs) = [rhs]
1926 rhssOfBind (Rec pairs) = [rhs | (_,rhs) <- pairs]
1927
1928 rhssOfAlts :: [Alt b] -> [Expr b]
1929 rhssOfAlts alts = [e | (_,_,e) <- alts]
1930
1931 -- | Collapse all the bindings in the supplied groups into a single
1932 -- list of lhs\/rhs pairs suitable for binding in a 'Rec' binding group
1933 flattenBinds :: [Bind b] -> [(b, Expr b)]
1934 flattenBinds (NonRec b r : binds) = (b,r) : flattenBinds binds
1935 flattenBinds (Rec prs1 : binds) = prs1 ++ flattenBinds binds
1936 flattenBinds [] = []
1937
1938 -- | We often want to strip off leading lambdas before getting down to
1939 -- business. Variants are 'collectTyBinders', 'collectValBinders',
1940 -- and 'collectTyAndValBinders'
1941 collectBinders :: Expr b -> ([b], Expr b)
1942 collectTyBinders :: CoreExpr -> ([TyVar], CoreExpr)
1943 collectValBinders :: CoreExpr -> ([Id], CoreExpr)
1944 collectTyAndValBinders :: CoreExpr -> ([TyVar], [Id], CoreExpr)
1945 -- | Strip off exactly N leading lambdas (type or value). Good for use with
1946 -- join points.
1947 collectNBinders :: Int -> Expr b -> ([b], Expr b)
1948
1949 collectBinders expr
1950 = go [] expr
1951 where
1952 go bs (Lam b e) = go (b:bs) e
1953 go bs e = (reverse bs, e)
1954
1955 collectTyBinders expr
1956 = go [] expr
1957 where
1958 go tvs (Lam b e) | isTyVar b = go (b:tvs) e
1959 go tvs e = (reverse tvs, e)
1960
1961 collectValBinders expr
1962 = go [] expr
1963 where
1964 go ids (Lam b e) | isId b = go (b:ids) e
1965 go ids body = (reverse ids, body)
1966
1967 collectTyAndValBinders expr
1968 = (tvs, ids, body)
1969 where
1970 (tvs, body1) = collectTyBinders expr
1971 (ids, body) = collectValBinders body1
1972
1973 collectNBinders orig_n orig_expr
1974 = go orig_n [] orig_expr
1975 where
1976 go 0 bs expr = (reverse bs, expr)
1977 go n bs (Lam b e) = go (n-1) (b:bs) e
1978 go _ _ _ = pprPanic "collectNBinders" $ int orig_n
1979
1980 -- | Takes a nested application expression and returns the the function
1981 -- being applied and the arguments to which it is applied
1982 collectArgs :: Expr b -> (Expr b, [Arg b])
1983 collectArgs expr
1984 = go expr []
1985 where
1986 go (App f a) as = go f (a:as)
1987 go e as = (e, as)
1988
1989 -- | Like @collectArgs@, but also collects looks through floatable
1990 -- ticks if it means that we can find more arguments.
1991 collectArgsTicks :: (Tickish Id -> Bool) -> Expr b
1992 -> (Expr b, [Arg b], [Tickish Id])
1993 collectArgsTicks skipTick expr
1994 = go expr [] []
1995 where
1996 go (App f a) as ts = go f (a:as) ts
1997 go (Tick t e) as ts
1998 | skipTick t = go e as (t:ts)
1999 go e as ts = (e, as, reverse ts)
2000
2001
2002 {-
2003 ************************************************************************
2004 * *
2005 \subsection{Predicates}
2006 * *
2007 ************************************************************************
2008
2009 At one time we optionally carried type arguments through to runtime.
2010 @isRuntimeVar v@ returns if (Lam v _) really becomes a lambda at runtime,
2011 i.e. if type applications are actual lambdas because types are kept around
2012 at runtime. Similarly isRuntimeArg.
2013 -}
2014
2015 -- | Will this variable exist at runtime?
2016 isRuntimeVar :: Var -> Bool
2017 isRuntimeVar = isId
2018
2019 -- | Will this argument expression exist at runtime?
2020 isRuntimeArg :: CoreExpr -> Bool
2021 isRuntimeArg = isValArg
2022
2023 -- | Returns @True@ for value arguments, false for type args
2024 -- NB: coercions are value arguments (zero width, to be sure,
2025 -- like State#, but still value args).
2026 isValArg :: Expr b -> Bool
2027 isValArg e = not (isTypeArg e)
2028
2029 -- | Returns @True@ iff the expression is a 'Type' or 'Coercion'
2030 -- expression at its top level
2031 isTyCoArg :: Expr b -> Bool
2032 isTyCoArg (Type {}) = True
2033 isTyCoArg (Coercion {}) = True
2034 isTyCoArg _ = False
2035
2036 -- | Returns @True@ iff the expression is a 'Type' expression at its
2037 -- top level. Note this does NOT include 'Coercion's.
2038 isTypeArg :: Expr b -> Bool
2039 isTypeArg (Type {}) = True
2040 isTypeArg _ = False
2041
2042 -- | The number of binders that bind values rather than types
2043 valBndrCount :: [CoreBndr] -> Int
2044 valBndrCount = count isId
2045
2046 -- | The number of argument expressions that are values rather than types at their top level
2047 valArgCount :: [Arg b] -> Int
2048 valArgCount = count isValArg
2049
2050 {-
2051 ************************************************************************
2052 * *
2053 \subsection{Annotated core}
2054 * *
2055 ************************************************************************
2056 -}
2057
2058 -- | Annotated core: allows annotation at every node in the tree
2059 type AnnExpr bndr annot = (annot, AnnExpr' bndr annot)
2060
2061 -- | A clone of the 'Expr' type but allowing annotation at every tree node
2062 data AnnExpr' bndr annot
2063 = AnnVar Id
2064 | AnnLit Literal
2065 | AnnLam bndr (AnnExpr bndr annot)
2066 | AnnApp (AnnExpr bndr annot) (AnnExpr bndr annot)
2067 | AnnCase (AnnExpr bndr annot) bndr Type [AnnAlt bndr annot]
2068 | AnnLet (AnnBind bndr annot) (AnnExpr bndr annot)
2069 | AnnCast (AnnExpr bndr annot) (annot, Coercion)
2070 -- Put an annotation on the (root of) the coercion
2071 | AnnTick (Tickish Id) (AnnExpr bndr annot)
2072 | AnnType Type
2073 | AnnCoercion Coercion
2074
2075 -- | A clone of the 'Alt' type but allowing annotation at every tree node
2076 type AnnAlt bndr annot = (AltCon, [bndr], AnnExpr bndr annot)
2077
2078 -- | A clone of the 'Bind' type but allowing annotation at every tree node
2079 data AnnBind bndr annot
2080 = AnnNonRec bndr (AnnExpr bndr annot)
2081 | AnnRec [(bndr, AnnExpr bndr annot)]
2082
2083 -- | Takes a nested application expression and returns the the function
2084 -- being applied and the arguments to which it is applied
2085 collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
2086 collectAnnArgs expr
2087 = go expr []
2088 where
2089 go (_, AnnApp f a) as = go f (a:as)
2090 go e as = (e, as)
2091
2092 collectAnnArgsTicks :: (Tickish Var -> Bool) -> AnnExpr b a
2093 -> (AnnExpr b a, [AnnExpr b a], [Tickish Var])
2094 collectAnnArgsTicks tickishOk expr
2095 = go expr [] []
2096 where
2097 go (_, AnnApp f a) as ts = go f (a:as) ts
2098 go (_, AnnTick t e) as ts | tickishOk t
2099 = go e as (t:ts)
2100 go e as ts = (e, as, reverse ts)
2101
2102 deAnnotate :: AnnExpr bndr annot -> Expr bndr
2103 deAnnotate (_, e) = deAnnotate' e
2104
2105 deAnnotate' :: AnnExpr' bndr annot -> Expr bndr
2106 deAnnotate' (AnnType t) = Type t
2107 deAnnotate' (AnnCoercion co) = Coercion co
2108 deAnnotate' (AnnVar v) = Var v
2109 deAnnotate' (AnnLit lit) = Lit lit
2110 deAnnotate' (AnnLam binder body) = Lam binder (deAnnotate body)
2111 deAnnotate' (AnnApp fun arg) = App (deAnnotate fun) (deAnnotate arg)
2112 deAnnotate' (AnnCast e (_,co)) = Cast (deAnnotate e) co
2113 deAnnotate' (AnnTick tick body) = Tick tick (deAnnotate body)
2114
2115 deAnnotate' (AnnLet bind body)
2116 = Let (deAnnBind bind) (deAnnotate body)
2117 where
2118 deAnnBind (AnnNonRec var rhs) = NonRec var (deAnnotate rhs)
2119 deAnnBind (AnnRec pairs) = Rec [(v,deAnnotate rhs) | (v,rhs) <- pairs]
2120
2121 deAnnotate' (AnnCase scrut v t alts)
2122 = Case (deAnnotate scrut) v t (map deAnnAlt alts)
2123
2124 deAnnAlt :: AnnAlt bndr annot -> Alt bndr
2125 deAnnAlt (con,args,rhs) = (con,args,deAnnotate rhs)
2126
2127 -- | As 'collectBinders' but for 'AnnExpr' rather than 'Expr'
2128 collectAnnBndrs :: AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2129 collectAnnBndrs e
2130 = collect [] e
2131 where
2132 collect bs (_, AnnLam b body) = collect (b:bs) body
2133 collect bs body = (reverse bs, body)
2134
2135 -- | As 'collectNBinders' but for 'AnnExpr' rather than 'Expr'
2136 collectNAnnBndrs :: Int -> AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2137 collectNAnnBndrs orig_n e
2138 = collect orig_n [] e
2139 where
2140 collect 0 bs body = (reverse bs, body)
2141 collect n bs (_, AnnLam b body) = collect (n-1) (b:bs) body
2142 collect _ _ _ = pprPanic "collectNBinders" $ int orig_n