No join-point from an INLINE function with wrong arity
[ghc.git] / compiler / coreSyn / CoreSyn.hs
1 {-
2 (c) The University of Glasgow 2006
3 (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 -}
5
6 {-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts #-}
7
8 -- | CoreSyn holds all the main data types for use by for the Glasgow Haskell Compiler midsection
9 module CoreSyn (
10 -- * Main data types
11 Expr(..), Alt, Bind(..), AltCon(..), Arg,
12 Tickish(..), TickishScoping(..), TickishPlacement(..),
13 CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr,
14 TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), deTagExpr,
15
16 -- * In/Out type synonyms
17 InId, InBind, InExpr, InAlt, InArg, InType, InKind,
18 InBndr, InVar, InCoercion, InTyVar, InCoVar,
19 OutId, OutBind, OutExpr, OutAlt, OutArg, OutType, OutKind,
20 OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar,
21
22 -- ** 'Expr' construction
23 mkLet, mkLets, mkLetNonRec, mkLetRec, mkLams,
24 mkApps, mkTyApps, mkCoApps, mkVarApps, mkTyArg,
25
26 mkIntLit, mkIntLitInt,
27 mkWordLit, mkWordLitWord,
28 mkWord64LitWord64, mkInt64LitInt64,
29 mkCharLit, mkStringLit,
30 mkFloatLit, mkFloatLitFloat,
31 mkDoubleLit, mkDoubleLitDouble,
32
33 mkConApp, mkConApp2, mkTyBind, mkCoBind,
34 varToCoreExpr, varsToCoreExprs,
35
36 isId, cmpAltCon, cmpAlt, ltAlt,
37
38 -- ** Simple 'Expr' access functions and predicates
39 bindersOf, bindersOfBinds, rhssOfBind, rhssOfAlts,
40 collectBinders, collectTyBinders, collectTyAndValBinders,
41 collectNBinders,
42 collectArgs, collectArgsTicks, flattenBinds,
43
44 exprToType, exprToCoercion_maybe,
45 applyTypeToArg,
46
47 isValArg, isTypeArg, isTyCoArg, valArgCount, valBndrCount,
48 isRuntimeArg, isRuntimeVar,
49
50 -- * Tick-related functions
51 tickishCounts, tickishScoped, tickishScopesLike, tickishFloatable,
52 tickishCanSplit, mkNoCount, mkNoScope,
53 tickishIsCode, tickishPlace,
54 tickishContains,
55
56 -- * Unfolding data types
57 Unfolding(..), UnfoldingGuidance(..), UnfoldingSource(..),
58
59 -- ** Constructing 'Unfolding's
60 noUnfolding, bootUnfolding, evaldUnfolding, mkOtherCon,
61 unSaturatedOk, needSaturated, boringCxtOk, boringCxtNotOk,
62
63 -- ** Predicates and deconstruction on 'Unfolding'
64 unfoldingTemplate, expandUnfolding_maybe,
65 maybeUnfoldingTemplate, otherCons,
66 isValueUnfolding, isEvaldUnfolding, isCheapUnfolding,
67 isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding,
68 isStableUnfolding, isFragileUnfolding, hasSomeUnfolding,
69 isBootUnfolding,
70 canUnfold, neverUnfoldGuidance, isStableSource,
71
72 -- * Annotated expression data types
73 AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt,
74
75 -- ** Operations on annotated expressions
76 collectAnnArgs, collectAnnArgsTicks,
77
78 -- ** Operations on annotations
79 deAnnotate, deAnnotate', deAnnAlt,
80 collectAnnBndrs, collectNAnnBndrs,
81
82 -- * Orphanhood
83 IsOrphan(..), isOrphan, notOrphan, chooseOrphanAnchor,
84
85 -- * Core rule data types
86 CoreRule(..), RuleBase,
87 RuleName, RuleFun, IdUnfoldingFun, InScopeEnv,
88 RuleEnv(..), mkRuleEnv, emptyRuleEnv,
89
90 -- ** Operations on 'CoreRule's
91 ruleArity, ruleName, ruleIdName, ruleActivation,
92 setRuleIdName,
93 isBuiltinRule, isLocalRule, isAutoRule,
94
95 -- * Core vectorisation declarations data type
96 CoreVect(..)
97 ) where
98
99 #include "HsVersions.h"
100
101 import CostCentre
102 import VarEnv( InScopeSet )
103 import Var
104 import Type
105 import Coercion
106 import Name
107 import NameSet
108 import NameEnv( NameEnv, emptyNameEnv )
109 import Literal
110 import DataCon
111 import Module
112 import TyCon
113 import BasicTypes
114 import DynFlags
115 import Outputable
116 import Util
117 import UniqSet
118 import SrcLoc ( RealSrcSpan, containsSpan )
119 import Binary
120
121 import Data.Data hiding (TyCon)
122 import Data.Int
123 import Data.Word
124
125 infixl 4 `mkApps`, `mkTyApps`, `mkVarApps`, `App`, `mkCoApps`
126 -- Left associative, so that we can say (f `mkTyApps` xs `mkVarApps` ys)
127
128 {-
129 ************************************************************************
130 * *
131 \subsection{The main data types}
132 * *
133 ************************************************************************
134
135 These data types are the heart of the compiler
136 -}
137
138 -- | This is the data type that represents GHCs core intermediate language. Currently
139 -- GHC uses System FC <http://research.microsoft.com/~simonpj/papers/ext-f/> for this purpose,
140 -- which is closely related to the simpler and better known System F <http://en.wikipedia.org/wiki/System_F>.
141 --
142 -- We get from Haskell source to this Core language in a number of stages:
143 --
144 -- 1. The source code is parsed into an abstract syntax tree, which is represented
145 -- by the data type 'HsExpr.HsExpr' with the names being 'RdrName.RdrNames'
146 --
147 -- 2. This syntax tree is /renamed/, which attaches a 'Unique.Unique' to every 'RdrName.RdrName'
148 -- (yielding a 'Name.Name') to disambiguate identifiers which are lexically identical.
149 -- For example, this program:
150 --
151 -- @
152 -- f x = let f x = x + 1
153 -- in f (x - 2)
154 -- @
155 --
156 -- Would be renamed by having 'Unique's attached so it looked something like this:
157 --
158 -- @
159 -- f_1 x_2 = let f_3 x_4 = x_4 + 1
160 -- in f_3 (x_2 - 2)
161 -- @
162 -- But see Note [Shadowing] below.
163 --
164 -- 3. The resulting syntax tree undergoes type checking (which also deals with instantiating
165 -- type class arguments) to yield a 'HsExpr.HsExpr' type that has 'Id.Id' as it's names.
166 --
167 -- 4. Finally the syntax tree is /desugared/ from the expressive 'HsExpr.HsExpr' type into
168 -- this 'Expr' type, which has far fewer constructors and hence is easier to perform
169 -- optimization, analysis and code generation on.
170 --
171 -- The type parameter @b@ is for the type of binders in the expression tree.
172 --
173 -- The language consists of the following elements:
174 --
175 -- * Variables
176 --
177 -- * Primitive literals
178 --
179 -- * Applications: note that the argument may be a 'Type'.
180 -- See Note [CoreSyn let/app invariant]
181 -- See Note [Levity polymorphism invariants]
182 --
183 -- * Lambda abstraction
184 -- See Note [Levity polymorphism invariants]
185 --
186 -- * Recursive and non recursive @let@s. Operationally
187 -- this corresponds to allocating a thunk for the things
188 -- bound and then executing the sub-expression.
189 --
190 -- #top_level_invariant#
191 -- #letrec_invariant#
192 --
193 -- The right hand sides of all top-level and recursive @let@s
194 -- /must/ be of lifted type (see "Type#type_classification" for
195 -- the meaning of /lifted/ vs. /unlifted/). There is one exception
196 -- to this rule, top-level @let@s are allowed to bind primitive
197 -- string literals, see Note [CoreSyn top-level string literals].
198 --
199 -- See Note [CoreSyn let/app invariant]
200 -- See Note [Levity polymorphism invariants]
201 --
202 -- #type_let#
203 -- We allow a /non-recursive/ let to bind a type variable, thus:
204 --
205 -- > Let (NonRec tv (Type ty)) body
206 --
207 -- This can be very convenient for postponing type substitutions until
208 -- the next run of the simplifier.
209 --
210 -- At the moment, the rest of the compiler only deals with type-let
211 -- in a Let expression, rather than at top level. We may want to revist
212 -- this choice.
213 --
214 -- * Case expression. Operationally this corresponds to evaluating
215 -- the scrutinee (expression examined) to weak head normal form
216 -- and then examining at most one level of resulting constructor (i.e. you
217 -- cannot do nested pattern matching directly with this).
218 --
219 -- The binder gets bound to the value of the scrutinee,
220 -- and the 'Type' must be that of all the case alternatives
221 --
222 -- #case_invariants#
223 -- This is one of the more complicated elements of the Core language,
224 -- and comes with a number of restrictions:
225 --
226 -- 1. The list of alternatives may be empty;
227 -- See Note [Empty case alternatives]
228 --
229 -- 2. The 'DEFAULT' case alternative must be first in the list,
230 -- if it occurs at all.
231 --
232 -- 3. The remaining cases are in order of increasing
233 -- tag (for 'DataAlts') or
234 -- lit (for 'LitAlts').
235 -- This makes finding the relevant constructor easy,
236 -- and makes comparison easier too.
237 --
238 -- 4. The list of alternatives must be exhaustive. An /exhaustive/ case
239 -- does not necessarily mention all constructors:
240 --
241 -- @
242 -- data Foo = Red | Green | Blue
243 -- ... case x of
244 -- Red -> True
245 -- other -> f (case x of
246 -- Green -> ...
247 -- Blue -> ... ) ...
248 -- @
249 --
250 -- The inner case does not need a @Red@ alternative, because @x@
251 -- can't be @Red@ at that program point.
252 --
253 -- 5. Floating-point values must not be scrutinised against literals.
254 -- See Trac #9238 and Note [Rules for floating-point comparisons]
255 -- in PrelRules for rationale.
256 --
257 -- * Cast an expression to a particular type.
258 -- This is used to implement @newtype@s (a @newtype@ constructor or
259 -- destructor just becomes a 'Cast' in Core) and GADTs.
260 --
261 -- * Notes. These allow general information to be added to expressions
262 -- in the syntax tree
263 --
264 -- * A type: this should only show up at the top level of an Arg
265 --
266 -- * A coercion
267
268 -- If you edit this type, you may need to update the GHC formalism
269 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
270 data Expr b
271 = Var Id
272 | Lit Literal
273 | App (Expr b) (Arg b)
274 | Lam b (Expr b)
275 | Let (Bind b) (Expr b)
276 | Case (Expr b) b Type [Alt b] -- See #case_invariants#
277 | Cast (Expr b) Coercion
278 | Tick (Tickish Id) (Expr b)
279 | Type Type
280 | Coercion Coercion
281 deriving Data
282
283 -- | Type synonym for expressions that occur in function argument positions.
284 -- Only 'Arg' should contain a 'Type' at top level, general 'Expr' should not
285 type Arg b = Expr b
286
287 -- | A case split alternative. Consists of the constructor leading to the alternative,
288 -- the variables bound from the constructor, and the expression to be executed given that binding.
289 -- The default alternative is @(DEFAULT, [], rhs)@
290
291 -- If you edit this type, you may need to update the GHC formalism
292 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
293 type Alt b = (AltCon, [b], Expr b)
294
295 -- | A case alternative constructor (i.e. pattern match)
296
297 -- If you edit this type, you may need to update the GHC formalism
298 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
299 data AltCon
300 = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
301 -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
302
303 | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
304 -- Invariant: always an *unlifted* literal
305 -- See Note [Literal alternatives]
306
307 | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
308 deriving (Eq, Data)
309
310 -- This instance is a bit shady. It can only be used to compare AltCons for
311 -- a single type constructor. Fortunately, it seems quite unlikely that we'll
312 -- ever need to compare AltCons for different type constructors.
313 instance Ord AltCon where
314 compare (DataAlt con1) (DataAlt con2) =
315 ASSERT( dataConTyCon con1 == dataConTyCon con2 )
316 compare (dataConTag con1) (dataConTag con2)
317 compare (DataAlt _) _ = LT
318 compare _ (DataAlt _) = GT
319 compare (LitAlt l1) (LitAlt l2) = compare l1 l2
320 compare (LitAlt _) DEFAULT = LT
321 compare DEFAULT DEFAULT = EQ
322 compare DEFAULT _ = GT
323
324 -- | Binding, used for top level bindings in a module and local bindings in a @let@.
325
326 -- If you edit this type, you may need to update the GHC formalism
327 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
328 data Bind b = NonRec b (Expr b)
329 | Rec [(b, (Expr b))]
330 deriving Data
331
332 {-
333 Note [Shadowing]
334 ~~~~~~~~~~~~~~~~
335 While various passes attempt to rename on-the-fly in a manner that
336 avoids "shadowing" (thereby simplifying downstream optimizations),
337 neither the simplifier nor any other pass GUARANTEES that shadowing is
338 avoided. Thus, all passes SHOULD work fine even in the presence of
339 arbitrary shadowing in their inputs.
340
341 In particular, scrutinee variables `x` in expressions of the form
342 `Case e x t` are often renamed to variables with a prefix
343 "wild_". These "wild" variables may appear in the body of the
344 case-expression, and further, may be shadowed within the body.
345
346 So the Unique in an Var is not really unique at all. Still, it's very
347 useful to give a constant-time equality/ordering for Vars, and to give
348 a key that can be used to make sets of Vars (VarSet), or mappings from
349 Vars to other things (VarEnv). Moreover, if you do want to eliminate
350 shadowing, you can give a new Unique to an Id without changing its
351 printable name, which makes debugging easier.
352
353 Note [Literal alternatives]
354 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
355 Literal alternatives (LitAlt lit) are always for *un-lifted* literals.
356 We have one literal, a literal Integer, that is lifted, and we don't
357 allow in a LitAlt, because LitAlt cases don't do any evaluation. Also
358 (see Trac #5603) if you say
359 case 3 of
360 S# x -> ...
361 J# _ _ -> ...
362 (where S#, J# are the constructors for Integer) we don't want the
363 simplifier calling findAlt with argument (LitAlt 3). No no. Integer
364 literals are an opaque encoding of an algebraic data type, not of
365 an unlifted literal, like all the others.
366
367 Also, we do not permit case analysis with literal patterns on floating-point
368 types. See Trac #9238 and Note [Rules for floating-point comparisons] in
369 PrelRules for the rationale for this restriction.
370
371 -------------------------- CoreSyn INVARIANTS ---------------------------
372
373 Note [CoreSyn top-level invariant]
374 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
375 See #toplevel_invariant#
376
377 Note [CoreSyn letrec invariant]
378 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
379 See #letrec_invariant#
380
381 Note [CoreSyn top-level string literals]
382 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
383 As an exception to the usual rule that top-level binders must be lifted,
384 we allow binding primitive string literals (of type Addr#) of type Addr# at the
385 top level. This allows us to share string literals earlier in the pipeline and
386 crucially allows other optimizations in the Core2Core pipeline to fire.
387 Consider,
388
389 f n = let a::Addr# = "foo"#
390 in \x -> blah
391
392 In order to be able to inline `f`, we would like to float `a` to the top.
393 Another option would be to inline `a`, but that would lead to duplicating string
394 literals, which we want to avoid. See Trac #8472.
395
396 The solution is simply to allow top-level unlifted binders. We can't allow
397 arbitrary unlifted expression at the top-level though, unlifted binders cannot
398 be thunks, so we just allow string literals.
399
400 It is important to note that top-level primitive string literals cannot be
401 wrapped in Ticks, as is otherwise done with lifted bindings. CoreToStg expects
402 to see just a plain (Lit (MachStr ...)) expression on the RHS of primitive
403 string bindings; anything else and things break. CoreLint checks this invariant.
404
405 Also see Note [Compilation plan for top-level string literals].
406
407 Note [Compilation plan for top-level string literals]
408 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
409 Here is a summary on how top-level string literals are handled by various
410 parts of the compilation pipeline.
411
412 * In the source language, there is no way to bind a primitive string literal
413 at the top leve.
414
415 * In Core, we have a special rule that permits top-level Addr# bindings. See
416 Note [CoreSyn top-level string literals]. Core-to-core passes may introduce
417 new top-level string literals.
418
419 * In STG, top-level string literals are explicitly represented in the syntax
420 tree.
421
422 * A top-level string literal may end up exported from a module. In this case,
423 in the object file, the content of the exported literal is given a label with
424 the _bytes suffix.
425
426 Note [CoreSyn let/app invariant]
427 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
428 The let/app invariant
429 the right hand side of a non-recursive 'Let', and
430 the argument of an 'App',
431 /may/ be of unlifted type, but only if
432 the expression is ok-for-speculation
433 or the 'Let' is for a join point.
434
435 This means that the let can be floated around
436 without difficulty. For example, this is OK:
437
438 y::Int# = x +# 1#
439
440 But this is not, as it may affect termination if the
441 expression is floated out:
442
443 y::Int# = fac 4#
444
445 In this situation you should use @case@ rather than a @let@. The function
446 'CoreUtils.needsCaseBinding' can help you determine which to generate, or
447 alternatively use 'MkCore.mkCoreLet' rather than this constructor directly,
448 which will generate a @case@ if necessary
449
450 The let/app invariant is initially enforced by mkCoreLet and mkCoreApp in
451 coreSyn/MkCore.
452
453 Note [CoreSyn case invariants]
454 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
455 See #case_invariants#
456
457 Note [Levity polymorphism invariants]
458 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
459 The levity-polymorphism invariants are these:
460
461 * The type of a term-binder must not be levity-polymorphic,
462 unless it is a let(rec)-bound join point
463 (see Note [Invariants on join points])
464
465 * The type of the argument of an App must not be levity-polymorphic.
466
467 A type (t::TYPE r) is "levity polymorphic" if 'r' has any free variables.
468
469 For example
470 \(r::RuntimeRep). \(a::TYPE r). \(x::a). e
471 is illegal because x's type has kind (TYPE r), which has 'r' free.
472
473 See Note [Levity polymorphism checking] in DsMonad to see where these
474 invariants are established for user-written code.
475
476 Note [CoreSyn let goal]
477 ~~~~~~~~~~~~~~~~~~~~~~~
478 * The simplifier tries to ensure that if the RHS of a let is a constructor
479 application, its arguments are trivial, so that the constructor can be
480 inlined vigorously.
481
482 Note [Type let]
483 ~~~~~~~~~~~~~~~
484 See #type_let#
485
486 Note [Empty case alternatives]
487 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
488 The alternatives of a case expression should be exhaustive. But
489 this exhaustive list can be empty!
490
491 * A case expression can have empty alternatives if (and only if) the
492 scrutinee is bound to raise an exception or diverge. When do we know
493 this? See Note [Bottoming expressions] in CoreUtils.
494
495 * The possiblity of empty alternatives is one reason we need a type on
496 the case expression: if the alternatives are empty we can't get the
497 type from the alternatives!
498
499 * In the case of empty types (see Note [Bottoming expressions]), say
500 data T
501 we do NOT want to replace
502 case (x::T) of Bool {} --> error Bool "Inaccessible case"
503 because x might raise an exception, and *that*'s what we want to see!
504 (Trac #6067 is an example.) To preserve semantics we'd have to say
505 x `seq` error Bool "Inaccessible case"
506 but the 'seq' is just a case, so we are back to square 1. Or I suppose
507 we could say
508 x |> UnsafeCoerce T Bool
509 but that loses all trace of the fact that this originated with an empty
510 set of alternatives.
511
512 * We can use the empty-alternative construct to coerce error values from
513 one type to another. For example
514
515 f :: Int -> Int
516 f n = error "urk"
517
518 g :: Int -> (# Char, Bool #)
519 g x = case f x of { 0 -> ..., n -> ... }
520
521 Then if we inline f in g's RHS we get
522 case (error Int "urk") of (# Char, Bool #) { ... }
523 and we can discard the alternatives since the scrutinee is bottom to give
524 case (error Int "urk") of (# Char, Bool #) {}
525
526 This is nicer than using an unsafe coerce between Int ~ (# Char,Bool #),
527 if for no other reason that we don't need to instantiate the (~) at an
528 unboxed type.
529
530 * We treat a case expression with empty alternatives as trivial iff
531 its scrutinee is (see CoreUtils.exprIsTrivial). This is actually
532 important; see Note [Empty case is trivial] in CoreUtils
533
534 * An empty case is replaced by its scrutinee during the CoreToStg
535 conversion; remember STG is un-typed, so there is no need for
536 the empty case to do the type conversion.
537
538 Note [Join points]
539 ~~~~~~~~~~~~~~~~~~
540 In Core, a *join point* is a specially tagged function whose only occurrences
541 are saturated tail calls. A tail call can appear in these places:
542
543 1. In the branches (not the scrutinee) of a case
544 2. Underneath a let (value or join point)
545 3. Inside another join point
546
547 We write a join-point declaration as
548 join j @a @b x y = e1 in e2,
549 like a let binding but with "join" instead (or "join rec" for "let rec"). Note
550 that we put the parameters before the = rather than using lambdas; this is
551 because it's relevant how many parameters the join point takes *as a join
552 point.* This number is called the *join arity,* distinct from arity because it
553 counts types as well as values. Note that a join point may return a lambda! So
554 join j x = x + 1
555 is different from
556 join j = \x -> x + 1
557 The former has join arity 1, while the latter has join arity 0.
558
559 The identifier for a join point is called a join id or a *label.* An invocation
560 is called a *jump.* We write a jump using the jump keyword:
561
562 jump j 3
563
564 The words *label* and *jump* are evocative of assembly code (or Cmm) for a
565 reason: join points are indeed compiled as labeled blocks, and jumps become
566 actual jumps (plus argument passing and stack adjustment). There is no closure
567 allocated and only a fraction of the function-call overhead. Hence we would
568 like as many functions as possible to become join points (see OccurAnal) and
569 the type rules for join points ensure we preserve the properties that make them
570 efficient.
571
572 In the actual AST, a join point is indicated by the IdDetails of the binder: a
573 local value binding gets 'VanillaId' but a join point gets a 'JoinId' with its
574 join arity.
575
576 For more details, see the paper:
577
578 Luke Maurer, Paul Downen, Zena Ariola, and Simon Peyton Jones. "Compiling
579 without continuations." Submitted to PLDI'17.
580
581 https://www.microsoft.com/en-us/research/publication/compiling-without-continuations/
582
583 Note [Invariants on join points]
584 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
585 Join points must follow these invariants:
586
587 1. All occurrences must be tail calls. Each of these tail calls must pass the
588 same number of arguments, counting both types and values; we call this the
589 "join arity" (to distinguish from regular arity, which only counts values).
590
591 2. For join arity n, the right-hand side must begin with at least n lambdas.
592 No ticks, no casts, just lambdas! C.f. CoreUtils.joinRhsArity.
593
594 2a. Moreover, this same constraint applies to any unfolding of the binder.
595 Reason: if we want to push a continuation into the RHS we must push it
596 into the unfolding as well.
597
598 3. If the binding is recursive, then all other bindings in the recursive group
599 must also be join points.
600
601 4. The binding's type must not be polymorphic in its return type (as defined
602 in Note [The polymorphism rule of join points]).
603
604 However, join points have simpler invariants in other ways
605
606 5. A join point can have an unboxed type without the RHS being
607 ok-for-speculation (i.e. drop the let/app invariant)
608 e.g. let j :: Int# = factorial x in ...
609
610 6. A join point can have a levity-polymorphic RHS
611 e.g. let j :: r :: TYPE l = fail void# in ...
612 This happened in an intermediate program Trac #13394
613
614 Examples:
615
616 join j1 x = 1 + x in jump j (jump j x) -- Fails 1: non-tail call
617 join j1' x = 1 + x in if even a
618 then jump j1 a
619 else jump j1 a b -- Fails 1: inconsistent calls
620 join j2 x = flip (+) x in j2 1 2 -- Fails 2: not enough lambdas
621 join j2' x = \y -> x + y in j3 1 -- Passes: extra lams ok
622 join j @a (x :: a) = x -- Fails 4: polymorphic in ret type
623
624 Invariant 1 applies to left-hand sides of rewrite rules, so a rule for a join
625 point must have an exact call as its LHS.
626
627 Strictly speaking, invariant 3 is redundant, since a call from inside a lazy
628 binding isn't a tail call. Since a let-bound value can't invoke a free join
629 point, then, they can't be mutually recursive. (A Core binding group *can*
630 include spurious extra bindings if the occurrence analyser hasn't run, so
631 invariant 3 does still need to be checked.) For the rigorous definition of
632 "tail call", see Section 3 of the paper (Note [Join points]).
633
634 Invariant 4 is subtle; see Note [The polymorphism rule of join points].
635
636 Core Lint will check these invariants, anticipating that any binder whose
637 OccInfo is marked AlwaysTailCalled will become a join point as soon as the
638 simplifier (or simpleOptPgm) runs.
639
640 Note [The type of a join point]
641 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
642 A join point has the same type it would have as a function. That is, if it takes
643 an Int and a Bool and its body produces a String, its type is `Int -> Bool ->
644 String`. Natural as this may seem, it can be awkward. A join point shouldn't be
645 thought to "return" in the same sense a function does---a jump is one-way. This
646 is crucial for understanding how case-of-case interacts with join points:
647
648 case (join
649 j :: Int -> Bool -> String
650 j x y = ...
651 in
652 jump j z w) of
653 "" -> True
654 _ -> False
655
656 The simplifier will pull the case into the join point (see Note [Case-of-case
657 and join points] in Simplify):
658
659 join
660 j :: Int -> Bool -> Bool -- changed!
661 j x y = case ... of "" -> True
662 _ -> False
663 in
664 jump j z w
665
666 The body of the join point now returns a Bool, so the label `j` has to have its
667 type updated accordingly. Inconvenient though this may be, it has the advantage
668 that 'CoreUtils.exprType' can still return a type for any expression, including
669 a jump.
670
671 This differs from the paper (see Note [Invariants on join points]). In the
672 paper, we instead give j the type `Int -> Bool -> forall a. a`. Then each jump
673 carries the "return type" as a parameter, exactly the way other non-returning
674 functions like `error` work:
675
676 case (join
677 j :: Int -> Bool -> forall a. a
678 j x y = ...
679 in
680 jump j z w @String) of
681 "" -> True
682 _ -> False
683
684 Now we can move the case inward and we only have to change the jump:
685
686 join
687 j :: Int -> Bool -> forall a. a
688 j x y = case ... of "" -> True
689 _ -> False
690 in
691 jump j z w @Bool
692
693 (Core Lint would still check that the body of the join point has the right type;
694 that type would simply not be reflected in the join id.)
695
696 Note [The polymorphism rule of join points]
697 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
698 Invariant 4 of Note [Invariants on join points] forbids a join point to be
699 polymorphic in its return type. That is, if its type is
700
701 forall a1 ... ak. t1 -> ... -> tn -> r
702
703 where its join arity is k+n, none of the type parameters ai may occur free in r.
704 The most direct explanation is that given
705
706 join j @a1 ... @ak x1 ... xn = e1 in e2
707
708 our typing rules require `e1` and `e2` to have the same type. Therefore the type
709 of `e1`---the return type of the join point---must be the same as the type of
710 e2. Since the type variables aren't bound in `e2`, its type can't include them,
711 and thus neither can the type of `e1`.
712
713 There's a deeper explanation in terms of the sequent calculus in Section 5.3 of
714 a previous paper:
715
716 Paul Downen, Luke Maurer, Zena Ariola, and Simon Peyton Jones. "Sequent
717 calculus as a compiler intermediate language." ICFP'16.
718
719 https://www.microsoft.com/en-us/research/wp-content/uploads/2016/04/sequent-calculus-icfp16.pdf
720
721 The quick version: Consider the CPS term (the paper uses the sequent calculus,
722 but we can translate readily):
723
724 \k -> join j @a1 ... @ak x1 ... xn = e1 k in e2 k
725
726 Since `j` is a join point, it doesn't bind a continuation variable but reuses
727 the variable `k` from the context. But the parameters `ai` are not in `k`'s
728 scope, and `k`'s type determines the return type of `j`; thus the `ai`s don't
729 appear in the return type of `j`. (Also, since `e1` and `e2` are passed the same
730 continuation, they must have the same type; hence the direct explanation above.)
731
732 ************************************************************************
733 * *
734 In/Out type synonyms
735 * *
736 ********************************************************************* -}
737
738 {- Many passes apply a substitution, and it's very handy to have type
739 synonyms to remind us whether or not the substitution has been applied -}
740
741 -- Pre-cloning or substitution
742 type InBndr = CoreBndr
743 type InType = Type
744 type InKind = Kind
745 type InBind = CoreBind
746 type InExpr = CoreExpr
747 type InAlt = CoreAlt
748 type InArg = CoreArg
749 type InCoercion = Coercion
750
751 -- Post-cloning or substitution
752 type OutBndr = CoreBndr
753 type OutType = Type
754 type OutKind = Kind
755 type OutCoercion = Coercion
756 type OutBind = CoreBind
757 type OutExpr = CoreExpr
758 type OutAlt = CoreAlt
759 type OutArg = CoreArg
760
761
762 {- *********************************************************************
763 * *
764 Ticks
765 * *
766 ************************************************************************
767 -}
768
769 -- | Allows attaching extra information to points in expressions
770
771 -- If you edit this type, you may need to update the GHC formalism
772 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
773 data Tickish id =
774 -- | An @{-# SCC #-}@ profiling annotation, either automatically
775 -- added by the desugarer as a result of -auto-all, or added by
776 -- the user.
777 ProfNote {
778 profNoteCC :: CostCentre, -- ^ the cost centre
779 profNoteCount :: !Bool, -- ^ bump the entry count?
780 profNoteScope :: !Bool -- ^ scopes over the enclosed expression
781 -- (i.e. not just a tick)
782 }
783
784 -- | A "tick" used by HPC to track the execution of each
785 -- subexpression in the original source code.
786 | HpcTick {
787 tickModule :: Module,
788 tickId :: !Int
789 }
790
791 -- | A breakpoint for the GHCi debugger. This behaves like an HPC
792 -- tick, but has a list of free variables which will be available
793 -- for inspection in GHCi when the program stops at the breakpoint.
794 --
795 -- NB. we must take account of these Ids when (a) counting free variables,
796 -- and (b) substituting (don't substitute for them)
797 | Breakpoint
798 { breakpointId :: !Int
799 , breakpointFVs :: [id] -- ^ the order of this list is important:
800 -- it matches the order of the lists in the
801 -- appropriate entry in HscTypes.ModBreaks.
802 --
803 -- Careful about substitution! See
804 -- Note [substTickish] in CoreSubst.
805 }
806
807 -- | A source note.
808 --
809 -- Source notes are pure annotations: Their presence should neither
810 -- influence compilation nor execution. The semantics are given by
811 -- causality: The presence of a source note means that a local
812 -- change in the referenced source code span will possibly provoke
813 -- the generated code to change. On the flip-side, the functionality
814 -- of annotated code *must* be invariant against changes to all
815 -- source code *except* the spans referenced in the source notes
816 -- (see "Causality of optimized Haskell" paper for details).
817 --
818 -- Therefore extending the scope of any given source note is always
819 -- valid. Note that it is still undesirable though, as this reduces
820 -- their usefulness for debugging and profiling. Therefore we will
821 -- generally try only to make use of this property where it is
822 -- necessary to enable optimizations.
823 | SourceNote
824 { sourceSpan :: RealSrcSpan -- ^ Source covered
825 , sourceName :: String -- ^ Name for source location
826 -- (uses same names as CCs)
827 }
828
829 deriving (Eq, Ord, Data)
830
831 -- | A "counting tick" (where tickishCounts is True) is one that
832 -- counts evaluations in some way. We cannot discard a counting tick,
833 -- and the compiler should preserve the number of counting ticks as
834 -- far as possible.
835 --
836 -- However, we still allow the simplifier to increase or decrease
837 -- sharing, so in practice the actual number of ticks may vary, except
838 -- that we never change the value from zero to non-zero or vice versa.
839 tickishCounts :: Tickish id -> Bool
840 tickishCounts n@ProfNote{} = profNoteCount n
841 tickishCounts HpcTick{} = True
842 tickishCounts Breakpoint{} = True
843 tickishCounts _ = False
844
845
846 -- | Specifies the scoping behaviour of ticks. This governs the
847 -- behaviour of ticks that care about the covered code and the cost
848 -- associated with it. Important for ticks relating to profiling.
849 data TickishScoping =
850 -- | No scoping: The tick does not care about what code it
851 -- covers. Transformations can freely move code inside as well as
852 -- outside without any additional annotation obligations
853 NoScope
854
855 -- | Soft scoping: We want all code that is covered to stay
856 -- covered. Note that this scope type does not forbid
857 -- transformations from happening, as as long as all results of
858 -- the transformations are still covered by this tick or a copy of
859 -- it. For example
860 --
861 -- let x = tick<...> (let y = foo in bar) in baz
862 -- ===>
863 -- let x = tick<...> bar; y = tick<...> foo in baz
864 --
865 -- Is a valid transformation as far as "bar" and "foo" is
866 -- concerned, because both still are scoped over by the tick.
867 --
868 -- Note though that one might object to the "let" not being
869 -- covered by the tick any more. However, we are generally lax
870 -- with this - constant costs don't matter too much, and given
871 -- that the "let" was effectively merged we can view it as having
872 -- lost its identity anyway.
873 --
874 -- Also note that this scoping behaviour allows floating a tick
875 -- "upwards" in pretty much any situation. For example:
876 --
877 -- case foo of x -> tick<...> bar
878 -- ==>
879 -- tick<...> case foo of x -> bar
880 --
881 -- While this is always leagl, we want to make a best effort to
882 -- only make us of this where it exposes transformation
883 -- opportunities.
884 | SoftScope
885
886 -- | Cost centre scoping: We don't want any costs to move to other
887 -- cost-centre stacks. This means we not only want no code or cost
888 -- to get moved out of their cost centres, but we also object to
889 -- code getting associated with new cost-centre ticks - or
890 -- changing the order in which they get applied.
891 --
892 -- A rule of thumb is that we don't want any code to gain new
893 -- annotations. However, there are notable exceptions, for
894 -- example:
895 --
896 -- let f = \y -> foo in tick<...> ... (f x) ...
897 -- ==>
898 -- tick<...> ... foo[x/y] ...
899 --
900 -- In-lining lambdas like this is always legal, because inlining a
901 -- function does not change the cost-centre stack when the
902 -- function is called.
903 | CostCentreScope
904
905 deriving (Eq)
906
907 -- | Returns the intended scoping rule for a Tickish
908 tickishScoped :: Tickish id -> TickishScoping
909 tickishScoped n@ProfNote{}
910 | profNoteScope n = CostCentreScope
911 | otherwise = NoScope
912 tickishScoped HpcTick{} = NoScope
913 tickishScoped Breakpoint{} = CostCentreScope
914 -- Breakpoints are scoped: eventually we're going to do call
915 -- stacks, but also this helps prevent the simplifier from moving
916 -- breakpoints around and changing their result type (see #1531).
917 tickishScoped SourceNote{} = SoftScope
918
919 -- | Returns whether the tick scoping rule is at least as permissive
920 -- as the given scoping rule.
921 tickishScopesLike :: Tickish id -> TickishScoping -> Bool
922 tickishScopesLike t scope = tickishScoped t `like` scope
923 where NoScope `like` _ = True
924 _ `like` NoScope = False
925 SoftScope `like` _ = True
926 _ `like` SoftScope = False
927 CostCentreScope `like` _ = True
928
929 -- | Returns @True@ for ticks that can be floated upwards easily even
930 -- where it might change execution counts, such as:
931 --
932 -- Just (tick<...> foo)
933 -- ==>
934 -- tick<...> (Just foo)
935 --
936 -- This is a combination of @tickishSoftScope@ and
937 -- @tickishCounts@. Note that in principle splittable ticks can become
938 -- floatable using @mkNoTick@ -- even though there's currently no
939 -- tickish for which that is the case.
940 tickishFloatable :: Tickish id -> Bool
941 tickishFloatable t = t `tickishScopesLike` SoftScope && not (tickishCounts t)
942
943 -- | Returns @True@ for a tick that is both counting /and/ scoping and
944 -- can be split into its (tick, scope) parts using 'mkNoScope' and
945 -- 'mkNoTick' respectively.
946 tickishCanSplit :: Tickish id -> Bool
947 tickishCanSplit ProfNote{profNoteScope = True, profNoteCount = True}
948 = True
949 tickishCanSplit _ = False
950
951 mkNoCount :: Tickish id -> Tickish id
952 mkNoCount n | not (tickishCounts n) = n
953 | not (tickishCanSplit n) = panic "mkNoCount: Cannot split!"
954 mkNoCount n@ProfNote{} = n {profNoteCount = False}
955 mkNoCount _ = panic "mkNoCount: Undefined split!"
956
957 mkNoScope :: Tickish id -> Tickish id
958 mkNoScope n | tickishScoped n == NoScope = n
959 | not (tickishCanSplit n) = panic "mkNoScope: Cannot split!"
960 mkNoScope n@ProfNote{} = n {profNoteScope = False}
961 mkNoScope _ = panic "mkNoScope: Undefined split!"
962
963 -- | Return @True@ if this source annotation compiles to some backend
964 -- code. Without this flag, the tickish is seen as a simple annotation
965 -- that does not have any associated evaluation code.
966 --
967 -- What this means that we are allowed to disregard the tick if doing
968 -- so means that we can skip generating any code in the first place. A
969 -- typical example is top-level bindings:
970 --
971 -- foo = tick<...> \y -> ...
972 -- ==>
973 -- foo = \y -> tick<...> ...
974 --
975 -- Here there is just no operational difference between the first and
976 -- the second version. Therefore code generation should simply
977 -- translate the code as if it found the latter.
978 tickishIsCode :: Tickish id -> Bool
979 tickishIsCode SourceNote{} = False
980 tickishIsCode _tickish = True -- all the rest for now
981
982
983 -- | Governs the kind of expression that the tick gets placed on when
984 -- annotating for example using @mkTick@. If we find that we want to
985 -- put a tickish on an expression ruled out here, we try to float it
986 -- inwards until we find a suitable expression.
987 data TickishPlacement =
988
989 -- | Place ticks exactly on run-time expressions. We can still
990 -- move the tick through pure compile-time constructs such as
991 -- other ticks, casts or type lambdas. This is the most
992 -- restrictive placement rule for ticks, as all tickishs have in
993 -- common that they want to track runtime processes. The only
994 -- legal placement rule for counting ticks.
995 PlaceRuntime
996
997 -- | As @PlaceRuntime@, but we float the tick through all
998 -- lambdas. This makes sense where there is little difference
999 -- between annotating the lambda and annotating the lambda's code.
1000 | PlaceNonLam
1001
1002 -- | In addition to floating through lambdas, cost-centre style
1003 -- tickishs can also be moved from constructors, non-function
1004 -- variables and literals. For example:
1005 --
1006 -- let x = scc<...> C (scc<...> y) (scc<...> 3) in ...
1007 --
1008 -- Neither the constructor application, the variable or the
1009 -- literal are likely to have any cost worth mentioning. And even
1010 -- if y names a thunk, the call would not care about the
1011 -- evaluation context. Therefore removing all annotations in the
1012 -- above example is safe.
1013 | PlaceCostCentre
1014
1015 deriving (Eq)
1016
1017 -- | Placement behaviour we want for the ticks
1018 tickishPlace :: Tickish id -> TickishPlacement
1019 tickishPlace n@ProfNote{}
1020 | profNoteCount n = PlaceRuntime
1021 | otherwise = PlaceCostCentre
1022 tickishPlace HpcTick{} = PlaceRuntime
1023 tickishPlace Breakpoint{} = PlaceRuntime
1024 tickishPlace SourceNote{} = PlaceNonLam
1025
1026 -- | Returns whether one tick "contains" the other one, therefore
1027 -- making the second tick redundant.
1028 tickishContains :: Eq b => Tickish b -> Tickish b -> Bool
1029 tickishContains (SourceNote sp1 n1) (SourceNote sp2 n2)
1030 = containsSpan sp1 sp2 && n1 == n2
1031 -- compare the String last
1032 tickishContains t1 t2
1033 = t1 == t2
1034
1035 {-
1036 ************************************************************************
1037 * *
1038 Orphans
1039 * *
1040 ************************************************************************
1041 -}
1042
1043 -- | Is this instance an orphan? If it is not an orphan, contains an 'OccName'
1044 -- witnessing the instance's non-orphanhood.
1045 -- See Note [Orphans]
1046 data IsOrphan
1047 = IsOrphan
1048 | NotOrphan OccName -- The OccName 'n' witnesses the instance's non-orphanhood
1049 -- In that case, the instance is fingerprinted as part
1050 -- of the definition of 'n's definition
1051 deriving Data
1052
1053 -- | Returns true if 'IsOrphan' is orphan.
1054 isOrphan :: IsOrphan -> Bool
1055 isOrphan IsOrphan = True
1056 isOrphan _ = False
1057
1058 -- | Returns true if 'IsOrphan' is not an orphan.
1059 notOrphan :: IsOrphan -> Bool
1060 notOrphan NotOrphan{} = True
1061 notOrphan _ = False
1062
1063 chooseOrphanAnchor :: NameSet -> IsOrphan
1064 -- Something (rule, instance) is relate to all the Names in this
1065 -- list. Choose one of them to be an "anchor" for the orphan. We make
1066 -- the choice deterministic to avoid gratuitious changes in the ABI
1067 -- hash (Trac #4012). Specifically, use lexicographic comparison of
1068 -- OccName rather than comparing Uniques
1069 --
1070 -- NB: 'minimum' use Ord, and (Ord OccName) works lexicographically
1071 --
1072 chooseOrphanAnchor local_names
1073 | isEmptyNameSet local_names = IsOrphan
1074 | otherwise = NotOrphan (minimum occs)
1075 where
1076 occs = map nameOccName $ nonDetEltsUniqSet local_names
1077 -- It's OK to use nonDetEltsUFM here, see comments above
1078
1079 instance Binary IsOrphan where
1080 put_ bh IsOrphan = putByte bh 0
1081 put_ bh (NotOrphan n) = do
1082 putByte bh 1
1083 put_ bh n
1084 get bh = do
1085 h <- getByte bh
1086 case h of
1087 0 -> return IsOrphan
1088 _ -> do
1089 n <- get bh
1090 return $ NotOrphan n
1091
1092 {-
1093 Note [Orphans]
1094 ~~~~~~~~~~~~~~
1095 Class instances, rules, and family instances are divided into orphans
1096 and non-orphans. Roughly speaking, an instance/rule is an orphan if
1097 its left hand side mentions nothing defined in this module. Orphan-hood
1098 has two major consequences
1099
1100 * A module that contains orphans is called an "orphan module". If
1101 the module being compiled depends (transitively) on an oprhan
1102 module M, then M.hi is read in regardless of whether M is oherwise
1103 needed. This is to ensure that we don't miss any instance decls in
1104 M. But it's painful, because it means we need to keep track of all
1105 the orphan modules below us.
1106
1107 * A non-orphan is not finger-printed separately. Instead, for
1108 fingerprinting purposes it is treated as part of the entity it
1109 mentions on the LHS. For example
1110 data T = T1 | T2
1111 instance Eq T where ....
1112 The instance (Eq T) is incorprated as part of T's fingerprint.
1113
1114 In contrast, orphans are all fingerprinted together in the
1115 mi_orph_hash field of the ModIface.
1116
1117 See MkIface.addFingerprints.
1118
1119 Orphan-hood is computed
1120 * For class instances:
1121 when we make a ClsInst
1122 (because it is needed during instance lookup)
1123
1124 * For rules and family instances:
1125 when we generate an IfaceRule (MkIface.coreRuleToIfaceRule)
1126 or IfaceFamInst (MkIface.instanceToIfaceInst)
1127 -}
1128
1129 {-
1130 ************************************************************************
1131 * *
1132 \subsection{Transformation rules}
1133 * *
1134 ************************************************************************
1135
1136 The CoreRule type and its friends are dealt with mainly in CoreRules,
1137 but CoreFVs, Subst, PprCore, CoreTidy also inspect the representation.
1138 -}
1139
1140 -- | Gathers a collection of 'CoreRule's. Maps (the name of) an 'Id' to its rules
1141 type RuleBase = NameEnv [CoreRule]
1142 -- The rules are unordered;
1143 -- we sort out any overlaps on lookup
1144
1145 -- | A full rule environment which we can apply rules from. Like a 'RuleBase',
1146 -- but it also includes the set of visible orphans we use to filter out orphan
1147 -- rules which are not visible (even though we can see them...)
1148 data RuleEnv
1149 = RuleEnv { re_base :: RuleBase
1150 , re_visible_orphs :: ModuleSet
1151 }
1152
1153 mkRuleEnv :: RuleBase -> [Module] -> RuleEnv
1154 mkRuleEnv rules vis_orphs = RuleEnv rules (mkModuleSet vis_orphs)
1155
1156 emptyRuleEnv :: RuleEnv
1157 emptyRuleEnv = RuleEnv emptyNameEnv emptyModuleSet
1158
1159 -- | A 'CoreRule' is:
1160 --
1161 -- * \"Local\" if the function it is a rule for is defined in the
1162 -- same module as the rule itself.
1163 --
1164 -- * \"Orphan\" if nothing on the LHS is defined in the same module
1165 -- as the rule itself
1166 data CoreRule
1167 = Rule {
1168 ru_name :: RuleName, -- ^ Name of the rule, for communication with the user
1169 ru_act :: Activation, -- ^ When the rule is active
1170
1171 -- Rough-matching stuff
1172 -- see comments with InstEnv.ClsInst( is_cls, is_rough )
1173 ru_fn :: Name, -- ^ Name of the 'Id.Id' at the head of this rule
1174 ru_rough :: [Maybe Name], -- ^ Name at the head of each argument to the left hand side
1175
1176 -- Proper-matching stuff
1177 -- see comments with InstEnv.ClsInst( is_tvs, is_tys )
1178 ru_bndrs :: [CoreBndr], -- ^ Variables quantified over
1179 ru_args :: [CoreExpr], -- ^ Left hand side arguments
1180
1181 -- And the right-hand side
1182 ru_rhs :: CoreExpr, -- ^ Right hand side of the rule
1183 -- Occurrence info is guaranteed correct
1184 -- See Note [OccInfo in unfoldings and rules]
1185
1186 -- Locality
1187 ru_auto :: Bool, -- ^ @True@ <=> this rule is auto-generated
1188 -- (notably by Specialise or SpecConstr)
1189 -- @False@ <=> generated at the user's behest
1190 -- See Note [Trimming auto-rules] in TidyPgm
1191 -- for the sole purpose of this field.
1192
1193 ru_origin :: !Module, -- ^ 'Module' the rule was defined in, used
1194 -- to test if we should see an orphan rule.
1195
1196 ru_orphan :: !IsOrphan, -- ^ Whether or not the rule is an orphan.
1197
1198 ru_local :: Bool -- ^ @True@ iff the fn at the head of the rule is
1199 -- defined in the same module as the rule
1200 -- and is not an implicit 'Id' (like a record selector,
1201 -- class operation, or data constructor). This
1202 -- is different from 'ru_orphan', where a rule
1203 -- can avoid being an orphan if *any* Name in
1204 -- LHS of the rule was defined in the same
1205 -- module as the rule.
1206 }
1207
1208 -- | Built-in rules are used for constant folding
1209 -- and suchlike. They have no free variables.
1210 -- A built-in rule is always visible (there is no such thing as
1211 -- an orphan built-in rule.)
1212 | BuiltinRule {
1213 ru_name :: RuleName, -- ^ As above
1214 ru_fn :: Name, -- ^ As above
1215 ru_nargs :: Int, -- ^ Number of arguments that 'ru_try' consumes,
1216 -- if it fires, including type arguments
1217 ru_try :: RuleFun
1218 -- ^ This function does the rewrite. It given too many
1219 -- arguments, it simply discards them; the returned 'CoreExpr'
1220 -- is just the rewrite of 'ru_fn' applied to the first 'ru_nargs' args
1221 }
1222 -- See Note [Extra args in rule matching] in Rules.hs
1223
1224 type RuleFun = DynFlags -> InScopeEnv -> Id -> [CoreExpr] -> Maybe CoreExpr
1225 type InScopeEnv = (InScopeSet, IdUnfoldingFun)
1226
1227 type IdUnfoldingFun = Id -> Unfolding
1228 -- A function that embodies how to unfold an Id if you need
1229 -- to do that in the Rule. The reason we need to pass this info in
1230 -- is that whether an Id is unfoldable depends on the simplifier phase
1231
1232 isBuiltinRule :: CoreRule -> Bool
1233 isBuiltinRule (BuiltinRule {}) = True
1234 isBuiltinRule _ = False
1235
1236 isAutoRule :: CoreRule -> Bool
1237 isAutoRule (BuiltinRule {}) = False
1238 isAutoRule (Rule { ru_auto = is_auto }) = is_auto
1239
1240 -- | The number of arguments the 'ru_fn' must be applied
1241 -- to before the rule can match on it
1242 ruleArity :: CoreRule -> Int
1243 ruleArity (BuiltinRule {ru_nargs = n}) = n
1244 ruleArity (Rule {ru_args = args}) = length args
1245
1246 ruleName :: CoreRule -> RuleName
1247 ruleName = ru_name
1248
1249 ruleActivation :: CoreRule -> Activation
1250 ruleActivation (BuiltinRule { }) = AlwaysActive
1251 ruleActivation (Rule { ru_act = act }) = act
1252
1253 -- | The 'Name' of the 'Id.Id' at the head of the rule left hand side
1254 ruleIdName :: CoreRule -> Name
1255 ruleIdName = ru_fn
1256
1257 isLocalRule :: CoreRule -> Bool
1258 isLocalRule = ru_local
1259
1260 -- | Set the 'Name' of the 'Id.Id' at the head of the rule left hand side
1261 setRuleIdName :: Name -> CoreRule -> CoreRule
1262 setRuleIdName nm ru = ru { ru_fn = nm }
1263
1264 {-
1265 ************************************************************************
1266 * *
1267 \subsection{Vectorisation declarations}
1268 * *
1269 ************************************************************************
1270
1271 Representation of desugared vectorisation declarations that are fed to the vectoriser (via
1272 'ModGuts').
1273 -}
1274
1275 data CoreVect = Vect Id CoreExpr
1276 | NoVect Id
1277 | VectType Bool TyCon (Maybe TyCon)
1278 | VectClass TyCon -- class tycon
1279 | VectInst Id -- instance dfun (always SCALAR) !!!FIXME: should be superfluous now
1280
1281 {-
1282 ************************************************************************
1283 * *
1284 Unfoldings
1285 * *
1286 ************************************************************************
1287
1288 The @Unfolding@ type is declared here to avoid numerous loops
1289 -}
1290
1291 -- | Records the /unfolding/ of an identifier, which is approximately the form the
1292 -- identifier would have if we substituted its definition in for the identifier.
1293 -- This type should be treated as abstract everywhere except in "CoreUnfold"
1294 data Unfolding
1295 = NoUnfolding -- ^ We have no information about the unfolding.
1296
1297 | BootUnfolding -- ^ We have no information about the unfolding, because
1298 -- this 'Id' came from an @hi-boot@ file.
1299 -- See Note [Inlining and hs-boot files] in ToIface
1300 -- for what this is used for.
1301
1302 | OtherCon [AltCon] -- ^ It ain't one of these constructors.
1303 -- @OtherCon xs@ also indicates that something has been evaluated
1304 -- and hence there's no point in re-evaluating it.
1305 -- @OtherCon []@ is used even for non-data-type values
1306 -- to indicated evaluated-ness. Notably:
1307 --
1308 -- > data C = C !(Int -> Int)
1309 -- > case x of { C f -> ... }
1310 --
1311 -- Here, @f@ gets an @OtherCon []@ unfolding.
1312
1313 | DFunUnfolding { -- The Unfolding of a DFunId
1314 -- See Note [DFun unfoldings]
1315 -- df = /\a1..am. \d1..dn. MkD t1 .. tk
1316 -- (op1 a1..am d1..dn)
1317 -- (op2 a1..am d1..dn)
1318 df_bndrs :: [Var], -- The bound variables [a1..m],[d1..dn]
1319 df_con :: DataCon, -- The dictionary data constructor (never a newtype datacon)
1320 df_args :: [CoreExpr] -- Args of the data con: types, superclasses and methods,
1321 } -- in positional order
1322
1323 | CoreUnfolding { -- An unfolding for an Id with no pragma,
1324 -- or perhaps a NOINLINE pragma
1325 -- (For NOINLINE, the phase, if any, is in the
1326 -- InlinePragInfo for this Id.)
1327 uf_tmpl :: CoreExpr, -- Template; occurrence info is correct
1328 uf_src :: UnfoldingSource, -- Where the unfolding came from
1329 uf_is_top :: Bool, -- True <=> top level binding
1330 uf_is_value :: Bool, -- exprIsHNF template (cached); it is ok to discard
1331 -- a `seq` on this variable
1332 uf_is_conlike :: Bool, -- True <=> applicn of constructor or CONLIKE function
1333 -- Cached version of exprIsConLike
1334 uf_is_work_free :: Bool, -- True <=> doesn't waste (much) work to expand
1335 -- inside an inlining
1336 -- Cached version of exprIsCheap
1337 uf_expandable :: Bool, -- True <=> can expand in RULE matching
1338 -- Cached version of exprIsExpandable
1339 uf_guidance :: UnfoldingGuidance -- Tells about the *size* of the template.
1340 }
1341 -- ^ An unfolding with redundant cached information. Parameters:
1342 --
1343 -- uf_tmpl: Template used to perform unfolding;
1344 -- NB: Occurrence info is guaranteed correct:
1345 -- see Note [OccInfo in unfoldings and rules]
1346 --
1347 -- uf_is_top: Is this a top level binding?
1348 --
1349 -- uf_is_value: 'exprIsHNF' template (cached); it is ok to discard a 'seq' on
1350 -- this variable
1351 --
1352 -- uf_is_work_free: Does this waste only a little work if we expand it inside an inlining?
1353 -- Basically this is a cached version of 'exprIsWorkFree'
1354 --
1355 -- uf_guidance: Tells us about the /size/ of the unfolding template
1356
1357
1358 ------------------------------------------------
1359 data UnfoldingSource
1360 = -- See also Note [Historical note: unfoldings for wrappers]
1361
1362 InlineRhs -- The current rhs of the function
1363 -- Replace uf_tmpl each time around
1364
1365 | InlineStable -- From an INLINE or INLINABLE pragma
1366 -- INLINE if guidance is UnfWhen
1367 -- INLINABLE if guidance is UnfIfGoodArgs/UnfoldNever
1368 -- (well, technically an INLINABLE might be made
1369 -- UnfWhen if it was small enough, and then
1370 -- it will behave like INLINE outside the current
1371 -- module, but that is the way automatic unfoldings
1372 -- work so it is consistent with the intended
1373 -- meaning of INLINABLE).
1374 --
1375 -- uf_tmpl may change, but only as a result of
1376 -- gentle simplification, it doesn't get updated
1377 -- to the current RHS during compilation as with
1378 -- InlineRhs.
1379 --
1380 -- See Note [InlineStable]
1381
1382 | InlineCompulsory -- Something that *has* no binding, so you *must* inline it
1383 -- Only a few primop-like things have this property
1384 -- (see MkId.hs, calls to mkCompulsoryUnfolding).
1385 -- Inline absolutely always, however boring the context.
1386
1387
1388
1389 -- | 'UnfoldingGuidance' says when unfolding should take place
1390 data UnfoldingGuidance
1391 = UnfWhen { -- Inline without thinking about the *size* of the uf_tmpl
1392 -- Used (a) for small *and* cheap unfoldings
1393 -- (b) for INLINE functions
1394 -- See Note [INLINE for small functions] in CoreUnfold
1395 ug_arity :: Arity, -- Number of value arguments expected
1396
1397 ug_unsat_ok :: Bool, -- True <=> ok to inline even if unsaturated
1398 ug_boring_ok :: Bool -- True <=> ok to inline even if the context is boring
1399 -- So True,True means "always"
1400 }
1401
1402 | UnfIfGoodArgs { -- Arose from a normal Id; the info here is the
1403 -- result of a simple analysis of the RHS
1404
1405 ug_args :: [Int], -- Discount if the argument is evaluated.
1406 -- (i.e., a simplification will definitely
1407 -- be possible). One elt of the list per *value* arg.
1408
1409 ug_size :: Int, -- The "size" of the unfolding.
1410
1411 ug_res :: Int -- Scrutinee discount: the discount to substract if the thing is in
1412 } -- a context (case (thing args) of ...),
1413 -- (where there are the right number of arguments.)
1414
1415 | UnfNever -- The RHS is big, so don't inline it
1416 deriving (Eq)
1417
1418 {-
1419 Note [Historical note: unfoldings for wrappers]
1420 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1421 We used to have a nice clever scheme in interface files for
1422 wrappers. A wrapper's unfolding can be reconstructed from its worker's
1423 id and its strictness. This decreased .hi file size (sometimes
1424 significantly, for modules like GHC.Classes with many high-arity w/w
1425 splits) and had a slight corresponding effect on compile times.
1426
1427 However, when we added the second demand analysis, this scheme lead to
1428 some Core lint errors. The second analysis could change the strictness
1429 signatures, which sometimes resulted in a wrapper's regenerated
1430 unfolding applying the wrapper to too many arguments.
1431
1432 Instead of repairing the clever .hi scheme, we abandoned it in favor
1433 of simplicity. The .hi sizes are usually insignificant (excluding the
1434 +1M for base libraries), and compile time barely increases (~+1% for
1435 nofib). The nicer upshot is that the UnfoldingSource no longer mentions
1436 an Id, so, eg, substitutions need not traverse them.
1437
1438
1439 Note [DFun unfoldings]
1440 ~~~~~~~~~~~~~~~~~~~~~~
1441 The Arity in a DFunUnfolding is total number of args (type and value)
1442 that the DFun needs to produce a dictionary. That's not necessarily
1443 related to the ordinary arity of the dfun Id, esp if the class has
1444 one method, so the dictionary is represented by a newtype. Example
1445
1446 class C a where { op :: a -> Int }
1447 instance C a -> C [a] where op xs = op (head xs)
1448
1449 The instance translates to
1450
1451 $dfCList :: forall a. C a => C [a] -- Arity 2!
1452 $dfCList = /\a.\d. $copList {a} d |> co
1453
1454 $copList :: forall a. C a => [a] -> Int -- Arity 2!
1455 $copList = /\a.\d.\xs. op {a} d (head xs)
1456
1457 Now we might encounter (op (dfCList {ty} d) a1 a2)
1458 and we want the (op (dfList {ty} d)) rule to fire, because $dfCList
1459 has all its arguments, even though its (value) arity is 2. That's
1460 why we record the number of expected arguments in the DFunUnfolding.
1461
1462 Note that although it's an Arity, it's most convenient for it to give
1463 the *total* number of arguments, both type and value. See the use
1464 site in exprIsConApp_maybe.
1465 -}
1466
1467 -- Constants for the UnfWhen constructor
1468 needSaturated, unSaturatedOk :: Bool
1469 needSaturated = False
1470 unSaturatedOk = True
1471
1472 boringCxtNotOk, boringCxtOk :: Bool
1473 boringCxtOk = True
1474 boringCxtNotOk = False
1475
1476 ------------------------------------------------
1477 noUnfolding :: Unfolding
1478 -- ^ There is no known 'Unfolding'
1479 evaldUnfolding :: Unfolding
1480 -- ^ This unfolding marks the associated thing as being evaluated
1481
1482 noUnfolding = NoUnfolding
1483 evaldUnfolding = OtherCon []
1484
1485 -- | There is no known 'Unfolding', because this came from an
1486 -- hi-boot file.
1487 bootUnfolding :: Unfolding
1488 bootUnfolding = BootUnfolding
1489
1490 mkOtherCon :: [AltCon] -> Unfolding
1491 mkOtherCon = OtherCon
1492
1493 isStableSource :: UnfoldingSource -> Bool
1494 -- Keep the unfolding template
1495 isStableSource InlineCompulsory = True
1496 isStableSource InlineStable = True
1497 isStableSource InlineRhs = False
1498
1499 -- | Retrieves the template of an unfolding: panics if none is known
1500 unfoldingTemplate :: Unfolding -> CoreExpr
1501 unfoldingTemplate = uf_tmpl
1502
1503 -- | Retrieves the template of an unfolding if possible
1504 -- maybeUnfoldingTemplate is used mainly wnen specialising, and we do
1505 -- want to specialise DFuns, so it's important to return a template
1506 -- for DFunUnfoldings
1507 maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr
1508 maybeUnfoldingTemplate (CoreUnfolding { uf_tmpl = expr })
1509 = Just expr
1510 maybeUnfoldingTemplate (DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args })
1511 = Just (mkLams bndrs (mkApps (Var (dataConWorkId con)) args))
1512 maybeUnfoldingTemplate _
1513 = Nothing
1514
1515 -- | The constructors that the unfolding could never be:
1516 -- returns @[]@ if no information is available
1517 otherCons :: Unfolding -> [AltCon]
1518 otherCons (OtherCon cons) = cons
1519 otherCons _ = []
1520
1521 -- | Determines if it is certainly the case that the unfolding will
1522 -- yield a value (something in HNF): returns @False@ if unsure
1523 isValueUnfolding :: Unfolding -> Bool
1524 -- Returns False for OtherCon
1525 isValueUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1526 isValueUnfolding _ = False
1527
1528 -- | Determines if it possibly the case that the unfolding will
1529 -- yield a value. Unlike 'isValueUnfolding' it returns @True@
1530 -- for 'OtherCon'
1531 isEvaldUnfolding :: Unfolding -> Bool
1532 -- Returns True for OtherCon
1533 isEvaldUnfolding (OtherCon _) = True
1534 isEvaldUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald
1535 isEvaldUnfolding _ = False
1536
1537 -- | @True@ if the unfolding is a constructor application, the application
1538 -- of a CONLIKE function or 'OtherCon'
1539 isConLikeUnfolding :: Unfolding -> Bool
1540 isConLikeUnfolding (OtherCon _) = True
1541 isConLikeUnfolding (CoreUnfolding { uf_is_conlike = con }) = con
1542 isConLikeUnfolding _ = False
1543
1544 -- | Is the thing we will unfold into certainly cheap?
1545 isCheapUnfolding :: Unfolding -> Bool
1546 isCheapUnfolding (CoreUnfolding { uf_is_work_free = is_wf }) = is_wf
1547 isCheapUnfolding _ = False
1548
1549 isExpandableUnfolding :: Unfolding -> Bool
1550 isExpandableUnfolding (CoreUnfolding { uf_expandable = is_expable }) = is_expable
1551 isExpandableUnfolding _ = False
1552
1553 expandUnfolding_maybe :: Unfolding -> Maybe CoreExpr
1554 -- Expand an expandable unfolding; this is used in rule matching
1555 -- See Note [Expanding variables] in Rules.hs
1556 -- The key point here is that CONLIKE things can be expanded
1557 expandUnfolding_maybe (CoreUnfolding { uf_expandable = True, uf_tmpl = rhs }) = Just rhs
1558 expandUnfolding_maybe _ = Nothing
1559
1560 isCompulsoryUnfolding :: Unfolding -> Bool
1561 isCompulsoryUnfolding (CoreUnfolding { uf_src = InlineCompulsory }) = True
1562 isCompulsoryUnfolding _ = False
1563
1564 isStableUnfolding :: Unfolding -> Bool
1565 -- True of unfoldings that should not be overwritten
1566 -- by a CoreUnfolding for the RHS of a let-binding
1567 isStableUnfolding (CoreUnfolding { uf_src = src }) = isStableSource src
1568 isStableUnfolding (DFunUnfolding {}) = True
1569 isStableUnfolding _ = False
1570
1571 -- | Only returns False if there is no unfolding information available at all
1572 hasSomeUnfolding :: Unfolding -> Bool
1573 hasSomeUnfolding NoUnfolding = False
1574 hasSomeUnfolding BootUnfolding = False
1575 hasSomeUnfolding _ = True
1576
1577 isBootUnfolding :: Unfolding -> Bool
1578 isBootUnfolding BootUnfolding = True
1579 isBootUnfolding _ = False
1580
1581 neverUnfoldGuidance :: UnfoldingGuidance -> Bool
1582 neverUnfoldGuidance UnfNever = True
1583 neverUnfoldGuidance _ = False
1584
1585 isFragileUnfolding :: Unfolding -> Bool
1586 -- An unfolding is fragile if it mentions free variables or
1587 -- is otherwise subject to change. A robust one can be kept.
1588 -- See Note [Fragile unfoldings]
1589 isFragileUnfolding (CoreUnfolding {}) = True
1590 isFragileUnfolding (DFunUnfolding {}) = True
1591 isFragileUnfolding _ = False
1592 -- NoUnfolding, BootUnfolding, OtherCon are all non-fragile
1593
1594 canUnfold :: Unfolding -> Bool
1595 canUnfold (CoreUnfolding { uf_guidance = g }) = not (neverUnfoldGuidance g)
1596 canUnfold _ = False
1597
1598 {- Note [Fragile unfoldings]
1599 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1600 An unfolding is "fragile" if it mentions free variables (and hence would
1601 need substitution) or might be affected by optimisation. The non-fragile
1602 ones are
1603
1604 NoUnfolding, BootUnfolding
1605
1606 OtherCon {} If we know this binder (say a lambda binder) will be
1607 bound to an evaluated thing, we want to retain that
1608 info in simpleOptExpr; see Trac #13077.
1609
1610 We consider even a StableUnfolding as fragile, because it needs substitution.
1611
1612 Note [InlineStable]
1613 ~~~~~~~~~~~~~~~~~
1614 When you say
1615 {-# INLINE f #-}
1616 f x = <rhs>
1617 you intend that calls (f e) are replaced by <rhs>[e/x] So we
1618 should capture (\x.<rhs>) in the Unfolding of 'f', and never meddle
1619 with it. Meanwhile, we can optimise <rhs> to our heart's content,
1620 leaving the original unfolding intact in Unfolding of 'f'. For example
1621 all xs = foldr (&&) True xs
1622 any p = all . map p {-# INLINE any #-}
1623 We optimise any's RHS fully, but leave the InlineRule saying "all . map p",
1624 which deforests well at the call site.
1625
1626 So INLINE pragma gives rise to an InlineRule, which captures the original RHS.
1627
1628 Moreover, it's only used when 'f' is applied to the
1629 specified number of arguments; that is, the number of argument on
1630 the LHS of the '=' sign in the original source definition.
1631 For example, (.) is now defined in the libraries like this
1632 {-# INLINE (.) #-}
1633 (.) f g = \x -> f (g x)
1634 so that it'll inline when applied to two arguments. If 'x' appeared
1635 on the left, thus
1636 (.) f g x = f (g x)
1637 it'd only inline when applied to three arguments. This slightly-experimental
1638 change was requested by Roman, but it seems to make sense.
1639
1640 See also Note [Inlining an InlineRule] in CoreUnfold.
1641
1642
1643 Note [OccInfo in unfoldings and rules]
1644 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1645 In unfoldings and rules, we guarantee that the template is occ-analysed,
1646 so that the occurrence info on the binders is correct. This is important,
1647 because the Simplifier does not re-analyse the template when using it. If
1648 the occurrence info is wrong
1649 - We may get more simpifier iterations than necessary, because
1650 once-occ info isn't there
1651 - More seriously, we may get an infinite loop if there's a Rec
1652 without a loop breaker marked
1653
1654
1655 ************************************************************************
1656 * *
1657 AltCon
1658 * *
1659 ************************************************************************
1660 -}
1661
1662 -- The Ord is needed for the FiniteMap used in the lookForConstructor
1663 -- in SimplEnv. If you declared that lookForConstructor *ignores*
1664 -- constructor-applications with LitArg args, then you could get
1665 -- rid of this Ord.
1666
1667 instance Outputable AltCon where
1668 ppr (DataAlt dc) = ppr dc
1669 ppr (LitAlt lit) = ppr lit
1670 ppr DEFAULT = text "__DEFAULT"
1671
1672 cmpAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Ordering
1673 cmpAlt (con1, _, _) (con2, _, _) = con1 `cmpAltCon` con2
1674
1675 ltAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Bool
1676 ltAlt a1 a2 = (a1 `cmpAlt` a2) == LT
1677
1678 cmpAltCon :: AltCon -> AltCon -> Ordering
1679 -- ^ Compares 'AltCon's within a single list of alternatives
1680 cmpAltCon DEFAULT DEFAULT = EQ
1681 cmpAltCon DEFAULT _ = LT
1682
1683 cmpAltCon (DataAlt d1) (DataAlt d2) = dataConTag d1 `compare` dataConTag d2
1684 cmpAltCon (DataAlt _) DEFAULT = GT
1685 cmpAltCon (LitAlt l1) (LitAlt l2) = l1 `compare` l2
1686 cmpAltCon (LitAlt _) DEFAULT = GT
1687
1688 cmpAltCon con1 con2 = WARN( True, text "Comparing incomparable AltCons" <+>
1689 ppr con1 <+> ppr con2 )
1690 LT
1691
1692 {-
1693 ************************************************************************
1694 * *
1695 \subsection{Useful synonyms}
1696 * *
1697 ************************************************************************
1698
1699 Note [CoreProgram]
1700 ~~~~~~~~~~~~~~~~~~
1701 The top level bindings of a program, a CoreProgram, are represented as
1702 a list of CoreBind
1703
1704 * Later bindings in the list can refer to earlier ones, but not vice
1705 versa. So this is OK
1706 NonRec { x = 4 }
1707 Rec { p = ...q...x...
1708 ; q = ...p...x }
1709 Rec { f = ...p..x..f.. }
1710 NonRec { g = ..f..q...x.. }
1711 But it would NOT be ok for 'f' to refer to 'g'.
1712
1713 * The occurrence analyser does strongly-connected component analysis
1714 on each Rec binding, and splits it into a sequence of smaller
1715 bindings where possible. So the program typically starts life as a
1716 single giant Rec, which is then dependency-analysed into smaller
1717 chunks.
1718 -}
1719
1720 -- If you edit this type, you may need to update the GHC formalism
1721 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1722 type CoreProgram = [CoreBind] -- See Note [CoreProgram]
1723
1724 -- | The common case for the type of binders and variables when
1725 -- we are manipulating the Core language within GHC
1726 type CoreBndr = Var
1727 -- | Expressions where binders are 'CoreBndr's
1728 type CoreExpr = Expr CoreBndr
1729 -- | Argument expressions where binders are 'CoreBndr's
1730 type CoreArg = Arg CoreBndr
1731 -- | Binding groups where binders are 'CoreBndr's
1732 type CoreBind = Bind CoreBndr
1733 -- | Case alternatives where binders are 'CoreBndr's
1734 type CoreAlt = Alt CoreBndr
1735
1736 {-
1737 ************************************************************************
1738 * *
1739 \subsection{Tagging}
1740 * *
1741 ************************************************************************
1742 -}
1743
1744 -- | Binders are /tagged/ with a t
1745 data TaggedBndr t = TB CoreBndr t -- TB for "tagged binder"
1746
1747 type TaggedBind t = Bind (TaggedBndr t)
1748 type TaggedExpr t = Expr (TaggedBndr t)
1749 type TaggedArg t = Arg (TaggedBndr t)
1750 type TaggedAlt t = Alt (TaggedBndr t)
1751
1752 instance Outputable b => Outputable (TaggedBndr b) where
1753 ppr (TB b l) = char '<' <> ppr b <> comma <> ppr l <> char '>'
1754
1755 deTagExpr :: TaggedExpr t -> CoreExpr
1756 deTagExpr (Var v) = Var v
1757 deTagExpr (Lit l) = Lit l
1758 deTagExpr (Type ty) = Type ty
1759 deTagExpr (Coercion co) = Coercion co
1760 deTagExpr (App e1 e2) = App (deTagExpr e1) (deTagExpr e2)
1761 deTagExpr (Lam (TB b _) e) = Lam b (deTagExpr e)
1762 deTagExpr (Let bind body) = Let (deTagBind bind) (deTagExpr body)
1763 deTagExpr (Case e (TB b _) ty alts) = Case (deTagExpr e) b ty (map deTagAlt alts)
1764 deTagExpr (Tick t e) = Tick t (deTagExpr e)
1765 deTagExpr (Cast e co) = Cast (deTagExpr e) co
1766
1767 deTagBind :: TaggedBind t -> CoreBind
1768 deTagBind (NonRec (TB b _) rhs) = NonRec b (deTagExpr rhs)
1769 deTagBind (Rec prs) = Rec [(b, deTagExpr rhs) | (TB b _, rhs) <- prs]
1770
1771 deTagAlt :: TaggedAlt t -> CoreAlt
1772 deTagAlt (con, bndrs, rhs) = (con, [b | TB b _ <- bndrs], deTagExpr rhs)
1773
1774 {-
1775 ************************************************************************
1776 * *
1777 \subsection{Core-constructing functions with checking}
1778 * *
1779 ************************************************************************
1780 -}
1781
1782 -- | Apply a list of argument expressions to a function expression in a nested fashion. Prefer to
1783 -- use 'MkCore.mkCoreApps' if possible
1784 mkApps :: Expr b -> [Arg b] -> Expr b
1785 -- | Apply a list of type argument expressions to a function expression in a nested fashion
1786 mkTyApps :: Expr b -> [Type] -> Expr b
1787 -- | Apply a list of coercion argument expressions to a function expression in a nested fashion
1788 mkCoApps :: Expr b -> [Coercion] -> Expr b
1789 -- | Apply a list of type or value variables to a function expression in a nested fashion
1790 mkVarApps :: Expr b -> [Var] -> Expr b
1791 -- | Apply a list of argument expressions to a data constructor in a nested fashion. Prefer to
1792 -- use 'MkCore.mkCoreConApps' if possible
1793 mkConApp :: DataCon -> [Arg b] -> Expr b
1794
1795 mkApps f args = foldl App f args
1796 mkCoApps f args = foldl (\ e a -> App e (Coercion a)) f args
1797 mkVarApps f vars = foldl (\ e a -> App e (varToCoreExpr a)) f vars
1798 mkConApp con args = mkApps (Var (dataConWorkId con)) args
1799
1800 mkTyApps f args = foldl (\ e a -> App e (mkTyArg a)) f args
1801
1802 mkConApp2 :: DataCon -> [Type] -> [Var] -> Expr b
1803 mkConApp2 con tys arg_ids = Var (dataConWorkId con)
1804 `mkApps` map Type tys
1805 `mkApps` map varToCoreExpr arg_ids
1806
1807 mkTyArg :: Type -> Expr b
1808 mkTyArg ty
1809 | Just co <- isCoercionTy_maybe ty = Coercion co
1810 | otherwise = Type ty
1811
1812 -- | Create a machine integer literal expression of type @Int#@ from an @Integer@.
1813 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1814 mkIntLit :: DynFlags -> Integer -> Expr b
1815 -- | Create a machine integer literal expression of type @Int#@ from an @Int@.
1816 -- If you want an expression of type @Int@ use 'MkCore.mkIntExpr'
1817 mkIntLitInt :: DynFlags -> Int -> Expr b
1818
1819 mkIntLit dflags n = Lit (mkMachInt dflags n)
1820 mkIntLitInt dflags n = Lit (mkMachInt dflags (toInteger n))
1821
1822 -- | Create a machine word literal expression of type @Word#@ from an @Integer@.
1823 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1824 mkWordLit :: DynFlags -> Integer -> Expr b
1825 -- | Create a machine word literal expression of type @Word#@ from a @Word@.
1826 -- If you want an expression of type @Word@ use 'MkCore.mkWordExpr'
1827 mkWordLitWord :: DynFlags -> Word -> Expr b
1828
1829 mkWordLit dflags w = Lit (mkMachWord dflags w)
1830 mkWordLitWord dflags w = Lit (mkMachWord dflags (toInteger w))
1831
1832 mkWord64LitWord64 :: Word64 -> Expr b
1833 mkWord64LitWord64 w = Lit (mkMachWord64 (toInteger w))
1834
1835 mkInt64LitInt64 :: Int64 -> Expr b
1836 mkInt64LitInt64 w = Lit (mkMachInt64 (toInteger w))
1837
1838 -- | Create a machine character literal expression of type @Char#@.
1839 -- If you want an expression of type @Char@ use 'MkCore.mkCharExpr'
1840 mkCharLit :: Char -> Expr b
1841 -- | Create a machine string literal expression of type @Addr#@.
1842 -- If you want an expression of type @String@ use 'MkCore.mkStringExpr'
1843 mkStringLit :: String -> Expr b
1844
1845 mkCharLit c = Lit (mkMachChar c)
1846 mkStringLit s = Lit (mkMachString s)
1847
1848 -- | Create a machine single precision literal expression of type @Float#@ from a @Rational@.
1849 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1850 mkFloatLit :: Rational -> Expr b
1851 -- | Create a machine single precision literal expression of type @Float#@ from a @Float@.
1852 -- If you want an expression of type @Float@ use 'MkCore.mkFloatExpr'
1853 mkFloatLitFloat :: Float -> Expr b
1854
1855 mkFloatLit f = Lit (mkMachFloat f)
1856 mkFloatLitFloat f = Lit (mkMachFloat (toRational f))
1857
1858 -- | Create a machine double precision literal expression of type @Double#@ from a @Rational@.
1859 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1860 mkDoubleLit :: Rational -> Expr b
1861 -- | Create a machine double precision literal expression of type @Double#@ from a @Double@.
1862 -- If you want an expression of type @Double@ use 'MkCore.mkDoubleExpr'
1863 mkDoubleLitDouble :: Double -> Expr b
1864
1865 mkDoubleLit d = Lit (mkMachDouble d)
1866 mkDoubleLitDouble d = Lit (mkMachDouble (toRational d))
1867
1868 -- | Bind all supplied binding groups over an expression in a nested let expression. Assumes
1869 -- that the rhs satisfies the let/app invariant. Prefer to use 'MkCore.mkCoreLets' if
1870 -- possible, which does guarantee the invariant
1871 mkLets :: [Bind b] -> Expr b -> Expr b
1872 -- | Bind all supplied binders over an expression in a nested lambda expression. Prefer to
1873 -- use 'MkCore.mkCoreLams' if possible
1874 mkLams :: [b] -> Expr b -> Expr b
1875
1876 mkLams binders body = foldr Lam body binders
1877 mkLets binds body = foldr mkLet body binds
1878
1879 mkLet :: Bind b -> Expr b -> Expr b
1880 -- The desugarer sometimes generates an empty Rec group
1881 -- which Lint rejects, so we kill it off right away
1882 mkLet (Rec []) body = body
1883 mkLet bind body = Let bind body
1884
1885 -- | @mkLetNonRec bndr rhs body@ wraps @body@ in a @let@ binding @bndr@.
1886 mkLetNonRec :: b -> Expr b -> Expr b -> Expr b
1887 mkLetNonRec b rhs body = Let (NonRec b rhs) body
1888
1889 -- | @mkLetRec binds body@ wraps @body@ in a @let rec@ with the given set of
1890 -- @binds@ if binds is non-empty.
1891 mkLetRec :: [(b, Expr b)] -> Expr b -> Expr b
1892 mkLetRec [] body = body
1893 mkLetRec bs body = Let (Rec bs) body
1894
1895 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1896 -- this can only be used to bind something in a non-recursive @let@ expression
1897 mkTyBind :: TyVar -> Type -> CoreBind
1898 mkTyBind tv ty = NonRec tv (Type ty)
1899
1900 -- | Create a binding group where a type variable is bound to a type. Per "CoreSyn#type_let",
1901 -- this can only be used to bind something in a non-recursive @let@ expression
1902 mkCoBind :: CoVar -> Coercion -> CoreBind
1903 mkCoBind cv co = NonRec cv (Coercion co)
1904
1905 -- | Convert a binder into either a 'Var' or 'Type' 'Expr' appropriately
1906 varToCoreExpr :: CoreBndr -> Expr b
1907 varToCoreExpr v | isTyVar v = Type (mkTyVarTy v)
1908 | isCoVar v = Coercion (mkCoVarCo v)
1909 | otherwise = ASSERT( isId v ) Var v
1910
1911 varsToCoreExprs :: [CoreBndr] -> [Expr b]
1912 varsToCoreExprs vs = map varToCoreExpr vs
1913
1914 {-
1915 ************************************************************************
1916 * *
1917 Getting a result type
1918 * *
1919 ************************************************************************
1920
1921 These are defined here to avoid a module loop between CoreUtils and CoreFVs
1922
1923 -}
1924
1925 applyTypeToArg :: Type -> CoreExpr -> Type
1926 -- ^ Determines the type resulting from applying an expression with given type
1927 -- to a given argument expression
1928 applyTypeToArg fun_ty arg = piResultTy fun_ty (exprToType arg)
1929
1930 -- | If the expression is a 'Type', converts. Otherwise,
1931 -- panics. NB: This does /not/ convert 'Coercion' to 'CoercionTy'.
1932 exprToType :: CoreExpr -> Type
1933 exprToType (Type ty) = ty
1934 exprToType _bad = pprPanic "exprToType" empty
1935
1936 -- | If the expression is a 'Coercion', converts.
1937 exprToCoercion_maybe :: CoreExpr -> Maybe Coercion
1938 exprToCoercion_maybe (Coercion co) = Just co
1939 exprToCoercion_maybe _ = Nothing
1940
1941 {-
1942 ************************************************************************
1943 * *
1944 \subsection{Simple access functions}
1945 * *
1946 ************************************************************************
1947 -}
1948
1949 -- | Extract every variable by this group
1950 bindersOf :: Bind b -> [b]
1951 -- If you edit this function, you may need to update the GHC formalism
1952 -- See Note [GHC Formalism] in coreSyn/CoreLint.hs
1953 bindersOf (NonRec binder _) = [binder]
1954 bindersOf (Rec pairs) = [binder | (binder, _) <- pairs]
1955
1956 -- | 'bindersOf' applied to a list of binding groups
1957 bindersOfBinds :: [Bind b] -> [b]
1958 bindersOfBinds binds = foldr ((++) . bindersOf) [] binds
1959
1960 rhssOfBind :: Bind b -> [Expr b]
1961 rhssOfBind (NonRec _ rhs) = [rhs]
1962 rhssOfBind (Rec pairs) = [rhs | (_,rhs) <- pairs]
1963
1964 rhssOfAlts :: [Alt b] -> [Expr b]
1965 rhssOfAlts alts = [e | (_,_,e) <- alts]
1966
1967 -- | Collapse all the bindings in the supplied groups into a single
1968 -- list of lhs\/rhs pairs suitable for binding in a 'Rec' binding group
1969 flattenBinds :: [Bind b] -> [(b, Expr b)]
1970 flattenBinds (NonRec b r : binds) = (b,r) : flattenBinds binds
1971 flattenBinds (Rec prs1 : binds) = prs1 ++ flattenBinds binds
1972 flattenBinds [] = []
1973
1974 -- | We often want to strip off leading lambdas before getting down to
1975 -- business. Variants are 'collectTyBinders', 'collectValBinders',
1976 -- and 'collectTyAndValBinders'
1977 collectBinders :: Expr b -> ([b], Expr b)
1978 collectTyBinders :: CoreExpr -> ([TyVar], CoreExpr)
1979 collectValBinders :: CoreExpr -> ([Id], CoreExpr)
1980 collectTyAndValBinders :: CoreExpr -> ([TyVar], [Id], CoreExpr)
1981 -- | Strip off exactly N leading lambdas (type or value). Good for use with
1982 -- join points.
1983 collectNBinders :: Int -> Expr b -> ([b], Expr b)
1984
1985 collectBinders expr
1986 = go [] expr
1987 where
1988 go bs (Lam b e) = go (b:bs) e
1989 go bs e = (reverse bs, e)
1990
1991 collectTyBinders expr
1992 = go [] expr
1993 where
1994 go tvs (Lam b e) | isTyVar b = go (b:tvs) e
1995 go tvs e = (reverse tvs, e)
1996
1997 collectValBinders expr
1998 = go [] expr
1999 where
2000 go ids (Lam b e) | isId b = go (b:ids) e
2001 go ids body = (reverse ids, body)
2002
2003 collectTyAndValBinders expr
2004 = (tvs, ids, body)
2005 where
2006 (tvs, body1) = collectTyBinders expr
2007 (ids, body) = collectValBinders body1
2008
2009 collectNBinders orig_n orig_expr
2010 = go orig_n [] orig_expr
2011 where
2012 go 0 bs expr = (reverse bs, expr)
2013 go n bs (Lam b e) = go (n-1) (b:bs) e
2014 go _ _ _ = pprPanic "collectNBinders" $ int orig_n
2015
2016 -- | Takes a nested application expression and returns the the function
2017 -- being applied and the arguments to which it is applied
2018 collectArgs :: Expr b -> (Expr b, [Arg b])
2019 collectArgs expr
2020 = go expr []
2021 where
2022 go (App f a) as = go f (a:as)
2023 go e as = (e, as)
2024
2025 -- | Like @collectArgs@, but also collects looks through floatable
2026 -- ticks if it means that we can find more arguments.
2027 collectArgsTicks :: (Tickish Id -> Bool) -> Expr b
2028 -> (Expr b, [Arg b], [Tickish Id])
2029 collectArgsTicks skipTick expr
2030 = go expr [] []
2031 where
2032 go (App f a) as ts = go f (a:as) ts
2033 go (Tick t e) as ts
2034 | skipTick t = go e as (t:ts)
2035 go e as ts = (e, as, reverse ts)
2036
2037
2038 {-
2039 ************************************************************************
2040 * *
2041 \subsection{Predicates}
2042 * *
2043 ************************************************************************
2044
2045 At one time we optionally carried type arguments through to runtime.
2046 @isRuntimeVar v@ returns if (Lam v _) really becomes a lambda at runtime,
2047 i.e. if type applications are actual lambdas because types are kept around
2048 at runtime. Similarly isRuntimeArg.
2049 -}
2050
2051 -- | Will this variable exist at runtime?
2052 isRuntimeVar :: Var -> Bool
2053 isRuntimeVar = isId
2054
2055 -- | Will this argument expression exist at runtime?
2056 isRuntimeArg :: CoreExpr -> Bool
2057 isRuntimeArg = isValArg
2058
2059 -- | Returns @True@ for value arguments, false for type args
2060 -- NB: coercions are value arguments (zero width, to be sure,
2061 -- like State#, but still value args).
2062 isValArg :: Expr b -> Bool
2063 isValArg e = not (isTypeArg e)
2064
2065 -- | Returns @True@ iff the expression is a 'Type' or 'Coercion'
2066 -- expression at its top level
2067 isTyCoArg :: Expr b -> Bool
2068 isTyCoArg (Type {}) = True
2069 isTyCoArg (Coercion {}) = True
2070 isTyCoArg _ = False
2071
2072 -- | Returns @True@ iff the expression is a 'Type' expression at its
2073 -- top level. Note this does NOT include 'Coercion's.
2074 isTypeArg :: Expr b -> Bool
2075 isTypeArg (Type {}) = True
2076 isTypeArg _ = False
2077
2078 -- | The number of binders that bind values rather than types
2079 valBndrCount :: [CoreBndr] -> Int
2080 valBndrCount = count isId
2081
2082 -- | The number of argument expressions that are values rather than types at their top level
2083 valArgCount :: [Arg b] -> Int
2084 valArgCount = count isValArg
2085
2086 {-
2087 ************************************************************************
2088 * *
2089 \subsection{Annotated core}
2090 * *
2091 ************************************************************************
2092 -}
2093
2094 -- | Annotated core: allows annotation at every node in the tree
2095 type AnnExpr bndr annot = (annot, AnnExpr' bndr annot)
2096
2097 -- | A clone of the 'Expr' type but allowing annotation at every tree node
2098 data AnnExpr' bndr annot
2099 = AnnVar Id
2100 | AnnLit Literal
2101 | AnnLam bndr (AnnExpr bndr annot)
2102 | AnnApp (AnnExpr bndr annot) (AnnExpr bndr annot)
2103 | AnnCase (AnnExpr bndr annot) bndr Type [AnnAlt bndr annot]
2104 | AnnLet (AnnBind bndr annot) (AnnExpr bndr annot)
2105 | AnnCast (AnnExpr bndr annot) (annot, Coercion)
2106 -- Put an annotation on the (root of) the coercion
2107 | AnnTick (Tickish Id) (AnnExpr bndr annot)
2108 | AnnType Type
2109 | AnnCoercion Coercion
2110
2111 -- | A clone of the 'Alt' type but allowing annotation at every tree node
2112 type AnnAlt bndr annot = (AltCon, [bndr], AnnExpr bndr annot)
2113
2114 -- | A clone of the 'Bind' type but allowing annotation at every tree node
2115 data AnnBind bndr annot
2116 = AnnNonRec bndr (AnnExpr bndr annot)
2117 | AnnRec [(bndr, AnnExpr bndr annot)]
2118
2119 -- | Takes a nested application expression and returns the the function
2120 -- being applied and the arguments to which it is applied
2121 collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
2122 collectAnnArgs expr
2123 = go expr []
2124 where
2125 go (_, AnnApp f a) as = go f (a:as)
2126 go e as = (e, as)
2127
2128 collectAnnArgsTicks :: (Tickish Var -> Bool) -> AnnExpr b a
2129 -> (AnnExpr b a, [AnnExpr b a], [Tickish Var])
2130 collectAnnArgsTicks tickishOk expr
2131 = go expr [] []
2132 where
2133 go (_, AnnApp f a) as ts = go f (a:as) ts
2134 go (_, AnnTick t e) as ts | tickishOk t
2135 = go e as (t:ts)
2136 go e as ts = (e, as, reverse ts)
2137
2138 deAnnotate :: AnnExpr bndr annot -> Expr bndr
2139 deAnnotate (_, e) = deAnnotate' e
2140
2141 deAnnotate' :: AnnExpr' bndr annot -> Expr bndr
2142 deAnnotate' (AnnType t) = Type t
2143 deAnnotate' (AnnCoercion co) = Coercion co
2144 deAnnotate' (AnnVar v) = Var v
2145 deAnnotate' (AnnLit lit) = Lit lit
2146 deAnnotate' (AnnLam binder body) = Lam binder (deAnnotate body)
2147 deAnnotate' (AnnApp fun arg) = App (deAnnotate fun) (deAnnotate arg)
2148 deAnnotate' (AnnCast e (_,co)) = Cast (deAnnotate e) co
2149 deAnnotate' (AnnTick tick body) = Tick tick (deAnnotate body)
2150
2151 deAnnotate' (AnnLet bind body)
2152 = Let (deAnnBind bind) (deAnnotate body)
2153 where
2154 deAnnBind (AnnNonRec var rhs) = NonRec var (deAnnotate rhs)
2155 deAnnBind (AnnRec pairs) = Rec [(v,deAnnotate rhs) | (v,rhs) <- pairs]
2156
2157 deAnnotate' (AnnCase scrut v t alts)
2158 = Case (deAnnotate scrut) v t (map deAnnAlt alts)
2159
2160 deAnnAlt :: AnnAlt bndr annot -> Alt bndr
2161 deAnnAlt (con,args,rhs) = (con,args,deAnnotate rhs)
2162
2163 -- | As 'collectBinders' but for 'AnnExpr' rather than 'Expr'
2164 collectAnnBndrs :: AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2165 collectAnnBndrs e
2166 = collect [] e
2167 where
2168 collect bs (_, AnnLam b body) = collect (b:bs) body
2169 collect bs body = (reverse bs, body)
2170
2171 -- | As 'collectNBinders' but for 'AnnExpr' rather than 'Expr'
2172 collectNAnnBndrs :: Int -> AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
2173 collectNAnnBndrs orig_n e
2174 = collect orig_n [] e
2175 where
2176 collect 0 bs body = (reverse bs, body)
2177 collect n bs (_, AnnLam b body) = collect (n-1) (b:bs) body
2178 collect _ _ _ = pprPanic "collectNBinders" $ int orig_n